| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 27.825396825396826, | |
| "eval_steps": 500, | |
| "global_step": 4396, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.031746031746031744, | |
| "grad_norm": 0.5545095205307007, | |
| "learning_rate": 5.307855626326963e-07, | |
| "loss": 3.7162, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.06349206349206349, | |
| "grad_norm": 0.6163601279258728, | |
| "learning_rate": 1.0615711252653927e-06, | |
| "loss": 3.9388, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.09523809523809523, | |
| "grad_norm": 0.5541530847549438, | |
| "learning_rate": 1.5923566878980892e-06, | |
| "loss": 3.9165, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.12698412698412698, | |
| "grad_norm": 0.457332044839859, | |
| "learning_rate": 2.1231422505307854e-06, | |
| "loss": 3.7326, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.15873015873015872, | |
| "grad_norm": 0.5335279107093811, | |
| "learning_rate": 2.653927813163482e-06, | |
| "loss": 3.8251, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.19047619047619047, | |
| "grad_norm": 0.7080379724502563, | |
| "learning_rate": 3.1847133757961785e-06, | |
| "loss": 3.7534, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 0.520993709564209, | |
| "learning_rate": 3.715498938428875e-06, | |
| "loss": 3.898, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.25396825396825395, | |
| "grad_norm": 0.5451405644416809, | |
| "learning_rate": 4.246284501061571e-06, | |
| "loss": 3.8951, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 0.6205154657363892, | |
| "learning_rate": 4.777070063694268e-06, | |
| "loss": 3.7666, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.31746031746031744, | |
| "grad_norm": 0.7404439449310303, | |
| "learning_rate": 5.307855626326964e-06, | |
| "loss": 4.0258, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3492063492063492, | |
| "grad_norm": 0.6272220015525818, | |
| "learning_rate": 5.838641188959661e-06, | |
| "loss": 3.8464, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.38095238095238093, | |
| "grad_norm": 0.7744691967964172, | |
| "learning_rate": 6.369426751592357e-06, | |
| "loss": 3.7299, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.4126984126984127, | |
| "grad_norm": 0.8805738687515259, | |
| "learning_rate": 6.900212314225053e-06, | |
| "loss": 3.5008, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 1.0740723609924316, | |
| "learning_rate": 7.43099787685775e-06, | |
| "loss": 3.7552, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.47619047619047616, | |
| "grad_norm": 0.965708315372467, | |
| "learning_rate": 7.961783439490445e-06, | |
| "loss": 3.5516, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.5079365079365079, | |
| "grad_norm": 0.9812778234481812, | |
| "learning_rate": 8.492569002123141e-06, | |
| "loss": 3.6003, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5396825396825397, | |
| "grad_norm": 0.8831024169921875, | |
| "learning_rate": 9.023354564755838e-06, | |
| "loss": 3.613, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 0.8358364105224609, | |
| "learning_rate": 9.554140127388536e-06, | |
| "loss": 3.1858, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.6031746031746031, | |
| "grad_norm": 1.0740444660186768, | |
| "learning_rate": 1.0084925690021232e-05, | |
| "loss": 3.0937, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.6349206349206349, | |
| "grad_norm": 1.0987530946731567, | |
| "learning_rate": 1.0615711252653929e-05, | |
| "loss": 3.154, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 1.2300925254821777, | |
| "learning_rate": 1.1146496815286625e-05, | |
| "loss": 2.9414, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.6984126984126984, | |
| "grad_norm": 1.2214170694351196, | |
| "learning_rate": 1.1677282377919321e-05, | |
| "loss": 2.9464, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.7301587301587301, | |
| "grad_norm": 1.2803975343704224, | |
| "learning_rate": 1.2208067940552018e-05, | |
| "loss": 2.8921, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.7619047619047619, | |
| "grad_norm": 1.2232719659805298, | |
| "learning_rate": 1.2738853503184714e-05, | |
| "loss": 2.5252, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.7936507936507936, | |
| "grad_norm": 1.204835295677185, | |
| "learning_rate": 1.326963906581741e-05, | |
| "loss": 2.5215, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.8253968253968254, | |
| "grad_norm": 1.4095579385757446, | |
| "learning_rate": 1.3800424628450107e-05, | |
| "loss": 2.136, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 1.4166598320007324, | |
| "learning_rate": 1.4331210191082803e-05, | |
| "loss": 2.2653, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 1.3040446043014526, | |
| "learning_rate": 1.48619957537155e-05, | |
| "loss": 2.0193, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.9206349206349206, | |
| "grad_norm": 1.4114688634872437, | |
| "learning_rate": 1.5392781316348196e-05, | |
| "loss": 1.7935, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 1.8066726922988892, | |
| "learning_rate": 1.592356687898089e-05, | |
| "loss": 1.5731, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.9841269841269841, | |
| "grad_norm": 1.4303158521652222, | |
| "learning_rate": 1.6454352441613588e-05, | |
| "loss": 1.6552, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.0126984126984127, | |
| "grad_norm": 1.6671762466430664, | |
| "learning_rate": 1.6985138004246283e-05, | |
| "loss": 1.6973, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.0444444444444445, | |
| "grad_norm": 1.5719650983810425, | |
| "learning_rate": 1.751592356687898e-05, | |
| "loss": 1.312, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.0761904761904761, | |
| "grad_norm": 1.4845054149627686, | |
| "learning_rate": 1.8046709129511676e-05, | |
| "loss": 1.3601, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.107936507936508, | |
| "grad_norm": 1.1172235012054443, | |
| "learning_rate": 1.8577494692144374e-05, | |
| "loss": 1.3137, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.1396825396825396, | |
| "grad_norm": 1.9621731042861938, | |
| "learning_rate": 1.910828025477707e-05, | |
| "loss": 1.1778, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.1714285714285715, | |
| "grad_norm": 1.7722721099853516, | |
| "learning_rate": 1.963906581740977e-05, | |
| "loss": 1.4534, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.2031746031746031, | |
| "grad_norm": 1.3677467107772827, | |
| "learning_rate": 2.0169851380042464e-05, | |
| "loss": 1.3356, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.234920634920635, | |
| "grad_norm": 1.3260482549667358, | |
| "learning_rate": 2.0700636942675162e-05, | |
| "loss": 1.0876, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.2666666666666666, | |
| "grad_norm": 1.5176818370819092, | |
| "learning_rate": 2.1231422505307857e-05, | |
| "loss": 1.1602, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.2984126984126985, | |
| "grad_norm": 1.2793077230453491, | |
| "learning_rate": 2.1762208067940555e-05, | |
| "loss": 1.1505, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.33015873015873, | |
| "grad_norm": 1.196784257888794, | |
| "learning_rate": 2.229299363057325e-05, | |
| "loss": 1.0664, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.361904761904762, | |
| "grad_norm": 1.303207516670227, | |
| "learning_rate": 2.2823779193205948e-05, | |
| "loss": 1.2557, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.3936507936507936, | |
| "grad_norm": 1.2853388786315918, | |
| "learning_rate": 2.3354564755838642e-05, | |
| "loss": 1.0704, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.4253968253968254, | |
| "grad_norm": 1.381369948387146, | |
| "learning_rate": 2.388535031847134e-05, | |
| "loss": 1.1371, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.457142857142857, | |
| "grad_norm": 1.8012712001800537, | |
| "learning_rate": 2.4416135881104035e-05, | |
| "loss": 1.248, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.488888888888889, | |
| "grad_norm": 1.7397032976150513, | |
| "learning_rate": 2.4946921443736733e-05, | |
| "loss": 1.2782, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.5206349206349206, | |
| "grad_norm": 1.4026210308074951, | |
| "learning_rate": 2.5477707006369428e-05, | |
| "loss": 1.154, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.5523809523809524, | |
| "grad_norm": 1.2906067371368408, | |
| "learning_rate": 2.6008492569002126e-05, | |
| "loss": 0.9141, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.5841269841269843, | |
| "grad_norm": 1.265598177909851, | |
| "learning_rate": 2.653927813163482e-05, | |
| "loss": 1.0625, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.615873015873016, | |
| "grad_norm": 1.6044715642929077, | |
| "learning_rate": 2.707006369426752e-05, | |
| "loss": 0.9624, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.6476190476190475, | |
| "grad_norm": 1.4612747430801392, | |
| "learning_rate": 2.7600849256900213e-05, | |
| "loss": 1.0413, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.6793650793650794, | |
| "grad_norm": 1.6222745180130005, | |
| "learning_rate": 2.8131634819532908e-05, | |
| "loss": 1.0929, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.7111111111111112, | |
| "grad_norm": 1.1456222534179688, | |
| "learning_rate": 2.8662420382165606e-05, | |
| "loss": 0.9957, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.7428571428571429, | |
| "grad_norm": 1.5746041536331177, | |
| "learning_rate": 2.91932059447983e-05, | |
| "loss": 1.0274, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.7746031746031745, | |
| "grad_norm": 1.3407832384109497, | |
| "learning_rate": 2.9723991507431e-05, | |
| "loss": 0.9487, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.8063492063492064, | |
| "grad_norm": 1.6232194900512695, | |
| "learning_rate": 3.0254777070063693e-05, | |
| "loss": 1.0966, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.8380952380952382, | |
| "grad_norm": 1.4920552968978882, | |
| "learning_rate": 3.078556263269639e-05, | |
| "loss": 0.9099, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.8698412698412699, | |
| "grad_norm": 1.2123301029205322, | |
| "learning_rate": 3.1316348195329086e-05, | |
| "loss": 1.0902, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.9015873015873015, | |
| "grad_norm": 1.2080968618392944, | |
| "learning_rate": 3.184713375796178e-05, | |
| "loss": 0.943, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.9333333333333333, | |
| "grad_norm": 1.190319299697876, | |
| "learning_rate": 3.237791932059448e-05, | |
| "loss": 0.7893, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.9650793650793652, | |
| "grad_norm": 1.5929204225540161, | |
| "learning_rate": 3.2908704883227177e-05, | |
| "loss": 1.0232, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.9968253968253968, | |
| "grad_norm": 1.0138347148895264, | |
| "learning_rate": 3.343949044585987e-05, | |
| "loss": 0.6693, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 2.0253968253968253, | |
| "grad_norm": 1.3012847900390625, | |
| "learning_rate": 3.3970276008492566e-05, | |
| "loss": 0.8355, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.057142857142857, | |
| "grad_norm": 1.2264782190322876, | |
| "learning_rate": 3.450106157112527e-05, | |
| "loss": 0.9872, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.088888888888889, | |
| "grad_norm": 1.139275312423706, | |
| "learning_rate": 3.503184713375796e-05, | |
| "loss": 0.8662, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.1206349206349207, | |
| "grad_norm": 1.3836581707000732, | |
| "learning_rate": 3.5562632696390657e-05, | |
| "loss": 0.9549, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 2.1523809523809523, | |
| "grad_norm": 1.368600845336914, | |
| "learning_rate": 3.609341825902335e-05, | |
| "loss": 0.9195, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.1841269841269844, | |
| "grad_norm": 1.8793011903762817, | |
| "learning_rate": 3.662420382165605e-05, | |
| "loss": 0.8505, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 2.215873015873016, | |
| "grad_norm": 1.305284023284912, | |
| "learning_rate": 3.715498938428875e-05, | |
| "loss": 0.7755, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.2476190476190476, | |
| "grad_norm": 1.7851749658584595, | |
| "learning_rate": 3.768577494692145e-05, | |
| "loss": 0.9242, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 2.2793650793650793, | |
| "grad_norm": 1.4341535568237305, | |
| "learning_rate": 3.821656050955414e-05, | |
| "loss": 0.8221, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.311111111111111, | |
| "grad_norm": 1.39107346534729, | |
| "learning_rate": 3.874734607218684e-05, | |
| "loss": 0.6999, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 2.342857142857143, | |
| "grad_norm": 1.2304264307022095, | |
| "learning_rate": 3.927813163481954e-05, | |
| "loss": 0.8362, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.3746031746031746, | |
| "grad_norm": 1.8470840454101562, | |
| "learning_rate": 3.9808917197452234e-05, | |
| "loss": 0.9398, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.4063492063492062, | |
| "grad_norm": 1.2533882856369019, | |
| "learning_rate": 4.033970276008493e-05, | |
| "loss": 0.7754, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.4380952380952383, | |
| "grad_norm": 1.5335006713867188, | |
| "learning_rate": 4.087048832271762e-05, | |
| "loss": 1.1124, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.46984126984127, | |
| "grad_norm": 1.5298357009887695, | |
| "learning_rate": 4.1401273885350325e-05, | |
| "loss": 1.017, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.5015873015873016, | |
| "grad_norm": 1.4403260946273804, | |
| "learning_rate": 4.193205944798302e-05, | |
| "loss": 0.8831, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.533333333333333, | |
| "grad_norm": 1.1528433561325073, | |
| "learning_rate": 4.2462845010615714e-05, | |
| "loss": 0.801, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.565079365079365, | |
| "grad_norm": 1.3371326923370361, | |
| "learning_rate": 4.299363057324841e-05, | |
| "loss": 0.8692, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.596825396825397, | |
| "grad_norm": 1.4064775705337524, | |
| "learning_rate": 4.352441613588111e-05, | |
| "loss": 0.9059, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.6285714285714286, | |
| "grad_norm": 1.4531422853469849, | |
| "learning_rate": 4.4055201698513805e-05, | |
| "loss": 0.7344, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.66031746031746, | |
| "grad_norm": 1.7043890953063965, | |
| "learning_rate": 4.45859872611465e-05, | |
| "loss": 0.8298, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.6920634920634923, | |
| "grad_norm": 1.5105586051940918, | |
| "learning_rate": 4.5116772823779194e-05, | |
| "loss": 0.7768, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.723809523809524, | |
| "grad_norm": 1.8101528882980347, | |
| "learning_rate": 4.5647558386411895e-05, | |
| "loss": 0.733, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.7555555555555555, | |
| "grad_norm": 1.6365174055099487, | |
| "learning_rate": 4.617834394904459e-05, | |
| "loss": 0.8061, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.787301587301587, | |
| "grad_norm": 1.7808202505111694, | |
| "learning_rate": 4.6709129511677285e-05, | |
| "loss": 0.8333, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.819047619047619, | |
| "grad_norm": 1.5223265886306763, | |
| "learning_rate": 4.723991507430998e-05, | |
| "loss": 0.7557, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.850793650793651, | |
| "grad_norm": 1.3064416646957397, | |
| "learning_rate": 4.777070063694268e-05, | |
| "loss": 0.8041, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.8825396825396825, | |
| "grad_norm": 1.8025637865066528, | |
| "learning_rate": 4.8301486199575375e-05, | |
| "loss": 0.9534, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.914285714285714, | |
| "grad_norm": 1.924846887588501, | |
| "learning_rate": 4.883227176220807e-05, | |
| "loss": 0.9066, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.9460317460317462, | |
| "grad_norm": 1.9862899780273438, | |
| "learning_rate": 4.9363057324840765e-05, | |
| "loss": 0.7994, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.977777777777778, | |
| "grad_norm": 1.9615916013717651, | |
| "learning_rate": 4.9893842887473466e-05, | |
| "loss": 0.7045, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 3.0063492063492063, | |
| "grad_norm": 1.519852876663208, | |
| "learning_rate": 4.999989014936042e-05, | |
| "loss": 0.7212, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 3.038095238095238, | |
| "grad_norm": 1.9328887462615967, | |
| "learning_rate": 4.999944388279162e-05, | |
| "loss": 0.6598, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.06984126984127, | |
| "grad_norm": 2.0340709686279297, | |
| "learning_rate": 4.999865434075176e-05, | |
| "loss": 0.6829, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 3.1015873015873017, | |
| "grad_norm": 1.8775280714035034, | |
| "learning_rate": 4.999752153408229e-05, | |
| "loss": 0.6664, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.1333333333333333, | |
| "grad_norm": 2.385218381881714, | |
| "learning_rate": 4.999604547833814e-05, | |
| "loss": 0.6836, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 3.165079365079365, | |
| "grad_norm": 2.1743783950805664, | |
| "learning_rate": 4.999422619378752e-05, | |
| "loss": 0.7, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.196825396825397, | |
| "grad_norm": 2.20786452293396, | |
| "learning_rate": 4.999206370541162e-05, | |
| "loss": 0.7253, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 3.2285714285714286, | |
| "grad_norm": 1.8182263374328613, | |
| "learning_rate": 4.998955804290425e-05, | |
| "loss": 0.6824, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.2603174603174603, | |
| "grad_norm": 2.2959372997283936, | |
| "learning_rate": 4.9986709240671495e-05, | |
| "loss": 0.601, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 3.292063492063492, | |
| "grad_norm": 2.385838031768799, | |
| "learning_rate": 4.998351733783116e-05, | |
| "loss": 0.7417, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.323809523809524, | |
| "grad_norm": 2.0416879653930664, | |
| "learning_rate": 4.997998237821233e-05, | |
| "loss": 0.6463, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 3.3555555555555556, | |
| "grad_norm": 2.2781031131744385, | |
| "learning_rate": 4.9976104410354654e-05, | |
| "loss": 0.6998, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 3.3873015873015873, | |
| "grad_norm": 2.146778106689453, | |
| "learning_rate": 4.9971883487507775e-05, | |
| "loss": 0.7694, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 3.419047619047619, | |
| "grad_norm": 2.1369104385375977, | |
| "learning_rate": 4.9967319667630567e-05, | |
| "loss": 0.6615, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.450793650793651, | |
| "grad_norm": 2.4529733657836914, | |
| "learning_rate": 4.996241301339029e-05, | |
| "loss": 0.6109, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 3.4825396825396826, | |
| "grad_norm": 2.07030987739563, | |
| "learning_rate": 4.995716359216183e-05, | |
| "loss": 0.7611, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.5142857142857142, | |
| "grad_norm": 2.4329919815063477, | |
| "learning_rate": 4.995157147602669e-05, | |
| "loss": 0.7515, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 3.546031746031746, | |
| "grad_norm": 2.056351900100708, | |
| "learning_rate": 4.994563674177202e-05, | |
| "loss": 0.6885, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.5777777777777775, | |
| "grad_norm": 2.3665318489074707, | |
| "learning_rate": 4.993935947088958e-05, | |
| "loss": 0.6271, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 3.6095238095238096, | |
| "grad_norm": 2.677706480026245, | |
| "learning_rate": 4.993273974957463e-05, | |
| "loss": 0.5586, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.641269841269841, | |
| "grad_norm": 3.422136068344116, | |
| "learning_rate": 4.9925777668724685e-05, | |
| "loss": 0.7552, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 3.6730158730158733, | |
| "grad_norm": 2.4525184631347656, | |
| "learning_rate": 4.991847332393835e-05, | |
| "loss": 0.7367, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.704761904761905, | |
| "grad_norm": 2.4242067337036133, | |
| "learning_rate": 4.991082681551396e-05, | |
| "loss": 0.7044, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 3.7365079365079366, | |
| "grad_norm": 1.8419867753982544, | |
| "learning_rate": 4.9902838248448184e-05, | |
| "loss": 0.5966, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 3.768253968253968, | |
| "grad_norm": 2.1394360065460205, | |
| "learning_rate": 4.989450773243463e-05, | |
| "loss": 0.6736, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 1.285447597503662, | |
| "learning_rate": 4.9885835381862326e-05, | |
| "loss": 0.5021, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.831746031746032, | |
| "grad_norm": 2.724978446960449, | |
| "learning_rate": 4.987682131581413e-05, | |
| "loss": 0.6128, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 3.8634920634920635, | |
| "grad_norm": 2.239682912826538, | |
| "learning_rate": 4.986746565806508e-05, | |
| "loss": 0.5457, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 3.895238095238095, | |
| "grad_norm": 2.48944091796875, | |
| "learning_rate": 4.9857768537080784e-05, | |
| "loss": 0.6927, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 3.9269841269841272, | |
| "grad_norm": 2.4086852073669434, | |
| "learning_rate": 4.9847730086015534e-05, | |
| "loss": 0.5963, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.958730158730159, | |
| "grad_norm": 2.0070106983184814, | |
| "learning_rate": 4.9837350442710553e-05, | |
| "loss": 0.5856, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 3.9904761904761905, | |
| "grad_norm": 1.9726545810699463, | |
| "learning_rate": 4.98266297496921e-05, | |
| "loss": 0.6208, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 4.019047619047619, | |
| "grad_norm": 2.6137828826904297, | |
| "learning_rate": 4.981556815416948e-05, | |
| "loss": 0.6319, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 4.050793650793651, | |
| "grad_norm": 2.3489890098571777, | |
| "learning_rate": 4.9804165808033054e-05, | |
| "loss": 0.5887, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 4.082539682539682, | |
| "grad_norm": 2.8010590076446533, | |
| "learning_rate": 4.979242286785214e-05, | |
| "loss": 0.5257, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 4.114285714285714, | |
| "grad_norm": 2.993411064147949, | |
| "learning_rate": 4.978033949487284e-05, | |
| "loss": 0.4545, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 4.146031746031746, | |
| "grad_norm": 2.669935703277588, | |
| "learning_rate": 4.976791585501588e-05, | |
| "loss": 0.5989, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 4.177777777777778, | |
| "grad_norm": 3.084409236907959, | |
| "learning_rate": 4.9755152118874294e-05, | |
| "loss": 0.528, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 4.20952380952381, | |
| "grad_norm": 2.797873020172119, | |
| "learning_rate": 4.974204846171106e-05, | |
| "loss": 0.5249, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 4.241269841269841, | |
| "grad_norm": 3.667867422103882, | |
| "learning_rate": 4.9728605063456765e-05, | |
| "loss": 0.5838, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 4.273015873015873, | |
| "grad_norm": 2.6918869018554688, | |
| "learning_rate": 4.971482210870706e-05, | |
| "loss": 0.5143, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 4.304761904761905, | |
| "grad_norm": 2.1545379161834717, | |
| "learning_rate": 4.970069978672017e-05, | |
| "loss": 0.5317, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 4.336507936507936, | |
| "grad_norm": 2.1043529510498047, | |
| "learning_rate": 4.9686238291414275e-05, | |
| "loss": 0.4815, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 4.368253968253969, | |
| "grad_norm": 2.1359753608703613, | |
| "learning_rate": 4.9671437821364855e-05, | |
| "loss": 0.4935, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 3.092057228088379, | |
| "learning_rate": 4.965629857980197e-05, | |
| "loss": 0.6831, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 4.431746031746032, | |
| "grad_norm": 2.5296835899353027, | |
| "learning_rate": 4.964082077460745e-05, | |
| "loss": 0.5323, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 4.463492063492064, | |
| "grad_norm": 1.6655627489089966, | |
| "learning_rate": 4.962500461831207e-05, | |
| "loss": 0.4553, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 4.495238095238095, | |
| "grad_norm": 2.6663475036621094, | |
| "learning_rate": 4.9608850328092576e-05, | |
| "loss": 0.463, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 4.526984126984127, | |
| "grad_norm": 2.3763060569763184, | |
| "learning_rate": 4.959235812576879e-05, | |
| "loss": 0.4861, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 4.5587301587301585, | |
| "grad_norm": 2.2217962741851807, | |
| "learning_rate": 4.957552823780047e-05, | |
| "loss": 0.468, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 4.59047619047619, | |
| "grad_norm": 2.8885600566864014, | |
| "learning_rate": 4.9558360895284295e-05, | |
| "loss": 0.4588, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 4.622222222222222, | |
| "grad_norm": 2.5661261081695557, | |
| "learning_rate": 4.954085633395058e-05, | |
| "loss": 0.4926, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 4.653968253968254, | |
| "grad_norm": 2.304365396499634, | |
| "learning_rate": 4.952301479416015e-05, | |
| "loss": 0.494, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 4.685714285714286, | |
| "grad_norm": 2.690577983856201, | |
| "learning_rate": 4.9504836520900976e-05, | |
| "loss": 0.5814, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 4.717460317460318, | |
| "grad_norm": 2.7180025577545166, | |
| "learning_rate": 4.948632176378481e-05, | |
| "loss": 0.5329, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 4.749206349206349, | |
| "grad_norm": 2.716587543487549, | |
| "learning_rate": 4.9467470777043806e-05, | |
| "loss": 0.5264, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 4.780952380952381, | |
| "grad_norm": 2.315419912338257, | |
| "learning_rate": 4.9448283819526954e-05, | |
| "loss": 0.4756, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 4.8126984126984125, | |
| "grad_norm": 2.1679515838623047, | |
| "learning_rate": 4.9428761154696605e-05, | |
| "loss": 0.4819, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 4.844444444444444, | |
| "grad_norm": 3.389266014099121, | |
| "learning_rate": 4.9408903050624796e-05, | |
| "loss": 0.5121, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 4.876190476190477, | |
| "grad_norm": 3.4317383766174316, | |
| "learning_rate": 4.938870977998959e-05, | |
| "loss": 0.4535, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 4.907936507936508, | |
| "grad_norm": 2.9491918087005615, | |
| "learning_rate": 4.9368181620071344e-05, | |
| "loss": 0.5333, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 4.93968253968254, | |
| "grad_norm": 2.516798496246338, | |
| "learning_rate": 4.934731885274887e-05, | |
| "loss": 0.5367, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 4.9714285714285715, | |
| "grad_norm": 3.0031046867370605, | |
| "learning_rate": 4.9326121764495596e-05, | |
| "loss": 0.4957, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 3.334085702896118, | |
| "learning_rate": 4.9304590646375614e-05, | |
| "loss": 0.5287, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 5.031746031746032, | |
| "grad_norm": 1.9608453512191772, | |
| "learning_rate": 4.928272579403969e-05, | |
| "loss": 0.36, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 5.063492063492063, | |
| "grad_norm": 2.328850746154785, | |
| "learning_rate": 4.92605275077212e-05, | |
| "loss": 0.3628, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 5.095238095238095, | |
| "grad_norm": 2.3446412086486816, | |
| "learning_rate": 4.923799609223202e-05, | |
| "loss": 0.3327, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 5.1269841269841265, | |
| "grad_norm": 2.476181745529175, | |
| "learning_rate": 4.921513185695831e-05, | |
| "loss": 0.4246, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 5.158730158730159, | |
| "grad_norm": 3.1026763916015625, | |
| "learning_rate": 4.91919351158563e-05, | |
| "loss": 0.5048, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 5.190476190476191, | |
| "grad_norm": 2.8165297508239746, | |
| "learning_rate": 4.916840618744798e-05, | |
| "loss": 0.4361, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 5.222222222222222, | |
| "grad_norm": 1.8732138872146606, | |
| "learning_rate": 4.9144545394816687e-05, | |
| "loss": 0.4693, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 5.253968253968254, | |
| "grad_norm": 1.7250264883041382, | |
| "learning_rate": 4.91203530656027e-05, | |
| "loss": 0.4076, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 5.285714285714286, | |
| "grad_norm": 2.105459690093994, | |
| "learning_rate": 4.9095829531998725e-05, | |
| "loss": 0.3589, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 5.317460317460317, | |
| "grad_norm": 3.6825687885284424, | |
| "learning_rate": 4.9070975130745387e-05, | |
| "loss": 0.5263, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 5.349206349206349, | |
| "grad_norm": 2.947052001953125, | |
| "learning_rate": 4.90457902031265e-05, | |
| "loss": 0.4632, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 5.380952380952381, | |
| "grad_norm": 1.9546104669570923, | |
| "learning_rate": 4.902027509496448e-05, | |
| "loss": 0.4348, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 5.412698412698413, | |
| "grad_norm": 2.4471983909606934, | |
| "learning_rate": 4.899443015661557e-05, | |
| "loss": 0.4209, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 5.444444444444445, | |
| "grad_norm": 1.827124834060669, | |
| "learning_rate": 4.8968255742964975e-05, | |
| "loss": 0.413, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 5.476190476190476, | |
| "grad_norm": 2.654707431793213, | |
| "learning_rate": 4.894175221342207e-05, | |
| "loss": 0.432, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 5.507936507936508, | |
| "grad_norm": 2.648967981338501, | |
| "learning_rate": 4.8914919931915407e-05, | |
| "loss": 0.4339, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 5.5396825396825395, | |
| "grad_norm": 2.874075412750244, | |
| "learning_rate": 4.888775926688775e-05, | |
| "loss": 0.4392, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 5.571428571428571, | |
| "grad_norm": 2.9674830436706543, | |
| "learning_rate": 4.8860270591291e-05, | |
| "loss": 0.4459, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 5.603174603174603, | |
| "grad_norm": 2.054748296737671, | |
| "learning_rate": 4.883245428258107e-05, | |
| "loss": 0.4313, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 5.634920634920634, | |
| "grad_norm": 1.9174392223358154, | |
| "learning_rate": 4.880431072271272e-05, | |
| "loss": 0.3906, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 5.666666666666667, | |
| "grad_norm": 2.5257787704467773, | |
| "learning_rate": 4.87758402981343e-05, | |
| "loss": 0.4219, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 5.698412698412699, | |
| "grad_norm": 2.6365532875061035, | |
| "learning_rate": 4.8747043399782424e-05, | |
| "loss": 0.3978, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 5.73015873015873, | |
| "grad_norm": 2.0583746433258057, | |
| "learning_rate": 4.871792042307667e-05, | |
| "loss": 0.4847, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 5.761904761904762, | |
| "grad_norm": 2.035872459411621, | |
| "learning_rate": 4.868847176791406e-05, | |
| "loss": 0.4675, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 5.7936507936507935, | |
| "grad_norm": 2.3722939491271973, | |
| "learning_rate": 4.8658697838663625e-05, | |
| "loss": 0.4586, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 5.825396825396825, | |
| "grad_norm": 1.2609732151031494, | |
| "learning_rate": 4.862859904416085e-05, | |
| "loss": 0.3274, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 5.857142857142857, | |
| "grad_norm": 2.3673977851867676, | |
| "learning_rate": 4.8598175797702036e-05, | |
| "loss": 0.4685, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 5.888888888888889, | |
| "grad_norm": 2.8414175510406494, | |
| "learning_rate": 4.856742851703866e-05, | |
| "loss": 0.4762, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 5.920634920634921, | |
| "grad_norm": 2.4126765727996826, | |
| "learning_rate": 4.853635762437159e-05, | |
| "loss": 0.4075, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 5.9523809523809526, | |
| "grad_norm": 1.8691045045852661, | |
| "learning_rate": 4.8504963546345334e-05, | |
| "loss": 0.4865, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 5.984126984126984, | |
| "grad_norm": 3.5297420024871826, | |
| "learning_rate": 4.8473246714042155e-05, | |
| "loss": 0.4623, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 6.012698412698413, | |
| "grad_norm": 2.059169054031372, | |
| "learning_rate": 4.844120756297617e-05, | |
| "loss": 0.4164, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 6.044444444444444, | |
| "grad_norm": 2.4746127128601074, | |
| "learning_rate": 4.840884653308735e-05, | |
| "loss": 0.3552, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 6.076190476190476, | |
| "grad_norm": 2.504425287246704, | |
| "learning_rate": 4.8376164068735485e-05, | |
| "loss": 0.3368, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 6.1079365079365076, | |
| "grad_norm": 2.062577486038208, | |
| "learning_rate": 4.83431606186941e-05, | |
| "loss": 0.3139, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 6.13968253968254, | |
| "grad_norm": 2.4934544563293457, | |
| "learning_rate": 4.830983663614427e-05, | |
| "loss": 0.3777, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 6.171428571428572, | |
| "grad_norm": 2.5747485160827637, | |
| "learning_rate": 4.827619257866839e-05, | |
| "loss": 0.373, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 6.203174603174603, | |
| "grad_norm": 2.449357271194458, | |
| "learning_rate": 4.8242228908243946e-05, | |
| "loss": 0.3936, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 6.234920634920635, | |
| "grad_norm": 2.952680826187134, | |
| "learning_rate": 4.82079460912371e-05, | |
| "loss": 0.407, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 6.266666666666667, | |
| "grad_norm": 2.1754496097564697, | |
| "learning_rate": 4.817334459839633e-05, | |
| "loss": 0.3189, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 6.298412698412698, | |
| "grad_norm": 2.8406214714050293, | |
| "learning_rate": 4.8138424904845947e-05, | |
| "loss": 0.3883, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 6.33015873015873, | |
| "grad_norm": 1.7533257007598877, | |
| "learning_rate": 4.8103187490079604e-05, | |
| "loss": 0.3131, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 6.3619047619047615, | |
| "grad_norm": 2.4574601650238037, | |
| "learning_rate": 4.806763283795366e-05, | |
| "loss": 0.3606, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 6.393650793650794, | |
| "grad_norm": 2.002281427383423, | |
| "learning_rate": 4.8031761436680575e-05, | |
| "loss": 0.37, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 6.425396825396826, | |
| "grad_norm": 2.823315143585205, | |
| "learning_rate": 4.79955737788222e-05, | |
| "loss": 0.3791, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 6.457142857142857, | |
| "grad_norm": 2.7891204357147217, | |
| "learning_rate": 4.795907036128299e-05, | |
| "loss": 0.3556, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 6.488888888888889, | |
| "grad_norm": 2.2387146949768066, | |
| "learning_rate": 4.7922251685303213e-05, | |
| "loss": 0.3929, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 6.520634920634921, | |
| "grad_norm": 2.5023891925811768, | |
| "learning_rate": 4.788511825645205e-05, | |
| "loss": 0.379, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 6.552380952380952, | |
| "grad_norm": 2.2654805183410645, | |
| "learning_rate": 4.7847670584620653e-05, | |
| "loss": 0.3435, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 6.584126984126984, | |
| "grad_norm": 3.3823065757751465, | |
| "learning_rate": 4.7809909184015146e-05, | |
| "loss": 0.4109, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 6.6158730158730155, | |
| "grad_norm": 2.6096551418304443, | |
| "learning_rate": 4.7771834573149576e-05, | |
| "loss": 0.4233, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 6.647619047619048, | |
| "grad_norm": 2.3933897018432617, | |
| "learning_rate": 4.773344727483876e-05, | |
| "loss": 0.3709, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 6.67936507936508, | |
| "grad_norm": 2.189544916152954, | |
| "learning_rate": 4.769474781619114e-05, | |
| "loss": 0.3287, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 6.711111111111111, | |
| "grad_norm": 2.450892686843872, | |
| "learning_rate": 4.765573672860154e-05, | |
| "loss": 0.4022, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 6.742857142857143, | |
| "grad_norm": 2.4342429637908936, | |
| "learning_rate": 4.761641454774386e-05, | |
| "loss": 0.4029, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 6.7746031746031745, | |
| "grad_norm": 2.2122364044189453, | |
| "learning_rate": 4.75767818135637e-05, | |
| "loss": 0.3322, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 6.806349206349206, | |
| "grad_norm": 3.968445301055908, | |
| "learning_rate": 4.7536839070271e-05, | |
| "loss": 0.3836, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 6.838095238095238, | |
| "grad_norm": 3.529158353805542, | |
| "learning_rate": 4.749658686633251e-05, | |
| "loss": 0.4745, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 6.86984126984127, | |
| "grad_norm": 2.430727243423462, | |
| "learning_rate": 4.7456025754464304e-05, | |
| "loss": 0.3664, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 6.901587301587302, | |
| "grad_norm": 2.6552302837371826, | |
| "learning_rate": 4.7415156291624166e-05, | |
| "loss": 0.4359, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 6.933333333333334, | |
| "grad_norm": 2.134822130203247, | |
| "learning_rate": 4.737397903900393e-05, | |
| "loss": 0.3969, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 6.965079365079365, | |
| "grad_norm": 2.5052947998046875, | |
| "learning_rate": 4.7332494562021815e-05, | |
| "loss": 0.4069, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 6.996825396825397, | |
| "grad_norm": 2.1377065181732178, | |
| "learning_rate": 4.729070343031463e-05, | |
| "loss": 0.3853, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 7.025396825396825, | |
| "grad_norm": 1.9704042673110962, | |
| "learning_rate": 4.724860621772995e-05, | |
| "loss": 0.3283, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 7.057142857142857, | |
| "grad_norm": 2.476968765258789, | |
| "learning_rate": 4.7206203502318256e-05, | |
| "loss": 0.3325, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 7.088888888888889, | |
| "grad_norm": 1.9231969118118286, | |
| "learning_rate": 4.716349586632499e-05, | |
| "loss": 0.2876, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 7.12063492063492, | |
| "grad_norm": 2.6444814205169678, | |
| "learning_rate": 4.712048389618254e-05, | |
| "loss": 0.3005, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 7.152380952380953, | |
| "grad_norm": 3.2589964866638184, | |
| "learning_rate": 4.7077168182502216e-05, | |
| "loss": 0.4023, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 7.184126984126984, | |
| "grad_norm": 2.5481936931610107, | |
| "learning_rate": 4.703354932006615e-05, | |
| "loss": 0.3302, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 7.215873015873016, | |
| "grad_norm": 1.7125908136367798, | |
| "learning_rate": 4.698962790781906e-05, | |
| "loss": 0.3329, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 7.247619047619048, | |
| "grad_norm": 2.2756667137145996, | |
| "learning_rate": 4.6945404548860115e-05, | |
| "loss": 0.3369, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 7.279365079365079, | |
| "grad_norm": 2.9158453941345215, | |
| "learning_rate": 4.6900879850434604e-05, | |
| "loss": 0.3339, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 7.311111111111111, | |
| "grad_norm": 2.3047537803649902, | |
| "learning_rate": 4.685605442392559e-05, | |
| "loss": 0.3915, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 7.3428571428571425, | |
| "grad_norm": 2.7815029621124268, | |
| "learning_rate": 4.681092888484554e-05, | |
| "loss": 0.3317, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 7.374603174603175, | |
| "grad_norm": 2.2644097805023193, | |
| "learning_rate": 4.676550385282787e-05, | |
| "loss": 0.3314, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 7.406349206349207, | |
| "grad_norm": 2.5144474506378174, | |
| "learning_rate": 4.671977995161843e-05, | |
| "loss": 0.3188, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 7.438095238095238, | |
| "grad_norm": 3.120821714401245, | |
| "learning_rate": 4.667375780906693e-05, | |
| "loss": 0.3523, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 7.46984126984127, | |
| "grad_norm": 4.47842264175415, | |
| "learning_rate": 4.662743805711832e-05, | |
| "loss": 0.3611, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 7.501587301587302, | |
| "grad_norm": 1.9228928089141846, | |
| "learning_rate": 4.658082133180416e-05, | |
| "loss": 0.3612, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 7.533333333333333, | |
| "grad_norm": 2.1507537364959717, | |
| "learning_rate": 4.6533908273233815e-05, | |
| "loss": 0.3321, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 7.565079365079365, | |
| "grad_norm": 2.1849119663238525, | |
| "learning_rate": 4.64866995255857e-05, | |
| "loss": 0.2943, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 7.5968253968253965, | |
| "grad_norm": 2.1777775287628174, | |
| "learning_rate": 4.643919573709843e-05, | |
| "loss": 0.353, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 7.628571428571428, | |
| "grad_norm": 2.5231118202209473, | |
| "learning_rate": 4.639139756006195e-05, | |
| "loss": 0.3571, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 7.660317460317461, | |
| "grad_norm": 1.8409479856491089, | |
| "learning_rate": 4.6343305650808516e-05, | |
| "loss": 0.3691, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 7.692063492063492, | |
| "grad_norm": 1.7940895557403564, | |
| "learning_rate": 4.629492066970373e-05, | |
| "loss": 0.3738, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 7.723809523809524, | |
| "grad_norm": 2.014902114868164, | |
| "learning_rate": 4.6246243281137474e-05, | |
| "loss": 0.361, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 7.7555555555555555, | |
| "grad_norm": 3.4182560443878174, | |
| "learning_rate": 4.6197274153514735e-05, | |
| "loss": 0.3663, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 7.787301587301587, | |
| "grad_norm": 2.518728256225586, | |
| "learning_rate": 4.614801395924649e-05, | |
| "loss": 0.3646, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 7.819047619047619, | |
| "grad_norm": 2.154189109802246, | |
| "learning_rate": 4.6098463374740466e-05, | |
| "loss": 0.3331, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 7.85079365079365, | |
| "grad_norm": 2.536081075668335, | |
| "learning_rate": 4.604862308039177e-05, | |
| "loss": 0.3742, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 7.882539682539683, | |
| "grad_norm": 2.340764045715332, | |
| "learning_rate": 4.599849376057366e-05, | |
| "loss": 0.3352, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 7.914285714285715, | |
| "grad_norm": 3.5488364696502686, | |
| "learning_rate": 4.5948076103628094e-05, | |
| "loss": 0.3663, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 7.946031746031746, | |
| "grad_norm": 2.779360294342041, | |
| "learning_rate": 4.589737080185625e-05, | |
| "loss": 0.3362, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 7.977777777777778, | |
| "grad_norm": 1.8792667388916016, | |
| "learning_rate": 4.5846378551509097e-05, | |
| "loss": 0.346, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 8.006349206349206, | |
| "grad_norm": 2.453295946121216, | |
| "learning_rate": 4.579510005277774e-05, | |
| "loss": 0.3509, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 8.038095238095238, | |
| "grad_norm": 1.9493130445480347, | |
| "learning_rate": 4.574353600978388e-05, | |
| "loss": 0.3062, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 8.06984126984127, | |
| "grad_norm": 1.9360930919647217, | |
| "learning_rate": 4.56916871305701e-05, | |
| "loss": 0.3056, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 8.101587301587301, | |
| "grad_norm": 1.5592070817947388, | |
| "learning_rate": 4.563955412709021e-05, | |
| "loss": 0.2785, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 8.133333333333333, | |
| "grad_norm": 1.8093425035476685, | |
| "learning_rate": 4.5587137715199354e-05, | |
| "loss": 0.308, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 8.165079365079364, | |
| "grad_norm": 2.2939181327819824, | |
| "learning_rate": 4.5534438614644294e-05, | |
| "loss": 0.3038, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 8.196825396825396, | |
| "grad_norm": 2.4204866886138916, | |
| "learning_rate": 4.548145754905346e-05, | |
| "loss": 0.3375, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 8.228571428571428, | |
| "grad_norm": 1.725534439086914, | |
| "learning_rate": 4.5428195245927064e-05, | |
| "loss": 0.3101, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 8.260317460317461, | |
| "grad_norm": 1.637730360031128, | |
| "learning_rate": 4.537465243662704e-05, | |
| "loss": 0.2931, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 8.292063492063493, | |
| "grad_norm": 1.3372169733047485, | |
| "learning_rate": 4.532082985636709e-05, | |
| "loss": 0.2763, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 8.323809523809524, | |
| "grad_norm": 2.5993168354034424, | |
| "learning_rate": 4.5266728244202494e-05, | |
| "loss": 0.3458, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 8.355555555555556, | |
| "grad_norm": 2.461862564086914, | |
| "learning_rate": 4.521234834302006e-05, | |
| "loss": 0.3693, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 8.387301587301588, | |
| "grad_norm": 1.8519413471221924, | |
| "learning_rate": 4.5157690899527816e-05, | |
| "loss": 0.3327, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 8.41904761904762, | |
| "grad_norm": 2.1535580158233643, | |
| "learning_rate": 4.510275666424487e-05, | |
| "loss": 0.3229, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 8.450793650793651, | |
| "grad_norm": 1.6819690465927124, | |
| "learning_rate": 4.5047546391491e-05, | |
| "loss": 0.2925, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 8.482539682539683, | |
| "grad_norm": 1.6538281440734863, | |
| "learning_rate": 4.499206083937638e-05, | |
| "loss": 0.3218, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 8.514285714285714, | |
| "grad_norm": 1.8956862688064575, | |
| "learning_rate": 4.493630076979112e-05, | |
| "loss": 0.3423, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 8.546031746031746, | |
| "grad_norm": 2.274681806564331, | |
| "learning_rate": 4.48802669483948e-05, | |
| "loss": 0.3152, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 8.577777777777778, | |
| "grad_norm": 2.2956337928771973, | |
| "learning_rate": 4.4823960144606014e-05, | |
| "loss": 0.3417, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 8.60952380952381, | |
| "grad_norm": 1.8650286197662354, | |
| "learning_rate": 4.4767381131591734e-05, | |
| "loss": 0.2896, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 8.64126984126984, | |
| "grad_norm": 1.3998652696609497, | |
| "learning_rate": 4.471053068625674e-05, | |
| "loss": 0.3372, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 8.673015873015872, | |
| "grad_norm": 2.855074167251587, | |
| "learning_rate": 4.465340958923293e-05, | |
| "loss": 0.332, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 8.704761904761904, | |
| "grad_norm": 1.6865357160568237, | |
| "learning_rate": 4.459601862486862e-05, | |
| "loss": 0.3053, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 8.736507936507937, | |
| "grad_norm": 2.501856803894043, | |
| "learning_rate": 4.453835858121773e-05, | |
| "loss": 0.3119, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 8.768253968253969, | |
| "grad_norm": 2.4325456619262695, | |
| "learning_rate": 4.4480430250029046e-05, | |
| "loss": 0.3395, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "grad_norm": 1.4845948219299316, | |
| "learning_rate": 4.4422234426735256e-05, | |
| "loss": 0.3237, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 8.831746031746032, | |
| "grad_norm": 1.3553249835968018, | |
| "learning_rate": 4.436377191044208e-05, | |
| "loss": 0.3387, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 8.863492063492064, | |
| "grad_norm": 1.8338890075683594, | |
| "learning_rate": 4.430504350391729e-05, | |
| "loss": 0.3618, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 8.895238095238096, | |
| "grad_norm": 2.291538953781128, | |
| "learning_rate": 4.4246050013579686e-05, | |
| "loss": 0.3608, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 8.926984126984127, | |
| "grad_norm": 1.3809788227081299, | |
| "learning_rate": 4.4186792249488005e-05, | |
| "loss": 0.3077, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 8.958730158730159, | |
| "grad_norm": 1.5944230556488037, | |
| "learning_rate": 4.412727102532983e-05, | |
| "loss": 0.3307, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 8.99047619047619, | |
| "grad_norm": 2.2244362831115723, | |
| "learning_rate": 4.4067487158410396e-05, | |
| "loss": 0.3469, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 9.019047619047619, | |
| "grad_norm": 1.444221019744873, | |
| "learning_rate": 4.400744146964136e-05, | |
| "loss": 0.3049, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 9.05079365079365, | |
| "grad_norm": 1.5847752094268799, | |
| "learning_rate": 4.394713478352955e-05, | |
| "loss": 0.2715, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 9.082539682539682, | |
| "grad_norm": 1.6062681674957275, | |
| "learning_rate": 4.388656792816562e-05, | |
| "loss": 0.2487, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 9.114285714285714, | |
| "grad_norm": 2.099787712097168, | |
| "learning_rate": 4.382574173521272e-05, | |
| "loss": 0.2866, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 9.146031746031746, | |
| "grad_norm": 1.0997334718704224, | |
| "learning_rate": 4.376465703989502e-05, | |
| "loss": 0.3052, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 9.177777777777777, | |
| "grad_norm": 2.4327454566955566, | |
| "learning_rate": 4.370331468098628e-05, | |
| "loss": 0.3212, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 9.209523809523809, | |
| "grad_norm": 1.4816385507583618, | |
| "learning_rate": 4.364171550079833e-05, | |
| "loss": 0.3046, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 9.24126984126984, | |
| "grad_norm": 2.039186716079712, | |
| "learning_rate": 4.357986034516947e-05, | |
| "loss": 0.3165, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 9.273015873015874, | |
| "grad_norm": 1.437852382659912, | |
| "learning_rate": 4.3517750063452934e-05, | |
| "loss": 0.3037, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 9.304761904761905, | |
| "grad_norm": 1.818982720375061, | |
| "learning_rate": 4.345538550850512e-05, | |
| "loss": 0.3122, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 9.336507936507937, | |
| "grad_norm": 1.12025785446167, | |
| "learning_rate": 4.339276753667395e-05, | |
| "loss": 0.2909, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 9.368253968253969, | |
| "grad_norm": 1.6094844341278076, | |
| "learning_rate": 4.3329897007787125e-05, | |
| "loss": 0.2823, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "grad_norm": 1.916200041770935, | |
| "learning_rate": 4.326677478514024e-05, | |
| "loss": 0.2939, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 9.431746031746032, | |
| "grad_norm": 1.97919499874115, | |
| "learning_rate": 4.320340173548503e-05, | |
| "loss": 0.2826, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 9.463492063492064, | |
| "grad_norm": 2.0238938331604004, | |
| "learning_rate": 4.313977872901737e-05, | |
| "loss": 0.3273, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 9.495238095238095, | |
| "grad_norm": 2.5840957164764404, | |
| "learning_rate": 4.307590663936541e-05, | |
| "loss": 0.2889, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 9.526984126984127, | |
| "grad_norm": 2.3503904342651367, | |
| "learning_rate": 4.30117863435775e-05, | |
| "loss": 0.3012, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 9.558730158730159, | |
| "grad_norm": 2.019792318344116, | |
| "learning_rate": 4.294741872211024e-05, | |
| "loss": 0.3267, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 9.59047619047619, | |
| "grad_norm": 2.2713353633880615, | |
| "learning_rate": 4.288280465881632e-05, | |
| "loss": 0.3096, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 9.622222222222222, | |
| "grad_norm": 2.4236693382263184, | |
| "learning_rate": 4.281794504093237e-05, | |
| "loss": 0.3291, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 9.653968253968253, | |
| "grad_norm": 1.772703766822815, | |
| "learning_rate": 4.275284075906686e-05, | |
| "loss": 0.3117, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 9.685714285714285, | |
| "grad_norm": 1.9665186405181885, | |
| "learning_rate": 4.268749270718778e-05, | |
| "loss": 0.326, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 9.717460317460317, | |
| "grad_norm": 1.9472782611846924, | |
| "learning_rate": 4.262190178261044e-05, | |
| "loss": 0.2683, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 9.74920634920635, | |
| "grad_norm": 2.0638089179992676, | |
| "learning_rate": 4.255606888598508e-05, | |
| "loss": 0.314, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 9.780952380952382, | |
| "grad_norm": 2.1349925994873047, | |
| "learning_rate": 4.248999492128456e-05, | |
| "loss": 0.2897, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 9.812698412698413, | |
| "grad_norm": 2.112536907196045, | |
| "learning_rate": 4.242368079579192e-05, | |
| "loss": 0.31, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 9.844444444444445, | |
| "grad_norm": 1.6859878301620483, | |
| "learning_rate": 4.2357127420087917e-05, | |
| "loss": 0.3412, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 9.876190476190477, | |
| "grad_norm": 1.9178651571273804, | |
| "learning_rate": 4.229033570803853e-05, | |
| "loss": 0.334, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 9.907936507936508, | |
| "grad_norm": 2.562436103820801, | |
| "learning_rate": 4.2223306576782426e-05, | |
| "loss": 0.3379, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 9.93968253968254, | |
| "grad_norm": 1.8472412824630737, | |
| "learning_rate": 4.215604094671835e-05, | |
| "loss": 0.3415, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 9.971428571428572, | |
| "grad_norm": 1.9416279792785645, | |
| "learning_rate": 4.208853974149246e-05, | |
| "loss": 0.3085, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 2.0056397914886475, | |
| "learning_rate": 4.202080388798571e-05, | |
| "loss": 0.3263, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 10.031746031746032, | |
| "grad_norm": 2.195781946182251, | |
| "learning_rate": 4.1952834316301065e-05, | |
| "loss": 0.2867, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 10.063492063492063, | |
| "grad_norm": 1.7489805221557617, | |
| "learning_rate": 4.1884631959750766e-05, | |
| "loss": 0.2589, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 10.095238095238095, | |
| "grad_norm": 1.9361369609832764, | |
| "learning_rate": 4.181619775484348e-05, | |
| "loss": 0.2722, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 10.126984126984127, | |
| "grad_norm": 2.24322509765625, | |
| "learning_rate": 4.174753264127147e-05, | |
| "loss": 0.2534, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 10.158730158730158, | |
| "grad_norm": 2.4550466537475586, | |
| "learning_rate": 4.167863756189767e-05, | |
| "loss": 0.2777, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 10.19047619047619, | |
| "grad_norm": 1.9439811706542969, | |
| "learning_rate": 4.160951346274278e-05, | |
| "loss": 0.2864, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 10.222222222222221, | |
| "grad_norm": 1.633494257926941, | |
| "learning_rate": 4.154016129297219e-05, | |
| "loss": 0.2858, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 10.253968253968253, | |
| "grad_norm": 1.69782292842865, | |
| "learning_rate": 4.147058200488305e-05, | |
| "loss": 0.2942, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 10.285714285714286, | |
| "grad_norm": 1.613031268119812, | |
| "learning_rate": 4.140077655389113e-05, | |
| "loss": 0.2632, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 10.317460317460318, | |
| "grad_norm": 2.0266177654266357, | |
| "learning_rate": 4.1330745898517714e-05, | |
| "loss": 0.3011, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 10.34920634920635, | |
| "grad_norm": 1.8945387601852417, | |
| "learning_rate": 4.1260491000376446e-05, | |
| "loss": 0.2832, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 10.380952380952381, | |
| "grad_norm": 1.7012510299682617, | |
| "learning_rate": 4.119001282416009e-05, | |
| "loss": 0.2718, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 10.412698412698413, | |
| "grad_norm": 1.5538525581359863, | |
| "learning_rate": 4.111931233762738e-05, | |
| "loss": 0.3232, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 10.444444444444445, | |
| "grad_norm": 2.3083150386810303, | |
| "learning_rate": 4.1048390511589595e-05, | |
| "loss": 0.3057, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 10.476190476190476, | |
| "grad_norm": 1.293314814567566, | |
| "learning_rate": 4.097724831989733e-05, | |
| "loss": 0.2523, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 10.507936507936508, | |
| "grad_norm": 2.517212152481079, | |
| "learning_rate": 4.09058867394271e-05, | |
| "loss": 0.3269, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 10.53968253968254, | |
| "grad_norm": 2.057063102722168, | |
| "learning_rate": 4.083430675006791e-05, | |
| "loss": 0.2844, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 10.571428571428571, | |
| "grad_norm": 1.5663833618164062, | |
| "learning_rate": 4.0762509334707786e-05, | |
| "loss": 0.3005, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 10.603174603174603, | |
| "grad_norm": 2.5423505306243896, | |
| "learning_rate": 4.069049547922035e-05, | |
| "loss": 0.2802, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 10.634920634920634, | |
| "grad_norm": 1.578316569328308, | |
| "learning_rate": 4.061826617245119e-05, | |
| "loss": 0.2667, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 10.666666666666666, | |
| "grad_norm": 1.502928376197815, | |
| "learning_rate": 4.0545822406204334e-05, | |
| "loss": 0.3059, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 10.698412698412698, | |
| "grad_norm": 1.2470905780792236, | |
| "learning_rate": 4.047316517522864e-05, | |
| "loss": 0.2879, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 10.73015873015873, | |
| "grad_norm": 1.8238775730133057, | |
| "learning_rate": 4.0400295477204105e-05, | |
| "loss": 0.2923, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 10.761904761904763, | |
| "grad_norm": 2.0516586303710938, | |
| "learning_rate": 4.032721431272819e-05, | |
| "loss": 0.3086, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 10.793650793650794, | |
| "grad_norm": 1.3188791275024414, | |
| "learning_rate": 4.0253922685302046e-05, | |
| "loss": 0.2893, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 10.825396825396826, | |
| "grad_norm": 1.7352266311645508, | |
| "learning_rate": 4.01804216013168e-05, | |
| "loss": 0.2981, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 10.857142857142858, | |
| "grad_norm": 1.3449515104293823, | |
| "learning_rate": 4.0106712070039656e-05, | |
| "loss": 0.2841, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 10.88888888888889, | |
| "grad_norm": 2.505431890487671, | |
| "learning_rate": 4.00327951036001e-05, | |
| "loss": 0.3034, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 10.920634920634921, | |
| "grad_norm": 1.8870325088500977, | |
| "learning_rate": 3.9958671716975966e-05, | |
| "loss": 0.305, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 10.952380952380953, | |
| "grad_norm": 2.913130044937134, | |
| "learning_rate": 3.988434292797951e-05, | |
| "loss": 0.3212, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 10.984126984126984, | |
| "grad_norm": 1.7870115041732788, | |
| "learning_rate": 3.980980975724344e-05, | |
| "loss": 0.3108, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 11.012698412698413, | |
| "grad_norm": 3.050985336303711, | |
| "learning_rate": 3.9735073228206896e-05, | |
| "loss": 0.3043, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 11.044444444444444, | |
| "grad_norm": 1.5993611812591553, | |
| "learning_rate": 3.96601343671014e-05, | |
| "loss": 0.2465, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 11.076190476190476, | |
| "grad_norm": 1.626888632774353, | |
| "learning_rate": 3.9584994202936746e-05, | |
| "loss": 0.2688, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 11.107936507936508, | |
| "grad_norm": 1.7132880687713623, | |
| "learning_rate": 3.950965376748689e-05, | |
| "loss": 0.2458, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 11.13968253968254, | |
| "grad_norm": 1.7764930725097656, | |
| "learning_rate": 3.94341140952758e-05, | |
| "loss": 0.2189, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 11.17142857142857, | |
| "grad_norm": 2.5560712814331055, | |
| "learning_rate": 3.9358376223563206e-05, | |
| "loss": 0.2866, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 11.203174603174602, | |
| "grad_norm": 1.1177359819412231, | |
| "learning_rate": 3.928244119233038e-05, | |
| "loss": 0.233, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 11.234920634920634, | |
| "grad_norm": 1.584670901298523, | |
| "learning_rate": 3.9206310044265866e-05, | |
| "loss": 0.273, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 11.266666666666667, | |
| "grad_norm": 1.6278687715530396, | |
| "learning_rate": 3.912998382475115e-05, | |
| "loss": 0.2746, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 11.2984126984127, | |
| "grad_norm": 1.657038688659668, | |
| "learning_rate": 3.905346358184629e-05, | |
| "loss": 0.2885, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 11.33015873015873, | |
| "grad_norm": 1.2840272188186646, | |
| "learning_rate": 3.897675036627557e-05, | |
| "loss": 0.2932, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 11.361904761904762, | |
| "grad_norm": 1.5678766965866089, | |
| "learning_rate": 3.8899845231413026e-05, | |
| "loss": 0.2945, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 11.393650793650794, | |
| "grad_norm": 1.788948655128479, | |
| "learning_rate": 3.8822749233268006e-05, | |
| "loss": 0.3013, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 11.425396825396826, | |
| "grad_norm": 1.2259769439697266, | |
| "learning_rate": 3.8745463430470664e-05, | |
| "loss": 0.2582, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 11.457142857142857, | |
| "grad_norm": 1.5430735349655151, | |
| "learning_rate": 3.866798888425741e-05, | |
| "loss": 0.275, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 11.488888888888889, | |
| "grad_norm": 1.9102168083190918, | |
| "learning_rate": 3.8590326658456376e-05, | |
| "loss": 0.2909, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 11.52063492063492, | |
| "grad_norm": 1.6118320226669312, | |
| "learning_rate": 3.851247781947277e-05, | |
| "loss": 0.2922, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 11.552380952380952, | |
| "grad_norm": 1.393646478652954, | |
| "learning_rate": 3.843444343627424e-05, | |
| "loss": 0.2783, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 11.584126984126984, | |
| "grad_norm": 2.522909641265869, | |
| "learning_rate": 3.83562245803762e-05, | |
| "loss": 0.2933, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 11.615873015873015, | |
| "grad_norm": 2.2534332275390625, | |
| "learning_rate": 3.827782232582714e-05, | |
| "loss": 0.3081, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 11.647619047619047, | |
| "grad_norm": 2.4088056087493896, | |
| "learning_rate": 3.819923774919383e-05, | |
| "loss": 0.276, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 11.679365079365079, | |
| "grad_norm": 1.7626562118530273, | |
| "learning_rate": 3.8120471929546576e-05, | |
| "loss": 0.2697, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 11.71111111111111, | |
| "grad_norm": 1.8656691312789917, | |
| "learning_rate": 3.8041525948444414e-05, | |
| "loss": 0.2979, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 11.742857142857144, | |
| "grad_norm": 1.6537258625030518, | |
| "learning_rate": 3.7962400889920185e-05, | |
| "loss": 0.3042, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 11.774603174603175, | |
| "grad_norm": 1.696975827217102, | |
| "learning_rate": 3.788309784046574e-05, | |
| "loss": 0.2984, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 11.806349206349207, | |
| "grad_norm": 1.976236343383789, | |
| "learning_rate": 3.780361788901696e-05, | |
| "loss": 0.2711, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 11.838095238095239, | |
| "grad_norm": 1.7781676054000854, | |
| "learning_rate": 3.772396212693885e-05, | |
| "loss": 0.3116, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 11.86984126984127, | |
| "grad_norm": 1.6252080202102661, | |
| "learning_rate": 3.7644131648010494e-05, | |
| "loss": 0.2879, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 11.901587301587302, | |
| "grad_norm": 1.7511012554168701, | |
| "learning_rate": 3.75641275484101e-05, | |
| "loss": 0.3106, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 11.933333333333334, | |
| "grad_norm": 1.5861046314239502, | |
| "learning_rate": 3.7483950926699885e-05, | |
| "loss": 0.2703, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 11.965079365079365, | |
| "grad_norm": 1.903902530670166, | |
| "learning_rate": 3.740360288381105e-05, | |
| "loss": 0.2808, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 11.996825396825397, | |
| "grad_norm": 1.644740104675293, | |
| "learning_rate": 3.732308452302864e-05, | |
| "loss": 0.2883, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 12.025396825396825, | |
| "grad_norm": 1.3671797513961792, | |
| "learning_rate": 3.724239694997637e-05, | |
| "loss": 0.2661, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 12.057142857142857, | |
| "grad_norm": 1.1910501718521118, | |
| "learning_rate": 3.716154127260147e-05, | |
| "loss": 0.2352, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 12.088888888888889, | |
| "grad_norm": 1.45137619972229, | |
| "learning_rate": 3.708051860115947e-05, | |
| "loss": 0.2703, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 12.12063492063492, | |
| "grad_norm": 2.625089645385742, | |
| "learning_rate": 3.699933004819895e-05, | |
| "loss": 0.2524, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 12.152380952380952, | |
| "grad_norm": 1.731602430343628, | |
| "learning_rate": 3.691797672854625e-05, | |
| "loss": 0.2533, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 12.184126984126983, | |
| "grad_norm": 2.1266143321990967, | |
| "learning_rate": 3.683645975929019e-05, | |
| "loss": 0.2666, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 12.215873015873015, | |
| "grad_norm": 1.7163398265838623, | |
| "learning_rate": 3.675478025976671e-05, | |
| "loss": 0.2838, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 12.247619047619047, | |
| "grad_norm": 1.8726729154586792, | |
| "learning_rate": 3.66729393515435e-05, | |
| "loss": 0.2646, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 12.27936507936508, | |
| "grad_norm": 1.2229722738265991, | |
| "learning_rate": 3.659093815840462e-05, | |
| "loss": 0.267, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 12.311111111111112, | |
| "grad_norm": 1.341997504234314, | |
| "learning_rate": 3.650877780633505e-05, | |
| "loss": 0.2464, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 12.342857142857143, | |
| "grad_norm": 1.3466174602508545, | |
| "learning_rate": 3.6426459423505214e-05, | |
| "loss": 0.2521, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 12.374603174603175, | |
| "grad_norm": 1.7022134065628052, | |
| "learning_rate": 3.6343984140255516e-05, | |
| "loss": 0.2662, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 12.406349206349207, | |
| "grad_norm": 2.5058987140655518, | |
| "learning_rate": 3.626135308908084e-05, | |
| "loss": 0.2745, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 12.438095238095238, | |
| "grad_norm": 1.7667903900146484, | |
| "learning_rate": 3.6178567404614936e-05, | |
| "loss": 0.2752, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 12.46984126984127, | |
| "grad_norm": 1.7334171533584595, | |
| "learning_rate": 3.609562822361487e-05, | |
| "loss": 0.2667, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 12.501587301587302, | |
| "grad_norm": 1.7093929052352905, | |
| "learning_rate": 3.601253668494546e-05, | |
| "loss": 0.2829, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 12.533333333333333, | |
| "grad_norm": 1.8272080421447754, | |
| "learning_rate": 3.592929392956355e-05, | |
| "loss": 0.2583, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 12.565079365079365, | |
| "grad_norm": 1.5226343870162964, | |
| "learning_rate": 3.584590110050241e-05, | |
| "loss": 0.2652, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 12.596825396825396, | |
| "grad_norm": 1.7395323514938354, | |
| "learning_rate": 3.5762359342856036e-05, | |
| "loss": 0.2585, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 12.628571428571428, | |
| "grad_norm": 2.278144359588623, | |
| "learning_rate": 3.567866980376337e-05, | |
| "loss": 0.2862, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 12.66031746031746, | |
| "grad_norm": 1.9335429668426514, | |
| "learning_rate": 3.559483363239262e-05, | |
| "loss": 0.2945, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 12.692063492063491, | |
| "grad_norm": 1.3866902589797974, | |
| "learning_rate": 3.551085197992545e-05, | |
| "loss": 0.2675, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 12.723809523809523, | |
| "grad_norm": 1.7110604047775269, | |
| "learning_rate": 3.5426725999541174e-05, | |
| "loss": 0.2705, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 12.755555555555556, | |
| "grad_norm": 1.6411776542663574, | |
| "learning_rate": 3.534245684640089e-05, | |
| "loss": 0.2482, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 12.787301587301588, | |
| "grad_norm": 1.4663981199264526, | |
| "learning_rate": 3.525804567763167e-05, | |
| "loss": 0.2838, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 12.81904761904762, | |
| "grad_norm": 1.8676140308380127, | |
| "learning_rate": 3.517349365231065e-05, | |
| "loss": 0.2906, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 12.850793650793651, | |
| "grad_norm": 1.6150639057159424, | |
| "learning_rate": 3.508880193144911e-05, | |
| "loss": 0.3011, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 12.882539682539683, | |
| "grad_norm": 2.5578598976135254, | |
| "learning_rate": 3.500397167797654e-05, | |
| "loss": 0.2846, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 12.914285714285715, | |
| "grad_norm": 1.3131941556930542, | |
| "learning_rate": 3.491900405672466e-05, | |
| "loss": 0.2509, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 12.946031746031746, | |
| "grad_norm": 1.5713857412338257, | |
| "learning_rate": 3.483390023441141e-05, | |
| "loss": 0.2824, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 12.977777777777778, | |
| "grad_norm": 1.788149356842041, | |
| "learning_rate": 3.4748661379625e-05, | |
| "loss": 0.2932, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 13.006349206349206, | |
| "grad_norm": 1.080249547958374, | |
| "learning_rate": 3.466328866280778e-05, | |
| "loss": 0.2817, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 13.038095238095238, | |
| "grad_norm": 1.1128981113433838, | |
| "learning_rate": 3.45777832562402e-05, | |
| "loss": 0.2141, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 13.06984126984127, | |
| "grad_norm": 1.6394922733306885, | |
| "learning_rate": 3.449214633402473e-05, | |
| "loss": 0.216, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 13.101587301587301, | |
| "grad_norm": 2.198416233062744, | |
| "learning_rate": 3.440637907206973e-05, | |
| "loss": 0.258, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 13.133333333333333, | |
| "grad_norm": 1.5107051134109497, | |
| "learning_rate": 3.4320482648073285e-05, | |
| "loss": 0.2422, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 13.165079365079364, | |
| "grad_norm": 1.7206802368164062, | |
| "learning_rate": 3.4234458241507034e-05, | |
| "loss": 0.2524, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 13.196825396825396, | |
| "grad_norm": 1.4832013845443726, | |
| "learning_rate": 3.4148307033600014e-05, | |
| "loss": 0.27, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 13.228571428571428, | |
| "grad_norm": 1.592344880104065, | |
| "learning_rate": 3.4062030207322395e-05, | |
| "loss": 0.2486, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 13.260317460317461, | |
| "grad_norm": 1.7398722171783447, | |
| "learning_rate": 3.397562894736926e-05, | |
| "loss": 0.2605, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 13.292063492063493, | |
| "grad_norm": 2.0896050930023193, | |
| "learning_rate": 3.388910444014432e-05, | |
| "loss": 0.2562, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 13.323809523809524, | |
| "grad_norm": 2.7245311737060547, | |
| "learning_rate": 3.380245787374365e-05, | |
| "loss": 0.2573, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 13.355555555555556, | |
| "grad_norm": 1.1951723098754883, | |
| "learning_rate": 3.371569043793936e-05, | |
| "loss": 0.2709, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 13.387301587301588, | |
| "grad_norm": 1.697736144065857, | |
| "learning_rate": 3.3628803324163236e-05, | |
| "loss": 0.2413, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 13.41904761904762, | |
| "grad_norm": 1.447302222251892, | |
| "learning_rate": 3.354179772549042e-05, | |
| "loss": 0.2685, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 13.450793650793651, | |
| "grad_norm": 1.232428789138794, | |
| "learning_rate": 3.3454674836623e-05, | |
| "loss": 0.2546, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 13.482539682539683, | |
| "grad_norm": 1.889103889465332, | |
| "learning_rate": 3.336743585387362e-05, | |
| "loss": 0.2412, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 13.514285714285714, | |
| "grad_norm": 1.888352632522583, | |
| "learning_rate": 3.328008197514903e-05, | |
| "loss": 0.2737, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 13.546031746031746, | |
| "grad_norm": 1.4205390214920044, | |
| "learning_rate": 3.319261439993369e-05, | |
| "loss": 0.2727, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 13.577777777777778, | |
| "grad_norm": 1.442429542541504, | |
| "learning_rate": 3.310503432927322e-05, | |
| "loss": 0.2894, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 13.60952380952381, | |
| "grad_norm": 1.413885474205017, | |
| "learning_rate": 3.3017342965757986e-05, | |
| "loss": 0.2928, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 13.64126984126984, | |
| "grad_norm": 1.0464482307434082, | |
| "learning_rate": 3.2929541513506524e-05, | |
| "loss": 0.252, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 13.673015873015872, | |
| "grad_norm": 1.6716376543045044, | |
| "learning_rate": 3.284163117814906e-05, | |
| "loss": 0.2614, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 13.704761904761904, | |
| "grad_norm": 1.9366419315338135, | |
| "learning_rate": 3.2753613166810914e-05, | |
| "loss": 0.2963, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 13.736507936507937, | |
| "grad_norm": 1.4641704559326172, | |
| "learning_rate": 3.2665488688095944e-05, | |
| "loss": 0.2654, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 13.768253968253969, | |
| "grad_norm": 1.3404383659362793, | |
| "learning_rate": 3.2577258952069934e-05, | |
| "loss": 0.2601, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 13.8, | |
| "grad_norm": 1.6471940279006958, | |
| "learning_rate": 3.2488925170244015e-05, | |
| "loss": 0.2775, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 13.831746031746032, | |
| "grad_norm": 1.3665790557861328, | |
| "learning_rate": 3.240048855555798e-05, | |
| "loss": 0.2602, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 13.863492063492064, | |
| "grad_norm": 1.1486542224884033, | |
| "learning_rate": 3.2311950322363685e-05, | |
| "loss": 0.2603, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 13.895238095238096, | |
| "grad_norm": 1.45487642288208, | |
| "learning_rate": 3.2223311686408304e-05, | |
| "loss": 0.2794, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 13.926984126984127, | |
| "grad_norm": 1.6884818077087402, | |
| "learning_rate": 3.213457386481769e-05, | |
| "loss": 0.2682, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 13.958730158730159, | |
| "grad_norm": 1.9160594940185547, | |
| "learning_rate": 3.204573807607967e-05, | |
| "loss": 0.2619, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 13.99047619047619, | |
| "grad_norm": 1.9444315433502197, | |
| "learning_rate": 3.1956805540027244e-05, | |
| "loss": 0.3029, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 14.019047619047619, | |
| "grad_norm": 1.8739337921142578, | |
| "learning_rate": 3.1867777477821915e-05, | |
| "loss": 0.2298, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 14.05079365079365, | |
| "grad_norm": 1.3971226215362549, | |
| "learning_rate": 3.1778655111936866e-05, | |
| "loss": 0.2351, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 14.082539682539682, | |
| "grad_norm": 1.288387417793274, | |
| "learning_rate": 3.168943966614022e-05, | |
| "loss": 0.2331, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 14.114285714285714, | |
| "grad_norm": 1.2136940956115723, | |
| "learning_rate": 3.1600132365478186e-05, | |
| "loss": 0.2406, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 14.146031746031746, | |
| "grad_norm": 1.8233654499053955, | |
| "learning_rate": 3.151073443625828e-05, | |
| "loss": 0.2496, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 14.177777777777777, | |
| "grad_norm": 1.7613590955734253, | |
| "learning_rate": 3.142124710603245e-05, | |
| "loss": 0.2523, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 14.209523809523809, | |
| "grad_norm": 1.8933606147766113, | |
| "learning_rate": 3.133167160358024e-05, | |
| "loss": 0.2646, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 14.24126984126984, | |
| "grad_norm": 2.7858712673187256, | |
| "learning_rate": 3.124200915889195e-05, | |
| "loss": 0.2818, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 14.273015873015874, | |
| "grad_norm": 1.5656853914260864, | |
| "learning_rate": 3.115226100315166e-05, | |
| "loss": 0.2389, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 14.304761904761905, | |
| "grad_norm": 1.606392741203308, | |
| "learning_rate": 3.106242836872041e-05, | |
| "loss": 0.2302, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 14.336507936507937, | |
| "grad_norm": 2.1553008556365967, | |
| "learning_rate": 3.097251248911922e-05, | |
| "loss": 0.2484, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 14.368253968253969, | |
| "grad_norm": 1.7176517248153687, | |
| "learning_rate": 3.088251459901222e-05, | |
| "loss": 0.268, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 14.4, | |
| "grad_norm": 1.3635174036026, | |
| "learning_rate": 3.0792435934189604e-05, | |
| "loss": 0.2491, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 14.431746031746032, | |
| "grad_norm": 1.4257878065109253, | |
| "learning_rate": 3.070227773155074e-05, | |
| "loss": 0.2884, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 14.463492063492064, | |
| "grad_norm": 1.1561063528060913, | |
| "learning_rate": 3.0612041229087145e-05, | |
| "loss": 0.247, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 14.495238095238095, | |
| "grad_norm": 1.6131694316864014, | |
| "learning_rate": 3.0521727665865485e-05, | |
| "loss": 0.2708, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 14.526984126984127, | |
| "grad_norm": 1.7133935689926147, | |
| "learning_rate": 3.0431338282010606e-05, | |
| "loss": 0.2384, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 14.558730158730159, | |
| "grad_norm": 1.3232437372207642, | |
| "learning_rate": 3.0340874318688433e-05, | |
| "loss": 0.2503, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 14.59047619047619, | |
| "grad_norm": 1.5612751245498657, | |
| "learning_rate": 3.0250337018088994e-05, | |
| "loss": 0.2618, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 14.622222222222222, | |
| "grad_norm": 1.279241681098938, | |
| "learning_rate": 3.0159727623409313e-05, | |
| "loss": 0.2535, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 14.653968253968253, | |
| "grad_norm": 3.142040729522705, | |
| "learning_rate": 3.0069047378836375e-05, | |
| "loss": 0.2745, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 14.685714285714285, | |
| "grad_norm": 1.3240234851837158, | |
| "learning_rate": 2.9978297529530036e-05, | |
| "loss": 0.2591, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 14.717460317460317, | |
| "grad_norm": 1.121077537536621, | |
| "learning_rate": 2.9887479321605895e-05, | |
| "loss": 0.2453, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 14.74920634920635, | |
| "grad_norm": 2.3831980228424072, | |
| "learning_rate": 2.9796594002118218e-05, | |
| "loss": 0.257, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 14.780952380952382, | |
| "grad_norm": 2.175352096557617, | |
| "learning_rate": 2.9705642819042805e-05, | |
| "loss": 0.2434, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 14.812698412698413, | |
| "grad_norm": 1.5452053546905518, | |
| "learning_rate": 2.9614627021259846e-05, | |
| "loss": 0.2683, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 14.844444444444445, | |
| "grad_norm": 1.6765031814575195, | |
| "learning_rate": 2.952354785853677e-05, | |
| "loss": 0.2484, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 14.876190476190477, | |
| "grad_norm": 1.3790463209152222, | |
| "learning_rate": 2.9432406581511096e-05, | |
| "loss": 0.2636, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 14.907936507936508, | |
| "grad_norm": 1.2849820852279663, | |
| "learning_rate": 2.9341204441673266e-05, | |
| "loss": 0.2734, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 14.93968253968254, | |
| "grad_norm": 1.5512354373931885, | |
| "learning_rate": 2.924994269134942e-05, | |
| "loss": 0.2627, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 14.971428571428572, | |
| "grad_norm": 1.2973589897155762, | |
| "learning_rate": 2.9158622583684264e-05, | |
| "loss": 0.2464, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 2.749181032180786, | |
| "learning_rate": 2.906724537262381e-05, | |
| "loss": 0.2622, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 15.031746031746032, | |
| "grad_norm": 1.4415572881698608, | |
| "learning_rate": 2.8975812312898152e-05, | |
| "loss": 0.2395, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 15.063492063492063, | |
| "grad_norm": 1.41774320602417, | |
| "learning_rate": 2.8884324660004325e-05, | |
| "loss": 0.2213, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 15.095238095238095, | |
| "grad_norm": 1.375585675239563, | |
| "learning_rate": 2.8792783670188927e-05, | |
| "loss": 0.2319, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 15.126984126984127, | |
| "grad_norm": 1.2285529375076294, | |
| "learning_rate": 2.870119060043099e-05, | |
| "loss": 0.2317, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 15.158730158730158, | |
| "grad_norm": 2.5581822395324707, | |
| "learning_rate": 2.8609546708424645e-05, | |
| "loss": 0.2567, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 15.19047619047619, | |
| "grad_norm": 2.0000274181365967, | |
| "learning_rate": 2.8517853252561906e-05, | |
| "loss": 0.2265, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 15.222222222222221, | |
| "grad_norm": 1.6797828674316406, | |
| "learning_rate": 2.8426111491915342e-05, | |
| "loss": 0.243, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 15.253968253968253, | |
| "grad_norm": 1.8280093669891357, | |
| "learning_rate": 2.8334322686220826e-05, | |
| "loss": 0.2442, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 15.285714285714286, | |
| "grad_norm": 1.6335142850875854, | |
| "learning_rate": 2.824248809586021e-05, | |
| "loss": 0.229, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 15.317460317460318, | |
| "grad_norm": 2.1388628482818604, | |
| "learning_rate": 2.8150608981844034e-05, | |
| "loss": 0.247, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 15.34920634920635, | |
| "grad_norm": 1.5680638551712036, | |
| "learning_rate": 2.805868660579422e-05, | |
| "loss": 0.2269, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 15.380952380952381, | |
| "grad_norm": 1.6333919763565063, | |
| "learning_rate": 2.7966722229926712e-05, | |
| "loss": 0.2426, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 15.412698412698413, | |
| "grad_norm": 1.4023231267929077, | |
| "learning_rate": 2.787471711703419e-05, | |
| "loss": 0.2466, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 15.444444444444445, | |
| "grad_norm": 1.5598480701446533, | |
| "learning_rate": 2.7782672530468672e-05, | |
| "loss": 0.2504, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 15.476190476190476, | |
| "grad_norm": 1.3230295181274414, | |
| "learning_rate": 2.769058973412424e-05, | |
| "loss": 0.2478, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 15.507936507936508, | |
| "grad_norm": 2.0796375274658203, | |
| "learning_rate": 2.7598469992419646e-05, | |
| "loss": 0.2382, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 15.53968253968254, | |
| "grad_norm": 1.6772873401641846, | |
| "learning_rate": 2.7506314570280916e-05, | |
| "loss": 0.2716, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 15.571428571428571, | |
| "grad_norm": 1.9949897527694702, | |
| "learning_rate": 2.7414124733124046e-05, | |
| "loss": 0.2541, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 15.603174603174603, | |
| "grad_norm": 1.339775562286377, | |
| "learning_rate": 2.7321901746837585e-05, | |
| "loss": 0.238, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 15.634920634920634, | |
| "grad_norm": 1.3870205879211426, | |
| "learning_rate": 2.7229646877765264e-05, | |
| "loss": 0.2377, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 15.666666666666666, | |
| "grad_norm": 2.1485989093780518, | |
| "learning_rate": 2.7137361392688613e-05, | |
| "loss": 0.2748, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 15.698412698412698, | |
| "grad_norm": 1.3165838718414307, | |
| "learning_rate": 2.704504655880955e-05, | |
| "loss": 0.2592, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 15.73015873015873, | |
| "grad_norm": 1.5217639207839966, | |
| "learning_rate": 2.6952703643732983e-05, | |
| "loss": 0.2359, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 15.761904761904763, | |
| "grad_norm": 1.6285779476165771, | |
| "learning_rate": 2.686033391544945e-05, | |
| "loss": 0.2653, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 15.793650793650794, | |
| "grad_norm": 1.6619691848754883, | |
| "learning_rate": 2.676793864231763e-05, | |
| "loss": 0.2572, | |
| "step": 2495 | |
| }, | |
| { | |
| "epoch": 15.825396825396826, | |
| "grad_norm": 1.6636338233947754, | |
| "learning_rate": 2.6675519093046987e-05, | |
| "loss": 0.2702, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 15.857142857142858, | |
| "grad_norm": 1.6794034242630005, | |
| "learning_rate": 2.6583076536680323e-05, | |
| "loss": 0.2508, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 15.88888888888889, | |
| "grad_norm": 1.669232964515686, | |
| "learning_rate": 2.6490612242576344e-05, | |
| "loss": 0.2601, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 15.920634920634921, | |
| "grad_norm": 1.4551194906234741, | |
| "learning_rate": 2.639812748039228e-05, | |
| "loss": 0.2514, | |
| "step": 2515 | |
| }, | |
| { | |
| "epoch": 15.952380952380953, | |
| "grad_norm": 1.8457551002502441, | |
| "learning_rate": 2.6305623520066382e-05, | |
| "loss": 0.2682, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 15.984126984126984, | |
| "grad_norm": 1.318781852722168, | |
| "learning_rate": 2.6213101631800524e-05, | |
| "loss": 0.2452, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 16.012698412698413, | |
| "grad_norm": 1.4368094205856323, | |
| "learning_rate": 2.6120563086042753e-05, | |
| "loss": 0.2398, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 16.044444444444444, | |
| "grad_norm": 1.314927101135254, | |
| "learning_rate": 2.602800915346986e-05, | |
| "loss": 0.2321, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 16.076190476190476, | |
| "grad_norm": 1.501521110534668, | |
| "learning_rate": 2.5935441104969914e-05, | |
| "loss": 0.2329, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 16.107936507936508, | |
| "grad_norm": 1.3708770275115967, | |
| "learning_rate": 2.5842860211624802e-05, | |
| "loss": 0.2272, | |
| "step": 2545 | |
| }, | |
| { | |
| "epoch": 16.13968253968254, | |
| "grad_norm": 1.6570358276367188, | |
| "learning_rate": 2.5750267744692786e-05, | |
| "loss": 0.2323, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 16.17142857142857, | |
| "grad_norm": 1.4252843856811523, | |
| "learning_rate": 2.5657664975591063e-05, | |
| "loss": 0.2308, | |
| "step": 2555 | |
| }, | |
| { | |
| "epoch": 16.203174603174602, | |
| "grad_norm": 1.9588584899902344, | |
| "learning_rate": 2.5565053175878285e-05, | |
| "loss": 0.243, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 16.234920634920634, | |
| "grad_norm": 1.7678033113479614, | |
| "learning_rate": 2.5472433617237107e-05, | |
| "loss": 0.2304, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 16.266666666666666, | |
| "grad_norm": 1.45453679561615, | |
| "learning_rate": 2.5379807571456727e-05, | |
| "loss": 0.231, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 16.298412698412697, | |
| "grad_norm": 1.313704490661621, | |
| "learning_rate": 2.52871763104154e-05, | |
| "loss": 0.2434, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 16.33015873015873, | |
| "grad_norm": 1.331613302230835, | |
| "learning_rate": 2.519454110606304e-05, | |
| "loss": 0.2477, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 16.36190476190476, | |
| "grad_norm": 1.5135482549667358, | |
| "learning_rate": 2.510190323040366e-05, | |
| "loss": 0.2302, | |
| "step": 2585 | |
| }, | |
| { | |
| "epoch": 16.393650793650792, | |
| "grad_norm": 1.025403618812561, | |
| "learning_rate": 2.5009263955477986e-05, | |
| "loss": 0.2145, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 16.425396825396824, | |
| "grad_norm": 1.3128396272659302, | |
| "learning_rate": 2.491662455334592e-05, | |
| "loss": 0.237, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 16.457142857142856, | |
| "grad_norm": 1.9846177101135254, | |
| "learning_rate": 2.482398629606916e-05, | |
| "loss": 0.2227, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 16.488888888888887, | |
| "grad_norm": 1.3919912576675415, | |
| "learning_rate": 2.473135045569365e-05, | |
| "loss": 0.2542, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 16.520634920634922, | |
| "grad_norm": 1.4371297359466553, | |
| "learning_rate": 2.463871830423215e-05, | |
| "loss": 0.2533, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 16.552380952380954, | |
| "grad_norm": 1.2560791969299316, | |
| "learning_rate": 2.4546091113646774e-05, | |
| "loss": 0.2425, | |
| "step": 2615 | |
| }, | |
| { | |
| "epoch": 16.584126984126986, | |
| "grad_norm": 1.1686952114105225, | |
| "learning_rate": 2.445347015583151e-05, | |
| "loss": 0.2196, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 16.615873015873017, | |
| "grad_norm": 1.4156274795532227, | |
| "learning_rate": 2.436085670259479e-05, | |
| "loss": 0.2514, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 16.64761904761905, | |
| "grad_norm": 1.7230234146118164, | |
| "learning_rate": 2.426825202564194e-05, | |
| "loss": 0.24, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 16.67936507936508, | |
| "grad_norm": 1.6709225177764893, | |
| "learning_rate": 2.417565739655783e-05, | |
| "loss": 0.2523, | |
| "step": 2635 | |
| }, | |
| { | |
| "epoch": 16.711111111111112, | |
| "grad_norm": 1.4194560050964355, | |
| "learning_rate": 2.4083074086789332e-05, | |
| "loss": 0.2464, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 16.742857142857144, | |
| "grad_norm": 1.3121566772460938, | |
| "learning_rate": 2.3990503367627896e-05, | |
| "loss": 0.234, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 16.774603174603175, | |
| "grad_norm": 1.841577410697937, | |
| "learning_rate": 2.3897946510192104e-05, | |
| "loss": 0.2494, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 16.806349206349207, | |
| "grad_norm": 1.262974739074707, | |
| "learning_rate": 2.3805404785410157e-05, | |
| "loss": 0.2648, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 16.83809523809524, | |
| "grad_norm": 2.1865732669830322, | |
| "learning_rate": 2.3712879464002504e-05, | |
| "loss": 0.2486, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 16.86984126984127, | |
| "grad_norm": 1.3114222288131714, | |
| "learning_rate": 2.362037181646433e-05, | |
| "loss": 0.2617, | |
| "step": 2665 | |
| }, | |
| { | |
| "epoch": 16.901587301587302, | |
| "grad_norm": 1.6503785848617554, | |
| "learning_rate": 2.3527883113048154e-05, | |
| "loss": 0.2588, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 16.933333333333334, | |
| "grad_norm": 1.4067734479904175, | |
| "learning_rate": 2.343541462374634e-05, | |
| "loss": 0.2449, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 16.965079365079365, | |
| "grad_norm": 1.4102448225021362, | |
| "learning_rate": 2.3342967618273712e-05, | |
| "loss": 0.2418, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 16.996825396825397, | |
| "grad_norm": 1.4610909223556519, | |
| "learning_rate": 2.3250543366050074e-05, | |
| "loss": 0.2445, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 17.025396825396825, | |
| "grad_norm": 1.2868993282318115, | |
| "learning_rate": 2.3158143136182812e-05, | |
| "loss": 0.231, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 17.057142857142857, | |
| "grad_norm": 1.2340950965881348, | |
| "learning_rate": 2.3065768197449456e-05, | |
| "loss": 0.226, | |
| "step": 2695 | |
| }, | |
| { | |
| "epoch": 17.08888888888889, | |
| "grad_norm": 1.2958239316940308, | |
| "learning_rate": 2.2973419818280225e-05, | |
| "loss": 0.2279, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 17.12063492063492, | |
| "grad_norm": 1.271338701248169, | |
| "learning_rate": 2.288109926674068e-05, | |
| "loss": 0.2216, | |
| "step": 2705 | |
| }, | |
| { | |
| "epoch": 17.152380952380952, | |
| "grad_norm": 1.2706894874572754, | |
| "learning_rate": 2.2788807810514237e-05, | |
| "loss": 0.2261, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 17.184126984126983, | |
| "grad_norm": 1.331217646598816, | |
| "learning_rate": 2.2696546716884835e-05, | |
| "loss": 0.2149, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 17.215873015873015, | |
| "grad_norm": 1.74607515335083, | |
| "learning_rate": 2.260431725271944e-05, | |
| "loss": 0.245, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 17.247619047619047, | |
| "grad_norm": 1.0970765352249146, | |
| "learning_rate": 2.2512120684450745e-05, | |
| "loss": 0.2187, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 17.27936507936508, | |
| "grad_norm": 1.699119210243225, | |
| "learning_rate": 2.241995827805974e-05, | |
| "loss": 0.2111, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 17.31111111111111, | |
| "grad_norm": 2.3342185020446777, | |
| "learning_rate": 2.23278312990583e-05, | |
| "loss": 0.2292, | |
| "step": 2735 | |
| }, | |
| { | |
| "epoch": 17.34285714285714, | |
| "grad_norm": 1.6536673307418823, | |
| "learning_rate": 2.2235741012471866e-05, | |
| "loss": 0.23, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 17.374603174603173, | |
| "grad_norm": 2.414911985397339, | |
| "learning_rate": 2.2143688682822e-05, | |
| "loss": 0.2433, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 17.406349206349205, | |
| "grad_norm": 1.3330169916152954, | |
| "learning_rate": 2.205167557410912e-05, | |
| "loss": 0.224, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 17.438095238095237, | |
| "grad_norm": 1.5514572858810425, | |
| "learning_rate": 2.195970294979504e-05, | |
| "loss": 0.2501, | |
| "step": 2755 | |
| }, | |
| { | |
| "epoch": 17.469841269841268, | |
| "grad_norm": 1.316927433013916, | |
| "learning_rate": 2.1867772072785708e-05, | |
| "loss": 0.2165, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 17.5015873015873, | |
| "grad_norm": 1.425007700920105, | |
| "learning_rate": 2.1775884205413793e-05, | |
| "loss": 0.253, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 17.533333333333335, | |
| "grad_norm": 2.77079701423645, | |
| "learning_rate": 2.168404060942141e-05, | |
| "loss": 0.2637, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 17.565079365079367, | |
| "grad_norm": 1.365783452987671, | |
| "learning_rate": 2.1592242545942755e-05, | |
| "loss": 0.2303, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 17.5968253968254, | |
| "grad_norm": 1.606326699256897, | |
| "learning_rate": 2.150049127548681e-05, | |
| "loss": 0.2419, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 17.62857142857143, | |
| "grad_norm": 1.3090287446975708, | |
| "learning_rate": 2.1408788057920033e-05, | |
| "loss": 0.2346, | |
| "step": 2785 | |
| }, | |
| { | |
| "epoch": 17.66031746031746, | |
| "grad_norm": 1.6885017156600952, | |
| "learning_rate": 2.131713415244902e-05, | |
| "loss": 0.2238, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 17.692063492063493, | |
| "grad_norm": 1.3142653703689575, | |
| "learning_rate": 2.122553081760329e-05, | |
| "loss": 0.2423, | |
| "step": 2795 | |
| }, | |
| { | |
| "epoch": 17.723809523809525, | |
| "grad_norm": 1.4189752340316772, | |
| "learning_rate": 2.1133979311217923e-05, | |
| "loss": 0.2411, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 17.755555555555556, | |
| "grad_norm": 1.598166584968567, | |
| "learning_rate": 2.1042480890416368e-05, | |
| "loss": 0.2538, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 17.787301587301588, | |
| "grad_norm": 1.8780243396759033, | |
| "learning_rate": 2.0951036811593087e-05, | |
| "loss": 0.2284, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 17.81904761904762, | |
| "grad_norm": 1.5214310884475708, | |
| "learning_rate": 2.0859648330396386e-05, | |
| "loss": 0.236, | |
| "step": 2815 | |
| }, | |
| { | |
| "epoch": 17.85079365079365, | |
| "grad_norm": 1.4409775733947754, | |
| "learning_rate": 2.0768316701711153e-05, | |
| "loss": 0.2438, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 17.882539682539683, | |
| "grad_norm": 1.4400851726531982, | |
| "learning_rate": 2.067704317964158e-05, | |
| "loss": 0.2594, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 17.914285714285715, | |
| "grad_norm": 1.6254900693893433, | |
| "learning_rate": 2.0585829017494017e-05, | |
| "loss": 0.2506, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 17.946031746031746, | |
| "grad_norm": 1.6710137128829956, | |
| "learning_rate": 2.049467546775968e-05, | |
| "loss": 0.2627, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 17.977777777777778, | |
| "grad_norm": 1.5946308374404907, | |
| "learning_rate": 2.0403583782097547e-05, | |
| "loss": 0.246, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 18.006349206349206, | |
| "grad_norm": 1.2540661096572876, | |
| "learning_rate": 2.0312555211317074e-05, | |
| "loss": 0.2539, | |
| "step": 2845 | |
| }, | |
| { | |
| "epoch": 18.038095238095238, | |
| "grad_norm": 1.0493476390838623, | |
| "learning_rate": 2.0221591005361104e-05, | |
| "loss": 0.226, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 18.06984126984127, | |
| "grad_norm": 1.243975043296814, | |
| "learning_rate": 2.0130692413288627e-05, | |
| "loss": 0.2185, | |
| "step": 2855 | |
| }, | |
| { | |
| "epoch": 18.1015873015873, | |
| "grad_norm": 1.2368860244750977, | |
| "learning_rate": 2.00398606832577e-05, | |
| "loss": 0.2237, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 18.133333333333333, | |
| "grad_norm": 1.3286014795303345, | |
| "learning_rate": 1.9949097062508267e-05, | |
| "loss": 0.2216, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 18.165079365079364, | |
| "grad_norm": 1.6492300033569336, | |
| "learning_rate": 1.985840279734503e-05, | |
| "loss": 0.2363, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 18.196825396825396, | |
| "grad_norm": 1.306085228919983, | |
| "learning_rate": 1.976777913312037e-05, | |
| "loss": 0.2175, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 18.228571428571428, | |
| "grad_norm": 1.6214182376861572, | |
| "learning_rate": 1.9677227314217188e-05, | |
| "loss": 0.2118, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 18.26031746031746, | |
| "grad_norm": 1.2816709280014038, | |
| "learning_rate": 1.9586748584031895e-05, | |
| "loss": 0.2067, | |
| "step": 2885 | |
| }, | |
| { | |
| "epoch": 18.29206349206349, | |
| "grad_norm": 2.00748610496521, | |
| "learning_rate": 1.949634418495726e-05, | |
| "loss": 0.2097, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 18.323809523809523, | |
| "grad_norm": 1.5324851274490356, | |
| "learning_rate": 1.940601535836542e-05, | |
| "loss": 0.2364, | |
| "step": 2895 | |
| }, | |
| { | |
| "epoch": 18.355555555555554, | |
| "grad_norm": 1.6603834629058838, | |
| "learning_rate": 1.9315763344590765e-05, | |
| "loss": 0.2382, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 18.387301587301586, | |
| "grad_norm": 1.5497424602508545, | |
| "learning_rate": 1.9225589382912978e-05, | |
| "loss": 0.2385, | |
| "step": 2905 | |
| }, | |
| { | |
| "epoch": 18.419047619047618, | |
| "grad_norm": 1.3442565202713013, | |
| "learning_rate": 1.9135494711539975e-05, | |
| "loss": 0.2324, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 18.45079365079365, | |
| "grad_norm": 1.5078067779541016, | |
| "learning_rate": 1.9045480567590894e-05, | |
| "loss": 0.229, | |
| "step": 2915 | |
| }, | |
| { | |
| "epoch": 18.48253968253968, | |
| "grad_norm": 1.402087926864624, | |
| "learning_rate": 1.895554818707915e-05, | |
| "loss": 0.2173, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 18.514285714285712, | |
| "grad_norm": 1.2340173721313477, | |
| "learning_rate": 1.88656988048954e-05, | |
| "loss": 0.2435, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 18.546031746031748, | |
| "grad_norm": 1.2508577108383179, | |
| "learning_rate": 1.8775933654790673e-05, | |
| "loss": 0.2316, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 18.57777777777778, | |
| "grad_norm": 1.0763332843780518, | |
| "learning_rate": 1.8686253969359334e-05, | |
| "loss": 0.2208, | |
| "step": 2935 | |
| }, | |
| { | |
| "epoch": 18.60952380952381, | |
| "grad_norm": 1.2881734371185303, | |
| "learning_rate": 1.8596660980022258e-05, | |
| "loss": 0.2163, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 18.641269841269843, | |
| "grad_norm": 1.675923466682434, | |
| "learning_rate": 1.8507155917009793e-05, | |
| "loss": 0.2536, | |
| "step": 2945 | |
| }, | |
| { | |
| "epoch": 18.673015873015874, | |
| "grad_norm": 1.1991617679595947, | |
| "learning_rate": 1.8417740009345017e-05, | |
| "loss": 0.2406, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 18.704761904761906, | |
| "grad_norm": 1.3702925443649292, | |
| "learning_rate": 1.8328414484826745e-05, | |
| "loss": 0.2415, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 18.736507936507937, | |
| "grad_norm": 1.0503740310668945, | |
| "learning_rate": 1.8239180570012718e-05, | |
| "loss": 0.222, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 18.76825396825397, | |
| "grad_norm": 1.5632826089859009, | |
| "learning_rate": 1.8150039490202772e-05, | |
| "loss": 0.2494, | |
| "step": 2965 | |
| }, | |
| { | |
| "epoch": 18.8, | |
| "grad_norm": 1.1418730020523071, | |
| "learning_rate": 1.806099246942196e-05, | |
| "loss": 0.2481, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 18.831746031746032, | |
| "grad_norm": 1.4492175579071045, | |
| "learning_rate": 1.7972040730403818e-05, | |
| "loss": 0.2314, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 18.863492063492064, | |
| "grad_norm": 1.4045511484146118, | |
| "learning_rate": 1.7883185494573513e-05, | |
| "loss": 0.2473, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 18.895238095238096, | |
| "grad_norm": 1.394841194152832, | |
| "learning_rate": 1.7794427982031104e-05, | |
| "loss": 0.2307, | |
| "step": 2985 | |
| }, | |
| { | |
| "epoch": 18.926984126984127, | |
| "grad_norm": 1.3118870258331299, | |
| "learning_rate": 1.7705769411534794e-05, | |
| "loss": 0.2373, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 18.95873015873016, | |
| "grad_norm": 1.7034424543380737, | |
| "learning_rate": 1.7617211000484142e-05, | |
| "loss": 0.2396, | |
| "step": 2995 | |
| }, | |
| { | |
| "epoch": 18.99047619047619, | |
| "grad_norm": 1.27837336063385, | |
| "learning_rate": 1.7528753964903422e-05, | |
| "loss": 0.2447, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 19.01904761904762, | |
| "grad_norm": 0.9308731555938721, | |
| "learning_rate": 1.7440399519424866e-05, | |
| "loss": 0.2329, | |
| "step": 3005 | |
| }, | |
| { | |
| "epoch": 19.05079365079365, | |
| "grad_norm": 1.1904468536376953, | |
| "learning_rate": 1.735214887727204e-05, | |
| "loss": 0.2075, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 19.082539682539682, | |
| "grad_norm": 1.3396952152252197, | |
| "learning_rate": 1.7264003250243102e-05, | |
| "loss": 0.2144, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 19.114285714285714, | |
| "grad_norm": 0.9192984700202942, | |
| "learning_rate": 1.717596384869426e-05, | |
| "loss": 0.2147, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 19.146031746031746, | |
| "grad_norm": 1.3901828527450562, | |
| "learning_rate": 1.7088031881523076e-05, | |
| "loss": 0.2055, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 19.177777777777777, | |
| "grad_norm": 1.410027265548706, | |
| "learning_rate": 1.7000208556151915e-05, | |
| "loss": 0.2185, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 19.20952380952381, | |
| "grad_norm": 1.863762378692627, | |
| "learning_rate": 1.6912495078511342e-05, | |
| "loss": 0.2365, | |
| "step": 3035 | |
| }, | |
| { | |
| "epoch": 19.24126984126984, | |
| "grad_norm": 1.281455636024475, | |
| "learning_rate": 1.682489265302355e-05, | |
| "loss": 0.2189, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 19.273015873015872, | |
| "grad_norm": 1.5454927682876587, | |
| "learning_rate": 1.6737402482585863e-05, | |
| "loss": 0.2234, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 19.304761904761904, | |
| "grad_norm": 1.396296501159668, | |
| "learning_rate": 1.6650025768554172e-05, | |
| "loss": 0.2196, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 19.336507936507935, | |
| "grad_norm": 1.1868641376495361, | |
| "learning_rate": 1.6562763710726493e-05, | |
| "loss": 0.215, | |
| "step": 3055 | |
| }, | |
| { | |
| "epoch": 19.368253968253967, | |
| "grad_norm": 1.503721833229065, | |
| "learning_rate": 1.6475617507326418e-05, | |
| "loss": 0.2164, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 19.4, | |
| "grad_norm": 1.7851678133010864, | |
| "learning_rate": 1.638858835498674e-05, | |
| "loss": 0.2489, | |
| "step": 3065 | |
| }, | |
| { | |
| "epoch": 19.43174603174603, | |
| "grad_norm": 1.8271689414978027, | |
| "learning_rate": 1.6301677448732967e-05, | |
| "loss": 0.2379, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 19.463492063492062, | |
| "grad_norm": 1.2250396013259888, | |
| "learning_rate": 1.6214885981966937e-05, | |
| "loss": 0.2355, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 19.495238095238093, | |
| "grad_norm": 1.5733988285064697, | |
| "learning_rate": 1.6128215146450438e-05, | |
| "loss": 0.2168, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 19.52698412698413, | |
| "grad_norm": 1.329305648803711, | |
| "learning_rate": 1.60416661322888e-05, | |
| "loss": 0.2241, | |
| "step": 3085 | |
| }, | |
| { | |
| "epoch": 19.55873015873016, | |
| "grad_norm": 1.8745321035385132, | |
| "learning_rate": 1.5955240127914618e-05, | |
| "loss": 0.2346, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 19.590476190476192, | |
| "grad_norm": 1.7080330848693848, | |
| "learning_rate": 1.5868938320071376e-05, | |
| "loss": 0.2414, | |
| "step": 3095 | |
| }, | |
| { | |
| "epoch": 19.622222222222224, | |
| "grad_norm": 1.6266529560089111, | |
| "learning_rate": 1.5782761893797197e-05, | |
| "loss": 0.2379, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 19.653968253968255, | |
| "grad_norm": 1.3323780298233032, | |
| "learning_rate": 1.569671203240852e-05, | |
| "loss": 0.2557, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 19.685714285714287, | |
| "grad_norm": 1.2912445068359375, | |
| "learning_rate": 1.561078991748391e-05, | |
| "loss": 0.2481, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 19.71746031746032, | |
| "grad_norm": 1.3497467041015625, | |
| "learning_rate": 1.552499672884778e-05, | |
| "loss": 0.2264, | |
| "step": 3115 | |
| }, | |
| { | |
| "epoch": 19.74920634920635, | |
| "grad_norm": 1.301652431488037, | |
| "learning_rate": 1.5439333644554227e-05, | |
| "loss": 0.2155, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 19.78095238095238, | |
| "grad_norm": 1.579917311668396, | |
| "learning_rate": 1.5353801840870862e-05, | |
| "loss": 0.2211, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 19.812698412698413, | |
| "grad_norm": 1.1716071367263794, | |
| "learning_rate": 1.5268402492262586e-05, | |
| "loss": 0.2202, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 19.844444444444445, | |
| "grad_norm": 1.1521406173706055, | |
| "learning_rate": 1.5183136771375579e-05, | |
| "loss": 0.2097, | |
| "step": 3135 | |
| }, | |
| { | |
| "epoch": 19.876190476190477, | |
| "grad_norm": 2.2419068813323975, | |
| "learning_rate": 1.509800584902108e-05, | |
| "loss": 0.2342, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 19.90793650793651, | |
| "grad_norm": 1.8932515382766724, | |
| "learning_rate": 1.5013010894159415e-05, | |
| "loss": 0.2294, | |
| "step": 3145 | |
| }, | |
| { | |
| "epoch": 19.93968253968254, | |
| "grad_norm": 1.2340586185455322, | |
| "learning_rate": 1.4928153073883843e-05, | |
| "loss": 0.2099, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 19.97142857142857, | |
| "grad_norm": 1.4277369976043701, | |
| "learning_rate": 1.4843433553404649e-05, | |
| "loss": 0.2561, | |
| "step": 3155 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 1.2759459018707275, | |
| "learning_rate": 1.4758853496033009e-05, | |
| "loss": 0.2302, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 20.03174603174603, | |
| "grad_norm": 1.4725241661071777, | |
| "learning_rate": 1.4674414063165137e-05, | |
| "loss": 0.195, | |
| "step": 3165 | |
| }, | |
| { | |
| "epoch": 20.063492063492063, | |
| "grad_norm": 1.3785438537597656, | |
| "learning_rate": 1.4590116414266276e-05, | |
| "loss": 0.2214, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 20.095238095238095, | |
| "grad_norm": 1.3412481546401978, | |
| "learning_rate": 1.4505961706854754e-05, | |
| "loss": 0.2176, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 20.126984126984127, | |
| "grad_norm": 1.5986071825027466, | |
| "learning_rate": 1.4421951096486171e-05, | |
| "loss": 0.2151, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 20.158730158730158, | |
| "grad_norm": 1.5248156785964966, | |
| "learning_rate": 1.4338085736737462e-05, | |
| "loss": 0.2219, | |
| "step": 3185 | |
| }, | |
| { | |
| "epoch": 20.19047619047619, | |
| "grad_norm": 1.3911489248275757, | |
| "learning_rate": 1.4254366779191067e-05, | |
| "loss": 0.2111, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 20.22222222222222, | |
| "grad_norm": 1.5443161725997925, | |
| "learning_rate": 1.4170795373419148e-05, | |
| "loss": 0.2127, | |
| "step": 3195 | |
| }, | |
| { | |
| "epoch": 20.253968253968253, | |
| "grad_norm": 1.4181116819381714, | |
| "learning_rate": 1.4087372666967768e-05, | |
| "loss": 0.2053, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 20.285714285714285, | |
| "grad_norm": 1.094240665435791, | |
| "learning_rate": 1.4004099805341187e-05, | |
| "loss": 0.2088, | |
| "step": 3205 | |
| }, | |
| { | |
| "epoch": 20.317460317460316, | |
| "grad_norm": 1.1608216762542725, | |
| "learning_rate": 1.392097793198605e-05, | |
| "loss": 0.1988, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 20.349206349206348, | |
| "grad_norm": 1.3101439476013184, | |
| "learning_rate": 1.383800818827577e-05, | |
| "loss": 0.2301, | |
| "step": 3215 | |
| }, | |
| { | |
| "epoch": 20.38095238095238, | |
| "grad_norm": 1.244240403175354, | |
| "learning_rate": 1.3755191713494797e-05, | |
| "loss": 0.2194, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 20.41269841269841, | |
| "grad_norm": 2.0739810466766357, | |
| "learning_rate": 1.3672529644823004e-05, | |
| "loss": 0.2298, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 20.444444444444443, | |
| "grad_norm": 1.6282612085342407, | |
| "learning_rate": 1.359002311732005e-05, | |
| "loss": 0.2387, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 20.476190476190474, | |
| "grad_norm": 1.4091988801956177, | |
| "learning_rate": 1.3507673263909856e-05, | |
| "loss": 0.202, | |
| "step": 3235 | |
| }, | |
| { | |
| "epoch": 20.507936507936506, | |
| "grad_norm": 1.544467806816101, | |
| "learning_rate": 1.3425481215364922e-05, | |
| "loss": 0.2422, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 20.53968253968254, | |
| "grad_norm": 1.7039313316345215, | |
| "learning_rate": 1.334344810029095e-05, | |
| "loss": 0.1986, | |
| "step": 3245 | |
| }, | |
| { | |
| "epoch": 20.571428571428573, | |
| "grad_norm": 1.4984982013702393, | |
| "learning_rate": 1.3261575045111235e-05, | |
| "loss": 0.2303, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 20.603174603174605, | |
| "grad_norm": 1.1875499486923218, | |
| "learning_rate": 1.3179863174051238e-05, | |
| "loss": 0.2273, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 20.634920634920636, | |
| "grad_norm": 1.856332778930664, | |
| "learning_rate": 1.3098313609123192e-05, | |
| "loss": 0.2323, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 20.666666666666668, | |
| "grad_norm": 1.6271690130233765, | |
| "learning_rate": 1.301692747011058e-05, | |
| "loss": 0.2387, | |
| "step": 3265 | |
| }, | |
| { | |
| "epoch": 20.6984126984127, | |
| "grad_norm": 1.8108351230621338, | |
| "learning_rate": 1.2935705874552894e-05, | |
| "loss": 0.2224, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 20.73015873015873, | |
| "grad_norm": 1.3620928525924683, | |
| "learning_rate": 1.2854649937730196e-05, | |
| "loss": 0.2237, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 20.761904761904763, | |
| "grad_norm": 1.5440479516983032, | |
| "learning_rate": 1.2773760772647839e-05, | |
| "loss": 0.2319, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 20.793650793650794, | |
| "grad_norm": 1.2190790176391602, | |
| "learning_rate": 1.269303949002118e-05, | |
| "loss": 0.2169, | |
| "step": 3285 | |
| }, | |
| { | |
| "epoch": 20.825396825396826, | |
| "grad_norm": 1.42155122756958, | |
| "learning_rate": 1.261248719826032e-05, | |
| "loss": 0.2319, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 20.857142857142858, | |
| "grad_norm": 1.7695163488388062, | |
| "learning_rate": 1.2532105003454919e-05, | |
| "loss": 0.2421, | |
| "step": 3295 | |
| }, | |
| { | |
| "epoch": 20.88888888888889, | |
| "grad_norm": 1.3969519138336182, | |
| "learning_rate": 1.245189400935895e-05, | |
| "loss": 0.225, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 20.92063492063492, | |
| "grad_norm": 1.1847434043884277, | |
| "learning_rate": 1.2371855317375583e-05, | |
| "loss": 0.215, | |
| "step": 3305 | |
| }, | |
| { | |
| "epoch": 20.952380952380953, | |
| "grad_norm": 1.7256789207458496, | |
| "learning_rate": 1.2291990026542058e-05, | |
| "loss": 0.2446, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 20.984126984126984, | |
| "grad_norm": 1.1966052055358887, | |
| "learning_rate": 1.2212299233514582e-05, | |
| "loss": 0.2108, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 21.012698412698413, | |
| "grad_norm": 1.0815582275390625, | |
| "learning_rate": 1.2132784032553263e-05, | |
| "loss": 0.2431, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 21.044444444444444, | |
| "grad_norm": 1.4132349491119385, | |
| "learning_rate": 1.2053445515507147e-05, | |
| "loss": 0.2186, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 21.076190476190476, | |
| "grad_norm": 1.0495672225952148, | |
| "learning_rate": 1.1974284771799096e-05, | |
| "loss": 0.197, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 21.107936507936508, | |
| "grad_norm": 1.072950005531311, | |
| "learning_rate": 1.1895302888410986e-05, | |
| "loss": 0.1922, | |
| "step": 3335 | |
| }, | |
| { | |
| "epoch": 21.13968253968254, | |
| "grad_norm": 1.2509602308273315, | |
| "learning_rate": 1.1816500949868659e-05, | |
| "loss": 0.2067, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 21.17142857142857, | |
| "grad_norm": 1.460114598274231, | |
| "learning_rate": 1.1737880038227082e-05, | |
| "loss": 0.2136, | |
| "step": 3345 | |
| }, | |
| { | |
| "epoch": 21.203174603174602, | |
| "grad_norm": 1.2986267805099487, | |
| "learning_rate": 1.1659441233055513e-05, | |
| "loss": 0.2126, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 21.234920634920634, | |
| "grad_norm": 1.159629464149475, | |
| "learning_rate": 1.1581185611422585e-05, | |
| "loss": 0.2057, | |
| "step": 3355 | |
| }, | |
| { | |
| "epoch": 21.266666666666666, | |
| "grad_norm": 1.652693748474121, | |
| "learning_rate": 1.1503114247881648e-05, | |
| "loss": 0.2304, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 21.298412698412697, | |
| "grad_norm": 1.2332572937011719, | |
| "learning_rate": 1.14252282144559e-05, | |
| "loss": 0.2093, | |
| "step": 3365 | |
| }, | |
| { | |
| "epoch": 21.33015873015873, | |
| "grad_norm": 1.5902972221374512, | |
| "learning_rate": 1.1347528580623726e-05, | |
| "loss": 0.235, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 21.36190476190476, | |
| "grad_norm": 1.738487720489502, | |
| "learning_rate": 1.1270016413303997e-05, | |
| "loss": 0.2146, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 21.393650793650792, | |
| "grad_norm": 1.8629635572433472, | |
| "learning_rate": 1.1192692776841404e-05, | |
| "loss": 0.2254, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 21.425396825396824, | |
| "grad_norm": 1.3286288976669312, | |
| "learning_rate": 1.1115558732991898e-05, | |
| "loss": 0.2119, | |
| "step": 3385 | |
| }, | |
| { | |
| "epoch": 21.457142857142856, | |
| "grad_norm": 1.8470025062561035, | |
| "learning_rate": 1.103861534090804e-05, | |
| "loss": 0.2332, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 21.488888888888887, | |
| "grad_norm": 1.7377171516418457, | |
| "learning_rate": 1.0961863657124496e-05, | |
| "loss": 0.2281, | |
| "step": 3395 | |
| }, | |
| { | |
| "epoch": 21.520634920634922, | |
| "grad_norm": 1.1695598363876343, | |
| "learning_rate": 1.0885304735543528e-05, | |
| "loss": 0.2147, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 21.552380952380954, | |
| "grad_norm": 1.0073562860488892, | |
| "learning_rate": 1.0808939627420514e-05, | |
| "loss": 0.222, | |
| "step": 3405 | |
| }, | |
| { | |
| "epoch": 21.584126984126986, | |
| "grad_norm": 1.742859125137329, | |
| "learning_rate": 1.0732769381349508e-05, | |
| "loss": 0.2205, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 21.615873015873017, | |
| "grad_norm": 1.1256352663040161, | |
| "learning_rate": 1.0656795043248885e-05, | |
| "loss": 0.2205, | |
| "step": 3415 | |
| }, | |
| { | |
| "epoch": 21.64761904761905, | |
| "grad_norm": 1.3917202949523926, | |
| "learning_rate": 1.0581017656346904e-05, | |
| "loss": 0.2242, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 21.67936507936508, | |
| "grad_norm": 1.3438773155212402, | |
| "learning_rate": 1.0505438261167439e-05, | |
| "loss": 0.2101, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 21.711111111111112, | |
| "grad_norm": 1.573519229888916, | |
| "learning_rate": 1.0430057895515678e-05, | |
| "loss": 0.2339, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 21.742857142857144, | |
| "grad_norm": 1.392638087272644, | |
| "learning_rate": 1.0354877594463852e-05, | |
| "loss": 0.2101, | |
| "step": 3435 | |
| }, | |
| { | |
| "epoch": 21.774603174603175, | |
| "grad_norm": 1.6524523496627808, | |
| "learning_rate": 1.027989839033709e-05, | |
| "loss": 0.2178, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 21.806349206349207, | |
| "grad_norm": 1.2898451089859009, | |
| "learning_rate": 1.0205121312699117e-05, | |
| "loss": 0.2267, | |
| "step": 3445 | |
| }, | |
| { | |
| "epoch": 21.83809523809524, | |
| "grad_norm": 1.5442014932632446, | |
| "learning_rate": 1.0130547388338268e-05, | |
| "loss": 0.2191, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 21.86984126984127, | |
| "grad_norm": 1.7914522886276245, | |
| "learning_rate": 1.0056177641253266e-05, | |
| "loss": 0.2292, | |
| "step": 3455 | |
| }, | |
| { | |
| "epoch": 21.901587301587302, | |
| "grad_norm": 1.4891278743743896, | |
| "learning_rate": 9.982013092639225e-06, | |
| "loss": 0.2378, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 21.933333333333334, | |
| "grad_norm": 1.6321289539337158, | |
| "learning_rate": 9.908054760873633e-06, | |
| "loss": 0.235, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 21.965079365079365, | |
| "grad_norm": 1.5167237520217896, | |
| "learning_rate": 9.834303661502294e-06, | |
| "loss": 0.2181, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 21.996825396825397, | |
| "grad_norm": 1.4538085460662842, | |
| "learning_rate": 9.7607608072255e-06, | |
| "loss": 0.2126, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 22.025396825396825, | |
| "grad_norm": 1.2877874374389648, | |
| "learning_rate": 9.687427207884017e-06, | |
| "loss": 0.213, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 22.057142857142857, | |
| "grad_norm": 1.4707602262496948, | |
| "learning_rate": 9.614303870445287e-06, | |
| "loss": 0.2183, | |
| "step": 3485 | |
| }, | |
| { | |
| "epoch": 22.08888888888889, | |
| "grad_norm": 0.9957223534584045, | |
| "learning_rate": 9.541391798989562e-06, | |
| "loss": 0.188, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 22.12063492063492, | |
| "grad_norm": 1.6289327144622803, | |
| "learning_rate": 9.468691994696147e-06, | |
| "loss": 0.2183, | |
| "step": 3495 | |
| }, | |
| { | |
| "epoch": 22.152380952380952, | |
| "grad_norm": 1.2597798109054565, | |
| "learning_rate": 9.396205455829615e-06, | |
| "loss": 0.2266, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 22.184126984126983, | |
| "grad_norm": 1.234129548072815, | |
| "learning_rate": 9.323933177726163e-06, | |
| "loss": 0.2117, | |
| "step": 3505 | |
| }, | |
| { | |
| "epoch": 22.215873015873015, | |
| "grad_norm": 1.3455181121826172, | |
| "learning_rate": 9.251876152779863e-06, | |
| "loss": 0.2159, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 22.247619047619047, | |
| "grad_norm": 1.6391496658325195, | |
| "learning_rate": 9.180035370429096e-06, | |
| "loss": 0.2142, | |
| "step": 3515 | |
| }, | |
| { | |
| "epoch": 22.27936507936508, | |
| "grad_norm": 1.925265908241272, | |
| "learning_rate": 9.108411817142943e-06, | |
| "loss": 0.2173, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 22.31111111111111, | |
| "grad_norm": 1.912740707397461, | |
| "learning_rate": 9.037006476407628e-06, | |
| "loss": 0.2129, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 22.34285714285714, | |
| "grad_norm": 1.6799534559249878, | |
| "learning_rate": 8.965820328713071e-06, | |
| "loss": 0.2207, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 22.374603174603173, | |
| "grad_norm": 1.5147929191589355, | |
| "learning_rate": 8.894854351539311e-06, | |
| "loss": 0.2179, | |
| "step": 3535 | |
| }, | |
| { | |
| "epoch": 22.406349206349205, | |
| "grad_norm": 1.78584623336792, | |
| "learning_rate": 8.824109519343227e-06, | |
| "loss": 0.2105, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 22.438095238095237, | |
| "grad_norm": 1.2689015865325928, | |
| "learning_rate": 8.753586803545041e-06, | |
| "loss": 0.2106, | |
| "step": 3545 | |
| }, | |
| { | |
| "epoch": 22.469841269841268, | |
| "grad_norm": 1.0263334512710571, | |
| "learning_rate": 8.683287172515022e-06, | |
| "loss": 0.2134, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 22.5015873015873, | |
| "grad_norm": 1.3135781288146973, | |
| "learning_rate": 8.61321159156023e-06, | |
| "loss": 0.1915, | |
| "step": 3555 | |
| }, | |
| { | |
| "epoch": 22.533333333333335, | |
| "grad_norm": 1.3863434791564941, | |
| "learning_rate": 8.543361022911156e-06, | |
| "loss": 0.211, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 22.565079365079367, | |
| "grad_norm": 1.4205353260040283, | |
| "learning_rate": 8.473736425708637e-06, | |
| "loss": 0.2155, | |
| "step": 3565 | |
| }, | |
| { | |
| "epoch": 22.5968253968254, | |
| "grad_norm": 1.7269377708435059, | |
| "learning_rate": 8.404338755990587e-06, | |
| "loss": 0.2239, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 22.62857142857143, | |
| "grad_norm": 1.5212743282318115, | |
| "learning_rate": 8.335168966678913e-06, | |
| "loss": 0.2188, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 22.66031746031746, | |
| "grad_norm": 1.6680513620376587, | |
| "learning_rate": 8.266228007566426e-06, | |
| "loss": 0.2295, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 22.692063492063493, | |
| "grad_norm": 1.8336963653564453, | |
| "learning_rate": 8.197516825303792e-06, | |
| "loss": 0.209, | |
| "step": 3585 | |
| }, | |
| { | |
| "epoch": 22.723809523809525, | |
| "grad_norm": 1.3112249374389648, | |
| "learning_rate": 8.129036363386524e-06, | |
| "loss": 0.2091, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 22.755555555555556, | |
| "grad_norm": 1.7051877975463867, | |
| "learning_rate": 8.060787562142072e-06, | |
| "loss": 0.2216, | |
| "step": 3595 | |
| }, | |
| { | |
| "epoch": 22.787301587301588, | |
| "grad_norm": 1.1862343549728394, | |
| "learning_rate": 7.992771358716852e-06, | |
| "loss": 0.2059, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 22.81904761904762, | |
| "grad_norm": 1.2786476612091064, | |
| "learning_rate": 7.92498868706341e-06, | |
| "loss": 0.2009, | |
| "step": 3605 | |
| }, | |
| { | |
| "epoch": 22.85079365079365, | |
| "grad_norm": 1.6637458801269531, | |
| "learning_rate": 7.857440477927588e-06, | |
| "loss": 0.2168, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 22.882539682539683, | |
| "grad_norm": 1.3283144235610962, | |
| "learning_rate": 7.790127658835747e-06, | |
| "loss": 0.2254, | |
| "step": 3615 | |
| }, | |
| { | |
| "epoch": 22.914285714285715, | |
| "grad_norm": 1.3773967027664185, | |
| "learning_rate": 7.723051154082059e-06, | |
| "loss": 0.2135, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 22.946031746031746, | |
| "grad_norm": 1.679351806640625, | |
| "learning_rate": 7.656211884715728e-06, | |
| "loss": 0.2267, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 22.977777777777778, | |
| "grad_norm": 1.2563788890838623, | |
| "learning_rate": 7.58961076852846e-06, | |
| "loss": 0.245, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 23.006349206349206, | |
| "grad_norm": 1.3150761127471924, | |
| "learning_rate": 7.523248720041778e-06, | |
| "loss": 0.2309, | |
| "step": 3635 | |
| }, | |
| { | |
| "epoch": 23.038095238095238, | |
| "grad_norm": 1.3584800958633423, | |
| "learning_rate": 7.457126650494478e-06, | |
| "loss": 0.2081, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 23.06984126984127, | |
| "grad_norm": 1.5431658029556274, | |
| "learning_rate": 7.391245467830163e-06, | |
| "loss": 0.2191, | |
| "step": 3645 | |
| }, | |
| { | |
| "epoch": 23.1015873015873, | |
| "grad_norm": 1.5318843126296997, | |
| "learning_rate": 7.325606076684688e-06, | |
| "loss": 0.2175, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 23.133333333333333, | |
| "grad_norm": 1.3879364728927612, | |
| "learning_rate": 7.260209378373842e-06, | |
| "loss": 0.2008, | |
| "step": 3655 | |
| }, | |
| { | |
| "epoch": 23.165079365079364, | |
| "grad_norm": 1.4865491390228271, | |
| "learning_rate": 7.195056270880887e-06, | |
| "loss": 0.2231, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 23.196825396825396, | |
| "grad_norm": 1.495409369468689, | |
| "learning_rate": 7.1301476488442705e-06, | |
| "loss": 0.2079, | |
| "step": 3665 | |
| }, | |
| { | |
| "epoch": 23.228571428571428, | |
| "grad_norm": 1.1792328357696533, | |
| "learning_rate": 7.065484403545328e-06, | |
| "loss": 0.2102, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 23.26031746031746, | |
| "grad_norm": 1.2667046785354614, | |
| "learning_rate": 7.001067422896063e-06, | |
| "loss": 0.1998, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 23.29206349206349, | |
| "grad_norm": 2.0349957942962646, | |
| "learning_rate": 6.936897591426916e-06, | |
| "loss": 0.2117, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 23.323809523809523, | |
| "grad_norm": 1.3733776807785034, | |
| "learning_rate": 6.872975790274658e-06, | |
| "loss": 0.2222, | |
| "step": 3685 | |
| }, | |
| { | |
| "epoch": 23.355555555555554, | |
| "grad_norm": 1.6063858270645142, | |
| "learning_rate": 6.809302897170266e-06, | |
| "loss": 0.2026, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 23.387301587301586, | |
| "grad_norm": 1.2825813293457031, | |
| "learning_rate": 6.7458797864268786e-06, | |
| "loss": 0.2246, | |
| "step": 3695 | |
| }, | |
| { | |
| "epoch": 23.419047619047618, | |
| "grad_norm": 1.3881940841674805, | |
| "learning_rate": 6.68270732892782e-06, | |
| "loss": 0.2086, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 23.45079365079365, | |
| "grad_norm": 1.478417158126831, | |
| "learning_rate": 6.619786392114557e-06, | |
| "loss": 0.2055, | |
| "step": 3705 | |
| }, | |
| { | |
| "epoch": 23.48253968253968, | |
| "grad_norm": 1.8440154790878296, | |
| "learning_rate": 6.557117839974902e-06, | |
| "loss": 0.2077, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 23.514285714285712, | |
| "grad_norm": 1.771545648574829, | |
| "learning_rate": 6.494702533031064e-06, | |
| "loss": 0.2204, | |
| "step": 3715 | |
| }, | |
| { | |
| "epoch": 23.546031746031748, | |
| "grad_norm": 1.4290744066238403, | |
| "learning_rate": 6.432541328327848e-06, | |
| "loss": 0.2161, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 23.57777777777778, | |
| "grad_norm": 1.8629150390625, | |
| "learning_rate": 6.370635079420942e-06, | |
| "loss": 0.2194, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 23.60952380952381, | |
| "grad_norm": 1.5219929218292236, | |
| "learning_rate": 6.308984636365092e-06, | |
| "loss": 0.2091, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 23.641269841269843, | |
| "grad_norm": 1.8589943647384644, | |
| "learning_rate": 6.247590845702553e-06, | |
| "loss": 0.2114, | |
| "step": 3735 | |
| }, | |
| { | |
| "epoch": 23.673015873015874, | |
| "grad_norm": 1.3381997346878052, | |
| "learning_rate": 6.186454550451359e-06, | |
| "loss": 0.2076, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 23.704761904761906, | |
| "grad_norm": 1.4127261638641357, | |
| "learning_rate": 6.1255765900938126e-06, | |
| "loss": 0.2039, | |
| "step": 3745 | |
| }, | |
| { | |
| "epoch": 23.736507936507937, | |
| "grad_norm": 1.8508237600326538, | |
| "learning_rate": 6.064957800564924e-06, | |
| "loss": 0.2193, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 23.76825396825397, | |
| "grad_norm": 1.652459979057312, | |
| "learning_rate": 6.004599014240952e-06, | |
| "loss": 0.2124, | |
| "step": 3755 | |
| }, | |
| { | |
| "epoch": 23.8, | |
| "grad_norm": 1.456562876701355, | |
| "learning_rate": 5.944501059927962e-06, | |
| "loss": 0.2199, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 23.831746031746032, | |
| "grad_norm": 1.305023193359375, | |
| "learning_rate": 5.884664762850467e-06, | |
| "loss": 0.1943, | |
| "step": 3765 | |
| }, | |
| { | |
| "epoch": 23.863492063492064, | |
| "grad_norm": 1.977409839630127, | |
| "learning_rate": 5.825090944640055e-06, | |
| "loss": 0.2137, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 23.895238095238096, | |
| "grad_norm": 1.4465928077697754, | |
| "learning_rate": 5.7657804233241465e-06, | |
| "loss": 0.2099, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 23.926984126984127, | |
| "grad_norm": 1.4489303827285767, | |
| "learning_rate": 5.706734013314746e-06, | |
| "loss": 0.2229, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 23.95873015873016, | |
| "grad_norm": 1.493857979774475, | |
| "learning_rate": 5.647952525397246e-06, | |
| "loss": 0.2101, | |
| "step": 3785 | |
| }, | |
| { | |
| "epoch": 23.99047619047619, | |
| "grad_norm": 1.41466224193573, | |
| "learning_rate": 5.589436766719342e-06, | |
| "loss": 0.2145, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 24.01904761904762, | |
| "grad_norm": 1.3483541011810303, | |
| "learning_rate": 5.531187540779864e-06, | |
| "loss": 0.2092, | |
| "step": 3795 | |
| }, | |
| { | |
| "epoch": 24.05079365079365, | |
| "grad_norm": 1.5213457345962524, | |
| "learning_rate": 5.4732056474178475e-06, | |
| "loss": 0.2004, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 24.082539682539682, | |
| "grad_norm": 1.5124719142913818, | |
| "learning_rate": 5.415491882801463e-06, | |
| "loss": 0.2066, | |
| "step": 3805 | |
| }, | |
| { | |
| "epoch": 24.114285714285714, | |
| "grad_norm": 1.1013808250427246, | |
| "learning_rate": 5.358047039417122e-06, | |
| "loss": 0.2047, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 24.146031746031746, | |
| "grad_norm": 1.0998328924179077, | |
| "learning_rate": 5.300871906058619e-06, | |
| "loss": 0.1935, | |
| "step": 3815 | |
| }, | |
| { | |
| "epoch": 24.177777777777777, | |
| "grad_norm": 1.4137228727340698, | |
| "learning_rate": 5.243967267816225e-06, | |
| "loss": 0.2153, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 24.20952380952381, | |
| "grad_norm": 1.3818597793579102, | |
| "learning_rate": 5.187333906065999e-06, | |
| "loss": 0.2121, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 24.24126984126984, | |
| "grad_norm": 1.4308501482009888, | |
| "learning_rate": 5.1309725984589966e-06, | |
| "loss": 0.1923, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 24.273015873015872, | |
| "grad_norm": 1.8435720205307007, | |
| "learning_rate": 5.0748841189106024e-06, | |
| "loss": 0.2014, | |
| "step": 3835 | |
| }, | |
| { | |
| "epoch": 24.304761904761904, | |
| "grad_norm": 1.149243950843811, | |
| "learning_rate": 5.019069237589921e-06, | |
| "loss": 0.1993, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 24.336507936507935, | |
| "grad_norm": 1.1767566204071045, | |
| "learning_rate": 4.963528720909192e-06, | |
| "loss": 0.2073, | |
| "step": 3845 | |
| }, | |
| { | |
| "epoch": 24.368253968253967, | |
| "grad_norm": 1.269392967224121, | |
| "learning_rate": 4.908263331513246e-06, | |
| "loss": 0.2054, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 24.4, | |
| "grad_norm": 1.4679259061813354, | |
| "learning_rate": 4.853273828269089e-06, | |
| "loss": 0.2159, | |
| "step": 3855 | |
| }, | |
| { | |
| "epoch": 24.43174603174603, | |
| "grad_norm": 1.4625903367996216, | |
| "learning_rate": 4.798560966255411e-06, | |
| "loss": 0.2054, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 24.463492063492062, | |
| "grad_norm": 1.7532594203948975, | |
| "learning_rate": 4.744125496752264e-06, | |
| "loss": 0.2126, | |
| "step": 3865 | |
| }, | |
| { | |
| "epoch": 24.495238095238093, | |
| "grad_norm": 1.8333017826080322, | |
| "learning_rate": 4.6899681672307346e-06, | |
| "loss": 0.1991, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 24.52698412698413, | |
| "grad_norm": 1.4345580339431763, | |
| "learning_rate": 4.636089721342668e-06, | |
| "loss": 0.206, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 24.55873015873016, | |
| "grad_norm": 1.5848816633224487, | |
| "learning_rate": 4.582490898910499e-06, | |
| "loss": 0.2335, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 24.590476190476192, | |
| "grad_norm": 1.1518175601959229, | |
| "learning_rate": 4.529172435917012e-06, | |
| "loss": 0.2007, | |
| "step": 3885 | |
| }, | |
| { | |
| "epoch": 24.622222222222224, | |
| "grad_norm": 1.582337498664856, | |
| "learning_rate": 4.476135064495335e-06, | |
| "loss": 0.2242, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 24.653968253968255, | |
| "grad_norm": 1.1616642475128174, | |
| "learning_rate": 4.423379512918807e-06, | |
| "loss": 0.2091, | |
| "step": 3895 | |
| }, | |
| { | |
| "epoch": 24.685714285714287, | |
| "grad_norm": 1.5428528785705566, | |
| "learning_rate": 4.370906505591007e-06, | |
| "loss": 0.2076, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 24.71746031746032, | |
| "grad_norm": 1.3769774436950684, | |
| "learning_rate": 4.318716763035835e-06, | |
| "loss": 0.2298, | |
| "step": 3905 | |
| }, | |
| { | |
| "epoch": 24.74920634920635, | |
| "grad_norm": 1.6571112871170044, | |
| "learning_rate": 4.2668110018875465e-06, | |
| "loss": 0.2131, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 24.78095238095238, | |
| "grad_norm": 1.7125977277755737, | |
| "learning_rate": 4.215189934881001e-06, | |
| "loss": 0.2197, | |
| "step": 3915 | |
| }, | |
| { | |
| "epoch": 24.812698412698413, | |
| "grad_norm": 1.543136715888977, | |
| "learning_rate": 4.163854270841797e-06, | |
| "loss": 0.2095, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 24.844444444444445, | |
| "grad_norm": 1.397762417793274, | |
| "learning_rate": 4.112804714676594e-06, | |
| "loss": 0.227, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 24.876190476190477, | |
| "grad_norm": 2.147937059402466, | |
| "learning_rate": 4.062041967363395e-06, | |
| "loss": 0.2072, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 24.90793650793651, | |
| "grad_norm": 1.2324985265731812, | |
| "learning_rate": 4.011566725941942e-06, | |
| "loss": 0.2116, | |
| "step": 3935 | |
| }, | |
| { | |
| "epoch": 24.93968253968254, | |
| "grad_norm": 1.3976517915725708, | |
| "learning_rate": 3.961379683504157e-06, | |
| "loss": 0.2276, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 24.97142857142857, | |
| "grad_norm": 1.4638493061065674, | |
| "learning_rate": 3.911481529184588e-06, | |
| "loss": 0.2074, | |
| "step": 3945 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 2.1439075469970703, | |
| "learning_rate": 3.861872948150974e-06, | |
| "loss": 0.1959, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 25.03174603174603, | |
| "grad_norm": 1.3343623876571655, | |
| "learning_rate": 3.812554621594827e-06, | |
| "loss": 0.1867, | |
| "step": 3955 | |
| }, | |
| { | |
| "epoch": 25.063492063492063, | |
| "grad_norm": 1.3594505786895752, | |
| "learning_rate": 3.7635272267220858e-06, | |
| "loss": 0.2108, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 25.095238095238095, | |
| "grad_norm": 1.4568570852279663, | |
| "learning_rate": 3.7147914367437977e-06, | |
| "loss": 0.1973, | |
| "step": 3965 | |
| }, | |
| { | |
| "epoch": 25.126984126984127, | |
| "grad_norm": 1.5874037742614746, | |
| "learning_rate": 3.6663479208669223e-06, | |
| "loss": 0.2169, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 25.158730158730158, | |
| "grad_norm": 1.4729677438735962, | |
| "learning_rate": 3.6181973442850597e-06, | |
| "loss": 0.2117, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 25.19047619047619, | |
| "grad_norm": 1.549204707145691, | |
| "learning_rate": 3.5703403681694115e-06, | |
| "loss": 0.2142, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 25.22222222222222, | |
| "grad_norm": 1.3857277631759644, | |
| "learning_rate": 3.5227776496596283e-06, | |
| "loss": 0.2166, | |
| "step": 3985 | |
| }, | |
| { | |
| "epoch": 25.253968253968253, | |
| "grad_norm": 1.282907247543335, | |
| "learning_rate": 3.4755098418548155e-06, | |
| "loss": 0.2137, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 25.285714285714285, | |
| "grad_norm": 1.4106628894805908, | |
| "learning_rate": 3.42853759380459e-06, | |
| "loss": 0.1962, | |
| "step": 3995 | |
| }, | |
| { | |
| "epoch": 25.317460317460316, | |
| "grad_norm": 1.1567012071609497, | |
| "learning_rate": 3.3818615505000982e-06, | |
| "loss": 0.184, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 25.349206349206348, | |
| "grad_norm": 1.5095430612564087, | |
| "learning_rate": 3.3354823528652463e-06, | |
| "loss": 0.2084, | |
| "step": 4005 | |
| }, | |
| { | |
| "epoch": 25.38095238095238, | |
| "grad_norm": 1.6294128894805908, | |
| "learning_rate": 3.289400637747833e-06, | |
| "loss": 0.2137, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 25.41269841269841, | |
| "grad_norm": 1.0825536251068115, | |
| "learning_rate": 3.243617037910826e-06, | |
| "loss": 0.1874, | |
| "step": 4015 | |
| }, | |
| { | |
| "epoch": 25.444444444444443, | |
| "grad_norm": 1.886170744895935, | |
| "learning_rate": 3.1981321820236885e-06, | |
| "loss": 0.2096, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 25.476190476190474, | |
| "grad_norm": 1.4639593362808228, | |
| "learning_rate": 3.152946694653719e-06, | |
| "loss": 0.2162, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 25.507936507936506, | |
| "grad_norm": 1.3187980651855469, | |
| "learning_rate": 3.1080611962575084e-06, | |
| "loss": 0.2045, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 25.53968253968254, | |
| "grad_norm": 1.6470718383789062, | |
| "learning_rate": 3.0634763031723882e-06, | |
| "loss": 0.2056, | |
| "step": 4035 | |
| }, | |
| { | |
| "epoch": 25.571428571428573, | |
| "grad_norm": 1.702276587486267, | |
| "learning_rate": 3.0191926276079806e-06, | |
| "loss": 0.2215, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 25.603174603174605, | |
| "grad_norm": 1.4153060913085938, | |
| "learning_rate": 2.9752107776377963e-06, | |
| "loss": 0.2112, | |
| "step": 4045 | |
| }, | |
| { | |
| "epoch": 25.634920634920636, | |
| "grad_norm": 1.5753271579742432, | |
| "learning_rate": 2.931531357190881e-06, | |
| "loss": 0.2069, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 25.666666666666668, | |
| "grad_norm": 1.399776577949524, | |
| "learning_rate": 2.888154966043516e-06, | |
| "loss": 0.2175, | |
| "step": 4055 | |
| }, | |
| { | |
| "epoch": 25.6984126984127, | |
| "grad_norm": 1.9482572078704834, | |
| "learning_rate": 2.845082199811011e-06, | |
| "loss": 0.1971, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 25.73015873015873, | |
| "grad_norm": 1.8254753351211548, | |
| "learning_rate": 2.802313649939467e-06, | |
| "loss": 0.2204, | |
| "step": 4065 | |
| }, | |
| { | |
| "epoch": 25.761904761904763, | |
| "grad_norm": 2.1758205890655518, | |
| "learning_rate": 2.7598499036977253e-06, | |
| "loss": 0.2213, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 25.793650793650794, | |
| "grad_norm": 1.556915044784546, | |
| "learning_rate": 2.717691544169254e-06, | |
| "loss": 0.209, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 25.825396825396826, | |
| "grad_norm": 1.5348148345947266, | |
| "learning_rate": 2.675839150244153e-06, | |
| "loss": 0.203, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 25.857142857142858, | |
| "grad_norm": 1.1136623620986938, | |
| "learning_rate": 2.634293296611229e-06, | |
| "loss": 0.1943, | |
| "step": 4085 | |
| }, | |
| { | |
| "epoch": 25.88888888888889, | |
| "grad_norm": 1.4910120964050293, | |
| "learning_rate": 2.5930545537500515e-06, | |
| "loss": 0.1954, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 25.92063492063492, | |
| "grad_norm": 1.253962755203247, | |
| "learning_rate": 2.5521234879231887e-06, | |
| "loss": 0.2049, | |
| "step": 4095 | |
| }, | |
| { | |
| "epoch": 25.952380952380953, | |
| "grad_norm": 1.3038915395736694, | |
| "learning_rate": 2.5115006611683755e-06, | |
| "loss": 0.208, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 25.984126984126984, | |
| "grad_norm": 1.5659282207489014, | |
| "learning_rate": 2.4711866312908174e-06, | |
| "loss": 0.2146, | |
| "step": 4105 | |
| }, | |
| { | |
| "epoch": 26.012698412698413, | |
| "grad_norm": 1.5508460998535156, | |
| "learning_rate": 2.4311819518555295e-06, | |
| "loss": 0.1946, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 26.044444444444444, | |
| "grad_norm": 1.6189981698989868, | |
| "learning_rate": 2.391487172179738e-06, | |
| "loss": 0.2284, | |
| "step": 4115 | |
| }, | |
| { | |
| "epoch": 26.076190476190476, | |
| "grad_norm": 1.6239879131317139, | |
| "learning_rate": 2.3521028373253457e-06, | |
| "loss": 0.2087, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 26.107936507936508, | |
| "grad_norm": 1.3496952056884766, | |
| "learning_rate": 2.3130294880914173e-06, | |
| "loss": 0.1975, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 26.13968253968254, | |
| "grad_norm": 1.1777641773223877, | |
| "learning_rate": 2.2742676610067846e-06, | |
| "loss": 0.1851, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 26.17142857142857, | |
| "grad_norm": 1.4527008533477783, | |
| "learning_rate": 2.235817888322664e-06, | |
| "loss": 0.1768, | |
| "step": 4135 | |
| }, | |
| { | |
| "epoch": 26.203174603174602, | |
| "grad_norm": 1.4031193256378174, | |
| "learning_rate": 2.1976806980053556e-06, | |
| "loss": 0.2125, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 26.234920634920634, | |
| "grad_norm": 1.6796773672103882, | |
| "learning_rate": 2.1598566137289806e-06, | |
| "loss": 0.2175, | |
| "step": 4145 | |
| }, | |
| { | |
| "epoch": 26.266666666666666, | |
| "grad_norm": 1.3337997198104858, | |
| "learning_rate": 2.1223461548683184e-06, | |
| "loss": 0.2081, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 26.298412698412697, | |
| "grad_norm": 1.5123558044433594, | |
| "learning_rate": 2.0851498364916345e-06, | |
| "loss": 0.2022, | |
| "step": 4155 | |
| }, | |
| { | |
| "epoch": 26.33015873015873, | |
| "grad_norm": 1.5714031457901, | |
| "learning_rate": 2.04826816935364e-06, | |
| "loss": 0.2024, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 26.36190476190476, | |
| "grad_norm": 1.4283126592636108, | |
| "learning_rate": 2.0117016598884765e-06, | |
| "loss": 0.1725, | |
| "step": 4165 | |
| }, | |
| { | |
| "epoch": 26.393650793650792, | |
| "grad_norm": 1.5229535102844238, | |
| "learning_rate": 1.975450810202725e-06, | |
| "loss": 0.2028, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 26.425396825396824, | |
| "grad_norm": 1.0714068412780762, | |
| "learning_rate": 1.9395161180685796e-06, | |
| "loss": 0.1764, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 26.457142857142856, | |
| "grad_norm": 1.3268301486968994, | |
| "learning_rate": 1.9038980769169323e-06, | |
| "loss": 0.1962, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 26.488888888888887, | |
| "grad_norm": 1.5213900804519653, | |
| "learning_rate": 1.8685971758306691e-06, | |
| "loss": 0.2068, | |
| "step": 4185 | |
| }, | |
| { | |
| "epoch": 26.520634920634922, | |
| "grad_norm": 1.774190902709961, | |
| "learning_rate": 1.833613899537906e-06, | |
| "loss": 0.1951, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 26.552380952380954, | |
| "grad_norm": 1.5567339658737183, | |
| "learning_rate": 1.798948728405353e-06, | |
| "loss": 0.1967, | |
| "step": 4195 | |
| }, | |
| { | |
| "epoch": 26.584126984126986, | |
| "grad_norm": 1.5419505834579468, | |
| "learning_rate": 1.7646021384317201e-06, | |
| "loss": 0.2065, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 26.615873015873017, | |
| "grad_norm": 1.3854479789733887, | |
| "learning_rate": 1.730574601241175e-06, | |
| "loss": 0.2121, | |
| "step": 4205 | |
| }, | |
| { | |
| "epoch": 26.64761904761905, | |
| "grad_norm": 1.505738615989685, | |
| "learning_rate": 1.6968665840768588e-06, | |
| "loss": 0.21, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 26.67936507936508, | |
| "grad_norm": 1.7908788919448853, | |
| "learning_rate": 1.6634785497944922e-06, | |
| "loss": 0.2267, | |
| "step": 4215 | |
| }, | |
| { | |
| "epoch": 26.711111111111112, | |
| "grad_norm": 1.1521767377853394, | |
| "learning_rate": 1.630410956855996e-06, | |
| "loss": 0.2066, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 26.742857142857144, | |
| "grad_norm": 2.14155650138855, | |
| "learning_rate": 1.5976642593232145e-06, | |
| "loss": 0.2179, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 26.774603174603175, | |
| "grad_norm": 1.6598525047302246, | |
| "learning_rate": 1.5652389068516765e-06, | |
| "loss": 0.2235, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 26.806349206349207, | |
| "grad_norm": 1.3785622119903564, | |
| "learning_rate": 1.5331353446844027e-06, | |
| "loss": 0.2074, | |
| "step": 4235 | |
| }, | |
| { | |
| "epoch": 26.83809523809524, | |
| "grad_norm": 1.594824194908142, | |
| "learning_rate": 1.5013540136458227e-06, | |
| "loss": 0.2138, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 26.86984126984127, | |
| "grad_norm": 1.6035126447677612, | |
| "learning_rate": 1.4698953501356972e-06, | |
| "loss": 0.2203, | |
| "step": 4245 | |
| }, | |
| { | |
| "epoch": 26.901587301587302, | |
| "grad_norm": 1.6783777475357056, | |
| "learning_rate": 1.438759786123134e-06, | |
| "loss": 0.1916, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 26.933333333333334, | |
| "grad_norm": 1.8125184774398804, | |
| "learning_rate": 1.4079477491406701e-06, | |
| "loss": 0.216, | |
| "step": 4255 | |
| }, | |
| { | |
| "epoch": 26.965079365079365, | |
| "grad_norm": 1.9151347875595093, | |
| "learning_rate": 1.3774596622783604e-06, | |
| "loss": 0.2108, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 26.996825396825397, | |
| "grad_norm": 1.611247181892395, | |
| "learning_rate": 1.3472959441780286e-06, | |
| "loss": 0.203, | |
| "step": 4265 | |
| }, | |
| { | |
| "epoch": 27.025396825396825, | |
| "grad_norm": 1.6793745756149292, | |
| "learning_rate": 1.3174570090274618e-06, | |
| "loss": 0.2025, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 27.057142857142857, | |
| "grad_norm": 1.7114115953445435, | |
| "learning_rate": 1.2879432665547558e-06, | |
| "loss": 0.2021, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 27.08888888888889, | |
| "grad_norm": 1.5666910409927368, | |
| "learning_rate": 1.2587551220226812e-06, | |
| "loss": 0.2035, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 27.12063492063492, | |
| "grad_norm": 1.398741602897644, | |
| "learning_rate": 1.2298929762231098e-06, | |
| "loss": 0.2006, | |
| "step": 4285 | |
| }, | |
| { | |
| "epoch": 27.152380952380952, | |
| "grad_norm": 1.455268144607544, | |
| "learning_rate": 1.201357225471536e-06, | |
| "loss": 0.2049, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 27.184126984126983, | |
| "grad_norm": 1.4031116962432861, | |
| "learning_rate": 1.1731482616016004e-06, | |
| "loss": 0.2034, | |
| "step": 4295 | |
| }, | |
| { | |
| "epoch": 27.215873015873015, | |
| "grad_norm": 1.7742083072662354, | |
| "learning_rate": 1.1452664719597357e-06, | |
| "loss": 0.2065, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 27.247619047619047, | |
| "grad_norm": 1.729286551475525, | |
| "learning_rate": 1.1177122393998374e-06, | |
| "loss": 0.2125, | |
| "step": 4305 | |
| }, | |
| { | |
| "epoch": 27.27936507936508, | |
| "grad_norm": 1.7302000522613525, | |
| "learning_rate": 1.0904859422780079e-06, | |
| "loss": 0.2179, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 27.31111111111111, | |
| "grad_norm": 1.6988170146942139, | |
| "learning_rate": 1.0635879544473592e-06, | |
| "loss": 0.2039, | |
| "step": 4315 | |
| }, | |
| { | |
| "epoch": 27.34285714285714, | |
| "grad_norm": 1.6711211204528809, | |
| "learning_rate": 1.0370186452528935e-06, | |
| "loss": 0.2083, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 27.374603174603173, | |
| "grad_norm": 1.3255887031555176, | |
| "learning_rate": 1.0107783795264002e-06, | |
| "loss": 0.2027, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 27.406349206349205, | |
| "grad_norm": 1.5709261894226074, | |
| "learning_rate": 9.848675175814865e-07, | |
| "loss": 0.2083, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 27.438095238095237, | |
| "grad_norm": 1.546505093574524, | |
| "learning_rate": 9.592864152085963e-07, | |
| "loss": 0.2096, | |
| "step": 4335 | |
| }, | |
| { | |
| "epoch": 27.469841269841268, | |
| "grad_norm": 1.689866304397583, | |
| "learning_rate": 9.340354236701404e-07, | |
| "loss": 0.2065, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 27.5015873015873, | |
| "grad_norm": 1.6024067401885986, | |
| "learning_rate": 9.091148896956836e-07, | |
| "loss": 0.1989, | |
| "step": 4345 | |
| }, | |
| { | |
| "epoch": 27.533333333333335, | |
| "grad_norm": 1.393102765083313, | |
| "learning_rate": 8.845251554771422e-07, | |
| "loss": 0.1883, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 27.565079365079367, | |
| "grad_norm": 1.697074055671692, | |
| "learning_rate": 8.602665586641411e-07, | |
| "loss": 0.2219, | |
| "step": 4355 | |
| }, | |
| { | |
| "epoch": 27.5968253968254, | |
| "grad_norm": 1.2590184211730957, | |
| "learning_rate": 8.363394323593427e-07, | |
| "loss": 0.1884, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 27.62857142857143, | |
| "grad_norm": 1.6093244552612305, | |
| "learning_rate": 8.127441051138662e-07, | |
| "loss": 0.2005, | |
| "step": 4365 | |
| }, | |
| { | |
| "epoch": 27.66031746031746, | |
| "grad_norm": 1.4474371671676636, | |
| "learning_rate": 7.894809009228177e-07, | |
| "loss": 0.2055, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 27.692063492063493, | |
| "grad_norm": 2.0293591022491455, | |
| "learning_rate": 7.665501392207813e-07, | |
| "loss": 0.1919, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 27.723809523809525, | |
| "grad_norm": 1.2634299993515015, | |
| "learning_rate": 7.439521348774959e-07, | |
| "loss": 0.19, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 27.755555555555556, | |
| "grad_norm": 1.4110088348388672, | |
| "learning_rate": 7.216871981934797e-07, | |
| "loss": 0.1945, | |
| "step": 4385 | |
| }, | |
| { | |
| "epoch": 27.787301587301588, | |
| "grad_norm": 1.2176947593688965, | |
| "learning_rate": 6.997556348958018e-07, | |
| "loss": 0.1926, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 27.81904761904762, | |
| "grad_norm": 1.592971920967102, | |
| "learning_rate": 6.781577461338673e-07, | |
| "loss": 0.2062, | |
| "step": 4395 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 4710, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 157, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.441544276803584e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |