|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 31.44607329842932, |
|
"global_step": 15000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0020942408376963353, |
|
"learning_rate": 1.0206207261596573e-07, |
|
"loss": 42.81952667236328, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.020942408376963352, |
|
"learning_rate": 1.0206207261596575e-06, |
|
"loss": 43.49555460611979, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.041884816753926704, |
|
"learning_rate": 2.041241452319315e-06, |
|
"loss": 43.81195983886719, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06282722513089005, |
|
"learning_rate": 3.0618621784789722e-06, |
|
"loss": 43.1638671875, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08376963350785341, |
|
"learning_rate": 4.08248290463863e-06, |
|
"loss": 43.10018615722656, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10471204188481675, |
|
"learning_rate": 5.103103630798286e-06, |
|
"loss": 42.361767578125, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1256544502617801, |
|
"learning_rate": 6.1237243569579445e-06, |
|
"loss": 41.58636474609375, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14659685863874344, |
|
"learning_rate": 7.144345083117603e-06, |
|
"loss": 40.42512817382813, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16753926701570682, |
|
"learning_rate": 8.16496580927726e-06, |
|
"loss": 40.1504150390625, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18848167539267016, |
|
"learning_rate": 9.185586535436916e-06, |
|
"loss": 39.03293151855469, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2094240837696335, |
|
"learning_rate": 1.0206207261596573e-05, |
|
"loss": 38.355224609375, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23036649214659685, |
|
"learning_rate": 1.1226827987756233e-05, |
|
"loss": 37.3426025390625, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2513089005235602, |
|
"learning_rate": 1.2247448713915889e-05, |
|
"loss": 36.23554992675781, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27225130890052357, |
|
"learning_rate": 1.3268069440075545e-05, |
|
"loss": 35.76961975097656, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2931937172774869, |
|
"learning_rate": 1.4288690166235205e-05, |
|
"loss": 34.53816528320313, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31413612565445026, |
|
"learning_rate": 1.530931089239486e-05, |
|
"loss": 33.968804931640626, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33507853403141363, |
|
"learning_rate": 1.632993161855452e-05, |
|
"loss": 32.835089111328124, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.35602094240837695, |
|
"learning_rate": 1.7350552344714174e-05, |
|
"loss": 32.344024658203125, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3769633507853403, |
|
"learning_rate": 1.8371173070873833e-05, |
|
"loss": 31.358078002929688, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39790575916230364, |
|
"learning_rate": 1.939179379703349e-05, |
|
"loss": 30.222982788085936, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.418848167539267, |
|
"learning_rate": 2.0412414523193145e-05, |
|
"loss": 29.279229736328126, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4397905759162304, |
|
"learning_rate": 2.1433035249352804e-05, |
|
"loss": 28.632891845703124, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4607329842931937, |
|
"learning_rate": 2.2453655975512465e-05, |
|
"loss": 27.667315673828124, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4816753926701571, |
|
"learning_rate": 2.347427670167212e-05, |
|
"loss": 26.753375244140624, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5026178010471204, |
|
"learning_rate": 2.4494897427831778e-05, |
|
"loss": 26.086724853515626, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5235602094240838, |
|
"learning_rate": 2.5515518153991436e-05, |
|
"loss": 24.617787170410157, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5445026178010471, |
|
"learning_rate": 2.653613888015109e-05, |
|
"loss": 23.82097473144531, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5654450261780105, |
|
"learning_rate": 2.755675960631075e-05, |
|
"loss": 22.989166259765625, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5863874345549738, |
|
"learning_rate": 2.857738033247041e-05, |
|
"loss": 22.07659454345703, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6073298429319371, |
|
"learning_rate": 2.9598001058630065e-05, |
|
"loss": 21.30054168701172, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6282722513089005, |
|
"learning_rate": 3.061862178478972e-05, |
|
"loss": 20.448097229003906, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6492146596858639, |
|
"learning_rate": 3.163924251094938e-05, |
|
"loss": 19.719020080566406, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6701570680628273, |
|
"learning_rate": 3.265986323710904e-05, |
|
"loss": 19.040341186523438, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6910994764397905, |
|
"learning_rate": 3.3680483963268694e-05, |
|
"loss": 18.16093292236328, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7120418848167539, |
|
"learning_rate": 3.470110468942835e-05, |
|
"loss": 17.382850646972656, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7329842931937173, |
|
"learning_rate": 3.5721725415588004e-05, |
|
"loss": 16.651702880859375, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7539267015706806, |
|
"learning_rate": 3.6742346141747665e-05, |
|
"loss": 16.079180908203124, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.774869109947644, |
|
"learning_rate": 3.7762966867907327e-05, |
|
"loss": 15.322096252441407, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7958115183246073, |
|
"learning_rate": 3.878358759406698e-05, |
|
"loss": 14.650254821777343, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8167539267015707, |
|
"learning_rate": 3.980420832022664e-05, |
|
"loss": 14.0874267578125, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.837696335078534, |
|
"learning_rate": 4.082482904638629e-05, |
|
"loss": 13.5120849609375, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8586387434554974, |
|
"learning_rate": 4.184544977254595e-05, |
|
"loss": 12.803053283691407, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8795811518324608, |
|
"learning_rate": 4.286607049870561e-05, |
|
"loss": 12.207872009277343, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.900523560209424, |
|
"learning_rate": 4.388669122486527e-05, |
|
"loss": 11.640280151367188, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9214659685863874, |
|
"learning_rate": 4.490731195102493e-05, |
|
"loss": 11.116693878173828, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9424083769633508, |
|
"learning_rate": 4.5927932677184585e-05, |
|
"loss": 10.586290740966797, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9633507853403142, |
|
"learning_rate": 4.694855340334424e-05, |
|
"loss": 10.197176361083985, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9842931937172775, |
|
"learning_rate": 4.7969174129503894e-05, |
|
"loss": 9.696656036376954, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9989528795811519, |
|
"eval_loss": 7.631092548370361, |
|
"eval_runtime": 73.7635, |
|
"eval_samples_per_second": 148.529, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.006282722513089, |
|
"learning_rate": 4.8989794855663556e-05, |
|
"loss": 9.649329376220702, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.0272251308900524, |
|
"learning_rate": 5.001041558182322e-05, |
|
"loss": 8.800138092041015, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.0481675392670158, |
|
"learning_rate": 5.103103630798287e-05, |
|
"loss": 8.46273422241211, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0691099476439792, |
|
"learning_rate": 5.205165703414253e-05, |
|
"loss": 8.093246459960938, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.0900523560209425, |
|
"learning_rate": 5.307227776030218e-05, |
|
"loss": 7.800753021240235, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.1109947643979057, |
|
"learning_rate": 5.409289848646184e-05, |
|
"loss": 7.580593872070312, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.131937172774869, |
|
"learning_rate": 5.51135192126215e-05, |
|
"loss": 7.297437286376953, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.1528795811518324, |
|
"learning_rate": 5.613413993878116e-05, |
|
"loss": 7.115350341796875, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.1738219895287958, |
|
"learning_rate": 5.715476066494082e-05, |
|
"loss": 6.890144348144531, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.1947643979057592, |
|
"learning_rate": 5.817538139110047e-05, |
|
"loss": 6.647792053222656, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.2157068062827225, |
|
"learning_rate": 5.919600211726013e-05, |
|
"loss": 6.418707275390625, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.236649214659686, |
|
"learning_rate": 6.0216622843419785e-05, |
|
"loss": 6.1961822509765625, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.2575916230366493, |
|
"learning_rate": 6.123724356957945e-05, |
|
"loss": 6.020335388183594, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.2785340314136127, |
|
"learning_rate": 6.22578642957391e-05, |
|
"loss": 5.866229248046875, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.2994764397905758, |
|
"learning_rate": 6.327848502189876e-05, |
|
"loss": 5.687960433959961, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.3204188481675394, |
|
"learning_rate": 6.429910574805841e-05, |
|
"loss": 5.516669082641601, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.3413612565445026, |
|
"learning_rate": 6.531972647421808e-05, |
|
"loss": 5.365422058105469, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.362303664921466, |
|
"learning_rate": 6.634034720037773e-05, |
|
"loss": 5.2304027557373045, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.3832460732984293, |
|
"learning_rate": 6.736096792653739e-05, |
|
"loss": 5.1493980407714846, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.4041884816753927, |
|
"learning_rate": 6.838158865269704e-05, |
|
"loss": 5.069922256469726, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.425130890052356, |
|
"learning_rate": 6.94022093788567e-05, |
|
"loss": 4.946885299682617, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.4460732984293194, |
|
"learning_rate": 7.042283010501637e-05, |
|
"loss": 4.852196502685547, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.4670157068062828, |
|
"learning_rate": 7.144345083117601e-05, |
|
"loss": 4.79791488647461, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.487958115183246, |
|
"learning_rate": 7.246407155733568e-05, |
|
"loss": 4.701091766357422, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.5089005235602095, |
|
"learning_rate": 7.348469228349533e-05, |
|
"loss": 4.629792404174805, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.5298429319371727, |
|
"learning_rate": 7.450531300965498e-05, |
|
"loss": 4.491447067260742, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.5507853403141363, |
|
"learning_rate": 7.552593373581465e-05, |
|
"loss": 4.365177917480469, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.5717277486910994, |
|
"learning_rate": 7.654655446197431e-05, |
|
"loss": 4.266152572631836, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.5926701570680628, |
|
"learning_rate": 7.756717518813396e-05, |
|
"loss": 4.203376770019531, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.6136125654450262, |
|
"learning_rate": 7.858779591429362e-05, |
|
"loss": 4.128662872314453, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.6345549738219896, |
|
"learning_rate": 7.960841664045329e-05, |
|
"loss": 4.131737899780274, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.655497382198953, |
|
"learning_rate": 8.062903736661294e-05, |
|
"loss": 4.008557891845703, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.676439790575916, |
|
"learning_rate": 8.164965809277258e-05, |
|
"loss": 3.9548309326171873, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.6973821989528797, |
|
"learning_rate": 8.267027881893225e-05, |
|
"loss": 3.903990936279297, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.7183246073298428, |
|
"learning_rate": 8.36908995450919e-05, |
|
"loss": 3.8517215728759764, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.7392670157068064, |
|
"learning_rate": 8.471152027125156e-05, |
|
"loss": 3.815013122558594, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.7602094240837696, |
|
"learning_rate": 8.573214099741121e-05, |
|
"loss": 3.762827682495117, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.781151832460733, |
|
"learning_rate": 8.675276172357088e-05, |
|
"loss": 3.739139938354492, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.8020942408376963, |
|
"learning_rate": 8.777338244973054e-05, |
|
"loss": 3.6771942138671876, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.8230366492146597, |
|
"learning_rate": 8.879400317589019e-05, |
|
"loss": 3.671974945068359, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.843979057591623, |
|
"learning_rate": 8.981462390204986e-05, |
|
"loss": 3.6025531768798826, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.8649214659685864, |
|
"learning_rate": 9.083524462820951e-05, |
|
"loss": 3.580000305175781, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.8858638743455498, |
|
"learning_rate": 9.185586535436917e-05, |
|
"loss": 3.570189666748047, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.906806282722513, |
|
"learning_rate": 9.287648608052881e-05, |
|
"loss": 3.5345611572265625, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.9277486910994766, |
|
"learning_rate": 9.389710680668848e-05, |
|
"loss": 3.4658973693847654, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.9486910994764397, |
|
"learning_rate": 9.491772753284813e-05, |
|
"loss": 3.4885902404785156, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.9696335078534033, |
|
"learning_rate": 9.593834825900779e-05, |
|
"loss": 3.438787078857422, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.9905759162303664, |
|
"learning_rate": 9.695896898516746e-05, |
|
"loss": 3.434320831298828, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.998952879581152, |
|
"eval_loss": 3.1282973289489746, |
|
"eval_runtime": 73.33, |
|
"eval_samples_per_second": 149.407, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 2.012565445026178, |
|
"learning_rate": 9.797958971132711e-05, |
|
"loss": 3.5866859436035154, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.033507853403141, |
|
"learning_rate": 9.900021043748677e-05, |
|
"loss": 3.4268508911132813, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.054450261780105, |
|
"learning_rate": 0.00010002083116364643, |
|
"loss": 3.3977298736572266, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.075392670157068, |
|
"learning_rate": 0.00010104145188980609, |
|
"loss": 3.349309539794922, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.0963350785340316, |
|
"learning_rate": 0.00010206207261596574, |
|
"loss": 3.4084583282470704, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.1172774869109947, |
|
"learning_rate": 0.0001030826933421254, |
|
"loss": 3.3497646331787108, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.1382198952879583, |
|
"learning_rate": 0.00010410331406828505, |
|
"loss": 3.2944507598876953, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.1591623036649215, |
|
"learning_rate": 0.00010512393479444471, |
|
"loss": 3.312997055053711, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.180104712041885, |
|
"learning_rate": 0.00010614455552060436, |
|
"loss": 3.3428993225097656, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.201047120418848, |
|
"learning_rate": 0.00010716517624676403, |
|
"loss": 3.292295455932617, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.2219895287958114, |
|
"learning_rate": 0.00010818579697292369, |
|
"loss": 3.291975402832031, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.242931937172775, |
|
"learning_rate": 0.00010920641769908334, |
|
"loss": 3.2534629821777346, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.263874345549738, |
|
"learning_rate": 0.000110227038425243, |
|
"loss": 3.247083282470703, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.2848167539267017, |
|
"learning_rate": 0.00011124765915140266, |
|
"loss": 3.253615951538086, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.305759162303665, |
|
"learning_rate": 0.00011226827987756232, |
|
"loss": 3.2301055908203127, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.3267015706806284, |
|
"learning_rate": 0.00011328890060372197, |
|
"loss": 3.241224670410156, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.3476439790575916, |
|
"learning_rate": 0.00011430952132988164, |
|
"loss": 3.210051345825195, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.368586387434555, |
|
"learning_rate": 0.00011533014205604128, |
|
"loss": 3.225263214111328, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.3895287958115183, |
|
"learning_rate": 0.00011635076278220094, |
|
"loss": 3.1904216766357423, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.4104712041884815, |
|
"learning_rate": 0.00011737138350836059, |
|
"loss": 3.230540466308594, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.431413612565445, |
|
"learning_rate": 0.00011839200423452026, |
|
"loss": 3.191972351074219, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.4523560209424082, |
|
"learning_rate": 0.00011941262496067991, |
|
"loss": 3.191108512878418, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.473298429319372, |
|
"learning_rate": 0.00012043324568683957, |
|
"loss": 3.192665863037109, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.494240837696335, |
|
"learning_rate": 0.00012145386641299924, |
|
"loss": 3.1610179901123048, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.5151832460732986, |
|
"learning_rate": 0.0001224744871391589, |
|
"loss": 3.1794748306274414, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.5361256544502617, |
|
"learning_rate": 0.00012349510786531856, |
|
"loss": 3.2144695281982423, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.5570680628272253, |
|
"learning_rate": 0.0001245157285914782, |
|
"loss": 3.147447204589844, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.5780104712041885, |
|
"learning_rate": 0.00012553634931763784, |
|
"loss": 3.1747854232788084, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.5989528795811516, |
|
"learning_rate": 0.0001265569700437975, |
|
"loss": 3.137411880493164, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.619895287958115, |
|
"learning_rate": 0.00012757759076995718, |
|
"loss": 3.157614326477051, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.640837696335079, |
|
"learning_rate": 0.00012859821149611682, |
|
"loss": 3.1284500122070313, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.661780104712042, |
|
"learning_rate": 0.0001296188322222765, |
|
"loss": 3.168661117553711, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.682722513089005, |
|
"learning_rate": 0.00013063945294843616, |
|
"loss": 3.1208589553833006, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.7036649214659687, |
|
"learning_rate": 0.0001316600736745958, |
|
"loss": 3.177284049987793, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.724607329842932, |
|
"learning_rate": 0.00013268069440075547, |
|
"loss": 3.129215431213379, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.7455497382198955, |
|
"learning_rate": 0.00013370131512691514, |
|
"loss": 3.1385051727294924, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.7664921465968586, |
|
"learning_rate": 0.00013472193585307478, |
|
"loss": 3.093943977355957, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.7874345549738218, |
|
"learning_rate": 0.00013574255657923444, |
|
"loss": 3.125334358215332, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.8083769633507853, |
|
"learning_rate": 0.00013676317730539409, |
|
"loss": 3.1063247680664063, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.829319371727749, |
|
"learning_rate": 0.00013778379803155375, |
|
"loss": 3.153512382507324, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.850261780104712, |
|
"learning_rate": 0.0001388044187577134, |
|
"loss": 3.0612106323242188, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.8712041884816752, |
|
"learning_rate": 0.00013982503948387306, |
|
"loss": 3.0893680572509767, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.892146596858639, |
|
"learning_rate": 0.00014084566021003273, |
|
"loss": 3.0948190689086914, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.913089005235602, |
|
"learning_rate": 0.00014186628093619237, |
|
"loss": 3.052564811706543, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.9340314136125656, |
|
"learning_rate": 0.00014288690166235201, |
|
"loss": 3.061579132080078, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.9549738219895287, |
|
"learning_rate": 0.00014390752238851168, |
|
"loss": 3.0893646240234376, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.975916230366492, |
|
"learning_rate": 0.00014492814311467135, |
|
"loss": 3.0637126922607423, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.9968586387434555, |
|
"learning_rate": 0.000145948763840831, |
|
"loss": 3.063129425048828, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.998952879581152, |
|
"eval_loss": 2.8699653148651123, |
|
"eval_runtime": 73.0344, |
|
"eval_samples_per_second": 150.012, |
|
"step": 1431 |
|
}, |
|
{ |
|
"epoch": 3.018848167539267, |
|
"learning_rate": 0.00014696938456699066, |
|
"loss": 3.175088310241699, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 3.0397905759162303, |
|
"learning_rate": 0.00014799000529315033, |
|
"loss": 3.0708381652832033, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.060732984293194, |
|
"learning_rate": 0.00014901062601930997, |
|
"loss": 3.043408012390137, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 3.081675392670157, |
|
"learning_rate": 0.00015003124674546964, |
|
"loss": 3.039161491394043, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 3.1026178010471206, |
|
"learning_rate": 0.0001510518674716293, |
|
"loss": 3.0207361221313476, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 3.1235602094240837, |
|
"learning_rate": 0.00015207248819778895, |
|
"loss": 3.0527109146118163, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 3.144502617801047, |
|
"learning_rate": 0.00015309310892394862, |
|
"loss": 3.045629692077637, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.1654450261780105, |
|
"learning_rate": 0.00015411372965010828, |
|
"loss": 3.0113618850708006, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 3.1863874345549736, |
|
"learning_rate": 0.00015513435037626793, |
|
"loss": 3.0372419357299805, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 3.2073298429319372, |
|
"learning_rate": 0.0001561549711024276, |
|
"loss": 3.0219789505004884, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 3.2282722513089004, |
|
"learning_rate": 0.00015717559182858723, |
|
"loss": 3.0140264511108397, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 3.249214659685864, |
|
"learning_rate": 0.0001581962125547469, |
|
"loss": 3.039451789855957, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.270157068062827, |
|
"learning_rate": 0.00015921683328090657, |
|
"loss": 3.0277042388916016, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 3.2910994764397907, |
|
"learning_rate": 0.0001602374540070662, |
|
"loss": 3.0352230072021484, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 3.312041884816754, |
|
"learning_rate": 0.00016125807473322588, |
|
"loss": 3.0115480422973633, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 3.332984293193717, |
|
"learning_rate": 0.00016227869545938555, |
|
"loss": 3.008187103271484, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 3.3539267015706806, |
|
"learning_rate": 0.00016329931618554516, |
|
"loss": 3.02030086517334, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.374869109947644, |
|
"learning_rate": 0.00016431993691170483, |
|
"loss": 3.0034799575805664, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 3.3958115183246074, |
|
"learning_rate": 0.0001653405576378645, |
|
"loss": 3.0058149337768554, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 3.4167539267015705, |
|
"learning_rate": 0.00016636117836402414, |
|
"loss": 2.9678651809692385, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 3.437696335078534, |
|
"learning_rate": 0.0001673817990901838, |
|
"loss": 2.999662399291992, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 3.4586387434554973, |
|
"learning_rate": 0.00016840241981634345, |
|
"loss": 2.9900545120239257, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.479581151832461, |
|
"learning_rate": 0.00016942304054250312, |
|
"loss": 2.967659759521484, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 3.500523560209424, |
|
"learning_rate": 0.0001704436612686628, |
|
"loss": 2.9933372497558595, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 3.521465968586387, |
|
"learning_rate": 0.00017146428199482243, |
|
"loss": 2.9620410919189455, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 3.5424083769633508, |
|
"learning_rate": 0.0001724849027209821, |
|
"loss": 2.973393440246582, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 3.5633507853403144, |
|
"learning_rate": 0.00017350552344714176, |
|
"loss": 2.9914371490478517, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.5842931937172775, |
|
"learning_rate": 0.0001745261441733014, |
|
"loss": 2.959955596923828, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 3.6052356020942407, |
|
"learning_rate": 0.00017554676489946107, |
|
"loss": 2.947012519836426, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 3.6261780104712042, |
|
"learning_rate": 0.00017656738562562074, |
|
"loss": 2.9780450820922852, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 3.6471204188481674, |
|
"learning_rate": 0.00017758800635178038, |
|
"loss": 2.9911376953125, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 3.668062827225131, |
|
"learning_rate": 0.00017860862707794005, |
|
"loss": 2.936799430847168, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 3.689005235602094, |
|
"learning_rate": 0.00017962924780409972, |
|
"loss": 2.9443117141723634, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 3.7099476439790577, |
|
"learning_rate": 0.00018064986853025936, |
|
"loss": 2.9714879989624023, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 3.730890052356021, |
|
"learning_rate": 0.00018167048925641903, |
|
"loss": 2.9554422378540037, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 3.7518324607329845, |
|
"learning_rate": 0.0001826911099825787, |
|
"loss": 2.971892738342285, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 3.7727748691099476, |
|
"learning_rate": 0.00018371173070873834, |
|
"loss": 2.9589488983154295, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.793717277486911, |
|
"learning_rate": 0.000184732351434898, |
|
"loss": 2.944401741027832, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 3.8146596858638744, |
|
"learning_rate": 0.00018575297216105762, |
|
"loss": 2.926609420776367, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 3.8356020942408375, |
|
"learning_rate": 0.0001867735928872173, |
|
"loss": 2.906996726989746, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 3.856544502617801, |
|
"learning_rate": 0.00018779421361337696, |
|
"loss": 2.9651628494262696, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 3.8774869109947643, |
|
"learning_rate": 0.0001888148343395366, |
|
"loss": 2.94600887298584, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 3.898429319371728, |
|
"learning_rate": 0.00018983545506569627, |
|
"loss": 2.915750503540039, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 3.919371727748691, |
|
"learning_rate": 0.00019085607579185594, |
|
"loss": 2.942264747619629, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 3.9403141361256546, |
|
"learning_rate": 0.00019187669651801558, |
|
"loss": 2.9150556564331054, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 3.9612565445026178, |
|
"learning_rate": 0.00019289731724417525, |
|
"loss": 2.9057634353637694, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 3.982198952879581, |
|
"learning_rate": 0.0001939179379703349, |
|
"loss": 2.934947967529297, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.998952879581152, |
|
"eval_loss": 2.7904012203216553, |
|
"eval_runtime": 73.0809, |
|
"eval_samples_per_second": 149.916, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 4.004188481675393, |
|
"learning_rate": 0.00019493855869649455, |
|
"loss": 3.0482250213623048, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 4.025130890052356, |
|
"learning_rate": 0.00019595917942265422, |
|
"loss": 2.9126008987426757, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 4.046073298429319, |
|
"learning_rate": 0.0001969798001488139, |
|
"loss": 2.903646469116211, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 4.067015706806282, |
|
"learning_rate": 0.00019800042087497353, |
|
"loss": 2.9102720260620116, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 4.0879581151832465, |
|
"learning_rate": 0.0001990210416011332, |
|
"loss": 2.9236400604248045, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 4.10890052356021, |
|
"learning_rate": 0.00020004166232729287, |
|
"loss": 2.8818483352661133, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 4.129842931937173, |
|
"learning_rate": 0.0002010622830534525, |
|
"loss": 2.9328163146972654, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 4.150785340314136, |
|
"learning_rate": 0.00020208290377961218, |
|
"loss": 2.896937370300293, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 4.171727748691099, |
|
"learning_rate": 0.00020310352450577185, |
|
"loss": 2.9203187942504885, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 4.192670157068063, |
|
"learning_rate": 0.0002041241452319315, |
|
"loss": 2.887624740600586, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.213612565445026, |
|
"learning_rate": 0.00020514476595809116, |
|
"loss": 2.89230899810791, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 4.234554973821989, |
|
"learning_rate": 0.0002061653866842508, |
|
"loss": 2.8896648406982424, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 4.255497382198953, |
|
"learning_rate": 0.00020718600741041044, |
|
"loss": 2.922181510925293, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 4.276439790575917, |
|
"learning_rate": 0.0002082066281365701, |
|
"loss": 2.888766860961914, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 4.29738219895288, |
|
"learning_rate": 0.00020922724886272975, |
|
"loss": 2.8884586334228515, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 4.318324607329843, |
|
"learning_rate": 0.00021024786958888942, |
|
"loss": 2.8894960403442385, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 4.339267015706806, |
|
"learning_rate": 0.00021126849031504908, |
|
"loss": 2.88952522277832, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 4.36020942408377, |
|
"learning_rate": 0.00021228911104120873, |
|
"loss": 2.8968246459960936, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 4.381151832460733, |
|
"learning_rate": 0.0002133097317673684, |
|
"loss": 2.8720794677734376, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 4.402094240837696, |
|
"learning_rate": 0.00021433035249352806, |
|
"loss": 2.9035682678222656, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.4230366492146596, |
|
"learning_rate": 0.0002153509732196877, |
|
"loss": 2.8975749969482423, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 4.443979057591623, |
|
"learning_rate": 0.00021637159394584737, |
|
"loss": 2.8942371368408204, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 4.464921465968587, |
|
"learning_rate": 0.00021739221467200704, |
|
"loss": 2.8582950592041017, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 4.48586387434555, |
|
"learning_rate": 0.00021841283539816668, |
|
"loss": 2.8642202377319337, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 4.506806282722513, |
|
"learning_rate": 0.00021943345612432635, |
|
"loss": 2.8771089553833007, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 4.527748691099476, |
|
"learning_rate": 0.000220454076850486, |
|
"loss": 2.8463533401489256, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 4.548691099476439, |
|
"learning_rate": 0.00022147469757664566, |
|
"loss": 2.865732765197754, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 4.569633507853403, |
|
"learning_rate": 0.00022249531830280533, |
|
"loss": 2.8657468795776366, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 4.5905759162303665, |
|
"learning_rate": 0.00022351593902896497, |
|
"loss": 2.8737346649169924, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 4.61151832460733, |
|
"learning_rate": 0.00022453655975512464, |
|
"loss": 2.906464385986328, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.632460732984293, |
|
"learning_rate": 0.0002255571804812843, |
|
"loss": 2.8711727142333983, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 4.653403141361257, |
|
"learning_rate": 0.00022657780120744395, |
|
"loss": 2.866417121887207, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 4.67434554973822, |
|
"learning_rate": 0.00022759842193360361, |
|
"loss": 2.873806190490723, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 4.695287958115183, |
|
"learning_rate": 0.00022861904265976328, |
|
"loss": 2.867740249633789, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 4.716230366492146, |
|
"learning_rate": 0.0002296396633859229, |
|
"loss": 2.848478317260742, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 4.73717277486911, |
|
"learning_rate": 0.00023066028411208256, |
|
"loss": 2.865742492675781, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 4.7581151832460735, |
|
"learning_rate": 0.00023168090483824223, |
|
"loss": 2.8338348388671877, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 4.779057591623037, |
|
"learning_rate": 0.00023270152556440187, |
|
"loss": 2.848302459716797, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.00023372214629056154, |
|
"loss": 2.847154235839844, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 4.820942408376963, |
|
"learning_rate": 0.00023474276701672118, |
|
"loss": 2.889314651489258, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.841884816753927, |
|
"learning_rate": 0.00023576338774288085, |
|
"loss": 2.8715303421020506, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 4.86282722513089, |
|
"learning_rate": 0.00023678400846904052, |
|
"loss": 2.8583951950073243, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 4.883769633507853, |
|
"learning_rate": 0.00023780462919520016, |
|
"loss": 2.865638542175293, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 4.9047120418848165, |
|
"learning_rate": 0.00023882524992135983, |
|
"loss": 2.8511993408203127, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 4.92565445026178, |
|
"learning_rate": 0.0002398458706475195, |
|
"loss": 2.8662994384765623, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 4.946596858638744, |
|
"learning_rate": 0.00024086649137367914, |
|
"loss": 2.829090690612793, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 4.967539267015707, |
|
"learning_rate": 0.0002418871120998388, |
|
"loss": 2.8659574508666994, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 4.98848167539267, |
|
"learning_rate": 0.00024290773282599848, |
|
"loss": 2.8302743911743162, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 4.998952879581152, |
|
"eval_loss": 2.730773687362671, |
|
"eval_runtime": 73.1511, |
|
"eval_samples_per_second": 149.772, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 5.010471204188482, |
|
"learning_rate": 0.00024392835355215812, |
|
"loss": 2.994700050354004, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 5.031413612565445, |
|
"learning_rate": 0.0002449489742783178, |
|
"loss": 2.8189886093139647, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.052356020942408, |
|
"learning_rate": 0.00024596959500447745, |
|
"loss": 2.818514823913574, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 5.0732984293193715, |
|
"learning_rate": 0.0002469902157306371, |
|
"loss": 2.8165609359741213, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 5.0942408376963355, |
|
"learning_rate": 0.00024801083645679674, |
|
"loss": 2.8114208221435546, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 5.115183246073299, |
|
"learning_rate": 0.0002490314571829564, |
|
"loss": 2.841958236694336, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 5.136125654450262, |
|
"learning_rate": 0.00025005207790911607, |
|
"loss": 2.8192333221435546, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 5.157068062827225, |
|
"learning_rate": 0.0002510726986352757, |
|
"loss": 2.842849540710449, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 5.178010471204188, |
|
"learning_rate": 0.00025209331936143535, |
|
"loss": 2.797623062133789, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 5.198952879581152, |
|
"learning_rate": 0.000253113940087595, |
|
"loss": 2.8256582260131835, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 5.219895287958115, |
|
"learning_rate": 0.0002541345608137547, |
|
"loss": 2.853096771240234, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 5.2408376963350785, |
|
"learning_rate": 0.00025515518153991436, |
|
"loss": 2.8135236740112304, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.261780104712042, |
|
"learning_rate": 0.00025617580226607403, |
|
"loss": 2.826374053955078, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 5.282722513089006, |
|
"learning_rate": 0.00025719642299223364, |
|
"loss": 2.8227916717529298, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 5.303664921465969, |
|
"learning_rate": 0.0002582170437183933, |
|
"loss": 2.800633430480957, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 5.324607329842932, |
|
"learning_rate": 0.000259237664444553, |
|
"loss": 2.8282939910888674, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 5.345549738219895, |
|
"learning_rate": 0.00026025828517071265, |
|
"loss": 2.789654350280762, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 5.366492146596858, |
|
"learning_rate": 0.0002612789058968723, |
|
"loss": 2.832943916320801, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 5.387434554973822, |
|
"learning_rate": 0.00026229952662303193, |
|
"loss": 2.7946260452270506, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 5.408376963350785, |
|
"learning_rate": 0.0002633201473491916, |
|
"loss": 2.8106937408447266, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 5.429319371727749, |
|
"learning_rate": 0.00026434076807535127, |
|
"loss": 2.807754135131836, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 5.450261780104712, |
|
"learning_rate": 0.00026536138880151093, |
|
"loss": 2.809930992126465, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.471204188481676, |
|
"learning_rate": 0.0002663820095276706, |
|
"loss": 2.824372100830078, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 5.492146596858639, |
|
"learning_rate": 0.00026740263025383027, |
|
"loss": 2.797639846801758, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 5.513089005235602, |
|
"learning_rate": 0.0002684232509799899, |
|
"loss": 2.7917612075805662, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 5.534031413612565, |
|
"learning_rate": 0.00026944387170614955, |
|
"loss": 2.818395233154297, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 5.554973821989529, |
|
"learning_rate": 0.0002704644924323092, |
|
"loss": 2.7933046340942385, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 5.575916230366492, |
|
"learning_rate": 0.0002714851131584689, |
|
"loss": 2.794571876525879, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 5.596858638743456, |
|
"learning_rate": 0.0002725057338846285, |
|
"loss": 2.8182382583618164, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 5.617801047120419, |
|
"learning_rate": 0.00027352635461078817, |
|
"loss": 2.80462703704834, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 5.638743455497382, |
|
"learning_rate": 0.00027454697533694784, |
|
"loss": 2.793869400024414, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 5.659685863874346, |
|
"learning_rate": 0.0002755675960631075, |
|
"loss": 2.7867889404296875, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 5.680628272251309, |
|
"learning_rate": 0.0002765882167892671, |
|
"loss": 2.759464073181152, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 5.701570680628272, |
|
"learning_rate": 0.0002776088375154268, |
|
"loss": 2.7763525009155274, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 5.722513089005235, |
|
"learning_rate": 0.00027862945824158646, |
|
"loss": 2.7857263565063475, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 5.7434554973821985, |
|
"learning_rate": 0.00027965007896774613, |
|
"loss": 2.793513298034668, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 5.7643979057591626, |
|
"learning_rate": 0.0002806706996939058, |
|
"loss": 2.780983543395996, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 5.785340314136126, |
|
"learning_rate": 0.00028169132042006546, |
|
"loss": 2.762991714477539, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 5.806282722513089, |
|
"learning_rate": 0.00028271194114622513, |
|
"loss": 2.7711687088012695, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 5.827225130890052, |
|
"learning_rate": 0.00028373256187238475, |
|
"loss": 2.7514936447143556, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 5.848167539267016, |
|
"learning_rate": 0.0002847531825985444, |
|
"loss": 2.7642467498779295, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 5.869109947643979, |
|
"learning_rate": 0.00028577380332470403, |
|
"loss": 2.766267776489258, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.890052356020942, |
|
"learning_rate": 0.0002867944240508637, |
|
"loss": 2.7898386001586912, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 5.9109947643979055, |
|
"learning_rate": 0.00028781504477702337, |
|
"loss": 2.7557825088500976, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 5.9319371727748695, |
|
"learning_rate": 0.00028883566550318303, |
|
"loss": 2.7799331665039064, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 5.952879581151833, |
|
"learning_rate": 0.0002898562862293427, |
|
"loss": 2.7844886779785156, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 5.973821989528796, |
|
"learning_rate": 0.0002908769069555023, |
|
"loss": 2.791143608093262, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 5.994764397905759, |
|
"learning_rate": 0.000291897527681662, |
|
"loss": 2.7879209518432617, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 5.998952879581152, |
|
"eval_loss": 2.704967737197876, |
|
"eval_runtime": 73.9106, |
|
"eval_samples_per_second": 148.233, |
|
"step": 2862 |
|
}, |
|
{ |
|
"epoch": 6.016753926701571, |
|
"learning_rate": 0.00029291814840782165, |
|
"loss": 2.8958906173706054, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 6.037696335078534, |
|
"learning_rate": 0.0002939387691339813, |
|
"loss": 2.76577205657959, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 6.058638743455497, |
|
"learning_rate": 0.000294959389860141, |
|
"loss": 2.792632484436035, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 6.0795811518324605, |
|
"learning_rate": 0.00029598001058630066, |
|
"loss": 2.7835086822509765, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 6.100523560209424, |
|
"learning_rate": 0.00029700063131246027, |
|
"loss": 2.761422538757324, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 6.121465968586388, |
|
"learning_rate": 0.00029802125203861994, |
|
"loss": 2.763009452819824, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 6.142408376963351, |
|
"learning_rate": 0.0002990418727647796, |
|
"loss": 2.763312339782715, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 6.163350785340314, |
|
"learning_rate": 0.0003000624934909393, |
|
"loss": 2.764869499206543, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 6.184293193717277, |
|
"learning_rate": 0.00030108311421709894, |
|
"loss": 2.741547393798828, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 6.205235602094241, |
|
"learning_rate": 0.0003021037349432586, |
|
"loss": 2.7309391021728517, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 6.226178010471204, |
|
"learning_rate": 0.0003031243556694182, |
|
"loss": 2.7382484436035157, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 6.2471204188481675, |
|
"learning_rate": 0.0003041449763955779, |
|
"loss": 2.73870849609375, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 6.268062827225131, |
|
"learning_rate": 0.00030516559712173756, |
|
"loss": 2.7489850997924803, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 6.289005235602094, |
|
"learning_rate": 0.00030618621784789723, |
|
"loss": 2.720503807067871, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.309947643979058, |
|
"learning_rate": 0.0003072068385740569, |
|
"loss": 2.770734977722168, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 6.330890052356021, |
|
"learning_rate": 0.00030822745930021657, |
|
"loss": 2.7358495712280275, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 6.351832460732984, |
|
"learning_rate": 0.0003092480800263762, |
|
"loss": 2.7683557510375976, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 6.372774869109947, |
|
"learning_rate": 0.00031026870075253585, |
|
"loss": 2.7386125564575194, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 6.393717277486911, |
|
"learning_rate": 0.0003112893214786955, |
|
"loss": 2.730208396911621, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 6.4146596858638745, |
|
"learning_rate": 0.0003123099422048552, |
|
"loss": 2.753367042541504, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 6.435602094240838, |
|
"learning_rate": 0.00031333056293101486, |
|
"loss": 2.738628959655762, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 6.456544502617801, |
|
"learning_rate": 0.00031435118365717447, |
|
"loss": 2.7470897674560546, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 6.477486910994765, |
|
"learning_rate": 0.00031537180438333414, |
|
"loss": 2.7530258178710936, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 6.498429319371728, |
|
"learning_rate": 0.0003163924251094938, |
|
"loss": 2.7581613540649412, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 6.519371727748691, |
|
"learning_rate": 0.0003174130458356535, |
|
"loss": 2.719674301147461, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 6.540314136125654, |
|
"learning_rate": 0.00031843366656181314, |
|
"loss": 2.7147310256958006, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 6.561256544502617, |
|
"learning_rate": 0.0003194542872879728, |
|
"loss": 2.731971549987793, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 6.5821989528795815, |
|
"learning_rate": 0.0003204749080141324, |
|
"loss": 2.73608512878418, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 6.603141361256545, |
|
"learning_rate": 0.0003214955287402921, |
|
"loss": 2.7456966400146485, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 6.624083769633508, |
|
"learning_rate": 0.00032251614946645176, |
|
"loss": 2.7154998779296875, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 6.645026178010471, |
|
"learning_rate": 0.00032353677019261143, |
|
"loss": 2.737691116333008, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 6.665968586387434, |
|
"learning_rate": 0.0003245573909187711, |
|
"loss": 2.734482192993164, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 6.686910994764398, |
|
"learning_rate": 0.00032557801164493077, |
|
"loss": 2.74371395111084, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 6.707853403141361, |
|
"learning_rate": 0.0003265986323710903, |
|
"loss": 2.7436079025268554, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.728795811518324, |
|
"learning_rate": 0.00032761925309725, |
|
"loss": 2.7301549911499023, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 6.749738219895288, |
|
"learning_rate": 0.00032863987382340966, |
|
"loss": 2.7386409759521486, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 6.770680628272252, |
|
"learning_rate": 0.00032966049454956933, |
|
"loss": 2.718589973449707, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 6.791623036649215, |
|
"learning_rate": 0.000330681115275729, |
|
"loss": 2.7293943405151366, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 6.812565445026178, |
|
"learning_rate": 0.0003317017360018886, |
|
"loss": 2.730001449584961, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 6.833507853403141, |
|
"learning_rate": 0.0003327223567280483, |
|
"loss": 2.7539945602416993, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 6.854450261780105, |
|
"learning_rate": 0.00033374297745420795, |
|
"loss": 2.7157943725585936, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 6.875392670157068, |
|
"learning_rate": 0.0003347635981803676, |
|
"loss": 2.7142717361450197, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 6.896335078534031, |
|
"learning_rate": 0.0003357842189065273, |
|
"loss": 2.723209190368652, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 6.9172774869109945, |
|
"learning_rate": 0.0003368048396326869, |
|
"loss": 2.725255012512207, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 6.938219895287958, |
|
"learning_rate": 0.00033782546035884657, |
|
"loss": 2.7175876617431642, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 6.959162303664922, |
|
"learning_rate": 0.00033884608108500624, |
|
"loss": 2.7198652267456054, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 6.980104712041885, |
|
"learning_rate": 0.0003398667018111659, |
|
"loss": 2.719003105163574, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 6.998952879581152, |
|
"eval_loss": 2.657553195953369, |
|
"eval_runtime": 73.1108, |
|
"eval_samples_per_second": 149.855, |
|
"step": 3339 |
|
}, |
|
{ |
|
"epoch": 7.002094240837696, |
|
"learning_rate": 0.0003408873225373256, |
|
"loss": 2.8445356369018553, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 7.02303664921466, |
|
"learning_rate": 0.00034190794326348524, |
|
"loss": 2.6978481292724608, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 7.043979057591623, |
|
"learning_rate": 0.00034292856398964486, |
|
"loss": 2.6844539642333984, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 7.064921465968586, |
|
"learning_rate": 0.0003439491847158045, |
|
"loss": 2.7076372146606444, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 7.0858638743455495, |
|
"learning_rate": 0.0003449698054419642, |
|
"loss": 2.7051311492919923, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 7.106806282722513, |
|
"learning_rate": 0.00034599042616812386, |
|
"loss": 2.700316619873047, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 7.127748691099477, |
|
"learning_rate": 0.00034701104689428353, |
|
"loss": 2.732998085021973, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 7.14869109947644, |
|
"learning_rate": 0.0003480316676204432, |
|
"loss": 2.7185394287109377, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 7.169633507853403, |
|
"learning_rate": 0.0003490522883466028, |
|
"loss": 2.705458068847656, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 7.190575916230366, |
|
"learning_rate": 0.0003500729090727625, |
|
"loss": 2.708370590209961, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 7.21151832460733, |
|
"learning_rate": 0.00035109352979892215, |
|
"loss": 2.7156093597412108, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 7.232460732984293, |
|
"learning_rate": 0.0003521141505250818, |
|
"loss": 2.6954971313476563, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 7.2534031413612565, |
|
"learning_rate": 0.0003531347712512415, |
|
"loss": 2.7061573028564454, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 7.27434554973822, |
|
"learning_rate": 0.00035415539197740115, |
|
"loss": 2.7047218322753905, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 7.295287958115184, |
|
"learning_rate": 0.00035517601270356077, |
|
"loss": 2.6945539474487306, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 7.316230366492147, |
|
"learning_rate": 0.00035619663342972044, |
|
"loss": 2.67620735168457, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 7.33717277486911, |
|
"learning_rate": 0.0003572172541558801, |
|
"loss": 2.6955190658569337, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.358115183246073, |
|
"learning_rate": 0.00035823787488203977, |
|
"loss": 2.684027671813965, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 7.379057591623036, |
|
"learning_rate": 0.00035925849560819944, |
|
"loss": 2.698904800415039, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 0.0003602791163343591, |
|
"loss": 2.695516014099121, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 7.4209424083769635, |
|
"learning_rate": 0.0003612997370605187, |
|
"loss": 2.7022026062011717, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 7.441884816753927, |
|
"learning_rate": 0.0003623203577866784, |
|
"loss": 2.703862762451172, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 7.46282722513089, |
|
"learning_rate": 0.00036334097851283806, |
|
"loss": 2.6929235458374023, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 7.483769633507853, |
|
"learning_rate": 0.00036436159923899773, |
|
"loss": 2.6659242630004885, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 7.504712041884817, |
|
"learning_rate": 0.0003653822199651574, |
|
"loss": 2.660002899169922, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 7.52565445026178, |
|
"learning_rate": 0.000366402840691317, |
|
"loss": 2.699945831298828, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 7.546596858638743, |
|
"learning_rate": 0.0003674234614174767, |
|
"loss": 2.677934455871582, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 7.5675392670157065, |
|
"learning_rate": 0.00036844408214363635, |
|
"loss": 2.6725204467773436, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 7.5884816753926705, |
|
"learning_rate": 0.000369464702869796, |
|
"loss": 2.701091003417969, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 7.609424083769634, |
|
"learning_rate": 0.00037048532359595563, |
|
"loss": 2.6974748611450194, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 7.630366492146597, |
|
"learning_rate": 0.00037150594432211524, |
|
"loss": 2.6910377502441407, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 7.65130890052356, |
|
"learning_rate": 0.0003725265650482749, |
|
"loss": 2.6975467681884764, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 7.672251308900524, |
|
"learning_rate": 0.0003735471857744346, |
|
"loss": 2.6613983154296874, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 7.693193717277487, |
|
"learning_rate": 0.00037456780650059425, |
|
"loss": 2.6739892959594727, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 7.71413612565445, |
|
"learning_rate": 0.0003755884272267539, |
|
"loss": 2.684556770324707, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 7.735078534031413, |
|
"learning_rate": 0.0003766090479529136, |
|
"loss": 2.662580680847168, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 7.756020942408377, |
|
"learning_rate": 0.0003776296686790732, |
|
"loss": 2.671764373779297, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 7.776963350785341, |
|
"learning_rate": 0.00037865028940523287, |
|
"loss": 2.687736701965332, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 7.797905759162304, |
|
"learning_rate": 0.00037967091013139253, |
|
"loss": 2.676908493041992, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 7.818848167539267, |
|
"learning_rate": 0.0003806915308575522, |
|
"loss": 2.6953929901123046, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 7.83979057591623, |
|
"learning_rate": 0.00038171215158371187, |
|
"loss": 2.7138477325439454, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 7.860732984293193, |
|
"learning_rate": 0.00038273277230987154, |
|
"loss": 2.658716583251953, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 7.881675392670157, |
|
"learning_rate": 0.00038375339303603115, |
|
"loss": 2.6610225677490233, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 7.90261780104712, |
|
"learning_rate": 0.0003847740137621908, |
|
"loss": 2.6816684722900392, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 7.923560209424084, |
|
"learning_rate": 0.0003857946344883505, |
|
"loss": 2.69453125, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 7.944502617801048, |
|
"learning_rate": 0.00038681525521451016, |
|
"loss": 2.6690832138061524, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 7.965445026178011, |
|
"learning_rate": 0.0003878358759406698, |
|
"loss": 2.6719383239746093, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 7.986387434554974, |
|
"learning_rate": 0.00038885649666682944, |
|
"loss": 2.6821807861328124, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 7.998952879581152, |
|
"eval_loss": 2.6321663856506348, |
|
"eval_runtime": 73.4258, |
|
"eval_samples_per_second": 149.212, |
|
"step": 3816 |
|
}, |
|
{ |
|
"epoch": 8.008376963350786, |
|
"learning_rate": 0.0003898771173929891, |
|
"loss": 2.792423057556152, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 8.029319371727748, |
|
"learning_rate": 0.0003908977381191488, |
|
"loss": 2.639652061462402, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 8.050261780104712, |
|
"learning_rate": 0.00039191835884530845, |
|
"loss": 2.658628463745117, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 8.071204188481675, |
|
"learning_rate": 0.0003929389795714681, |
|
"loss": 2.6759145736694334, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 8.092146596858639, |
|
"learning_rate": 0.0003939596002976278, |
|
"loss": 2.7123003005981445, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 8.113089005235603, |
|
"learning_rate": 0.0003949802210237874, |
|
"loss": 2.654216766357422, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 8.134031413612565, |
|
"learning_rate": 0.00039600084174994706, |
|
"loss": 2.653605651855469, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 8.154973821989529, |
|
"learning_rate": 0.00039702146247610673, |
|
"loss": 2.6602855682373048, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 8.175916230366493, |
|
"learning_rate": 0.0003980420832022664, |
|
"loss": 2.664006805419922, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 8.196858638743455, |
|
"learning_rate": 0.00039906270392842607, |
|
"loss": 2.6484548568725588, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 8.21780104712042, |
|
"learning_rate": 0.00040008332465458574, |
|
"loss": 2.6500553131103515, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 8.238743455497382, |
|
"learning_rate": 0.00040110394538074535, |
|
"loss": 2.6387815475463867, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 8.259685863874346, |
|
"learning_rate": 0.000402124566106905, |
|
"loss": 2.666366958618164, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 8.28062827225131, |
|
"learning_rate": 0.0004031451868330647, |
|
"loss": 2.69473876953125, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 8.301570680628272, |
|
"learning_rate": 0.00040416580755922436, |
|
"loss": 2.649556350708008, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 8.322513089005236, |
|
"learning_rate": 0.000405186428285384, |
|
"loss": 2.661054801940918, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 8.343455497382198, |
|
"learning_rate": 0.0004062070490115437, |
|
"loss": 2.651872253417969, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 8.364397905759162, |
|
"learning_rate": 0.0004072276697377033, |
|
"loss": 2.657582473754883, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 8.385340314136126, |
|
"learning_rate": 0.000408248290463863, |
|
"loss": 2.647386932373047, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.406282722513089, |
|
"learning_rate": 0.00040926891119002264, |
|
"loss": 2.622752380371094, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 8.427225130890053, |
|
"learning_rate": 0.0004102895319161823, |
|
"loss": 2.6471187591552736, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 8.448167539267015, |
|
"learning_rate": 0.000411310152642342, |
|
"loss": 2.6503711700439454, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 8.469109947643979, |
|
"learning_rate": 0.0004123307733685016, |
|
"loss": 2.662638854980469, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 8.490052356020943, |
|
"learning_rate": 0.00041335139409466126, |
|
"loss": 2.6614749908447264, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 8.510994764397905, |
|
"learning_rate": 0.0004143720148208209, |
|
"loss": 2.651297760009766, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 8.53193717277487, |
|
"learning_rate": 0.00041539263554698055, |
|
"loss": 2.654410552978516, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 8.552879581151833, |
|
"learning_rate": 0.0004164132562731402, |
|
"loss": 2.6440593719482424, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 8.573821989528795, |
|
"learning_rate": 0.00041743387699929983, |
|
"loss": 2.6485244750976564, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 8.59476439790576, |
|
"learning_rate": 0.0004184544977254595, |
|
"loss": 2.645181655883789, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 8.615706806282722, |
|
"learning_rate": 0.00041947511845161916, |
|
"loss": 2.6527120590209963, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 8.636649214659686, |
|
"learning_rate": 0.00042049573917777883, |
|
"loss": 2.644283676147461, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 8.65759162303665, |
|
"learning_rate": 0.0004215163599039385, |
|
"loss": 2.6589195251464846, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 8.678534031413612, |
|
"learning_rate": 0.00042253698063009817, |
|
"loss": 2.638910675048828, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 8.699476439790576, |
|
"learning_rate": 0.0004235576013562578, |
|
"loss": 2.6332365036010743, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 8.72041884816754, |
|
"learning_rate": 0.00042457822208241745, |
|
"loss": 2.6593414306640626, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 8.741361256544502, |
|
"learning_rate": 0.0004255988428085771, |
|
"loss": 2.6320539474487306, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 8.762303664921467, |
|
"learning_rate": 0.0004266194635347368, |
|
"loss": 2.6423057556152343, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 8.783246073298429, |
|
"learning_rate": 0.00042764008426089646, |
|
"loss": 2.6288238525390626, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 8.804188481675393, |
|
"learning_rate": 0.0004286607049870561, |
|
"loss": 2.624134635925293, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 8.825130890052357, |
|
"learning_rate": 0.00042968132571321574, |
|
"loss": 2.6502635955810545, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 8.846073298429319, |
|
"learning_rate": 0.0004307019464393754, |
|
"loss": 2.630023384094238, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 8.867015706806283, |
|
"learning_rate": 0.0004317225671655351, |
|
"loss": 2.6346521377563477, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 8.887958115183245, |
|
"learning_rate": 0.00043274318789169474, |
|
"loss": 2.612893295288086, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 8.90890052356021, |
|
"learning_rate": 0.0004337638086178544, |
|
"loss": 2.605606460571289, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 8.929842931937173, |
|
"learning_rate": 0.0004347844293440141, |
|
"loss": 2.6086076736450194, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 8.950785340314136, |
|
"learning_rate": 0.0004358050500701737, |
|
"loss": 2.6492921829223635, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 8.9717277486911, |
|
"learning_rate": 0.00043682567079633336, |
|
"loss": 2.60416202545166, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 8.992670157068062, |
|
"learning_rate": 0.00043784629152249303, |
|
"loss": 2.639349937438965, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 8.998952879581152, |
|
"eval_loss": 2.6156058311462402, |
|
"eval_runtime": 73.0048, |
|
"eval_samples_per_second": 150.072, |
|
"step": 4293 |
|
}, |
|
{ |
|
"epoch": 9.014659685863874, |
|
"learning_rate": 0.0004388669122486527, |
|
"loss": 2.7455169677734377, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 9.035602094240838, |
|
"learning_rate": 0.00043988753297481237, |
|
"loss": 2.632405471801758, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 9.0565445026178, |
|
"learning_rate": 0.000440908153700972, |
|
"loss": 2.644683074951172, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 9.077486910994764, |
|
"learning_rate": 0.00044192877442713165, |
|
"loss": 2.620805358886719, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 9.098429319371728, |
|
"learning_rate": 0.0004429493951532913, |
|
"loss": 2.6189605712890627, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 9.11937172774869, |
|
"learning_rate": 0.000443970015879451, |
|
"loss": 2.614165687561035, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 9.140314136125655, |
|
"learning_rate": 0.00044499063660561065, |
|
"loss": 2.615524673461914, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 9.161256544502617, |
|
"learning_rate": 0.0004460112573317703, |
|
"loss": 2.623036003112793, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 9.182198952879581, |
|
"learning_rate": 0.00044703187805792994, |
|
"loss": 2.636097526550293, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 9.203141361256545, |
|
"learning_rate": 0.0004480524987840896, |
|
"loss": 2.5902896881103517, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 9.224083769633507, |
|
"learning_rate": 0.0004490731195102493, |
|
"loss": 2.596613121032715, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 9.245026178010471, |
|
"learning_rate": 0.00045009374023640894, |
|
"loss": 2.6126483917236327, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 9.265968586387434, |
|
"learning_rate": 0.0004511143609625686, |
|
"loss": 2.6270191192626955, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 9.286910994764398, |
|
"learning_rate": 0.0004521349816887283, |
|
"loss": 2.603664779663086, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 9.307853403141362, |
|
"learning_rate": 0.0004531556024148879, |
|
"loss": 2.618840980529785, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 9.328795811518324, |
|
"learning_rate": 0.00045417622314104756, |
|
"loss": 2.6355594635009765, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 9.349738219895288, |
|
"learning_rate": 0.00045519684386720723, |
|
"loss": 2.6463857650756837, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 9.370680628272252, |
|
"learning_rate": 0.0004562174645933669, |
|
"loss": 2.6071035385131838, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 9.391623036649214, |
|
"learning_rate": 0.00045723808531952657, |
|
"loss": 2.6336111068725585, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 9.412565445026178, |
|
"learning_rate": 0.0004582587060456861, |
|
"loss": 2.6231353759765623, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 9.43350785340314, |
|
"learning_rate": 0.0004592793267718458, |
|
"loss": 2.6107151031494142, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.454450261780105, |
|
"learning_rate": 0.00046029994749800546, |
|
"loss": 2.6213560104370117, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 9.475392670157069, |
|
"learning_rate": 0.00046132056822416513, |
|
"loss": 2.584638214111328, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 9.496335078534031, |
|
"learning_rate": 0.0004623411889503248, |
|
"loss": 2.6262628555297853, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 9.517277486910995, |
|
"learning_rate": 0.00046336180967648447, |
|
"loss": 2.5897647857666017, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 9.538219895287957, |
|
"learning_rate": 0.0004643824304026441, |
|
"loss": 2.6060354232788088, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 9.559162303664921, |
|
"learning_rate": 0.00046540305112880375, |
|
"loss": 2.6086732864379885, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 9.580104712041885, |
|
"learning_rate": 0.0004664236718549634, |
|
"loss": 2.5964023590087892, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 9.601047120418848, |
|
"learning_rate": 0.0004674442925811231, |
|
"loss": 2.622255325317383, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 9.621989528795812, |
|
"learning_rate": 0.00046846491330728275, |
|
"loss": 2.5982023239135743, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 9.642931937172776, |
|
"learning_rate": 0.00046948553403344237, |
|
"loss": 2.6015974044799806, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 9.663874345549738, |
|
"learning_rate": 0.00047050615475960204, |
|
"loss": 2.617095184326172, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 9.684816753926702, |
|
"learning_rate": 0.0004715267754857617, |
|
"loss": 2.5803293228149413, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 9.705759162303664, |
|
"learning_rate": 0.00047254739621192137, |
|
"loss": 2.615343475341797, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 9.726701570680628, |
|
"learning_rate": 0.00047356801693808104, |
|
"loss": 2.5800424575805665, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 9.747643979057592, |
|
"learning_rate": 0.0004745886376642407, |
|
"loss": 2.598635673522949, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 9.768586387434555, |
|
"learning_rate": 0.0004756092583904003, |
|
"loss": 2.615553283691406, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 9.789528795811519, |
|
"learning_rate": 0.00047662987911656, |
|
"loss": 2.582602882385254, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 9.810471204188481, |
|
"learning_rate": 0.00047765049984271966, |
|
"loss": 2.5848695755004885, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 9.831413612565445, |
|
"learning_rate": 0.00047867112056887933, |
|
"loss": 2.6057199478149413, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 9.852356020942409, |
|
"learning_rate": 0.000479691741295039, |
|
"loss": 2.6193204879760743, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 9.873298429319371, |
|
"learning_rate": 0.00048071236202119866, |
|
"loss": 2.6137775421142577, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 9.894240837696335, |
|
"learning_rate": 0.0004817329827473583, |
|
"loss": 2.613667678833008, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 9.915183246073298, |
|
"learning_rate": 0.00048275360347351795, |
|
"loss": 2.599857711791992, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 9.936125654450262, |
|
"learning_rate": 0.0004837742241996776, |
|
"loss": 2.579629325866699, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 9.957068062827226, |
|
"learning_rate": 0.0004847948449258373, |
|
"loss": 2.6212156295776365, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 9.978010471204188, |
|
"learning_rate": 0.00048581546565199695, |
|
"loss": 2.5806146621704102, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 9.998952879581152, |
|
"learning_rate": 0.0004868360863781566, |
|
"loss": 2.5807775497436523, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 9.998952879581152, |
|
"eval_loss": 2.604253053665161, |
|
"eval_runtime": 74.0972, |
|
"eval_samples_per_second": 147.86, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 10.020942408376964, |
|
"learning_rate": 0.00048785670710431623, |
|
"loss": 2.721482849121094, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 10.041884816753926, |
|
"learning_rate": 0.000488877327830476, |
|
"loss": 2.5954622268676757, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 10.06282722513089, |
|
"learning_rate": 0.0004898979485566356, |
|
"loss": 2.6108915328979494, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 10.083769633507853, |
|
"learning_rate": 0.0004909185692827952, |
|
"loss": 2.607802391052246, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 10.104712041884817, |
|
"learning_rate": 0.0004919391900089549, |
|
"loss": 2.594425010681152, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 10.12565445026178, |
|
"learning_rate": 0.0004929598107351145, |
|
"loss": 2.6012176513671874, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 10.146596858638743, |
|
"learning_rate": 0.0004939804314612742, |
|
"loss": 2.5987337112426756, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 10.167539267015707, |
|
"learning_rate": 0.0004950010521874339, |
|
"loss": 2.5843479156494142, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 10.188481675392671, |
|
"learning_rate": 0.0004960216729135935, |
|
"loss": 2.612710380554199, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 10.209424083769633, |
|
"learning_rate": 0.0004970422936397532, |
|
"loss": 2.608279991149902, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 10.230366492146597, |
|
"learning_rate": 0.0004980629143659128, |
|
"loss": 2.599015998840332, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 10.25130890052356, |
|
"learning_rate": 0.0004990835350920725, |
|
"loss": 2.6169193267822264, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 10.272251308900524, |
|
"learning_rate": 0.0005001041558182321, |
|
"loss": 2.5874061584472656, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 10.293193717277488, |
|
"learning_rate": 0.0005011247765443918, |
|
"loss": 2.5972221374511717, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 10.31413612565445, |
|
"learning_rate": 0.0005021453972705514, |
|
"loss": 2.592855453491211, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 10.335078534031414, |
|
"learning_rate": 0.0005031660179967111, |
|
"loss": 2.5813924789428713, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 10.356020942408376, |
|
"learning_rate": 0.0005041866387228707, |
|
"loss": 2.5748531341552736, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 10.37696335078534, |
|
"learning_rate": 0.0005052072594490304, |
|
"loss": 2.5954639434814455, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 10.397905759162304, |
|
"learning_rate": 0.00050622788017519, |
|
"loss": 2.5695329666137696, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 10.418848167539267, |
|
"learning_rate": 0.0005072485009013497, |
|
"loss": 2.5873296737670897, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 10.43979057591623, |
|
"learning_rate": 0.0005082691216275094, |
|
"loss": 2.6035577774047853, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 10.460732984293193, |
|
"learning_rate": 0.000509289742353669, |
|
"loss": 2.5872554779052734, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 10.481675392670157, |
|
"learning_rate": 0.0005103103630798287, |
|
"loss": 2.548787498474121, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.502617801047121, |
|
"learning_rate": 0.0005098008169087462, |
|
"loss": 2.5832122802734374, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 10.523560209424083, |
|
"learning_rate": 0.0005092927940452339, |
|
"loss": 2.5651966094970704, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 10.544502617801047, |
|
"learning_rate": 0.0005087862869144114, |
|
"loss": 2.5844688415527344, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 10.565445026178011, |
|
"learning_rate": 0.0005082812879940277, |
|
"loss": 2.5721431732177735, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 10.586387434554974, |
|
"learning_rate": 0.0005077777898139921, |
|
"loss": 2.59671630859375, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 10.607329842931938, |
|
"learning_rate": 0.0005072757849559103, |
|
"loss": 2.5834468841552733, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 10.6282722513089, |
|
"learning_rate": 0.0005067752660526248, |
|
"loss": 2.566558074951172, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 10.649214659685864, |
|
"learning_rate": 0.0005062762257877613, |
|
"loss": 2.579891395568848, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 10.670157068062828, |
|
"learning_rate": 0.0005057786568952791, |
|
"loss": 2.5753833770751955, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 10.69109947643979, |
|
"learning_rate": 0.000505282552159027, |
|
"loss": 2.5887382507324217, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 10.712041884816754, |
|
"learning_rate": 0.000504787904412304, |
|
"loss": 2.578693962097168, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 10.732984293193716, |
|
"learning_rate": 0.000504294706537424, |
|
"loss": 2.5702871322631835, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 10.75392670157068, |
|
"learning_rate": 0.0005038029514652858, |
|
"loss": 2.579792594909668, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 10.774869109947645, |
|
"learning_rate": 0.0005033126321749477, |
|
"loss": 2.5686906814575194, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 10.795811518324607, |
|
"learning_rate": 0.000502823741693206, |
|
"loss": 2.5612104415893553, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 10.81675392670157, |
|
"learning_rate": 0.0005023362730941793, |
|
"loss": 2.5499547958374023, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 10.837696335078533, |
|
"learning_rate": 0.0005018502194988955, |
|
"loss": 2.5822380065917967, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 10.858638743455497, |
|
"learning_rate": 0.0005013655740748848, |
|
"loss": 2.5612071990966796, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 10.879581151832461, |
|
"learning_rate": 0.0005008823300357761, |
|
"loss": 2.5559200286865233, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 10.900523560209423, |
|
"learning_rate": 0.0005004004806408972, |
|
"loss": 2.566044807434082, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 10.921465968586388, |
|
"learning_rate": 0.0004999200191948814, |
|
"loss": 2.5504446029663086, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 10.942408376963352, |
|
"learning_rate": 0.0004994409390472751, |
|
"loss": 2.5642301559448244, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 10.963350785340314, |
|
"learning_rate": 0.0004989632335921523, |
|
"loss": 2.560255241394043, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 10.984293193717278, |
|
"learning_rate": 0.0004984868962677315, |
|
"loss": 2.5622379302978517, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 10.998952879581152, |
|
"eval_loss": 2.5793886184692383, |
|
"eval_runtime": 73.6792, |
|
"eval_samples_per_second": 148.699, |
|
"step": 5247 |
|
}, |
|
{ |
|
"epoch": 11.006282722513088, |
|
"learning_rate": 0.0004980119205559973, |
|
"loss": 2.682263946533203, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 11.027225130890052, |
|
"learning_rate": 0.0004975382999823259, |
|
"loss": 2.581485557556152, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 11.048167539267016, |
|
"learning_rate": 0.0004970660281151141, |
|
"loss": 2.5775730133056642, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 11.069109947643978, |
|
"learning_rate": 0.0004965950985654126, |
|
"loss": 2.587117385864258, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 11.090052356020943, |
|
"learning_rate": 0.0004961255049865635, |
|
"loss": 2.5482534408569335, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 11.110994764397907, |
|
"learning_rate": 0.0004956572410738401, |
|
"loss": 2.5464542388916014, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 11.131937172774869, |
|
"learning_rate": 0.000495190300564092, |
|
"loss": 2.557751274108887, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 11.152879581151833, |
|
"learning_rate": 0.0004947246772353933, |
|
"loss": 2.54296932220459, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 11.173821989528795, |
|
"learning_rate": 0.0004942603649066942, |
|
"loss": 2.576173782348633, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 11.19476439790576, |
|
"learning_rate": 0.0004937973574374762, |
|
"loss": 2.535029411315918, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 11.215706806282723, |
|
"learning_rate": 0.0004933356487274114, |
|
"loss": 2.551316833496094, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 11.236649214659685, |
|
"learning_rate": 0.0004928752327160248, |
|
"loss": 2.551384925842285, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 11.25759162303665, |
|
"learning_rate": 0.0004924161033823598, |
|
"loss": 2.5437076568603514, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 11.278534031413612, |
|
"learning_rate": 0.0004919582547446482, |
|
"loss": 2.5423688888549805, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 11.299476439790576, |
|
"learning_rate": 0.0004915016808599824, |
|
"loss": 2.5350723266601562, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 11.32041884816754, |
|
"learning_rate": 0.0004910463758239914, |
|
"loss": 2.5370588302612305, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 11.341361256544502, |
|
"learning_rate": 0.0004905923337705201, |
|
"loss": 2.5458969116210937, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 11.362303664921466, |
|
"learning_rate": 0.0004901395488713123, |
|
"loss": 2.5638805389404298, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 11.38324607329843, |
|
"learning_rate": 0.0004896880153356963, |
|
"loss": 2.5516336441040037, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 11.404188481675392, |
|
"learning_rate": 0.000489237727410273, |
|
"loss": 2.559841346740723, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 11.425130890052356, |
|
"learning_rate": 0.0004887886793786093, |
|
"loss": 2.559991645812988, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 11.446073298429319, |
|
"learning_rate": 0.0004883408655609327, |
|
"loss": 2.5499288558959963, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 11.467015706806283, |
|
"learning_rate": 0.0004878942803138293, |
|
"loss": 2.544484519958496, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 11.487958115183247, |
|
"learning_rate": 0.0004874489180299454, |
|
"loss": 2.537228584289551, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 11.508900523560209, |
|
"learning_rate": 0.00048700477313769213, |
|
"loss": 2.5507528305053713, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 11.529842931937173, |
|
"learning_rate": 0.00048656184010095185, |
|
"loss": 2.5349197387695312, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 11.550785340314135, |
|
"learning_rate": 0.00048612011341878916, |
|
"loss": 2.5785711288452147, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 11.5717277486911, |
|
"learning_rate": 0.0004856795876251634, |
|
"loss": 2.5618894577026365, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 11.592670157068063, |
|
"learning_rate": 0.00048524025728864493, |
|
"loss": 2.553698921203613, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 11.613612565445026, |
|
"learning_rate": 0.0004848021170121335, |
|
"loss": 2.563484954833984, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 11.63455497382199, |
|
"learning_rate": 0.0004843651614325803, |
|
"loss": 2.5655393600463867, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 11.655497382198952, |
|
"learning_rate": 0.00048392938522071163, |
|
"loss": 2.550769233703613, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 11.676439790575916, |
|
"learning_rate": 0.0004834947830807563, |
|
"loss": 2.549762725830078, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 11.69738219895288, |
|
"learning_rate": 0.00048306134975017523, |
|
"loss": 2.5488073348999025, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 11.718324607329842, |
|
"learning_rate": 0.0004826290799993939, |
|
"loss": 2.554892158508301, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 11.739267015706806, |
|
"learning_rate": 0.0004821979686315372, |
|
"loss": 2.519801902770996, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 11.76020942408377, |
|
"learning_rate": 0.00048176801048216693, |
|
"loss": 2.541176414489746, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 11.781151832460733, |
|
"learning_rate": 0.0004813392004190223, |
|
"loss": 2.52908878326416, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 11.802094240837697, |
|
"learning_rate": 0.00048091153334176224, |
|
"loss": 2.5230037689208986, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 11.823036649214659, |
|
"learning_rate": 0.00048048500418171097, |
|
"loss": 2.5191682815551757, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 11.843979057591623, |
|
"learning_rate": 0.0004800596079016053, |
|
"loss": 2.553547668457031, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 11.864921465968587, |
|
"learning_rate": 0.0004796353394953452, |
|
"loss": 2.5445688247680662, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 11.88586387434555, |
|
"learning_rate": 0.0004792121939877459, |
|
"loss": 2.554265022277832, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 11.906806282722513, |
|
"learning_rate": 0.00047879016643429336, |
|
"loss": 2.5312326431274412, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 11.927748691099476, |
|
"learning_rate": 0.00047836925192090116, |
|
"loss": 2.54235897064209, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 11.94869109947644, |
|
"learning_rate": 0.0004779494455636703, |
|
"loss": 2.5591432571411135, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 11.969633507853404, |
|
"learning_rate": 0.00047753074250865145, |
|
"loss": 2.5049566268920898, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 11.990575916230366, |
|
"learning_rate": 0.00047711313793160877, |
|
"loss": 2.5459238052368165, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 11.998952879581152, |
|
"eval_loss": 2.5662965774536133, |
|
"eval_runtime": 74.2325, |
|
"eval_samples_per_second": 147.59, |
|
"step": 5724 |
|
}, |
|
{ |
|
"epoch": 12.012565445026178, |
|
"learning_rate": 0.000476696627037787, |
|
"loss": 2.6624425888061523, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 12.033507853403142, |
|
"learning_rate": 0.0004762812050616797, |
|
"loss": 2.5242809295654296, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 12.054450261780104, |
|
"learning_rate": 0.0004758668672668006, |
|
"loss": 2.541863441467285, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 12.075392670157068, |
|
"learning_rate": 0.00047545360894545664, |
|
"loss": 2.5424705505371095, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 12.09633507853403, |
|
"learning_rate": 0.0004750414254185235, |
|
"loss": 2.5307668685913085, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 12.117277486910995, |
|
"learning_rate": 0.0004746303120352226, |
|
"loss": 2.5350624084472657, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 12.138219895287959, |
|
"learning_rate": 0.00047422026417290146, |
|
"loss": 2.5554269790649413, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 12.159162303664921, |
|
"learning_rate": 0.0004738112772368146, |
|
"loss": 2.5168834686279298, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 12.180104712041885, |
|
"learning_rate": 0.00047340334665990787, |
|
"loss": 2.531605529785156, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 12.201047120418847, |
|
"learning_rate": 0.0004729964679026039, |
|
"loss": 2.515584373474121, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 12.221989528795811, |
|
"learning_rate": 0.0004725906364525903, |
|
"loss": 2.522596549987793, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 12.242931937172775, |
|
"learning_rate": 0.0004721858478246089, |
|
"loss": 2.5171236038208007, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 12.263874345549738, |
|
"learning_rate": 0.0004717820975602482, |
|
"loss": 2.529332160949707, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 12.284816753926702, |
|
"learning_rate": 0.0004713793812277367, |
|
"loss": 2.5047964096069335, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 12.305759162303666, |
|
"learning_rate": 0.00047097769442173856, |
|
"loss": 2.518666458129883, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 12.326701570680628, |
|
"learning_rate": 0.00047057703276315164, |
|
"loss": 2.5095588684082033, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 12.347643979057592, |
|
"learning_rate": 0.0004701773918989065, |
|
"loss": 2.5557069778442383, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 12.368586387434554, |
|
"learning_rate": 0.00046977876750176805, |
|
"loss": 2.5204561233520506, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 12.389528795811518, |
|
"learning_rate": 0.0004693811552701385, |
|
"loss": 2.5440658569335937, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 12.410471204188482, |
|
"learning_rate": 0.0004689845509278626, |
|
"loss": 2.5195499420166017, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 12.431413612565445, |
|
"learning_rate": 0.00046858895022403474, |
|
"loss": 2.506319999694824, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 12.452356020942409, |
|
"learning_rate": 0.000468194348932807, |
|
"loss": 2.507068061828613, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 12.473298429319371, |
|
"learning_rate": 0.00046780074285319984, |
|
"loss": 2.559153938293457, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 12.494240837696335, |
|
"learning_rate": 0.0004674081278089144, |
|
"loss": 2.5483341217041016, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 12.515183246073299, |
|
"learning_rate": 0.00046701649964814616, |
|
"loss": 2.523490333557129, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 12.536125654450261, |
|
"learning_rate": 0.0004666258542434007, |
|
"loss": 2.511086654663086, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 12.557068062827225, |
|
"learning_rate": 0.000466236187491311, |
|
"loss": 2.525220489501953, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 12.578010471204188, |
|
"learning_rate": 0.00046584749531245617, |
|
"loss": 2.5341968536376953, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 12.598952879581152, |
|
"learning_rate": 0.0004654597736511823, |
|
"loss": 2.517439842224121, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 12.619895287958116, |
|
"learning_rate": 0.0004650730184754247, |
|
"loss": 2.535861778259277, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 12.640837696335078, |
|
"learning_rate": 0.0004646872257765318, |
|
"loss": 2.5128170013427735, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 12.661780104712042, |
|
"learning_rate": 0.00046430239156909045, |
|
"loss": 2.5013412475585937, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 12.682722513089006, |
|
"learning_rate": 0.00046391851189075343, |
|
"loss": 2.492375373840332, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 12.703664921465968, |
|
"learning_rate": 0.00046353558280206746, |
|
"loss": 2.533987045288086, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 12.724607329842932, |
|
"learning_rate": 0.00046315360038630404, |
|
"loss": 2.5339818954467774, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 12.745549738219895, |
|
"learning_rate": 0.0004627725607492909, |
|
"loss": 2.5307100296020506, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 12.766492146596859, |
|
"learning_rate": 0.00046239246001924503, |
|
"loss": 2.5334211349487306, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 12.787434554973823, |
|
"learning_rate": 0.000462013294346608, |
|
"loss": 2.524607849121094, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 12.808376963350785, |
|
"learning_rate": 0.00046163505990388167, |
|
"loss": 2.5250701904296875, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 12.829319371727749, |
|
"learning_rate": 0.00046125775288546623, |
|
"loss": 2.514480400085449, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 12.850261780104713, |
|
"learning_rate": 0.00046088136950749937, |
|
"loss": 2.506093215942383, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 12.871204188481675, |
|
"learning_rate": 0.0004605059060076967, |
|
"loss": 2.5329927444458007, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 12.89214659685864, |
|
"learning_rate": 0.0004601313586451939, |
|
"loss": 2.5250947952270506, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 12.913089005235602, |
|
"learning_rate": 0.00045975772370039034, |
|
"loss": 2.535073471069336, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 12.934031413612566, |
|
"learning_rate": 0.0004593849974747937, |
|
"loss": 2.524639892578125, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 12.95497382198953, |
|
"learning_rate": 0.0004590131762908664, |
|
"loss": 2.509628486633301, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 12.975916230366492, |
|
"learning_rate": 0.00045864225649187287, |
|
"loss": 2.5401321411132813, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 12.996858638743456, |
|
"learning_rate": 0.000458272234441729, |
|
"loss": 2.512648582458496, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 12.998952879581152, |
|
"eval_loss": 2.558600902557373, |
|
"eval_runtime": 73.5975, |
|
"eval_samples_per_second": 148.864, |
|
"step": 6201 |
|
}, |
|
{ |
|
"epoch": 13.018848167539266, |
|
"learning_rate": 0.00045790310652485205, |
|
"loss": 2.6353900909423826, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 13.03979057591623, |
|
"learning_rate": 0.0004575348691460124, |
|
"loss": 2.519637870788574, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 13.060732984293194, |
|
"learning_rate": 0.00045716751873018654, |
|
"loss": 2.515974426269531, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 13.081675392670157, |
|
"learning_rate": 0.00045680105172241103, |
|
"loss": 2.5275392532348633, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 13.10261780104712, |
|
"learning_rate": 0.0004564354645876384, |
|
"loss": 2.518478012084961, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 13.123560209424085, |
|
"learning_rate": 0.00045607075381059363, |
|
"loss": 2.506203460693359, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 13.144502617801047, |
|
"learning_rate": 0.00045570691589563234, |
|
"loss": 2.501953125, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 13.165445026178011, |
|
"learning_rate": 0.0004553439473666, |
|
"loss": 2.536935234069824, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 13.186387434554973, |
|
"learning_rate": 0.0004549818447666924, |
|
"loss": 2.500376892089844, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 13.207329842931937, |
|
"learning_rate": 0.00045462060465831743, |
|
"loss": 2.507547950744629, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 13.228272251308901, |
|
"learning_rate": 0.0004542602236229581, |
|
"loss": 2.50396842956543, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 13.249214659685864, |
|
"learning_rate": 0.00045390069826103653, |
|
"loss": 2.4975730895996096, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 13.270157068062828, |
|
"learning_rate": 0.00045354202519177925, |
|
"loss": 2.5031005859375, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 13.29109947643979, |
|
"learning_rate": 0.0004531842010530839, |
|
"loss": 2.5102792739868165, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 13.312041884816754, |
|
"learning_rate": 0.0004528272225013865, |
|
"loss": 2.510196876525879, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 13.332984293193718, |
|
"learning_rate": 0.00045247108621153056, |
|
"loss": 2.529274559020996, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 13.35392670157068, |
|
"learning_rate": 0.0004521157888766368, |
|
"loss": 2.5169746398925783, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 13.374869109947644, |
|
"learning_rate": 0.00045176132720797443, |
|
"loss": 2.4974170684814454, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 13.395811518324606, |
|
"learning_rate": 0.0004514076979348328, |
|
"loss": 2.4937871932983398, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 13.41675392670157, |
|
"learning_rate": 0.0004510548978043951, |
|
"loss": 2.4988937377929688, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 13.437696335078535, |
|
"learning_rate": 0.00045070292358161265, |
|
"loss": 2.5192642211914062, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 13.458638743455497, |
|
"learning_rate": 0.0004503517720490801, |
|
"loss": 2.515308380126953, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 13.47958115183246, |
|
"learning_rate": 0.000450001440006912, |
|
"loss": 2.4873653411865235, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 13.500523560209425, |
|
"learning_rate": 0.00044965192427262043, |
|
"loss": 2.4860763549804688, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 13.521465968586387, |
|
"learning_rate": 0.0004493032216809934, |
|
"loss": 2.4914045333862305, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 13.542408376963351, |
|
"learning_rate": 0.00044895532908397455, |
|
"loss": 2.505444145202637, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 13.563350785340313, |
|
"learning_rate": 0.00044860824335054384, |
|
"loss": 2.484037971496582, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 13.584293193717278, |
|
"learning_rate": 0.00044826196136659916, |
|
"loss": 2.4801618576049806, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 13.605235602094242, |
|
"learning_rate": 0.00044791648003483884, |
|
"loss": 2.497146415710449, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 13.626178010471204, |
|
"learning_rate": 0.0004475717962746455, |
|
"loss": 2.510635757446289, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 13.647120418848168, |
|
"learning_rate": 0.0004472279070219706, |
|
"loss": 2.5058326721191406, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 13.66806282722513, |
|
"learning_rate": 0.00044688480922922, |
|
"loss": 2.490641975402832, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 13.689005235602094, |
|
"learning_rate": 0.00044654249986514057, |
|
"loss": 2.4954011917114256, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 13.709947643979058, |
|
"learning_rate": 0.0004462009759147076, |
|
"loss": 2.496523857116699, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 13.73089005235602, |
|
"learning_rate": 0.0004458602343790135, |
|
"loss": 2.4896028518676756, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 13.751832460732984, |
|
"learning_rate": 0.00044552027227515704, |
|
"loss": 2.490574836730957, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 13.772774869109949, |
|
"learning_rate": 0.00044518108663613355, |
|
"loss": 2.4956533432006838, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 13.79371727748691, |
|
"learning_rate": 0.00044484267451072644, |
|
"loss": 2.5057823181152346, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 13.814659685863875, |
|
"learning_rate": 0.0004445050329633992, |
|
"loss": 2.493949127197266, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 13.835602094240837, |
|
"learning_rate": 0.0004441681590741884, |
|
"loss": 2.514782524108887, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 13.856544502617801, |
|
"learning_rate": 0.0004438320499385977, |
|
"loss": 2.463920783996582, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 13.877486910994765, |
|
"learning_rate": 0.00044349670266749286, |
|
"loss": 2.494730567932129, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 13.898429319371727, |
|
"learning_rate": 0.0004431621143869969, |
|
"loss": 2.4927881240844725, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 13.919371727748691, |
|
"learning_rate": 0.00044282828223838727, |
|
"loss": 2.5001829147338865, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 13.940314136125654, |
|
"learning_rate": 0.0004424952033779929, |
|
"loss": 2.485161018371582, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 13.961256544502618, |
|
"learning_rate": 0.00044216287497709253, |
|
"loss": 2.4990121841430666, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 13.982198952879582, |
|
"learning_rate": 0.0004418312942218139, |
|
"loss": 2.478795051574707, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 13.998952879581152, |
|
"eval_loss": 2.551051616668701, |
|
"eval_runtime": 73.8418, |
|
"eval_samples_per_second": 148.371, |
|
"step": 6678 |
|
}, |
|
{ |
|
"epoch": 14.004188481675392, |
|
"learning_rate": 0.0004415004583130336, |
|
"loss": 2.631510925292969, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 14.025130890052356, |
|
"learning_rate": 0.0004411703644662778, |
|
"loss": 2.46261043548584, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 14.04607329842932, |
|
"learning_rate": 0.00044084100991162385, |
|
"loss": 2.488113212585449, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 14.067015706806282, |
|
"learning_rate": 0.00044051239189360286, |
|
"loss": 2.4580398559570313, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 14.087958115183246, |
|
"learning_rate": 0.00044018450767110235, |
|
"loss": 2.482432174682617, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 14.108900523560209, |
|
"learning_rate": 0.0004398573545172709, |
|
"loss": 2.4827293395996093, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 14.129842931937173, |
|
"learning_rate": 0.0004395309297194223, |
|
"loss": 2.472520637512207, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 14.150785340314137, |
|
"learning_rate": 0.0004392052305789416, |
|
"loss": 2.47951602935791, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 14.171727748691099, |
|
"learning_rate": 0.0004388802544111908, |
|
"loss": 2.4616981506347657, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 14.192670157068063, |
|
"learning_rate": 0.0004385559985454165, |
|
"loss": 2.4829242706298826, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 14.213612565445025, |
|
"learning_rate": 0.0004382324603246575, |
|
"loss": 2.478873634338379, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 14.23455497382199, |
|
"learning_rate": 0.0004379096371056532, |
|
"loss": 2.4993722915649412, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 14.255497382198953, |
|
"learning_rate": 0.000437587526258753, |
|
"loss": 2.474994659423828, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 14.276439790575916, |
|
"learning_rate": 0.0004372661251678265, |
|
"loss": 2.495197296142578, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 14.29738219895288, |
|
"learning_rate": 0.00043694543123017407, |
|
"loss": 2.476504325866699, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 14.318324607329842, |
|
"learning_rate": 0.0004366254418564382, |
|
"loss": 2.5161060333251952, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 14.339267015706806, |
|
"learning_rate": 0.0004363061544705161, |
|
"loss": 2.4868789672851563, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 14.36020942408377, |
|
"learning_rate": 0.0004359875665094723, |
|
"loss": 2.4974212646484375, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 14.381151832460732, |
|
"learning_rate": 0.00043566967542345227, |
|
"loss": 2.4662216186523436, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 14.402094240837696, |
|
"learning_rate": 0.00043535247867559673, |
|
"loss": 2.469373321533203, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 14.42303664921466, |
|
"learning_rate": 0.00043503597374195665, |
|
"loss": 2.483184242248535, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 14.443979057591623, |
|
"learning_rate": 0.0004347201581114088, |
|
"loss": 2.4689809799194338, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 14.464921465968587, |
|
"learning_rate": 0.0004344050292855724, |
|
"loss": 2.439427375793457, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 14.485863874345549, |
|
"learning_rate": 0.00043409058477872554, |
|
"loss": 2.47011775970459, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 14.506806282722513, |
|
"learning_rate": 0.00043377682211772343, |
|
"loss": 2.4866916656494142, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 14.527748691099477, |
|
"learning_rate": 0.0004334637388419161, |
|
"loss": 2.4834897994995115, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 14.54869109947644, |
|
"learning_rate": 0.0004331513325030681, |
|
"loss": 2.5011289596557615, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 14.569633507853403, |
|
"learning_rate": 0.0004328396006652773, |
|
"loss": 2.478676986694336, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 14.590575916230367, |
|
"learning_rate": 0.00043252854090489564, |
|
"loss": 2.4692920684814452, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 14.61151832460733, |
|
"learning_rate": 0.00043221815081044985, |
|
"loss": 2.492611122131348, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 14.632460732984294, |
|
"learning_rate": 0.00043190842798256285, |
|
"loss": 2.4726083755493162, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 14.653403141361256, |
|
"learning_rate": 0.00043159937003387584, |
|
"loss": 2.491672706604004, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 14.67434554973822, |
|
"learning_rate": 0.00043129097458897135, |
|
"loss": 2.474324417114258, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 14.695287958115184, |
|
"learning_rate": 0.000430983239284296, |
|
"loss": 2.4726449966430666, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 14.716230366492146, |
|
"learning_rate": 0.0004306761617680849, |
|
"loss": 2.456452178955078, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 14.73717277486911, |
|
"learning_rate": 0.00043036973970028583, |
|
"loss": 2.4777704238891602, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 14.758115183246073, |
|
"learning_rate": 0.00043006397075248464, |
|
"loss": 2.492514801025391, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 14.779057591623037, |
|
"learning_rate": 0.00042975885260783056, |
|
"loss": 2.464923095703125, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 0.00042945438296096303, |
|
"loss": 2.4520200729370116, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 14.820942408376963, |
|
"learning_rate": 0.0004291505595179379, |
|
"loss": 2.461465072631836, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 14.841884816753927, |
|
"learning_rate": 0.0004288473799961553, |
|
"loss": 2.474461555480957, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 14.86282722513089, |
|
"learning_rate": 0.0004285448421242875, |
|
"loss": 2.474432945251465, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 14.883769633507853, |
|
"learning_rate": 0.00042824294364220724, |
|
"loss": 2.506844329833984, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 14.904712041884817, |
|
"learning_rate": 0.0004279416823009172, |
|
"loss": 2.466670036315918, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 14.92565445026178, |
|
"learning_rate": 0.0004276410558624791, |
|
"loss": 2.4866743087768555, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 14.946596858638744, |
|
"learning_rate": 0.0004273410620999446, |
|
"loss": 2.4524404525756838, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 14.967539267015706, |
|
"learning_rate": 0.0004270416987972853, |
|
"loss": 2.4684980392456053, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 14.98848167539267, |
|
"learning_rate": 0.00042674296374932424, |
|
"loss": 2.469831848144531, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 14.998952879581152, |
|
"eval_loss": 2.5442276000976562, |
|
"eval_runtime": 73.9909, |
|
"eval_samples_per_second": 148.072, |
|
"step": 7155 |
|
}, |
|
{ |
|
"epoch": 15.010471204188482, |
|
"learning_rate": 0.0004264448547616681, |
|
"loss": 2.5812490463256834, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 15.031413612565444, |
|
"learning_rate": 0.00042614736965063864, |
|
"loss": 2.4873594284057616, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 15.052356020942408, |
|
"learning_rate": 0.0004258505062432064, |
|
"loss": 2.4659671783447266, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 15.073298429319372, |
|
"learning_rate": 0.0004255542623769234, |
|
"loss": 2.4558393478393556, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 15.094240837696335, |
|
"learning_rate": 0.00042525863589985727, |
|
"loss": 2.4745227813720705, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 15.115183246073299, |
|
"learning_rate": 0.00042496362467052564, |
|
"loss": 2.4763622283935547, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 15.136125654450261, |
|
"learning_rate": 0.00042466922655783073, |
|
"loss": 2.4713407516479493, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 15.157068062827225, |
|
"learning_rate": 0.00042437543944099504, |
|
"loss": 2.4634868621826174, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 15.178010471204189, |
|
"learning_rate": 0.00042408226120949674, |
|
"loss": 2.476248931884766, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 15.198952879581151, |
|
"learning_rate": 0.00042378968976300647, |
|
"loss": 2.4730270385742186, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 15.219895287958115, |
|
"learning_rate": 0.00042349772301132377, |
|
"loss": 2.476571273803711, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 15.24083769633508, |
|
"learning_rate": 0.0004232063588743146, |
|
"loss": 2.4510690689086916, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 15.261780104712042, |
|
"learning_rate": 0.00042291559528184904, |
|
"loss": 2.465399742126465, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 15.282722513089006, |
|
"learning_rate": 0.0004226254301737393, |
|
"loss": 2.4773502349853516, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 15.303664921465968, |
|
"learning_rate": 0.0004223358614996787, |
|
"loss": 2.43621711730957, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 15.324607329842932, |
|
"learning_rate": 0.00042204688721918075, |
|
"loss": 2.456114959716797, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 15.345549738219896, |
|
"learning_rate": 0.0004217585053015187, |
|
"loss": 2.468073844909668, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 15.366492146596858, |
|
"learning_rate": 0.0004214707137256656, |
|
"loss": 2.471833419799805, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 15.387434554973822, |
|
"learning_rate": 0.0004211835104802349, |
|
"loss": 2.499461364746094, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 15.408376963350785, |
|
"learning_rate": 0.00042089689356342115, |
|
"loss": 2.4492721557617188, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 15.429319371727749, |
|
"learning_rate": 0.0004206108609829418, |
|
"loss": 2.4671262741088866, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 15.450261780104713, |
|
"learning_rate": 0.00042032541075597875, |
|
"loss": 2.465005111694336, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 15.471204188481675, |
|
"learning_rate": 0.0004200405409091207, |
|
"loss": 2.4648488998413085, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 15.492146596858639, |
|
"learning_rate": 0.00041975624947830593, |
|
"loss": 2.463612174987793, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 15.513089005235603, |
|
"learning_rate": 0.00041947253450876515, |
|
"loss": 2.4697538375854493, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 15.534031413612565, |
|
"learning_rate": 0.00041918939405496546, |
|
"loss": 2.45694637298584, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 15.55497382198953, |
|
"learning_rate": 0.00041890682618055396, |
|
"loss": 2.4443153381347655, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 15.575916230366492, |
|
"learning_rate": 0.0004186248289583023, |
|
"loss": 2.445983123779297, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 15.596858638743456, |
|
"learning_rate": 0.00041834340047005144, |
|
"loss": 2.489885711669922, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 15.61780104712042, |
|
"learning_rate": 0.0004180625388066569, |
|
"loss": 2.4711660385131835, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 15.638743455497382, |
|
"learning_rate": 0.00041778224206793433, |
|
"loss": 2.4884315490722657, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 15.659685863874346, |
|
"learning_rate": 0.00041750250836260536, |
|
"loss": 2.477284240722656, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 15.680628272251308, |
|
"learning_rate": 0.0004172233358082443, |
|
"loss": 2.475067901611328, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 15.701570680628272, |
|
"learning_rate": 0.00041694472253122467, |
|
"loss": 2.482602119445801, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 15.722513089005236, |
|
"learning_rate": 0.00041666666666666664, |
|
"loss": 2.442608642578125, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 15.743455497382199, |
|
"learning_rate": 0.0004163891663583843, |
|
"loss": 2.468288040161133, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 15.764397905759163, |
|
"learning_rate": 0.00041611221975883396, |
|
"loss": 2.4465059280395507, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 15.785340314136125, |
|
"learning_rate": 0.00041583582502906203, |
|
"loss": 2.4614633560180663, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 15.806282722513089, |
|
"learning_rate": 0.0004155599803386543, |
|
"loss": 2.4629968643188476, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 15.827225130890053, |
|
"learning_rate": 0.0004152846838656846, |
|
"loss": 2.454400062561035, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 15.848167539267015, |
|
"learning_rate": 0.00041500993379666443, |
|
"loss": 2.467230224609375, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 15.86910994764398, |
|
"learning_rate": 0.0004147357283264927, |
|
"loss": 2.442008209228516, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 15.890052356020943, |
|
"learning_rate": 0.000414462065658406, |
|
"loss": 2.4506603240966798, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 15.910994764397905, |
|
"learning_rate": 0.0004141889440039292, |
|
"loss": 2.4443122863769533, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 15.93193717277487, |
|
"learning_rate": 0.00041391636158282614, |
|
"loss": 2.4457521438598633, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 15.952879581151832, |
|
"learning_rate": 0.00041364431662305114, |
|
"loss": 2.457781982421875, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 15.973821989528796, |
|
"learning_rate": 0.0004133728073607005, |
|
"loss": 2.440464210510254, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 15.99476439790576, |
|
"learning_rate": 0.00041310183203996446, |
|
"loss": 2.4534429550170898, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 15.998952879581152, |
|
"eval_loss": 2.5449907779693604, |
|
"eval_runtime": 73.6337, |
|
"eval_samples_per_second": 148.791, |
|
"step": 7632 |
|
}, |
|
{ |
|
"epoch": 16.016753926701572, |
|
"learning_rate": 0.0004128313889130795, |
|
"loss": 2.5835424423217774, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 16.037696335078532, |
|
"learning_rate": 0.0004125614762402809, |
|
"loss": 2.458993148803711, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 16.058638743455496, |
|
"learning_rate": 0.00041229209228975627, |
|
"loss": 2.471218299865723, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 16.07958115183246, |
|
"learning_rate": 0.000412023235337598, |
|
"loss": 2.464751052856445, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 16.100523560209425, |
|
"learning_rate": 0.00041175490366775766, |
|
"loss": 2.4599708557128905, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 16.12146596858639, |
|
"learning_rate": 0.0004114870955719997, |
|
"loss": 2.4355844497680663, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 16.14240837696335, |
|
"learning_rate": 0.00041121980934985563, |
|
"loss": 2.4272241592407227, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 16.163350785340313, |
|
"learning_rate": 0.000410953043308579, |
|
"loss": 2.4612340927124023, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 16.184293193717277, |
|
"learning_rate": 0.0004106867957631001, |
|
"loss": 2.465089797973633, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 16.20523560209424, |
|
"learning_rate": 0.00041042106503598165, |
|
"loss": 2.451694297790527, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 16.226178010471205, |
|
"learning_rate": 0.0004101558494573738, |
|
"loss": 2.464099884033203, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 16.24712041884817, |
|
"learning_rate": 0.0004098911473649706, |
|
"loss": 2.448426055908203, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 16.26806282722513, |
|
"learning_rate": 0.0004096269571039658, |
|
"loss": 2.455006217956543, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 16.289005235602094, |
|
"learning_rate": 0.00040936327702701005, |
|
"loss": 2.453194808959961, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 16.309947643979058, |
|
"learning_rate": 0.00040910010549416687, |
|
"loss": 2.4759195327758787, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 16.330890052356022, |
|
"learning_rate": 0.0004088374408728706, |
|
"loss": 2.4628747940063476, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 16.351832460732986, |
|
"learning_rate": 0.0004085752815378834, |
|
"loss": 2.446619415283203, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 16.372774869109946, |
|
"learning_rate": 0.0004083136258712532, |
|
"loss": 2.4754364013671877, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 16.39371727748691, |
|
"learning_rate": 0.0004080524722622717, |
|
"loss": 2.4566783905029297, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 16.414659685863874, |
|
"learning_rate": 0.00040779181910743294, |
|
"loss": 2.426336479187012, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 16.43560209424084, |
|
"learning_rate": 0.0004075316648103914, |
|
"loss": 2.460182762145996, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 16.456544502617803, |
|
"learning_rate": 0.0004072720077819216, |
|
"loss": 2.454692268371582, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 16.477486910994763, |
|
"learning_rate": 0.0004070128464398768, |
|
"loss": 2.4589263916015627, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 16.498429319371727, |
|
"learning_rate": 0.0004067541792091489, |
|
"loss": 2.472345161437988, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 16.51937172774869, |
|
"learning_rate": 0.0004064960045216279, |
|
"loss": 2.427416229248047, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 16.540314136125655, |
|
"learning_rate": 0.0004062383208161624, |
|
"loss": 2.454151725769043, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 16.56125654450262, |
|
"learning_rate": 0.0004059811265385193, |
|
"loss": 2.4490371704101563, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 16.58219895287958, |
|
"learning_rate": 0.00040572442014134516, |
|
"loss": 2.479467010498047, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 16.603141361256544, |
|
"learning_rate": 0.00040546820008412654, |
|
"loss": 2.4391218185424806, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 16.624083769633508, |
|
"learning_rate": 0.0004052124648331515, |
|
"loss": 2.455718421936035, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 16.645026178010472, |
|
"learning_rate": 0.00040495721286147086, |
|
"loss": 2.4620994567871093, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 16.665968586387436, |
|
"learning_rate": 0.00040470244264886006, |
|
"loss": 2.448670196533203, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 16.686910994764396, |
|
"learning_rate": 0.00040444815268178097, |
|
"loss": 2.426989936828613, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 16.70785340314136, |
|
"learning_rate": 0.00040419434145334414, |
|
"loss": 2.447972869873047, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 16.728795811518324, |
|
"learning_rate": 0.00040394100746327154, |
|
"loss": 2.457029342651367, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 16.74973821989529, |
|
"learning_rate": 0.0004036881492178589, |
|
"loss": 2.4539730072021486, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 16.770680628272252, |
|
"learning_rate": 0.00040343576522993926, |
|
"loss": 2.4607629776000977, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 16.791623036649213, |
|
"learning_rate": 0.00040318385401884554, |
|
"loss": 2.43496036529541, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 16.812565445026177, |
|
"learning_rate": 0.00040293241411037484, |
|
"loss": 2.423869323730469, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 16.83350785340314, |
|
"learning_rate": 0.00040268144403675154, |
|
"loss": 2.4423187255859373, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 16.854450261780105, |
|
"learning_rate": 0.0004024309423365915, |
|
"loss": 2.4698711395263673, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 16.87539267015707, |
|
"learning_rate": 0.0004021809075548668, |
|
"loss": 2.423082160949707, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 16.89633507853403, |
|
"learning_rate": 0.0004019313382428694, |
|
"loss": 2.443895149230957, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 16.917277486910994, |
|
"learning_rate": 0.00040168223295817656, |
|
"loss": 2.455313301086426, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 16.938219895287958, |
|
"learning_rate": 0.00040143359026461554, |
|
"loss": 2.415020751953125, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 16.95916230366492, |
|
"learning_rate": 0.000401185408732229, |
|
"loss": 2.462967109680176, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 16.980104712041886, |
|
"learning_rate": 0.0004009376869372401, |
|
"loss": 2.416962242126465, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 16.99895287958115, |
|
"eval_loss": 2.5404856204986572, |
|
"eval_runtime": 73.757, |
|
"eval_samples_per_second": 148.542, |
|
"step": 8109 |
|
}, |
|
{ |
|
"epoch": 17.002094240837696, |
|
"learning_rate": 0.00040069042346201864, |
|
"loss": 2.549093818664551, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 17.02303664921466, |
|
"learning_rate": 0.00040044361689504655, |
|
"loss": 2.452895736694336, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 17.043979057591624, |
|
"learning_rate": 0.0004001972658308847, |
|
"loss": 2.460617446899414, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 17.064921465968588, |
|
"learning_rate": 0.0003999513688701383, |
|
"loss": 2.4506912231445312, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 17.08586387434555, |
|
"learning_rate": 0.00039970592461942457, |
|
"loss": 2.440316581726074, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 17.106806282722513, |
|
"learning_rate": 0.00039946093169133874, |
|
"loss": 2.4249364852905275, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 17.127748691099477, |
|
"learning_rate": 0.0003992163887044217, |
|
"loss": 2.4489822387695312, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 17.14869109947644, |
|
"learning_rate": 0.0003989722942831268, |
|
"loss": 2.455015754699707, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 17.169633507853405, |
|
"learning_rate": 0.0003987286470577879, |
|
"loss": 2.4564001083374025, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 17.190575916230365, |
|
"learning_rate": 0.0003984854456645864, |
|
"loss": 2.4481569290161134, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 17.21151832460733, |
|
"learning_rate": 0.0003982426887455199, |
|
"loss": 2.4090858459472657, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 17.232460732984293, |
|
"learning_rate": 0.00039800037494836985, |
|
"loss": 2.4279315948486326, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 17.253403141361257, |
|
"learning_rate": 0.00039775850292667005, |
|
"loss": 2.4328563690185545, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 17.27434554973822, |
|
"learning_rate": 0.0003975170713396753, |
|
"loss": 2.426299476623535, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 17.295287958115182, |
|
"learning_rate": 0.0003972760788523301, |
|
"loss": 2.424925994873047, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 17.316230366492146, |
|
"learning_rate": 0.0003970355241352378, |
|
"loss": 2.4173357009887697, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 17.33717277486911, |
|
"learning_rate": 0.00039679540586462953, |
|
"loss": 2.4525693893432616, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 17.358115183246074, |
|
"learning_rate": 0.00039655572272233384, |
|
"loss": 2.473075103759766, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 17.379057591623038, |
|
"learning_rate": 0.0003963164733957462, |
|
"loss": 2.422397232055664, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 17.4, |
|
"learning_rate": 0.00039607765657779864, |
|
"loss": 2.441000556945801, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 17.420942408376963, |
|
"learning_rate": 0.0003958392709669304, |
|
"loss": 2.4337257385253905, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 17.441884816753927, |
|
"learning_rate": 0.00039560131526705723, |
|
"loss": 2.4255434036254884, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 17.46282722513089, |
|
"learning_rate": 0.0003953637881875425, |
|
"loss": 2.4355316162109375, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 17.483769633507855, |
|
"learning_rate": 0.0003951266884431675, |
|
"loss": 2.428698921203613, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 17.504712041884815, |
|
"learning_rate": 0.00039489001475410214, |
|
"loss": 2.4530813217163088, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 17.52565445026178, |
|
"learning_rate": 0.00039465376584587626, |
|
"loss": 2.4496335983276367, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 17.546596858638743, |
|
"learning_rate": 0.00039441794044935054, |
|
"loss": 2.425421142578125, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 17.567539267015707, |
|
"learning_rate": 0.00039418253730068797, |
|
"loss": 2.4285154342651367, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 17.58848167539267, |
|
"learning_rate": 0.0003939475551413253, |
|
"loss": 2.4037647247314453, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 17.609424083769632, |
|
"learning_rate": 0.000393712992717945, |
|
"loss": 2.423297119140625, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 17.630366492146596, |
|
"learning_rate": 0.0003934788487824469, |
|
"loss": 2.47174186706543, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 17.65130890052356, |
|
"learning_rate": 0.0003932451220919205, |
|
"loss": 2.441014289855957, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 17.672251308900524, |
|
"learning_rate": 0.0003930118114086172, |
|
"loss": 2.4317821502685546, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 17.693193717277488, |
|
"learning_rate": 0.00039277891549992266, |
|
"loss": 2.4377744674682615, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 17.71413612565445, |
|
"learning_rate": 0.0003925464331383298, |
|
"loss": 2.4461442947387697, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 17.735078534031413, |
|
"learning_rate": 0.00039231436310141113, |
|
"loss": 2.466485595703125, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 17.756020942408377, |
|
"learning_rate": 0.00039208270417179214, |
|
"loss": 2.4318614959716798, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 17.77696335078534, |
|
"learning_rate": 0.0003918514551371243, |
|
"loss": 2.431291389465332, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 17.797905759162305, |
|
"learning_rate": 0.0003916206147900585, |
|
"loss": 2.43109130859375, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 17.81884816753927, |
|
"learning_rate": 0.00039139018192821845, |
|
"loss": 2.4323259353637696, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 17.83979057591623, |
|
"learning_rate": 0.00039116015535417445, |
|
"loss": 2.439468193054199, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 17.860732984293193, |
|
"learning_rate": 0.00039093053387541745, |
|
"loss": 2.4567943572998048, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 17.881675392670157, |
|
"learning_rate": 0.00039070131630433274, |
|
"loss": 2.4346736907958983, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 17.90261780104712, |
|
"learning_rate": 0.00039047250145817424, |
|
"loss": 2.441089630126953, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 17.923560209424085, |
|
"learning_rate": 0.00039024408815903914, |
|
"loss": 2.4277088165283205, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 17.944502617801046, |
|
"learning_rate": 0.0003900160752338421, |
|
"loss": 2.4228445053100587, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 17.96544502617801, |
|
"learning_rate": 0.00038978846151429, |
|
"loss": 2.399433708190918, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 17.986387434554974, |
|
"learning_rate": 0.0003895612458368572, |
|
"loss": 2.4009246826171875, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 17.99895287958115, |
|
"eval_loss": 2.5318360328674316, |
|
"eval_runtime": 74.0596, |
|
"eval_samples_per_second": 147.935, |
|
"step": 8586 |
|
}, |
|
{ |
|
"epoch": 18.008376963350784, |
|
"learning_rate": 0.00038933442704275974, |
|
"loss": 2.537807655334473, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 18.02931937172775, |
|
"learning_rate": 0.0003891080039779314, |
|
"loss": 2.446313667297363, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 18.050261780104712, |
|
"learning_rate": 0.0003888819754929986, |
|
"loss": 2.4345209121704103, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 18.071204188481676, |
|
"learning_rate": 0.0003886563404432558, |
|
"loss": 2.437006187438965, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 18.09214659685864, |
|
"learning_rate": 0.0003884310976886414, |
|
"loss": 2.420798110961914, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 18.1130890052356, |
|
"learning_rate": 0.0003882062460937135, |
|
"loss": 2.4323537826538084, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 18.134031413612565, |
|
"learning_rate": 0.0003879817845276255, |
|
"loss": 2.442038345336914, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 18.15497382198953, |
|
"learning_rate": 0.0003877577118641029, |
|
"loss": 2.433667755126953, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 18.175916230366493, |
|
"learning_rate": 0.00038753402698141903, |
|
"loss": 2.424707221984863, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 18.196858638743457, |
|
"learning_rate": 0.0003873107287623715, |
|
"loss": 2.4348966598510744, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 18.217801047120417, |
|
"learning_rate": 0.00038708781609425905, |
|
"loss": 2.404917907714844, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 18.23874345549738, |
|
"learning_rate": 0.000386865287868858, |
|
"loss": 2.4346105575561525, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 18.259685863874346, |
|
"learning_rate": 0.0003866431429823993, |
|
"loss": 2.442304992675781, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 18.28062827225131, |
|
"learning_rate": 0.00038642138033554525, |
|
"loss": 2.42406005859375, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 18.301570680628274, |
|
"learning_rate": 0.00038619999883336703, |
|
"loss": 2.440979766845703, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 18.322513089005234, |
|
"learning_rate": 0.0003859789973853217, |
|
"loss": 2.440751075744629, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 18.343455497382198, |
|
"learning_rate": 0.0003857583749052298, |
|
"loss": 2.4363412857055664, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 18.364397905759162, |
|
"learning_rate": 0.0003855381303112527, |
|
"loss": 2.4235382080078125, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 18.385340314136126, |
|
"learning_rate": 0.0003853182625258708, |
|
"loss": 2.4238630294799806, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 18.40628272251309, |
|
"learning_rate": 0.0003850987704758608, |
|
"loss": 2.426643943786621, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 18.42722513089005, |
|
"learning_rate": 0.00038487965309227413, |
|
"loss": 2.438970947265625, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 18.448167539267015, |
|
"learning_rate": 0.0003846609093104148, |
|
"loss": 2.423859786987305, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 18.46910994764398, |
|
"learning_rate": 0.00038444253806981784, |
|
"loss": 2.4040243148803713, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 18.490052356020943, |
|
"learning_rate": 0.00038422453831422784, |
|
"loss": 2.420393371582031, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 18.510994764397907, |
|
"learning_rate": 0.0003840069089915771, |
|
"loss": 2.427932929992676, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 18.531937172774867, |
|
"learning_rate": 0.00038378964905396454, |
|
"loss": 2.419098663330078, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 18.55287958115183, |
|
"learning_rate": 0.00038357275745763475, |
|
"loss": 2.4184850692749023, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 18.573821989528795, |
|
"learning_rate": 0.0003833562331629563, |
|
"loss": 2.402060127258301, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 18.59476439790576, |
|
"learning_rate": 0.0003831400751344014, |
|
"loss": 2.403904914855957, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 18.615706806282724, |
|
"learning_rate": 0.00038292428234052486, |
|
"loss": 2.4094032287597655, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 18.636649214659684, |
|
"learning_rate": 0.0003827088537539434, |
|
"loss": 2.3887189865112304, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 18.657591623036648, |
|
"learning_rate": 0.00038249378835131535, |
|
"loss": 2.4003849029541016, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 18.678534031413612, |
|
"learning_rate": 0.0003822790851133196, |
|
"loss": 2.4100620269775392, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 18.699476439790576, |
|
"learning_rate": 0.00038206474302463617, |
|
"loss": 2.4087665557861326, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 18.72041884816754, |
|
"learning_rate": 0.00038185076107392544, |
|
"loss": 2.4067865371704102, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 18.741361256544504, |
|
"learning_rate": 0.0003816371382538082, |
|
"loss": 2.3902347564697264, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 18.762303664921465, |
|
"learning_rate": 0.0003814238735608459, |
|
"loss": 2.4000757217407225, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 18.78324607329843, |
|
"learning_rate": 0.0003812109659955207, |
|
"loss": 2.4148767471313475, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 18.804188481675393, |
|
"learning_rate": 0.00038099841456221617, |
|
"loss": 2.414336395263672, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 18.825130890052357, |
|
"learning_rate": 0.0003807862182691969, |
|
"loss": 2.440867042541504, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 18.84607329842932, |
|
"learning_rate": 0.00038057437612859003, |
|
"loss": 2.4532596588134767, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 18.86701570680628, |
|
"learning_rate": 0.0003803628871563653, |
|
"loss": 2.4020782470703126, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 18.887958115183245, |
|
"learning_rate": 0.0003801517503723161, |
|
"loss": 2.430096435546875, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 18.90890052356021, |
|
"learning_rate": 0.00037994096480004037, |
|
"loss": 2.419812774658203, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 18.929842931937173, |
|
"learning_rate": 0.0003797305294669214, |
|
"loss": 2.4075344085693358, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 18.950785340314138, |
|
"learning_rate": 0.00037952044340410954, |
|
"loss": 2.4337480545043944, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 18.971727748691098, |
|
"learning_rate": 0.00037931070564650276, |
|
"loss": 2.4088159561157227, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 18.992670157068062, |
|
"learning_rate": 0.0003791013152327286, |
|
"loss": 2.4013919830322266, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 18.99895287958115, |
|
"eval_loss": 2.529340982437134, |
|
"eval_runtime": 73.527, |
|
"eval_samples_per_second": 149.007, |
|
"step": 9063 |
|
}, |
|
{ |
|
"epoch": 19.014659685863876, |
|
"learning_rate": 0.00037889227120512545, |
|
"loss": 2.5226316452026367, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 19.035602094240836, |
|
"learning_rate": 0.0003786835726097239, |
|
"loss": 2.4024560928344725, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 19.0565445026178, |
|
"learning_rate": 0.00037847521849622895, |
|
"loss": 2.4342859268188475, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 19.077486910994764, |
|
"learning_rate": 0.0003782672079180015, |
|
"loss": 2.4292444229125976, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 19.09842931937173, |
|
"learning_rate": 0.0003780595399320404, |
|
"loss": 2.4289926528930663, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 19.119371727748693, |
|
"learning_rate": 0.00037785221359896444, |
|
"loss": 2.440321159362793, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 19.140314136125653, |
|
"learning_rate": 0.00037764522798299443, |
|
"loss": 2.4353168487548826, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 19.161256544502617, |
|
"learning_rate": 0.0003774385821519358, |
|
"loss": 2.389999008178711, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 19.18219895287958, |
|
"learning_rate": 0.0003772322751771605, |
|
"loss": 2.4299448013305662, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 19.203141361256545, |
|
"learning_rate": 0.00037702630613358986, |
|
"loss": 2.4094564437866213, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 19.22408376963351, |
|
"learning_rate": 0.0003768206740996769, |
|
"loss": 2.416705322265625, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 19.24502617801047, |
|
"learning_rate": 0.00037661537815738915, |
|
"loss": 2.4174514770507813, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 19.265968586387434, |
|
"learning_rate": 0.00037641041739219143, |
|
"loss": 2.4112581253051757, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 19.286910994764398, |
|
"learning_rate": 0.00037620579089302876, |
|
"loss": 2.4125255584716796, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 19.307853403141362, |
|
"learning_rate": 0.0003760014977523091, |
|
"loss": 2.41434268951416, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 19.328795811518326, |
|
"learning_rate": 0.00037579753706588697, |
|
"loss": 2.419674301147461, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 19.349738219895286, |
|
"learning_rate": 0.00037559390793304604, |
|
"loss": 2.3896152496337892, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 19.37068062827225, |
|
"learning_rate": 0.00037539060945648286, |
|
"loss": 2.4006847381591796, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 19.391623036649214, |
|
"learning_rate": 0.00037518764074229014, |
|
"loss": 2.4157575607299804, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 19.41256544502618, |
|
"learning_rate": 0.00037498500089994, |
|
"loss": 2.398466873168945, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 19.433507853403142, |
|
"learning_rate": 0.00037478268904226795, |
|
"loss": 2.4136272430419923, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 19.454450261780103, |
|
"learning_rate": 0.00037458070428545635, |
|
"loss": 2.412180709838867, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 19.475392670157067, |
|
"learning_rate": 0.00037437904574901817, |
|
"loss": 2.417103385925293, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 19.49633507853403, |
|
"learning_rate": 0.00037417771255578104, |
|
"loss": 2.3830541610717773, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 19.517277486910995, |
|
"learning_rate": 0.00037397670383187097, |
|
"loss": 2.4057411193847655, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 19.53821989528796, |
|
"learning_rate": 0.0003737760187066967, |
|
"loss": 2.429146957397461, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 19.559162303664923, |
|
"learning_rate": 0.00037357565631293365, |
|
"loss": 2.4229619979858397, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 19.580104712041884, |
|
"learning_rate": 0.00037337561578650833, |
|
"loss": 2.4030439376831056, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 19.601047120418848, |
|
"learning_rate": 0.00037317589626658255, |
|
"loss": 2.4122753143310547, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 19.62198952879581, |
|
"learning_rate": 0.0003729764968955379, |
|
"loss": 2.420066070556641, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 19.642931937172776, |
|
"learning_rate": 0.00037277741681896045, |
|
"loss": 2.4116867065429686, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 19.66387434554974, |
|
"learning_rate": 0.0003725786551856251, |
|
"loss": 2.4222272872924804, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 19.6848167539267, |
|
"learning_rate": 0.0003723802111474804, |
|
"loss": 2.398889350891113, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 19.705759162303664, |
|
"learning_rate": 0.0003721820838596335, |
|
"loss": 2.3947797775268556, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 19.72670157068063, |
|
"learning_rate": 0.00037198427248033485, |
|
"loss": 2.39971981048584, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 19.747643979057592, |
|
"learning_rate": 0.00037178677617096337, |
|
"loss": 2.3918169021606444, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 19.768586387434556, |
|
"learning_rate": 0.0003715895940960111, |
|
"loss": 2.393696975708008, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 19.789528795811517, |
|
"learning_rate": 0.000371392725423069, |
|
"loss": 2.403204345703125, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 19.81047120418848, |
|
"learning_rate": 0.00037119616932281165, |
|
"loss": 2.3984851837158203, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 19.831413612565445, |
|
"learning_rate": 0.00037099992496898276, |
|
"loss": 2.442034149169922, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 19.85235602094241, |
|
"learning_rate": 0.00037080399153838065, |
|
"loss": 2.3905046463012694, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 19.873298429319373, |
|
"learning_rate": 0.00037060836821084373, |
|
"loss": 2.4114078521728515, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 19.894240837696334, |
|
"learning_rate": 0.00037041305416923604, |
|
"loss": 2.393054962158203, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 19.915183246073298, |
|
"learning_rate": 0.0003702180485994327, |
|
"loss": 2.388008689880371, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 19.93612565445026, |
|
"learning_rate": 0.00037002335069030614, |
|
"loss": 2.4009252548217774, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 19.957068062827226, |
|
"learning_rate": 0.0003698289596337116, |
|
"loss": 2.3996566772460937, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 19.97801047120419, |
|
"learning_rate": 0.00036963487462447303, |
|
"loss": 2.3795480728149414, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 19.99895287958115, |
|
"learning_rate": 0.0003694410948603691, |
|
"loss": 2.4031463623046876, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 19.99895287958115, |
|
"eval_loss": 2.5357587337493896, |
|
"eval_runtime": 73.9758, |
|
"eval_samples_per_second": 148.102, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 20.020942408376964, |
|
"learning_rate": 0.00036924761954211944, |
|
"loss": 2.5228919982910156, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 20.041884816753928, |
|
"learning_rate": 0.0003690544478733707, |
|
"loss": 2.3785959243774415, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 20.06282722513089, |
|
"learning_rate": 0.0003688615790606828, |
|
"loss": 2.4037866592407227, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 20.083769633507853, |
|
"learning_rate": 0.000368669012313515, |
|
"loss": 2.3924365997314454, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 20.104712041884817, |
|
"learning_rate": 0.0003684767468442126, |
|
"loss": 2.4029878616333007, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 20.12565445026178, |
|
"learning_rate": 0.0003682847818679935, |
|
"loss": 2.387605094909668, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 20.146596858638745, |
|
"learning_rate": 0.0003680931166029342, |
|
"loss": 2.417312431335449, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 20.167539267015705, |
|
"learning_rate": 0.000367901750269957, |
|
"loss": 2.381046485900879, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 20.18848167539267, |
|
"learning_rate": 0.00036771068209281657, |
|
"loss": 2.376552963256836, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 20.209424083769633, |
|
"learning_rate": 0.0003675199112980863, |
|
"loss": 2.4089908599853516, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 20.230366492146597, |
|
"learning_rate": 0.0003673294371151458, |
|
"loss": 2.401862907409668, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 20.25130890052356, |
|
"learning_rate": 0.0003671392587761674, |
|
"loss": 2.406145477294922, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 20.272251308900522, |
|
"learning_rate": 0.0003669493755161031, |
|
"loss": 2.414588737487793, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 20.293193717277486, |
|
"learning_rate": 0.00036675978657267204, |
|
"loss": 2.4057403564453126, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 20.31413612565445, |
|
"learning_rate": 0.00036657049118634733, |
|
"loss": 2.404916000366211, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 20.335078534031414, |
|
"learning_rate": 0.0003663814886003432, |
|
"loss": 2.4110477447509764, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 20.356020942408378, |
|
"learning_rate": 0.00036619277806060276, |
|
"loss": 2.402661895751953, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 20.376963350785342, |
|
"learning_rate": 0.0003660043588157846, |
|
"loss": 2.404218864440918, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 20.397905759162303, |
|
"learning_rate": 0.00036581623011725114, |
|
"loss": 2.4069591522216798, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 20.418848167539267, |
|
"learning_rate": 0.0003656283912190554, |
|
"loss": 2.4185781478881836, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 20.43979057591623, |
|
"learning_rate": 0.00036544084137792883, |
|
"loss": 2.3999982833862306, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 20.460732984293195, |
|
"learning_rate": 0.00036525357985326903, |
|
"loss": 2.3702335357666016, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 20.48167539267016, |
|
"learning_rate": 0.0003650666059071275, |
|
"loss": 2.3878076553344725, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 20.50261780104712, |
|
"learning_rate": 0.00036487991880419725, |
|
"loss": 2.3818979263305664, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 20.523560209424083, |
|
"learning_rate": 0.00036469351781180073, |
|
"loss": 2.363344192504883, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 20.544502617801047, |
|
"learning_rate": 0.00036450740219987765, |
|
"loss": 2.3810457229614257, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 20.56544502617801, |
|
"learning_rate": 0.0003643215712409734, |
|
"loss": 2.411943054199219, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 20.586387434554975, |
|
"learning_rate": 0.00036413602421022653, |
|
"loss": 2.4092056274414064, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 20.607329842931936, |
|
"learning_rate": 0.0003639507603853572, |
|
"loss": 2.3944089889526365, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 20.6282722513089, |
|
"learning_rate": 0.00036376577904665525, |
|
"loss": 2.3668121337890624, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 20.649214659685864, |
|
"learning_rate": 0.00036358107947696876, |
|
"loss": 2.391695022583008, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 20.670157068062828, |
|
"learning_rate": 0.0003633966609616919, |
|
"loss": 2.380820083618164, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 20.691099476439792, |
|
"learning_rate": 0.00036321252278875344, |
|
"loss": 2.372467041015625, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 20.712041884816752, |
|
"learning_rate": 0.00036302866424860566, |
|
"loss": 2.3973648071289064, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 20.732984293193716, |
|
"learning_rate": 0.00036284508463421217, |
|
"loss": 2.3995847702026367, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 20.75392670157068, |
|
"learning_rate": 0.0003626617832410371, |
|
"loss": 2.3931917190551757, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 20.774869109947645, |
|
"learning_rate": 0.00036247875936703335, |
|
"loss": 2.4107311248779295, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 20.79581151832461, |
|
"learning_rate": 0.00036229601231263145, |
|
"loss": 2.367414855957031, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 20.81675392670157, |
|
"learning_rate": 0.0003621135413807282, |
|
"loss": 2.405007171630859, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 20.837696335078533, |
|
"learning_rate": 0.0003619313458766758, |
|
"loss": 2.364247512817383, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 20.858638743455497, |
|
"learning_rate": 0.0003617494251082704, |
|
"loss": 2.3823482513427736, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 20.87958115183246, |
|
"learning_rate": 0.0003615677783857413, |
|
"loss": 2.393014144897461, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 20.900523560209425, |
|
"learning_rate": 0.0003613864050217397, |
|
"loss": 2.3839509963989256, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 20.921465968586386, |
|
"learning_rate": 0.0003612053043313283, |
|
"loss": 2.378824806213379, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 20.94240837696335, |
|
"learning_rate": 0.0003610244756319697, |
|
"loss": 2.3893613815307617, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 20.963350785340314, |
|
"learning_rate": 0.00036084391824351607, |
|
"loss": 2.37738151550293, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 20.984293193717278, |
|
"learning_rate": 0.00036066363148819854, |
|
"loss": 2.389986038208008, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 20.99895287958115, |
|
"eval_loss": 2.5336103439331055, |
|
"eval_runtime": 73.4935, |
|
"eval_samples_per_second": 149.074, |
|
"step": 10017 |
|
}, |
|
{ |
|
"epoch": 21.006282722513088, |
|
"learning_rate": 0.000360483614690616, |
|
"loss": 2.528822135925293, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 21.027225130890052, |
|
"learning_rate": 0.00036030386717772494, |
|
"loss": 2.3780399322509767, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 21.048167539267016, |
|
"learning_rate": 0.0003601243882788286, |
|
"loss": 2.3978437423706054, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 21.06910994764398, |
|
"learning_rate": 0.0003599451773255667, |
|
"loss": 2.382208061218262, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 21.09005235602094, |
|
"learning_rate": 0.00035976623365190465, |
|
"loss": 2.375508499145508, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 21.110994764397905, |
|
"learning_rate": 0.0003595875565941235, |
|
"loss": 2.422568511962891, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 21.13193717277487, |
|
"learning_rate": 0.00035940914549080944, |
|
"loss": 2.4112144470214845, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 21.152879581151833, |
|
"learning_rate": 0.0003592309996828435, |
|
"loss": 2.400478172302246, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 21.173821989528797, |
|
"learning_rate": 0.0003590531185133913, |
|
"loss": 2.403495216369629, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 21.194764397905757, |
|
"learning_rate": 0.0003588755013278929, |
|
"loss": 2.375596046447754, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 21.21570680628272, |
|
"learning_rate": 0.00035869814747405306, |
|
"loss": 2.3807771682739256, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 21.236649214659685, |
|
"learning_rate": 0.00035852105630183027, |
|
"loss": 2.40921630859375, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 21.25759162303665, |
|
"learning_rate": 0.0003583442271634278, |
|
"loss": 2.398925018310547, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 21.278534031413614, |
|
"learning_rate": 0.000358167659413283, |
|
"loss": 2.373432731628418, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 21.299476439790578, |
|
"learning_rate": 0.00035799135240805765, |
|
"loss": 2.4216379165649413, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 21.320418848167538, |
|
"learning_rate": 0.0003578153055066282, |
|
"loss": 2.3817609786987304, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 21.341361256544502, |
|
"learning_rate": 0.00035763951807007597, |
|
"loss": 2.4331357955932615, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 21.362303664921466, |
|
"learning_rate": 0.0003574639894616771, |
|
"loss": 2.4034128189086914, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 21.38324607329843, |
|
"learning_rate": 0.0003572887190468934, |
|
"loss": 2.3663650512695313, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 21.404188481675394, |
|
"learning_rate": 0.00035711370619336214, |
|
"loss": 2.3921630859375, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 21.425130890052355, |
|
"learning_rate": 0.00035693895027088694, |
|
"loss": 2.3804367065429686, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 21.44607329842932, |
|
"learning_rate": 0.00035676445065142793, |
|
"loss": 2.4022769927978516, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 21.467015706806283, |
|
"learning_rate": 0.0003565902067090925, |
|
"loss": 2.3611806869506835, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 21.487958115183247, |
|
"learning_rate": 0.0003564162178201257, |
|
"loss": 2.383506202697754, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 21.50890052356021, |
|
"learning_rate": 0.0003562424833629007, |
|
"loss": 2.385580062866211, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 21.52984293193717, |
|
"learning_rate": 0.0003560690027179101, |
|
"loss": 2.3934825897216796, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 21.550785340314135, |
|
"learning_rate": 0.00035589577526775603, |
|
"loss": 2.385503387451172, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 21.5717277486911, |
|
"learning_rate": 0.000355722800397141, |
|
"loss": 2.344258499145508, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 21.592670157068063, |
|
"learning_rate": 0.00035555007749285897, |
|
"loss": 2.4048336029052733, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 21.613612565445028, |
|
"learning_rate": 0.00035537760594378607, |
|
"loss": 2.3891706466674805, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 21.634554973821988, |
|
"learning_rate": 0.00035520538514087155, |
|
"loss": 2.397173309326172, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 21.655497382198952, |
|
"learning_rate": 0.0003550334144771289, |
|
"loss": 2.3752115249633787, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 21.676439790575916, |
|
"learning_rate": 0.00035486169334762637, |
|
"loss": 2.3773225784301757, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 21.69738219895288, |
|
"learning_rate": 0.00035469022114947857, |
|
"loss": 2.410744476318359, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 21.718324607329844, |
|
"learning_rate": 0.00035451899728183736, |
|
"loss": 2.4002202987670898, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 21.739267015706805, |
|
"learning_rate": 0.00035434802114588305, |
|
"loss": 2.371893119812012, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 21.76020942408377, |
|
"learning_rate": 0.00035417729214481556, |
|
"loss": 2.412856674194336, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 21.781151832460733, |
|
"learning_rate": 0.0003540068096838456, |
|
"loss": 2.414295959472656, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 21.802094240837697, |
|
"learning_rate": 0.0003538365731701862, |
|
"loss": 2.3874536514282227, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 21.82303664921466, |
|
"learning_rate": 0.0003536665820130437, |
|
"loss": 2.408889389038086, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 21.843979057591625, |
|
"learning_rate": 0.00035349683562360966, |
|
"loss": 2.4029043197631834, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 21.864921465968585, |
|
"learning_rate": 0.0003533273334150517, |
|
"loss": 2.3872053146362306, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 21.88586387434555, |
|
"learning_rate": 0.0003531580748025054, |
|
"loss": 2.373563766479492, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 21.906806282722513, |
|
"learning_rate": 0.00035298905920306563, |
|
"loss": 2.3822809219360352, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 21.927748691099477, |
|
"learning_rate": 0.00035282028603577823, |
|
"loss": 2.4076284408569335, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 21.94869109947644, |
|
"learning_rate": 0.0003526517547216315, |
|
"loss": 2.3945655822753906, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 21.969633507853402, |
|
"learning_rate": 0.000352483464683548, |
|
"loss": 2.360683059692383, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 21.990575916230366, |
|
"learning_rate": 0.0003523154153463761, |
|
"loss": 2.371842956542969, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 21.99895287958115, |
|
"eval_loss": 2.536722183227539, |
|
"eval_runtime": 79.9983, |
|
"eval_samples_per_second": 136.953, |
|
"step": 10494 |
|
}, |
|
{ |
|
"epoch": 22.012565445026176, |
|
"learning_rate": 0.00035214760613688187, |
|
"loss": 2.498021697998047, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 22.03350785340314, |
|
"learning_rate": 0.0003519800364837407, |
|
"loss": 2.377554702758789, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 22.054450261780104, |
|
"learning_rate": 0.0003518127058175293, |
|
"loss": 2.380527687072754, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 22.07539267015707, |
|
"learning_rate": 0.00035164561357071755, |
|
"loss": 2.3838827133178713, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 22.096335078534032, |
|
"learning_rate": 0.0003514787591776602, |
|
"loss": 2.3740776062011717, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 22.117277486910996, |
|
"learning_rate": 0.0003513121420745892, |
|
"loss": 2.374008560180664, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 22.138219895287957, |
|
"learning_rate": 0.0003511457616996052, |
|
"loss": 2.373431587219238, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 22.15916230366492, |
|
"learning_rate": 0.0003509796174926703, |
|
"loss": 2.403927803039551, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 22.180104712041885, |
|
"learning_rate": 0.00035081370889559934, |
|
"loss": 2.4006370544433593, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 22.20104712041885, |
|
"learning_rate": 0.0003506480353520526, |
|
"loss": 2.3758676528930662, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 22.221989528795813, |
|
"learning_rate": 0.0003504825963075276, |
|
"loss": 2.417715644836426, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 22.242931937172774, |
|
"learning_rate": 0.00035031739120935175, |
|
"loss": 2.3941156387329103, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 22.263874345549738, |
|
"learning_rate": 0.0003501524195066741, |
|
"loss": 2.3949649810791014, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 22.2848167539267, |
|
"learning_rate": 0.0003499876806504578, |
|
"loss": 2.4047883987426757, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 22.305759162303666, |
|
"learning_rate": 0.00034982317409347263, |
|
"loss": 2.3971155166625975, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 22.32670157068063, |
|
"learning_rate": 0.00034965889929028707, |
|
"loss": 2.404866027832031, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 22.34764397905759, |
|
"learning_rate": 0.000349494855697261, |
|
"loss": 2.3915122985839843, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 22.368586387434554, |
|
"learning_rate": 0.0003493310427725377, |
|
"loss": 2.39025936126709, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 22.38952879581152, |
|
"learning_rate": 0.0003491674599760369, |
|
"loss": 2.399850273132324, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 22.410471204188482, |
|
"learning_rate": 0.0003490041067694469, |
|
"loss": 2.3945247650146486, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 22.431413612565446, |
|
"learning_rate": 0.00034884098261621724, |
|
"loss": 2.397679901123047, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 22.452356020942407, |
|
"learning_rate": 0.00034867808698155125, |
|
"loss": 2.355159568786621, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 22.47329842931937, |
|
"learning_rate": 0.0003485154193323988, |
|
"loss": 2.3898927688598635, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 22.494240837696335, |
|
"learning_rate": 0.00034835297913744903, |
|
"loss": 2.367123031616211, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 22.5151832460733, |
|
"learning_rate": 0.0003481907658671227, |
|
"loss": 2.3852542877197265, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 22.536125654450263, |
|
"learning_rate": 0.0003480287789935653, |
|
"loss": 2.4065229415893556, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 22.557068062827224, |
|
"learning_rate": 0.00034786701799063976, |
|
"loss": 2.3588846206665037, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 22.578010471204188, |
|
"learning_rate": 0.00034770548233391924, |
|
"loss": 2.390997123718262, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 22.59895287958115, |
|
"learning_rate": 0.0003475441715006799, |
|
"loss": 2.3878786087036135, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 22.619895287958116, |
|
"learning_rate": 0.0003473830849698938, |
|
"loss": 2.398370552062988, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 22.64083769633508, |
|
"learning_rate": 0.0003472222222222222, |
|
"loss": 2.3805349349975584, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 22.66178010471204, |
|
"learning_rate": 0.00034706158274000796, |
|
"loss": 2.3977741241455077, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 22.682722513089004, |
|
"learning_rate": 0.00034690116600726885, |
|
"loss": 2.387373924255371, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 22.70366492146597, |
|
"learning_rate": 0.0003467409715096907, |
|
"loss": 2.3700994491577148, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 22.724607329842932, |
|
"learning_rate": 0.00034658099873462027, |
|
"loss": 2.3671117782592774, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 22.745549738219896, |
|
"learning_rate": 0.0003464212471710583, |
|
"loss": 2.377743148803711, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 22.76649214659686, |
|
"learning_rate": 0.0003462617163096529, |
|
"loss": 2.386002540588379, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 22.78743455497382, |
|
"learning_rate": 0.00034610240564269265, |
|
"loss": 2.3687204360961913, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 22.808376963350785, |
|
"learning_rate": 0.0003459433146640997, |
|
"loss": 2.3671218872070314, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 22.82931937172775, |
|
"learning_rate": 0.00034578444286942307, |
|
"loss": 2.3793460845947267, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 22.850261780104713, |
|
"learning_rate": 0.00034562578975583187, |
|
"loss": 2.374790382385254, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 22.871204188481677, |
|
"learning_rate": 0.00034546735482210894, |
|
"loss": 2.356049728393555, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 22.892146596858638, |
|
"learning_rate": 0.0003453091375686437, |
|
"loss": 2.361851119995117, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 22.9130890052356, |
|
"learning_rate": 0.00034515113749742586, |
|
"loss": 2.3911083221435545, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 22.934031413612566, |
|
"learning_rate": 0.00034499335411203894, |
|
"loss": 2.353407096862793, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 22.95497382198953, |
|
"learning_rate": 0.00034483578691765326, |
|
"loss": 2.3763240814208983, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 22.975916230366494, |
|
"learning_rate": 0.00034467843542102, |
|
"loss": 2.3745288848876953, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 22.996858638743454, |
|
"learning_rate": 0.0003445212991304641, |
|
"loss": 2.367890167236328, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 22.99895287958115, |
|
"eval_loss": 2.542264461517334, |
|
"eval_runtime": 74.647, |
|
"eval_samples_per_second": 146.771, |
|
"step": 10971 |
|
}, |
|
{ |
|
"epoch": 23.018848167539268, |
|
"learning_rate": 0.00034436437755587827, |
|
"loss": 2.4732553482055666, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 23.039790575916232, |
|
"learning_rate": 0.00034420767020871656, |
|
"loss": 2.3789663314819336, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 23.060732984293193, |
|
"learning_rate": 0.00034405117660198765, |
|
"loss": 2.387537384033203, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 23.081675392670157, |
|
"learning_rate": 0.00034389489625024885, |
|
"loss": 2.3760297775268553, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 23.10261780104712, |
|
"learning_rate": 0.00034373882866959936, |
|
"loss": 2.3746875762939452, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 23.123560209424085, |
|
"learning_rate": 0.0003435829733776745, |
|
"loss": 2.4071685791015627, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 23.14450261780105, |
|
"learning_rate": 0.00034342732989363903, |
|
"loss": 2.3595859527587892, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 23.16544502617801, |
|
"learning_rate": 0.0003432718977381811, |
|
"loss": 2.356878662109375, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 23.186387434554973, |
|
"learning_rate": 0.0003431166764335058, |
|
"loss": 2.3806716918945314, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 23.207329842931937, |
|
"learning_rate": 0.0003429616655033297, |
|
"loss": 2.365432929992676, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 23.2282722513089, |
|
"learning_rate": 0.00034280686447287373, |
|
"loss": 2.3477930068969726, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 23.249214659685865, |
|
"learning_rate": 0.00034265227286885776, |
|
"loss": 2.359480094909668, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 23.270157068062826, |
|
"learning_rate": 0.00034249789021949435, |
|
"loss": 2.3736724853515625, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 23.29109947643979, |
|
"learning_rate": 0.0003423437160544826, |
|
"loss": 2.3853965759277345, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 23.312041884816754, |
|
"learning_rate": 0.0003421897499050022, |
|
"loss": 2.3594213485717774, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 23.332984293193718, |
|
"learning_rate": 0.0003420359913037075, |
|
"loss": 2.3540416717529298, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 23.353926701570682, |
|
"learning_rate": 0.0003418824397847216, |
|
"loss": 2.371465301513672, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 23.374869109947642, |
|
"learning_rate": 0.00034172909488363007, |
|
"loss": 2.360518455505371, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 23.395811518324606, |
|
"learning_rate": 0.00034157595613747545, |
|
"loss": 2.347417640686035, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 23.41675392670157, |
|
"learning_rate": 0.00034142302308475133, |
|
"loss": 2.388157081604004, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 23.437696335078535, |
|
"learning_rate": 0.0003412702952653962, |
|
"loss": 2.348739433288574, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 23.4586387434555, |
|
"learning_rate": 0.00034111777222078796, |
|
"loss": 2.3871492385864257, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 23.47958115183246, |
|
"learning_rate": 0.00034096545349373804, |
|
"loss": 2.3624570846557615, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 23.500523560209423, |
|
"learning_rate": 0.0003408133386284857, |
|
"loss": 2.337727165222168, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 23.521465968586387, |
|
"learning_rate": 0.0003406614271706919, |
|
"loss": 2.3544214248657225, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 23.54240837696335, |
|
"learning_rate": 0.0003405097186674344, |
|
"loss": 2.3601694107055664, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 23.563350785340315, |
|
"learning_rate": 0.00034035821266720136, |
|
"loss": 2.3869655609130858, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 23.58429319371728, |
|
"learning_rate": 0.0003402069087198858, |
|
"loss": 2.3417810440063476, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 23.60523560209424, |
|
"learning_rate": 0.00034005580637678053, |
|
"loss": 2.3512496948242188, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 23.626178010471204, |
|
"learning_rate": 0.00033990490519057183, |
|
"loss": 2.3688682556152343, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 23.647120418848168, |
|
"learning_rate": 0.0003397542047153345, |
|
"loss": 2.3577795028686523, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 23.668062827225132, |
|
"learning_rate": 0.0003396037045065257, |
|
"loss": 2.380731201171875, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 23.689005235602096, |
|
"learning_rate": 0.0003394534041209802, |
|
"loss": 2.349542427062988, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 23.709947643979056, |
|
"learning_rate": 0.0003393033031169043, |
|
"loss": 2.3590700149536135, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 23.73089005235602, |
|
"learning_rate": 0.0003391534010538705, |
|
"loss": 2.392327880859375, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 23.751832460732984, |
|
"learning_rate": 0.00033900369749281225, |
|
"loss": 2.3760391235351563, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 23.77277486910995, |
|
"learning_rate": 0.00033885419199601845, |
|
"loss": 2.355258560180664, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 23.793717277486913, |
|
"learning_rate": 0.000338704884127128, |
|
"loss": 2.369922065734863, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 23.814659685863873, |
|
"learning_rate": 0.00033855577345112453, |
|
"loss": 2.410330390930176, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 23.835602094240837, |
|
"learning_rate": 0.0003384068595343312, |
|
"loss": 2.374154472351074, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 23.8565445026178, |
|
"learning_rate": 0.00033825814194440504, |
|
"loss": 2.360888671875, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 23.877486910994765, |
|
"learning_rate": 0.0003381096202503321, |
|
"loss": 2.372193145751953, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 23.89842931937173, |
|
"learning_rate": 0.00033796129402242193, |
|
"loss": 2.3628297805786134, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 23.91937172774869, |
|
"learning_rate": 0.0003378131628323024, |
|
"loss": 2.365167999267578, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 23.940314136125654, |
|
"learning_rate": 0.0003376652262529146, |
|
"loss": 2.385006332397461, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 23.961256544502618, |
|
"learning_rate": 0.00033751748385850753, |
|
"loss": 2.3524898529052733, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 23.982198952879582, |
|
"learning_rate": 0.00033736993522463316, |
|
"loss": 2.374051094055176, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 23.99895287958115, |
|
"eval_loss": 2.532155990600586, |
|
"eval_runtime": 73.6953, |
|
"eval_samples_per_second": 148.666, |
|
"step": 11448 |
|
}, |
|
{ |
|
"epoch": 24.004188481675392, |
|
"learning_rate": 0.00033722257992814113, |
|
"loss": 2.4828319549560547, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 24.025130890052356, |
|
"learning_rate": 0.0003370754175471737, |
|
"loss": 2.360254669189453, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 24.04607329842932, |
|
"learning_rate": 0.0003369284476611607, |
|
"loss": 2.349439811706543, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 24.067015706806284, |
|
"learning_rate": 0.00033678166985081433, |
|
"loss": 2.36633415222168, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 24.087958115183245, |
|
"learning_rate": 0.0003366350836981245, |
|
"loss": 2.365359306335449, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 24.10890052356021, |
|
"learning_rate": 0.0003364886887863534, |
|
"loss": 2.362344169616699, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 24.129842931937173, |
|
"learning_rate": 0.0003363424847000309, |
|
"loss": 2.357081985473633, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 24.150785340314137, |
|
"learning_rate": 0.0003361964710249494, |
|
"loss": 2.3507287979125975, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 24.1717277486911, |
|
"learning_rate": 0.00033605064734815865, |
|
"loss": 2.3537702560424805, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 24.19267015706806, |
|
"learning_rate": 0.0003359050132579615, |
|
"loss": 2.37689151763916, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 24.213612565445025, |
|
"learning_rate": 0.00033575956834390843, |
|
"loss": 2.348763847351074, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 24.23455497382199, |
|
"learning_rate": 0.00033561431219679297, |
|
"loss": 2.3715591430664062, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 24.255497382198953, |
|
"learning_rate": 0.00033546924440864666, |
|
"loss": 2.3734716415405273, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 24.276439790575917, |
|
"learning_rate": 0.0003353243645727346, |
|
"loss": 2.3519350051879884, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 24.297382198952878, |
|
"learning_rate": 0.0003351796722835502, |
|
"loss": 2.355198287963867, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 24.318324607329842, |
|
"learning_rate": 0.00033503516713681087, |
|
"loss": 2.3704608917236327, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 24.339267015706806, |
|
"learning_rate": 0.00033489084872945283, |
|
"loss": 2.3960491180419923, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 24.36020942408377, |
|
"learning_rate": 0.0003347467166596268, |
|
"loss": 2.3748762130737306, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 24.381151832460734, |
|
"learning_rate": 0.0003346027705266929, |
|
"loss": 2.358123016357422, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 24.402094240837695, |
|
"learning_rate": 0.0003344590099312164, |
|
"loss": 2.345402717590332, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 24.42303664921466, |
|
"learning_rate": 0.00033431543447496275, |
|
"loss": 2.331704330444336, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 24.443979057591623, |
|
"learning_rate": 0.000334172043760893, |
|
"loss": 2.377284812927246, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 24.464921465968587, |
|
"learning_rate": 0.0003340288373931593, |
|
"loss": 2.354692268371582, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 24.48586387434555, |
|
"learning_rate": 0.0003338858149771002, |
|
"loss": 2.3740156173706053, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 24.506806282722515, |
|
"learning_rate": 0.0003337429761192361, |
|
"loss": 2.367665672302246, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 24.527748691099475, |
|
"learning_rate": 0.00033360032042726483, |
|
"loss": 2.362037467956543, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 24.54869109947644, |
|
"learning_rate": 0.000333457847510057, |
|
"loss": 2.371419334411621, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 24.569633507853403, |
|
"learning_rate": 0.0003333155569776514, |
|
"loss": 2.33715934753418, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 24.590575916230367, |
|
"learning_rate": 0.00033317344844125064, |
|
"loss": 2.3361494064331056, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 24.61151832460733, |
|
"learning_rate": 0.00033303152151321696, |
|
"loss": 2.346495819091797, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 24.632460732984292, |
|
"learning_rate": 0.00033288977580706714, |
|
"loss": 2.3827404022216796, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 24.653403141361256, |
|
"learning_rate": 0.0003327482109374687, |
|
"loss": 2.3554365158081056, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 24.67434554973822, |
|
"learning_rate": 0.00033260682652023517, |
|
"loss": 2.3607540130615234, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 24.695287958115184, |
|
"learning_rate": 0.0003324656221723217, |
|
"loss": 2.3806394577026366, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 24.716230366492148, |
|
"learning_rate": 0.000332324597511821, |
|
"loss": 2.3277612686157227, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 24.73717277486911, |
|
"learning_rate": 0.00033218375215795864, |
|
"loss": 2.3573076248168947, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 24.758115183246073, |
|
"learning_rate": 0.00033204308573108897, |
|
"loss": 2.3293807983398436, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 24.779057591623037, |
|
"learning_rate": 0.00033190259785269066, |
|
"loss": 2.3627220153808595, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"learning_rate": 0.0003317622881453626, |
|
"loss": 2.3504779815673826, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 24.820942408376965, |
|
"learning_rate": 0.0003316221562328194, |
|
"loss": 2.3526493072509767, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 24.841884816753925, |
|
"learning_rate": 0.0003314822017398875, |
|
"loss": 2.367503547668457, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 24.86282722513089, |
|
"learning_rate": 0.00033134242429250053, |
|
"loss": 2.364429473876953, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 24.883769633507853, |
|
"learning_rate": 0.00033120282351769556, |
|
"loss": 2.3418235778808594, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 24.904712041884817, |
|
"learning_rate": 0.0003310633990436084, |
|
"loss": 2.361065483093262, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 24.92565445026178, |
|
"learning_rate": 0.00033092415049947006, |
|
"loss": 2.3631685256958006, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 24.946596858638742, |
|
"learning_rate": 0.00033078507751560195, |
|
"loss": 2.346321868896484, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 24.967539267015706, |
|
"learning_rate": 0.00033064617972341235, |
|
"loss": 2.3589923858642576, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 24.98848167539267, |
|
"learning_rate": 0.0003305074567553919, |
|
"loss": 2.3485301971435546, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 24.99895287958115, |
|
"eval_loss": 2.5436818599700928, |
|
"eval_runtime": 73.6581, |
|
"eval_samples_per_second": 148.741, |
|
"step": 11925 |
|
}, |
|
{ |
|
"epoch": 25.01047120418848, |
|
"learning_rate": 0.0003303689082451096, |
|
"loss": 2.483962059020996, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 25.031413612565444, |
|
"learning_rate": 0.00033023053382720904, |
|
"loss": 2.352615547180176, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 25.05235602094241, |
|
"learning_rate": 0.0003300923331374039, |
|
"loss": 2.379102325439453, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 25.073298429319372, |
|
"learning_rate": 0.00032995430581247417, |
|
"loss": 2.3579853057861326, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 25.094240837696336, |
|
"learning_rate": 0.0003298164514902622, |
|
"loss": 2.3461565017700194, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 25.115183246073297, |
|
"learning_rate": 0.0003296787698096686, |
|
"loss": 2.328052520751953, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 25.13612565445026, |
|
"learning_rate": 0.0003295412604106482, |
|
"loss": 2.337063026428223, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 25.157068062827225, |
|
"learning_rate": 0.00032940392293420614, |
|
"loss": 2.3367223739624023, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 25.17801047120419, |
|
"learning_rate": 0.00032926675702239425, |
|
"loss": 2.365107536315918, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 25.198952879581153, |
|
"learning_rate": 0.00032912976231830646, |
|
"loss": 2.3596302032470704, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 25.219895287958114, |
|
"learning_rate": 0.0003289929384660757, |
|
"loss": 2.336884307861328, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 25.240837696335078, |
|
"learning_rate": 0.0003288562851108693, |
|
"loss": 2.3663518905639647, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 25.26178010471204, |
|
"learning_rate": 0.0003287198018988856, |
|
"loss": 2.3383811950683593, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 25.282722513089006, |
|
"learning_rate": 0.00032858348847734985, |
|
"loss": 2.3640661239624023, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 25.30366492146597, |
|
"learning_rate": 0.00032844734449451055, |
|
"loss": 2.3613861083984373, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 25.324607329842934, |
|
"learning_rate": 0.00032831136959963553, |
|
"loss": 2.3227806091308594, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 25.345549738219894, |
|
"learning_rate": 0.00032817556344300823, |
|
"loss": 2.328192710876465, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 25.36649214659686, |
|
"learning_rate": 0.0003280399256759237, |
|
"loss": 2.3461523056030273, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 25.387434554973822, |
|
"learning_rate": 0.0003279044559506852, |
|
"loss": 2.3762447357177736, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 25.408376963350786, |
|
"learning_rate": 0.0003277691539206003, |
|
"loss": 2.325837326049805, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 25.42931937172775, |
|
"learning_rate": 0.0003276340192399769, |
|
"loss": 2.3660905838012694, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 25.45026178010471, |
|
"learning_rate": 0.00032749905156412, |
|
"loss": 2.360948181152344, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 25.471204188481675, |
|
"learning_rate": 0.0003273642505493275, |
|
"loss": 2.3165866851806642, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 25.49214659685864, |
|
"learning_rate": 0.0003272296158528871, |
|
"loss": 2.3487401962280274, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 25.513089005235603, |
|
"learning_rate": 0.000327095147133072, |
|
"loss": 2.351056671142578, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 25.534031413612567, |
|
"learning_rate": 0.00032696084404913777, |
|
"loss": 2.3396501541137695, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 25.554973821989527, |
|
"learning_rate": 0.00032682670626131837, |
|
"loss": 2.3343048095703125, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 25.57591623036649, |
|
"learning_rate": 0.0003266927334308229, |
|
"loss": 2.3392221450805666, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 25.596858638743456, |
|
"learning_rate": 0.0003265589252198317, |
|
"loss": 2.339245414733887, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 25.61780104712042, |
|
"learning_rate": 0.0003264252812914928, |
|
"loss": 2.343129539489746, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 25.638743455497384, |
|
"learning_rate": 0.0003262918013099186, |
|
"loss": 2.344712829589844, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 25.659685863874344, |
|
"learning_rate": 0.00032615848494018204, |
|
"loss": 2.364294242858887, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 25.680628272251308, |
|
"learning_rate": 0.0003260253318483131, |
|
"loss": 2.3588529586791993, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 25.701570680628272, |
|
"learning_rate": 0.0003258923417012957, |
|
"loss": 2.3558927536010743, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 25.722513089005236, |
|
"learning_rate": 0.00032575951416706354, |
|
"loss": 2.370713996887207, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 25.7434554973822, |
|
"learning_rate": 0.0003256268489144972, |
|
"loss": 2.3426084518432617, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 25.76439790575916, |
|
"learning_rate": 0.0003254943456134202, |
|
"loss": 2.3299545288085937, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 25.785340314136125, |
|
"learning_rate": 0.0003253620039345959, |
|
"loss": 2.343545913696289, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 25.80628272251309, |
|
"learning_rate": 0.0003252298235497241, |
|
"loss": 2.3348289489746095, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 25.827225130890053, |
|
"learning_rate": 0.0003250978041314371, |
|
"loss": 2.3712085723876952, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 25.848167539267017, |
|
"learning_rate": 0.000324965945353297, |
|
"loss": 2.347680854797363, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 25.869109947643977, |
|
"learning_rate": 0.0003248342468897917, |
|
"loss": 2.342079925537109, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 25.89005235602094, |
|
"learning_rate": 0.00032470270841633195, |
|
"loss": 2.376851272583008, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 25.910994764397905, |
|
"learning_rate": 0.00032457132960924783, |
|
"loss": 2.3613746643066404, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 25.93193717277487, |
|
"learning_rate": 0.00032444011014578535, |
|
"loss": 2.3406829833984375, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 25.952879581151834, |
|
"learning_rate": 0.00032430904970410314, |
|
"loss": 2.328056526184082, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 25.973821989528794, |
|
"learning_rate": 0.0003241781479632693, |
|
"loss": 2.3438344955444337, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 25.994764397905758, |
|
"learning_rate": 0.0003240474046032579, |
|
"loss": 2.349610137939453, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 25.99895287958115, |
|
"eval_loss": 2.533395767211914, |
|
"eval_runtime": 73.6578, |
|
"eval_samples_per_second": 148.742, |
|
"step": 12402 |
|
}, |
|
{ |
|
"epoch": 26.016753926701572, |
|
"learning_rate": 0.00032391681930494566, |
|
"loss": 2.4640811920166015, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 26.037696335078532, |
|
"learning_rate": 0.000323786391750109, |
|
"loss": 2.3338626861572265, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 26.058638743455496, |
|
"learning_rate": 0.0003236561216214202, |
|
"loss": 2.342071533203125, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 26.07958115183246, |
|
"learning_rate": 0.000323526008602445, |
|
"loss": 2.374074172973633, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 26.100523560209425, |
|
"learning_rate": 0.0003233960523776387, |
|
"loss": 2.3421449661254883, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 26.12146596858639, |
|
"learning_rate": 0.0003232662526323429, |
|
"loss": 2.3614429473876952, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 26.14240837696335, |
|
"learning_rate": 0.0003231366090527828, |
|
"loss": 2.319747543334961, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 26.163350785340313, |
|
"learning_rate": 0.00032300712132606366, |
|
"loss": 2.3622182846069335, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 26.184293193717277, |
|
"learning_rate": 0.0003228777891401678, |
|
"loss": 2.359231185913086, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 26.20523560209424, |
|
"learning_rate": 0.0003227486121839514, |
|
"loss": 2.355366516113281, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 26.226178010471205, |
|
"learning_rate": 0.00032261959014714107, |
|
"loss": 2.3299293518066406, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 26.24712041884817, |
|
"learning_rate": 0.0003224907227203312, |
|
"loss": 2.3555164337158203, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 26.26806282722513, |
|
"learning_rate": 0.0003223620095949806, |
|
"loss": 2.314861869812012, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 26.289005235602094, |
|
"learning_rate": 0.00032223345046340936, |
|
"loss": 2.3362022399902345, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 26.309947643979058, |
|
"learning_rate": 0.00032210504501879576, |
|
"loss": 2.3406482696533204, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 26.330890052356022, |
|
"learning_rate": 0.0003219767929551733, |
|
"loss": 2.30753231048584, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 26.351832460732986, |
|
"learning_rate": 0.00032184869396742754, |
|
"loss": 2.351367950439453, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 26.372774869109946, |
|
"learning_rate": 0.00032172074775129323, |
|
"loss": 2.3465883255004885, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 26.39371727748691, |
|
"learning_rate": 0.00032159295400335114, |
|
"loss": 2.3782730102539062, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 26.414659685863874, |
|
"learning_rate": 0.00032146531242102476, |
|
"loss": 2.3480430603027345, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 26.43560209424084, |
|
"learning_rate": 0.0003213378227025779, |
|
"loss": 2.370161437988281, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 26.456544502617803, |
|
"learning_rate": 0.00032121048454711114, |
|
"loss": 2.3542537689208984, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 26.477486910994763, |
|
"learning_rate": 0.00032108329765455926, |
|
"loss": 2.3564731597900392, |
|
"step": 12630 |
|
}, |
|
{ |
|
"epoch": 26.498429319371727, |
|
"learning_rate": 0.00032095626172568784, |
|
"loss": 2.333011817932129, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 26.51937172774869, |
|
"learning_rate": 0.00032082937646209084, |
|
"loss": 2.3433643341064454, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 26.540314136125655, |
|
"learning_rate": 0.0003207026415661871, |
|
"loss": 2.3346595764160156, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 26.56125654450262, |
|
"learning_rate": 0.0003205760567412178, |
|
"loss": 2.339708709716797, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 26.58219895287958, |
|
"learning_rate": 0.00032044962169124335, |
|
"loss": 2.3501649856567384, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 26.603141361256544, |
|
"learning_rate": 0.0003203233361211406, |
|
"loss": 2.3222862243652345, |
|
"step": 12690 |
|
}, |
|
{ |
|
"epoch": 26.624083769633508, |
|
"learning_rate": 0.00032019719973659996, |
|
"loss": 2.3362213134765626, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 26.645026178010472, |
|
"learning_rate": 0.00032007121224412224, |
|
"loss": 2.311092567443848, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 26.665968586387436, |
|
"learning_rate": 0.0003199453733510162, |
|
"loss": 2.332124137878418, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 26.686910994764396, |
|
"learning_rate": 0.00031981968276539543, |
|
"loss": 2.3406246185302733, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 26.70785340314136, |
|
"learning_rate": 0.0003196941401961754, |
|
"loss": 2.3419260025024413, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 26.728795811518324, |
|
"learning_rate": 0.000319568745353071, |
|
"loss": 2.344953727722168, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 26.74973821989529, |
|
"learning_rate": 0.0003194434979465935, |
|
"loss": 2.3517208099365234, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 26.770680628272252, |
|
"learning_rate": 0.0003193183976880476, |
|
"loss": 2.3811822891235352, |
|
"step": 12770 |
|
}, |
|
{ |
|
"epoch": 26.791623036649213, |
|
"learning_rate": 0.00031919344428952895, |
|
"loss": 2.3604736328125, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 26.812565445026177, |
|
"learning_rate": 0.0003190686374639211, |
|
"loss": 2.3444387435913088, |
|
"step": 12790 |
|
}, |
|
{ |
|
"epoch": 26.83350785340314, |
|
"learning_rate": 0.00031894397692489295, |
|
"loss": 2.340729331970215, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 26.854450261780105, |
|
"learning_rate": 0.0003188194623868958, |
|
"loss": 2.340890121459961, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 26.87539267015707, |
|
"learning_rate": 0.00031869509356516063, |
|
"loss": 2.3352834701538088, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 26.89633507853403, |
|
"learning_rate": 0.00031857087017569556, |
|
"loss": 2.3224008560180662, |
|
"step": 12830 |
|
}, |
|
{ |
|
"epoch": 26.917277486910994, |
|
"learning_rate": 0.0003184467919352828, |
|
"loss": 2.3237512588500975, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 26.938219895287958, |
|
"learning_rate": 0.0003183228585614763, |
|
"loss": 2.3366432189941406, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 26.95916230366492, |
|
"learning_rate": 0.0003181990697725988, |
|
"loss": 2.3389394760131834, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 26.980104712041886, |
|
"learning_rate": 0.0003180754252877392, |
|
"loss": 2.2963605880737306, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 26.99895287958115, |
|
"eval_loss": 2.5350682735443115, |
|
"eval_runtime": 73.272, |
|
"eval_samples_per_second": 149.525, |
|
"step": 12879 |
|
}, |
|
{ |
|
"epoch": 27.002094240837696, |
|
"learning_rate": 0.0003179519248267498, |
|
"loss": 2.4346525192260744, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 27.02303664921466, |
|
"learning_rate": 0.000317828568110244, |
|
"loss": 2.3435186386108398, |
|
"step": 12890 |
|
}, |
|
{ |
|
"epoch": 27.043979057591624, |
|
"learning_rate": 0.000317705354859593, |
|
"loss": 2.351651191711426, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 27.064921465968588, |
|
"learning_rate": 0.0003175822847969239, |
|
"loss": 2.3490814208984374, |
|
"step": 12910 |
|
}, |
|
{ |
|
"epoch": 27.08586387434555, |
|
"learning_rate": 0.00031745935764511645, |
|
"loss": 2.3329612731933596, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 27.106806282722513, |
|
"learning_rate": 0.0003173365731278007, |
|
"loss": 2.321672248840332, |
|
"step": 12930 |
|
}, |
|
{ |
|
"epoch": 27.127748691099477, |
|
"learning_rate": 0.00031721393096935445, |
|
"loss": 2.3357425689697267, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 27.14869109947644, |
|
"learning_rate": 0.00031709143089490063, |
|
"loss": 2.3383440017700194, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 27.169633507853405, |
|
"learning_rate": 0.00031696907263030445, |
|
"loss": 2.3266096115112305, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 27.190575916230365, |
|
"learning_rate": 0.00031684685590217115, |
|
"loss": 2.3512828826904295, |
|
"step": 12970 |
|
}, |
|
{ |
|
"epoch": 27.21151832460733, |
|
"learning_rate": 0.00031672478043784336, |
|
"loss": 2.329998016357422, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 27.232460732984293, |
|
"learning_rate": 0.0003166028459653984, |
|
"loss": 2.353693962097168, |
|
"step": 12990 |
|
}, |
|
{ |
|
"epoch": 27.253403141361257, |
|
"learning_rate": 0.0003164810522136458, |
|
"loss": 2.3388673782348635, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 27.27434554973822, |
|
"learning_rate": 0.0003163593989121249, |
|
"loss": 2.34061222076416, |
|
"step": 13010 |
|
}, |
|
{ |
|
"epoch": 27.295287958115182, |
|
"learning_rate": 0.0003162378857911022, |
|
"loss": 2.3279703140258787, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 27.316230366492146, |
|
"learning_rate": 0.00031611651258156884, |
|
"loss": 2.3643896102905275, |
|
"step": 13030 |
|
}, |
|
{ |
|
"epoch": 27.33717277486911, |
|
"learning_rate": 0.0003159952790152381, |
|
"loss": 2.326703643798828, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 27.358115183246074, |
|
"learning_rate": 0.0003158741848245431, |
|
"loss": 2.3338809967041017, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 27.379057591623038, |
|
"learning_rate": 0.0003157532297426339, |
|
"loss": 2.318799591064453, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 27.4, |
|
"learning_rate": 0.00031563241350337546, |
|
"loss": 2.3162815093994142, |
|
"step": 13070 |
|
}, |
|
{ |
|
"epoch": 27.420942408376963, |
|
"learning_rate": 0.00031551173584134514, |
|
"loss": 2.354751968383789, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 27.441884816753927, |
|
"learning_rate": 0.0003153911964918298, |
|
"loss": 2.3353591918945313, |
|
"step": 13090 |
|
}, |
|
{ |
|
"epoch": 27.46282722513089, |
|
"learning_rate": 0.0003152707951908239, |
|
"loss": 2.3257909774780274, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 27.483769633507855, |
|
"learning_rate": 0.0003151505316750269, |
|
"loss": 2.336490821838379, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 27.504712041884815, |
|
"learning_rate": 0.0003150304056818405, |
|
"loss": 2.32800350189209, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 27.52565445026178, |
|
"learning_rate": 0.00031491041694936697, |
|
"loss": 2.3223346710205077, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 27.546596858638743, |
|
"learning_rate": 0.000314790565216406, |
|
"loss": 2.348642921447754, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 27.567539267015707, |
|
"learning_rate": 0.0003146708502224526, |
|
"loss": 2.3384424209594727, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 27.58848167539267, |
|
"learning_rate": 0.0003145512717076948, |
|
"loss": 2.301900863647461, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 27.609424083769632, |
|
"learning_rate": 0.00031443182941301147, |
|
"loss": 2.3309160232543946, |
|
"step": 13170 |
|
}, |
|
{ |
|
"epoch": 27.630366492146596, |
|
"learning_rate": 0.0003143125230799694, |
|
"loss": 2.358192253112793, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 27.65130890052356, |
|
"learning_rate": 0.00031419335245082134, |
|
"loss": 2.347599220275879, |
|
"step": 13190 |
|
}, |
|
{ |
|
"epoch": 27.672251308900524, |
|
"learning_rate": 0.00031407431726850375, |
|
"loss": 2.330830764770508, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 27.693193717277488, |
|
"learning_rate": 0.00031395541727663413, |
|
"loss": 2.33847599029541, |
|
"step": 13210 |
|
}, |
|
{ |
|
"epoch": 27.71413612565445, |
|
"learning_rate": 0.0003138366522195088, |
|
"loss": 2.3454364776611327, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 27.735078534031413, |
|
"learning_rate": 0.0003137180218421011, |
|
"loss": 2.3458301544189455, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 27.756020942408377, |
|
"learning_rate": 0.0003135995258900582, |
|
"loss": 2.2951147079467775, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 27.77696335078534, |
|
"learning_rate": 0.0003134811641096994, |
|
"loss": 2.324018096923828, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 27.797905759162305, |
|
"learning_rate": 0.00031336293624801393, |
|
"loss": 2.320078468322754, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 27.81884816753927, |
|
"learning_rate": 0.00031324484205265824, |
|
"loss": 2.3213479995727537, |
|
"step": 13270 |
|
}, |
|
{ |
|
"epoch": 27.83979057591623, |
|
"learning_rate": 0.000313126881271954, |
|
"loss": 2.352939224243164, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 27.860732984293193, |
|
"learning_rate": 0.0003130090536548859, |
|
"loss": 2.3275819778442384, |
|
"step": 13290 |
|
}, |
|
{ |
|
"epoch": 27.881675392670157, |
|
"learning_rate": 0.00031289135895109924, |
|
"loss": 2.341213607788086, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 27.90261780104712, |
|
"learning_rate": 0.00031277379691089786, |
|
"loss": 2.352794647216797, |
|
"step": 13310 |
|
}, |
|
{ |
|
"epoch": 27.923560209424085, |
|
"learning_rate": 0.00031265636728524174, |
|
"loss": 2.329135513305664, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 27.944502617801046, |
|
"learning_rate": 0.000312539069825745, |
|
"loss": 2.3414382934570312, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 27.96544502617801, |
|
"learning_rate": 0.00031242190428467325, |
|
"loss": 2.3638214111328124, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 27.986387434554974, |
|
"learning_rate": 0.0003123048704149423, |
|
"loss": 2.326797294616699, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 27.99895287958115, |
|
"eval_loss": 2.532017469406128, |
|
"eval_runtime": 73.734, |
|
"eval_samples_per_second": 148.588, |
|
"step": 13356 |
|
}, |
|
{ |
|
"epoch": 28.008376963350784, |
|
"learning_rate": 0.0003121879679701147, |
|
"loss": 2.463714599609375, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 28.02931937172775, |
|
"learning_rate": 0.00031207119670439884, |
|
"loss": 2.3355535507202148, |
|
"step": 13370 |
|
}, |
|
{ |
|
"epoch": 28.050261780104712, |
|
"learning_rate": 0.00031195455637264574, |
|
"loss": 2.3194732666015625, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 28.071204188481676, |
|
"learning_rate": 0.00031183804673034756, |
|
"loss": 2.3297607421875, |
|
"step": 13390 |
|
}, |
|
{ |
|
"epoch": 28.09214659685864, |
|
"learning_rate": 0.0003117216675336353, |
|
"loss": 2.33233642578125, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 28.1130890052356, |
|
"learning_rate": 0.00031160541853927627, |
|
"loss": 2.3335954666137697, |
|
"step": 13410 |
|
}, |
|
{ |
|
"epoch": 28.134031413612565, |
|
"learning_rate": 0.0003114892995046725, |
|
"loss": 2.3236547470092774, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 28.15497382198953, |
|
"learning_rate": 0.00031137331018785835, |
|
"loss": 2.3411203384399415, |
|
"step": 13430 |
|
}, |
|
{ |
|
"epoch": 28.175916230366493, |
|
"learning_rate": 0.00031125745034749834, |
|
"loss": 2.343415451049805, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 28.196858638743457, |
|
"learning_rate": 0.00031114171974288516, |
|
"loss": 2.305185890197754, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 28.217801047120417, |
|
"learning_rate": 0.00031102611813393753, |
|
"loss": 2.3106929779052736, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 28.23874345549738, |
|
"learning_rate": 0.0003109106452811981, |
|
"loss": 2.342930221557617, |
|
"step": 13470 |
|
}, |
|
{ |
|
"epoch": 28.259685863874346, |
|
"learning_rate": 0.00031079530094583135, |
|
"loss": 2.3201034545898436, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 28.28062827225131, |
|
"learning_rate": 0.0003106800848896216, |
|
"loss": 2.353871154785156, |
|
"step": 13490 |
|
}, |
|
{ |
|
"epoch": 28.301570680628274, |
|
"learning_rate": 0.0003105649968749708, |
|
"loss": 2.3587244033813475, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 28.322513089005234, |
|
"learning_rate": 0.0003104500366648965, |
|
"loss": 2.334798812866211, |
|
"step": 13510 |
|
}, |
|
{ |
|
"epoch": 28.343455497382198, |
|
"learning_rate": 0.0003103352040230302, |
|
"loss": 2.3387428283691407, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 28.364397905759162, |
|
"learning_rate": 0.00031022049871361445, |
|
"loss": 2.35083065032959, |
|
"step": 13530 |
|
}, |
|
{ |
|
"epoch": 28.385340314136126, |
|
"learning_rate": 0.0003101059205015017, |
|
"loss": 2.329609680175781, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 28.40628272251309, |
|
"learning_rate": 0.0003099914691521518, |
|
"loss": 2.3420963287353516, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 28.42722513089005, |
|
"learning_rate": 0.00030987714443163, |
|
"loss": 2.3433679580688476, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 28.448167539267015, |
|
"learning_rate": 0.00030976294610660516, |
|
"loss": 2.3441110610961915, |
|
"step": 13570 |
|
}, |
|
{ |
|
"epoch": 28.46910994764398, |
|
"learning_rate": 0.00030964887394434754, |
|
"loss": 2.338638687133789, |
|
"step": 13580 |
|
}, |
|
{ |
|
"epoch": 28.490052356020943, |
|
"learning_rate": 0.000309534927712727, |
|
"loss": 2.3203834533691405, |
|
"step": 13590 |
|
}, |
|
{ |
|
"epoch": 28.510994764397907, |
|
"learning_rate": 0.0003094211071802107, |
|
"loss": 2.3022727966308594, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 28.531937172774867, |
|
"learning_rate": 0.00030930741211586155, |
|
"loss": 2.3490713119506834, |
|
"step": 13610 |
|
}, |
|
{ |
|
"epoch": 28.55287958115183, |
|
"learning_rate": 0.0003091938422893358, |
|
"loss": 2.3286787033081056, |
|
"step": 13620 |
|
}, |
|
{ |
|
"epoch": 28.573821989528795, |
|
"learning_rate": 0.00030908039747088155, |
|
"loss": 2.305118942260742, |
|
"step": 13630 |
|
}, |
|
{ |
|
"epoch": 28.59476439790576, |
|
"learning_rate": 0.00030896707743133635, |
|
"loss": 2.3220989227294924, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 28.615706806282724, |
|
"learning_rate": 0.0003088538819421255, |
|
"loss": 2.3236154556274413, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 28.636649214659684, |
|
"learning_rate": 0.00030874081077526003, |
|
"loss": 2.323534393310547, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 28.657591623036648, |
|
"learning_rate": 0.00030862786370333505, |
|
"loss": 2.3269046783447265, |
|
"step": 13670 |
|
}, |
|
{ |
|
"epoch": 28.678534031413612, |
|
"learning_rate": 0.00030851504049952727, |
|
"loss": 2.3261356353759766, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 28.699476439790576, |
|
"learning_rate": 0.00030840234093759347, |
|
"loss": 2.3454893112182615, |
|
"step": 13690 |
|
}, |
|
{ |
|
"epoch": 28.72041884816754, |
|
"learning_rate": 0.0003082897647918688, |
|
"loss": 2.3275333404541017, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 28.741361256544504, |
|
"learning_rate": 0.0003081773118372642, |
|
"loss": 2.3333641052246095, |
|
"step": 13710 |
|
}, |
|
{ |
|
"epoch": 28.762303664921465, |
|
"learning_rate": 0.00030806498184926523, |
|
"loss": 2.3693473815917967, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 28.78324607329843, |
|
"learning_rate": 0.0003079527746039298, |
|
"loss": 2.3141483306884765, |
|
"step": 13730 |
|
}, |
|
{ |
|
"epoch": 28.804188481675393, |
|
"learning_rate": 0.00030784068987788624, |
|
"loss": 2.353886032104492, |
|
"step": 13740 |
|
}, |
|
{ |
|
"epoch": 28.825130890052357, |
|
"learning_rate": 0.00030772872744833183, |
|
"loss": 2.3143518447875975, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 28.84607329842932, |
|
"learning_rate": 0.00030761688709303036, |
|
"loss": 2.317976379394531, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 28.86701570680628, |
|
"learning_rate": 0.0003075051685903109, |
|
"loss": 2.331821060180664, |
|
"step": 13770 |
|
}, |
|
{ |
|
"epoch": 28.887958115183245, |
|
"learning_rate": 0.00030739357171906536, |
|
"loss": 2.3297216415405275, |
|
"step": 13780 |
|
}, |
|
{ |
|
"epoch": 28.90890052356021, |
|
"learning_rate": 0.0003072820962587471, |
|
"loss": 2.3354673385620117, |
|
"step": 13790 |
|
}, |
|
{ |
|
"epoch": 28.929842931937173, |
|
"learning_rate": 0.00030717074198936904, |
|
"loss": 2.308320999145508, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 28.950785340314138, |
|
"learning_rate": 0.0003070595086915015, |
|
"loss": 2.312677192687988, |
|
"step": 13810 |
|
}, |
|
{ |
|
"epoch": 28.971727748691098, |
|
"learning_rate": 0.00030694839614627076, |
|
"loss": 2.3000450134277344, |
|
"step": 13820 |
|
}, |
|
{ |
|
"epoch": 28.992670157068062, |
|
"learning_rate": 0.0003068374041353571, |
|
"loss": 2.333408737182617, |
|
"step": 13830 |
|
}, |
|
{ |
|
"epoch": 28.99895287958115, |
|
"eval_loss": 2.541404962539673, |
|
"eval_runtime": 73.9823, |
|
"eval_samples_per_second": 148.09, |
|
"step": 13833 |
|
}, |
|
{ |
|
"epoch": 29.014659685863876, |
|
"learning_rate": 0.000306726532440993, |
|
"loss": 2.4384193420410156, |
|
"step": 13840 |
|
}, |
|
{ |
|
"epoch": 29.035602094240836, |
|
"learning_rate": 0.0003066157808459613, |
|
"loss": 2.3120851516723633, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 29.0565445026178, |
|
"learning_rate": 0.0003065051491335936, |
|
"loss": 2.333901596069336, |
|
"step": 13860 |
|
}, |
|
{ |
|
"epoch": 29.077486910994764, |
|
"learning_rate": 0.0003063946370877681, |
|
"loss": 2.311614227294922, |
|
"step": 13870 |
|
}, |
|
{ |
|
"epoch": 29.09842931937173, |
|
"learning_rate": 0.0003062842444929085, |
|
"loss": 2.328507423400879, |
|
"step": 13880 |
|
}, |
|
{ |
|
"epoch": 29.119371727748693, |
|
"learning_rate": 0.00030617397113398125, |
|
"loss": 2.3186750411987305, |
|
"step": 13890 |
|
}, |
|
{ |
|
"epoch": 29.140314136125653, |
|
"learning_rate": 0.00030606381679649483, |
|
"loss": 2.3101566314697264, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 29.161256544502617, |
|
"learning_rate": 0.00030595378126649727, |
|
"loss": 2.3323139190673827, |
|
"step": 13910 |
|
}, |
|
{ |
|
"epoch": 29.18219895287958, |
|
"learning_rate": 0.0003058438643305747, |
|
"loss": 2.3120336532592773, |
|
"step": 13920 |
|
}, |
|
{ |
|
"epoch": 29.203141361256545, |
|
"learning_rate": 0.00030573406577584955, |
|
"loss": 2.3213123321533202, |
|
"step": 13930 |
|
}, |
|
{ |
|
"epoch": 29.22408376963351, |
|
"learning_rate": 0.000305624385389979, |
|
"loss": 2.3158872604370115, |
|
"step": 13940 |
|
}, |
|
{ |
|
"epoch": 29.24502617801047, |
|
"learning_rate": 0.0003055148229611527, |
|
"loss": 2.3301626205444337, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 29.265968586387434, |
|
"learning_rate": 0.00030540537827809176, |
|
"loss": 2.2979711532592773, |
|
"step": 13960 |
|
}, |
|
{ |
|
"epoch": 29.286910994764398, |
|
"learning_rate": 0.0003052960511300467, |
|
"loss": 2.337363433837891, |
|
"step": 13970 |
|
}, |
|
{ |
|
"epoch": 29.307853403141362, |
|
"learning_rate": 0.0003051868413067956, |
|
"loss": 2.3084648132324217, |
|
"step": 13980 |
|
}, |
|
{ |
|
"epoch": 29.328795811518326, |
|
"learning_rate": 0.00030507774859864277, |
|
"loss": 2.3315618515014647, |
|
"step": 13990 |
|
}, |
|
{ |
|
"epoch": 29.349738219895286, |
|
"learning_rate": 0.0003049687727964166, |
|
"loss": 2.342039680480957, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 29.37068062827225, |
|
"learning_rate": 0.00030485991369146834, |
|
"loss": 2.327268600463867, |
|
"step": 14010 |
|
}, |
|
{ |
|
"epoch": 29.391623036649214, |
|
"learning_rate": 0.00030475117107567015, |
|
"loss": 2.311885643005371, |
|
"step": 14020 |
|
}, |
|
{ |
|
"epoch": 29.41256544502618, |
|
"learning_rate": 0.0003046425447414135, |
|
"loss": 2.297453498840332, |
|
"step": 14030 |
|
}, |
|
{ |
|
"epoch": 29.433507853403142, |
|
"learning_rate": 0.0003045340344816073, |
|
"loss": 2.295667839050293, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 29.454450261780103, |
|
"learning_rate": 0.0003044256400896769, |
|
"loss": 2.3093278884887694, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 29.475392670157067, |
|
"learning_rate": 0.0003043173613595614, |
|
"loss": 2.3026140213012694, |
|
"step": 14060 |
|
}, |
|
{ |
|
"epoch": 29.49633507853403, |
|
"learning_rate": 0.0003042091980857131, |
|
"loss": 2.339429473876953, |
|
"step": 14070 |
|
}, |
|
{ |
|
"epoch": 29.517277486910995, |
|
"learning_rate": 0.0003041011500630949, |
|
"loss": 2.3204904556274415, |
|
"step": 14080 |
|
}, |
|
{ |
|
"epoch": 29.53821989528796, |
|
"learning_rate": 0.00030399321708717947, |
|
"loss": 2.327162170410156, |
|
"step": 14090 |
|
}, |
|
{ |
|
"epoch": 29.559162303664923, |
|
"learning_rate": 0.00030388539895394697, |
|
"loss": 2.3462697982788088, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 29.580104712041884, |
|
"learning_rate": 0.00030377769545988394, |
|
"loss": 2.2912479400634767, |
|
"step": 14110 |
|
}, |
|
{ |
|
"epoch": 29.601047120418848, |
|
"learning_rate": 0.00030367010640198143, |
|
"loss": 2.35098876953125, |
|
"step": 14120 |
|
}, |
|
{ |
|
"epoch": 29.62198952879581, |
|
"learning_rate": 0.0003035626315777333, |
|
"loss": 2.304596710205078, |
|
"step": 14130 |
|
}, |
|
{ |
|
"epoch": 29.642931937172776, |
|
"learning_rate": 0.00030345527078513493, |
|
"loss": 2.3083545684814455, |
|
"step": 14140 |
|
}, |
|
{ |
|
"epoch": 29.66387434554974, |
|
"learning_rate": 0.0003033480238226813, |
|
"loss": 2.304719924926758, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 29.6848167539267, |
|
"learning_rate": 0.0003032408904893656, |
|
"loss": 2.309472846984863, |
|
"step": 14160 |
|
}, |
|
{ |
|
"epoch": 29.705759162303664, |
|
"learning_rate": 0.00030313387058467756, |
|
"loss": 2.3114566802978516, |
|
"step": 14170 |
|
}, |
|
{ |
|
"epoch": 29.72670157068063, |
|
"learning_rate": 0.0003030269639086021, |
|
"loss": 2.3168495178222654, |
|
"step": 14180 |
|
}, |
|
{ |
|
"epoch": 29.747643979057592, |
|
"learning_rate": 0.0003029201702616173, |
|
"loss": 2.3027936935424806, |
|
"step": 14190 |
|
}, |
|
{ |
|
"epoch": 29.768586387434556, |
|
"learning_rate": 0.0003028134894446933, |
|
"loss": 2.330441474914551, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 29.789528795811517, |
|
"learning_rate": 0.00030270692125929034, |
|
"loss": 2.2950525283813477, |
|
"step": 14210 |
|
}, |
|
{ |
|
"epoch": 29.81047120418848, |
|
"learning_rate": 0.00030260046550735763, |
|
"loss": 2.3066877365112304, |
|
"step": 14220 |
|
}, |
|
{ |
|
"epoch": 29.831413612565445, |
|
"learning_rate": 0.0003024941219913316, |
|
"loss": 2.3138294219970703, |
|
"step": 14230 |
|
}, |
|
{ |
|
"epoch": 29.85235602094241, |
|
"learning_rate": 0.00030238789051413416, |
|
"loss": 2.3398483276367186, |
|
"step": 14240 |
|
}, |
|
{ |
|
"epoch": 29.873298429319373, |
|
"learning_rate": 0.00030228177087917153, |
|
"loss": 2.3180753707885744, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 29.894240837696334, |
|
"learning_rate": 0.00030217576289033235, |
|
"loss": 2.293859100341797, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 29.915183246073298, |
|
"learning_rate": 0.00030206986635198654, |
|
"loss": 2.315079116821289, |
|
"step": 14270 |
|
}, |
|
{ |
|
"epoch": 29.93612565445026, |
|
"learning_rate": 0.00030196408106898356, |
|
"loss": 2.3188785552978515, |
|
"step": 14280 |
|
}, |
|
{ |
|
"epoch": 29.957068062827226, |
|
"learning_rate": 0.0003018584068466507, |
|
"loss": 2.355891799926758, |
|
"step": 14290 |
|
}, |
|
{ |
|
"epoch": 29.97801047120419, |
|
"learning_rate": 0.0003017528434907922, |
|
"loss": 2.310663032531738, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 29.99895287958115, |
|
"learning_rate": 0.00030164739080768704, |
|
"loss": 2.357052803039551, |
|
"step": 14310 |
|
}, |
|
{ |
|
"epoch": 29.99895287958115, |
|
"eval_loss": 2.5363190174102783, |
|
"eval_runtime": 73.3558, |
|
"eval_samples_per_second": 149.354, |
|
"step": 14310 |
|
}, |
|
{ |
|
"epoch": 30.020942408376964, |
|
"learning_rate": 0.0003015420486040879, |
|
"loss": 2.432624626159668, |
|
"step": 14320 |
|
}, |
|
{ |
|
"epoch": 30.041884816753928, |
|
"learning_rate": 0.00030143681668721935, |
|
"loss": 2.3034442901611327, |
|
"step": 14330 |
|
}, |
|
{ |
|
"epoch": 30.06282722513089, |
|
"learning_rate": 0.00030133169486477694, |
|
"loss": 2.3489042282104493, |
|
"step": 14340 |
|
}, |
|
{ |
|
"epoch": 30.083769633507853, |
|
"learning_rate": 0.0003012266829449249, |
|
"loss": 2.3431249618530274, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 30.104712041884817, |
|
"learning_rate": 0.00030112178073629544, |
|
"loss": 2.332902526855469, |
|
"step": 14360 |
|
}, |
|
{ |
|
"epoch": 30.12565445026178, |
|
"learning_rate": 0.0003010169880479867, |
|
"loss": 2.323573112487793, |
|
"step": 14370 |
|
}, |
|
{ |
|
"epoch": 30.146596858638745, |
|
"learning_rate": 0.0003009123046895618, |
|
"loss": 2.299881362915039, |
|
"step": 14380 |
|
}, |
|
{ |
|
"epoch": 30.167539267015705, |
|
"learning_rate": 0.00030080773047104687, |
|
"loss": 2.319793701171875, |
|
"step": 14390 |
|
}, |
|
{ |
|
"epoch": 30.18848167539267, |
|
"learning_rate": 0.0003007032652029301, |
|
"loss": 2.3272857666015625, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 30.209424083769633, |
|
"learning_rate": 0.00030059890869615983, |
|
"loss": 2.3354257583618163, |
|
"step": 14410 |
|
}, |
|
{ |
|
"epoch": 30.230366492146597, |
|
"learning_rate": 0.0003004946607621435, |
|
"loss": 2.3249101638793945, |
|
"step": 14420 |
|
}, |
|
{ |
|
"epoch": 30.25130890052356, |
|
"learning_rate": 0.0003003905212127461, |
|
"loss": 2.331306266784668, |
|
"step": 14430 |
|
}, |
|
{ |
|
"epoch": 30.272251308900522, |
|
"learning_rate": 0.00030028648986028843, |
|
"loss": 2.3302356719970705, |
|
"step": 14440 |
|
}, |
|
{ |
|
"epoch": 30.293193717277486, |
|
"learning_rate": 0.00030018256651754633, |
|
"loss": 2.3084732055664063, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 30.31413612565445, |
|
"learning_rate": 0.00030007875099774864, |
|
"loss": 2.324197006225586, |
|
"step": 14460 |
|
}, |
|
{ |
|
"epoch": 30.335078534031414, |
|
"learning_rate": 0.0002999750431145761, |
|
"loss": 2.309644317626953, |
|
"step": 14470 |
|
}, |
|
{ |
|
"epoch": 30.356020942408378, |
|
"learning_rate": 0.0002998714426821599, |
|
"loss": 2.332279014587402, |
|
"step": 14480 |
|
}, |
|
{ |
|
"epoch": 30.376963350785342, |
|
"learning_rate": 0.00029976794951508027, |
|
"loss": 2.3015905380249024, |
|
"step": 14490 |
|
}, |
|
{ |
|
"epoch": 30.397905759162303, |
|
"learning_rate": 0.00029966456342836505, |
|
"loss": 2.3307212829589843, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 30.418848167539267, |
|
"learning_rate": 0.0002995612842374884, |
|
"loss": 2.3488508224487306, |
|
"step": 14510 |
|
}, |
|
{ |
|
"epoch": 30.43979057591623, |
|
"learning_rate": 0.0002994581117583693, |
|
"loss": 2.2981189727783202, |
|
"step": 14520 |
|
}, |
|
{ |
|
"epoch": 30.460732984293195, |
|
"learning_rate": 0.00029935504580737006, |
|
"loss": 2.292937088012695, |
|
"step": 14530 |
|
}, |
|
{ |
|
"epoch": 30.48167539267016, |
|
"learning_rate": 0.00029925208620129546, |
|
"loss": 2.329487609863281, |
|
"step": 14540 |
|
}, |
|
{ |
|
"epoch": 30.50261780104712, |
|
"learning_rate": 0.0002991492327573909, |
|
"loss": 2.275893974304199, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 30.523560209424083, |
|
"learning_rate": 0.0002990464852933409, |
|
"loss": 2.289459228515625, |
|
"step": 14560 |
|
}, |
|
{ |
|
"epoch": 30.544502617801047, |
|
"learning_rate": 0.0002989438436272684, |
|
"loss": 2.2976861953735352, |
|
"step": 14570 |
|
}, |
|
{ |
|
"epoch": 30.56544502617801, |
|
"learning_rate": 0.00029884130757773275, |
|
"loss": 2.319015884399414, |
|
"step": 14580 |
|
}, |
|
{ |
|
"epoch": 30.586387434554975, |
|
"learning_rate": 0.0002987388769637288, |
|
"loss": 2.3237770080566404, |
|
"step": 14590 |
|
}, |
|
{ |
|
"epoch": 30.607329842931936, |
|
"learning_rate": 0.00029863655160468534, |
|
"loss": 2.330046844482422, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 30.6282722513089, |
|
"learning_rate": 0.0002985343313204637, |
|
"loss": 2.328061103820801, |
|
"step": 14610 |
|
}, |
|
{ |
|
"epoch": 30.649214659685864, |
|
"learning_rate": 0.0002984322159313568, |
|
"loss": 2.3274772644042967, |
|
"step": 14620 |
|
}, |
|
{ |
|
"epoch": 30.670157068062828, |
|
"learning_rate": 0.00029833020525808714, |
|
"loss": 2.325545883178711, |
|
"step": 14630 |
|
}, |
|
{ |
|
"epoch": 30.691099476439792, |
|
"learning_rate": 0.00029822829912180636, |
|
"loss": 2.3240276336669923, |
|
"step": 14640 |
|
}, |
|
{ |
|
"epoch": 30.712041884816752, |
|
"learning_rate": 0.0002981264973440931, |
|
"loss": 2.324121856689453, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 30.732984293193716, |
|
"learning_rate": 0.00029802479974695223, |
|
"loss": 2.3233869552612303, |
|
"step": 14660 |
|
}, |
|
{ |
|
"epoch": 30.75392670157068, |
|
"learning_rate": 0.00029792320615281337, |
|
"loss": 2.3022382736206053, |
|
"step": 14670 |
|
}, |
|
{ |
|
"epoch": 30.774869109947645, |
|
"learning_rate": 0.00029782171638452937, |
|
"loss": 2.3219308853149414, |
|
"step": 14680 |
|
}, |
|
{ |
|
"epoch": 30.79581151832461, |
|
"learning_rate": 0.0002977203302653755, |
|
"loss": 2.3212976455688477, |
|
"step": 14690 |
|
}, |
|
{ |
|
"epoch": 30.81675392670157, |
|
"learning_rate": 0.0002976190476190476, |
|
"loss": 2.345839500427246, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 30.837696335078533, |
|
"learning_rate": 0.0002975178682696613, |
|
"loss": 2.2968841552734376, |
|
"step": 14710 |
|
}, |
|
{ |
|
"epoch": 30.858638743455497, |
|
"learning_rate": 0.0002974167920417504, |
|
"loss": 2.313581848144531, |
|
"step": 14720 |
|
}, |
|
{ |
|
"epoch": 30.87958115183246, |
|
"learning_rate": 0.00029731581876026557, |
|
"loss": 2.326977348327637, |
|
"step": 14730 |
|
}, |
|
{ |
|
"epoch": 30.900523560209425, |
|
"learning_rate": 0.00029721494825057357, |
|
"loss": 2.3257322311401367, |
|
"step": 14740 |
|
}, |
|
{ |
|
"epoch": 30.921465968586386, |
|
"learning_rate": 0.00029711418033845523, |
|
"loss": 2.285732460021973, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 30.94240837696335, |
|
"learning_rate": 0.0002970135148501047, |
|
"loss": 2.3275766372680664, |
|
"step": 14760 |
|
}, |
|
{ |
|
"epoch": 30.963350785340314, |
|
"learning_rate": 0.00029691295161212816, |
|
"loss": 2.3182727813720705, |
|
"step": 14770 |
|
}, |
|
{ |
|
"epoch": 30.984293193717278, |
|
"learning_rate": 0.0002968124904515423, |
|
"loss": 2.3104841232299806, |
|
"step": 14780 |
|
}, |
|
{ |
|
"epoch": 30.99895287958115, |
|
"eval_loss": 2.544727087020874, |
|
"eval_runtime": 73.5548, |
|
"eval_samples_per_second": 148.95, |
|
"step": 14787 |
|
}, |
|
{ |
|
"epoch": 31.006282722513088, |
|
"learning_rate": 0.00029671213119577346, |
|
"loss": 2.4215261459350588, |
|
"step": 14790 |
|
}, |
|
{ |
|
"epoch": 31.027225130890052, |
|
"learning_rate": 0.00029661187367265593, |
|
"loss": 2.3005090713500977, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 31.048167539267016, |
|
"learning_rate": 0.0002965117177104311, |
|
"loss": 2.3047313690185547, |
|
"step": 14810 |
|
}, |
|
{ |
|
"epoch": 31.06910994764398, |
|
"learning_rate": 0.0002964116631377459, |
|
"loss": 2.3039810180664064, |
|
"step": 14820 |
|
}, |
|
{ |
|
"epoch": 31.09005235602094, |
|
"learning_rate": 0.000296311709783652, |
|
"loss": 2.3020254135131837, |
|
"step": 14830 |
|
}, |
|
{ |
|
"epoch": 31.110994764397905, |
|
"learning_rate": 0.00029621185747760406, |
|
"loss": 2.3200841903686524, |
|
"step": 14840 |
|
}, |
|
{ |
|
"epoch": 31.13193717277487, |
|
"learning_rate": 0.0002961121060494589, |
|
"loss": 2.2955398559570312, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 31.152879581151833, |
|
"learning_rate": 0.00029601245532947417, |
|
"loss": 2.322628974914551, |
|
"step": 14860 |
|
}, |
|
{ |
|
"epoch": 31.173821989528797, |
|
"learning_rate": 0.0002959129051483069, |
|
"loss": 2.3180873870849608, |
|
"step": 14870 |
|
}, |
|
{ |
|
"epoch": 31.194764397905757, |
|
"learning_rate": 0.00029581345533701285, |
|
"loss": 2.299137306213379, |
|
"step": 14880 |
|
}, |
|
{ |
|
"epoch": 31.21570680628272, |
|
"learning_rate": 0.0002957141057270448, |
|
"loss": 2.2992317199707033, |
|
"step": 14890 |
|
}, |
|
{ |
|
"epoch": 31.236649214659685, |
|
"learning_rate": 0.0002956148561502513, |
|
"loss": 2.3339006423950197, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 31.25759162303665, |
|
"learning_rate": 0.00029551570643887603, |
|
"loss": 2.2937063217163085, |
|
"step": 14910 |
|
}, |
|
{ |
|
"epoch": 31.278534031413614, |
|
"learning_rate": 0.00029541665642555606, |
|
"loss": 2.3086185455322266, |
|
"step": 14920 |
|
}, |
|
{ |
|
"epoch": 31.299476439790578, |
|
"learning_rate": 0.00029531770594332096, |
|
"loss": 2.3051830291748048, |
|
"step": 14930 |
|
}, |
|
{ |
|
"epoch": 31.320418848167538, |
|
"learning_rate": 0.0002952188548255915, |
|
"loss": 2.3165931701660156, |
|
"step": 14940 |
|
}, |
|
{ |
|
"epoch": 31.341361256544502, |
|
"learning_rate": 0.00029512010290617854, |
|
"loss": 2.310456657409668, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 31.362303664921466, |
|
"learning_rate": 0.0002950214500192816, |
|
"loss": 2.3057369232177733, |
|
"step": 14960 |
|
}, |
|
{ |
|
"epoch": 31.38324607329843, |
|
"learning_rate": 0.00029492289599948834, |
|
"loss": 2.316122627258301, |
|
"step": 14970 |
|
}, |
|
{ |
|
"epoch": 31.404188481675394, |
|
"learning_rate": 0.0002948244406817725, |
|
"loss": 2.3287500381469726, |
|
"step": 14980 |
|
}, |
|
{ |
|
"epoch": 31.425130890052355, |
|
"learning_rate": 0.00029472608390149343, |
|
"loss": 2.309092330932617, |
|
"step": 14990 |
|
}, |
|
{ |
|
"epoch": 31.44607329842932, |
|
"learning_rate": 0.00029462782549439473, |
|
"loss": 2.331714057922363, |
|
"step": 15000 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 32, |
|
"total_flos": 4125839411805155328, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|