{ "best_metric": null, "best_model_checkpoint": null, "epoch": 31.44607329842932, "global_step": 15000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0020942408376963353, "learning_rate": 1.0206207261596573e-07, "loss": 42.81952667236328, "step": 1 }, { "epoch": 0.020942408376963352, "learning_rate": 1.0206207261596575e-06, "loss": 43.49555460611979, "step": 10 }, { "epoch": 0.041884816753926704, "learning_rate": 2.041241452319315e-06, "loss": 43.81195983886719, "step": 20 }, { "epoch": 0.06282722513089005, "learning_rate": 3.0618621784789722e-06, "loss": 43.1638671875, "step": 30 }, { "epoch": 0.08376963350785341, "learning_rate": 4.08248290463863e-06, "loss": 43.10018615722656, "step": 40 }, { "epoch": 0.10471204188481675, "learning_rate": 5.103103630798286e-06, "loss": 42.361767578125, "step": 50 }, { "epoch": 0.1256544502617801, "learning_rate": 6.1237243569579445e-06, "loss": 41.58636474609375, "step": 60 }, { "epoch": 0.14659685863874344, "learning_rate": 7.144345083117603e-06, "loss": 40.42512817382813, "step": 70 }, { "epoch": 0.16753926701570682, "learning_rate": 8.16496580927726e-06, "loss": 40.1504150390625, "step": 80 }, { "epoch": 0.18848167539267016, "learning_rate": 9.185586535436916e-06, "loss": 39.03293151855469, "step": 90 }, { "epoch": 0.2094240837696335, "learning_rate": 1.0206207261596573e-05, "loss": 38.355224609375, "step": 100 }, { "epoch": 0.23036649214659685, "learning_rate": 1.1226827987756233e-05, "loss": 37.3426025390625, "step": 110 }, { "epoch": 0.2513089005235602, "learning_rate": 1.2247448713915889e-05, "loss": 36.23554992675781, "step": 120 }, { "epoch": 0.27225130890052357, "learning_rate": 1.3268069440075545e-05, "loss": 35.76961975097656, "step": 130 }, { "epoch": 0.2931937172774869, "learning_rate": 1.4288690166235205e-05, "loss": 34.53816528320313, "step": 140 }, { "epoch": 0.31413612565445026, "learning_rate": 1.530931089239486e-05, "loss": 33.968804931640626, "step": 150 }, { "epoch": 0.33507853403141363, "learning_rate": 1.632993161855452e-05, "loss": 32.835089111328124, "step": 160 }, { "epoch": 0.35602094240837695, "learning_rate": 1.7350552344714174e-05, "loss": 32.344024658203125, "step": 170 }, { "epoch": 0.3769633507853403, "learning_rate": 1.8371173070873833e-05, "loss": 31.358078002929688, "step": 180 }, { "epoch": 0.39790575916230364, "learning_rate": 1.939179379703349e-05, "loss": 30.222982788085936, "step": 190 }, { "epoch": 0.418848167539267, "learning_rate": 2.0412414523193145e-05, "loss": 29.279229736328126, "step": 200 }, { "epoch": 0.4397905759162304, "learning_rate": 2.1433035249352804e-05, "loss": 28.632891845703124, "step": 210 }, { "epoch": 0.4607329842931937, "learning_rate": 2.2453655975512465e-05, "loss": 27.667315673828124, "step": 220 }, { "epoch": 0.4816753926701571, "learning_rate": 2.347427670167212e-05, "loss": 26.753375244140624, "step": 230 }, { "epoch": 0.5026178010471204, "learning_rate": 2.4494897427831778e-05, "loss": 26.086724853515626, "step": 240 }, { "epoch": 0.5235602094240838, "learning_rate": 2.5515518153991436e-05, "loss": 24.617787170410157, "step": 250 }, { "epoch": 0.5445026178010471, "learning_rate": 2.653613888015109e-05, "loss": 23.82097473144531, "step": 260 }, { "epoch": 0.5654450261780105, "learning_rate": 2.755675960631075e-05, "loss": 22.989166259765625, "step": 270 }, { "epoch": 0.5863874345549738, "learning_rate": 2.857738033247041e-05, "loss": 22.07659454345703, "step": 280 }, { "epoch": 0.6073298429319371, "learning_rate": 2.9598001058630065e-05, "loss": 21.30054168701172, "step": 290 }, { "epoch": 0.6282722513089005, "learning_rate": 3.061862178478972e-05, "loss": 20.448097229003906, "step": 300 }, { "epoch": 0.6492146596858639, "learning_rate": 3.163924251094938e-05, "loss": 19.719020080566406, "step": 310 }, { "epoch": 0.6701570680628273, "learning_rate": 3.265986323710904e-05, "loss": 19.040341186523438, "step": 320 }, { "epoch": 0.6910994764397905, "learning_rate": 3.3680483963268694e-05, "loss": 18.16093292236328, "step": 330 }, { "epoch": 0.7120418848167539, "learning_rate": 3.470110468942835e-05, "loss": 17.382850646972656, "step": 340 }, { "epoch": 0.7329842931937173, "learning_rate": 3.5721725415588004e-05, "loss": 16.651702880859375, "step": 350 }, { "epoch": 0.7539267015706806, "learning_rate": 3.6742346141747665e-05, "loss": 16.079180908203124, "step": 360 }, { "epoch": 0.774869109947644, "learning_rate": 3.7762966867907327e-05, "loss": 15.322096252441407, "step": 370 }, { "epoch": 0.7958115183246073, "learning_rate": 3.878358759406698e-05, "loss": 14.650254821777343, "step": 380 }, { "epoch": 0.8167539267015707, "learning_rate": 3.980420832022664e-05, "loss": 14.0874267578125, "step": 390 }, { "epoch": 0.837696335078534, "learning_rate": 4.082482904638629e-05, "loss": 13.5120849609375, "step": 400 }, { "epoch": 0.8586387434554974, "learning_rate": 4.184544977254595e-05, "loss": 12.803053283691407, "step": 410 }, { "epoch": 0.8795811518324608, "learning_rate": 4.286607049870561e-05, "loss": 12.207872009277343, "step": 420 }, { "epoch": 0.900523560209424, "learning_rate": 4.388669122486527e-05, "loss": 11.640280151367188, "step": 430 }, { "epoch": 0.9214659685863874, "learning_rate": 4.490731195102493e-05, "loss": 11.116693878173828, "step": 440 }, { "epoch": 0.9424083769633508, "learning_rate": 4.5927932677184585e-05, "loss": 10.586290740966797, "step": 450 }, { "epoch": 0.9633507853403142, "learning_rate": 4.694855340334424e-05, "loss": 10.197176361083985, "step": 460 }, { "epoch": 0.9842931937172775, "learning_rate": 4.7969174129503894e-05, "loss": 9.696656036376954, "step": 470 }, { "epoch": 0.9989528795811519, "eval_loss": 7.631092548370361, "eval_runtime": 73.7635, "eval_samples_per_second": 148.529, "step": 477 }, { "epoch": 1.006282722513089, "learning_rate": 4.8989794855663556e-05, "loss": 9.649329376220702, "step": 480 }, { "epoch": 1.0272251308900524, "learning_rate": 5.001041558182322e-05, "loss": 8.800138092041015, "step": 490 }, { "epoch": 1.0481675392670158, "learning_rate": 5.103103630798287e-05, "loss": 8.46273422241211, "step": 500 }, { "epoch": 1.0691099476439792, "learning_rate": 5.205165703414253e-05, "loss": 8.093246459960938, "step": 510 }, { "epoch": 1.0900523560209425, "learning_rate": 5.307227776030218e-05, "loss": 7.800753021240235, "step": 520 }, { "epoch": 1.1109947643979057, "learning_rate": 5.409289848646184e-05, "loss": 7.580593872070312, "step": 530 }, { "epoch": 1.131937172774869, "learning_rate": 5.51135192126215e-05, "loss": 7.297437286376953, "step": 540 }, { "epoch": 1.1528795811518324, "learning_rate": 5.613413993878116e-05, "loss": 7.115350341796875, "step": 550 }, { "epoch": 1.1738219895287958, "learning_rate": 5.715476066494082e-05, "loss": 6.890144348144531, "step": 560 }, { "epoch": 1.1947643979057592, "learning_rate": 5.817538139110047e-05, "loss": 6.647792053222656, "step": 570 }, { "epoch": 1.2157068062827225, "learning_rate": 5.919600211726013e-05, "loss": 6.418707275390625, "step": 580 }, { "epoch": 1.236649214659686, "learning_rate": 6.0216622843419785e-05, "loss": 6.1961822509765625, "step": 590 }, { "epoch": 1.2575916230366493, "learning_rate": 6.123724356957945e-05, "loss": 6.020335388183594, "step": 600 }, { "epoch": 1.2785340314136127, "learning_rate": 6.22578642957391e-05, "loss": 5.866229248046875, "step": 610 }, { "epoch": 1.2994764397905758, "learning_rate": 6.327848502189876e-05, "loss": 5.687960433959961, "step": 620 }, { "epoch": 1.3204188481675394, "learning_rate": 6.429910574805841e-05, "loss": 5.516669082641601, "step": 630 }, { "epoch": 1.3413612565445026, "learning_rate": 6.531972647421808e-05, "loss": 5.365422058105469, "step": 640 }, { "epoch": 1.362303664921466, "learning_rate": 6.634034720037773e-05, "loss": 5.2304027557373045, "step": 650 }, { "epoch": 1.3832460732984293, "learning_rate": 6.736096792653739e-05, "loss": 5.1493980407714846, "step": 660 }, { "epoch": 1.4041884816753927, "learning_rate": 6.838158865269704e-05, "loss": 5.069922256469726, "step": 670 }, { "epoch": 1.425130890052356, "learning_rate": 6.94022093788567e-05, "loss": 4.946885299682617, "step": 680 }, { "epoch": 1.4460732984293194, "learning_rate": 7.042283010501637e-05, "loss": 4.852196502685547, "step": 690 }, { "epoch": 1.4670157068062828, "learning_rate": 7.144345083117601e-05, "loss": 4.79791488647461, "step": 700 }, { "epoch": 1.487958115183246, "learning_rate": 7.246407155733568e-05, "loss": 4.701091766357422, "step": 710 }, { "epoch": 1.5089005235602095, "learning_rate": 7.348469228349533e-05, "loss": 4.629792404174805, "step": 720 }, { "epoch": 1.5298429319371727, "learning_rate": 7.450531300965498e-05, "loss": 4.491447067260742, "step": 730 }, { "epoch": 1.5507853403141363, "learning_rate": 7.552593373581465e-05, "loss": 4.365177917480469, "step": 740 }, { "epoch": 1.5717277486910994, "learning_rate": 7.654655446197431e-05, "loss": 4.266152572631836, "step": 750 }, { "epoch": 1.5926701570680628, "learning_rate": 7.756717518813396e-05, "loss": 4.203376770019531, "step": 760 }, { "epoch": 1.6136125654450262, "learning_rate": 7.858779591429362e-05, "loss": 4.128662872314453, "step": 770 }, { "epoch": 1.6345549738219896, "learning_rate": 7.960841664045329e-05, "loss": 4.131737899780274, "step": 780 }, { "epoch": 1.655497382198953, "learning_rate": 8.062903736661294e-05, "loss": 4.008557891845703, "step": 790 }, { "epoch": 1.676439790575916, "learning_rate": 8.164965809277258e-05, "loss": 3.9548309326171873, "step": 800 }, { "epoch": 1.6973821989528797, "learning_rate": 8.267027881893225e-05, "loss": 3.903990936279297, "step": 810 }, { "epoch": 1.7183246073298428, "learning_rate": 8.36908995450919e-05, "loss": 3.8517215728759764, "step": 820 }, { "epoch": 1.7392670157068064, "learning_rate": 8.471152027125156e-05, "loss": 3.815013122558594, "step": 830 }, { "epoch": 1.7602094240837696, "learning_rate": 8.573214099741121e-05, "loss": 3.762827682495117, "step": 840 }, { "epoch": 1.781151832460733, "learning_rate": 8.675276172357088e-05, "loss": 3.739139938354492, "step": 850 }, { "epoch": 1.8020942408376963, "learning_rate": 8.777338244973054e-05, "loss": 3.6771942138671876, "step": 860 }, { "epoch": 1.8230366492146597, "learning_rate": 8.879400317589019e-05, "loss": 3.671974945068359, "step": 870 }, { "epoch": 1.843979057591623, "learning_rate": 8.981462390204986e-05, "loss": 3.6025531768798826, "step": 880 }, { "epoch": 1.8649214659685864, "learning_rate": 9.083524462820951e-05, "loss": 3.580000305175781, "step": 890 }, { "epoch": 1.8858638743455498, "learning_rate": 9.185586535436917e-05, "loss": 3.570189666748047, "step": 900 }, { "epoch": 1.906806282722513, "learning_rate": 9.287648608052881e-05, "loss": 3.5345611572265625, "step": 910 }, { "epoch": 1.9277486910994766, "learning_rate": 9.389710680668848e-05, "loss": 3.4658973693847654, "step": 920 }, { "epoch": 1.9486910994764397, "learning_rate": 9.491772753284813e-05, "loss": 3.4885902404785156, "step": 930 }, { "epoch": 1.9696335078534033, "learning_rate": 9.593834825900779e-05, "loss": 3.438787078857422, "step": 940 }, { "epoch": 1.9905759162303664, "learning_rate": 9.695896898516746e-05, "loss": 3.434320831298828, "step": 950 }, { "epoch": 1.998952879581152, "eval_loss": 3.1282973289489746, "eval_runtime": 73.33, "eval_samples_per_second": 149.407, "step": 954 }, { "epoch": 2.012565445026178, "learning_rate": 9.797958971132711e-05, "loss": 3.5866859436035154, "step": 960 }, { "epoch": 2.033507853403141, "learning_rate": 9.900021043748677e-05, "loss": 3.4268508911132813, "step": 970 }, { "epoch": 2.054450261780105, "learning_rate": 0.00010002083116364643, "loss": 3.3977298736572266, "step": 980 }, { "epoch": 2.075392670157068, "learning_rate": 0.00010104145188980609, "loss": 3.349309539794922, "step": 990 }, { "epoch": 2.0963350785340316, "learning_rate": 0.00010206207261596574, "loss": 3.4084583282470704, "step": 1000 }, { "epoch": 2.1172774869109947, "learning_rate": 0.0001030826933421254, "loss": 3.3497646331787108, "step": 1010 }, { "epoch": 2.1382198952879583, "learning_rate": 0.00010410331406828505, "loss": 3.2944507598876953, "step": 1020 }, { "epoch": 2.1591623036649215, "learning_rate": 0.00010512393479444471, "loss": 3.312997055053711, "step": 1030 }, { "epoch": 2.180104712041885, "learning_rate": 0.00010614455552060436, "loss": 3.3428993225097656, "step": 1040 }, { "epoch": 2.201047120418848, "learning_rate": 0.00010716517624676403, "loss": 3.292295455932617, "step": 1050 }, { "epoch": 2.2219895287958114, "learning_rate": 0.00010818579697292369, "loss": 3.291975402832031, "step": 1060 }, { "epoch": 2.242931937172775, "learning_rate": 0.00010920641769908334, "loss": 3.2534629821777346, "step": 1070 }, { "epoch": 2.263874345549738, "learning_rate": 0.000110227038425243, "loss": 3.247083282470703, "step": 1080 }, { "epoch": 2.2848167539267017, "learning_rate": 0.00011124765915140266, "loss": 3.253615951538086, "step": 1090 }, { "epoch": 2.305759162303665, "learning_rate": 0.00011226827987756232, "loss": 3.2301055908203127, "step": 1100 }, { "epoch": 2.3267015706806284, "learning_rate": 0.00011328890060372197, "loss": 3.241224670410156, "step": 1110 }, { "epoch": 2.3476439790575916, "learning_rate": 0.00011430952132988164, "loss": 3.210051345825195, "step": 1120 }, { "epoch": 2.368586387434555, "learning_rate": 0.00011533014205604128, "loss": 3.225263214111328, "step": 1130 }, { "epoch": 2.3895287958115183, "learning_rate": 0.00011635076278220094, "loss": 3.1904216766357423, "step": 1140 }, { "epoch": 2.4104712041884815, "learning_rate": 0.00011737138350836059, "loss": 3.230540466308594, "step": 1150 }, { "epoch": 2.431413612565445, "learning_rate": 0.00011839200423452026, "loss": 3.191972351074219, "step": 1160 }, { "epoch": 2.4523560209424082, "learning_rate": 0.00011941262496067991, "loss": 3.191108512878418, "step": 1170 }, { "epoch": 2.473298429319372, "learning_rate": 0.00012043324568683957, "loss": 3.192665863037109, "step": 1180 }, { "epoch": 2.494240837696335, "learning_rate": 0.00012145386641299924, "loss": 3.1610179901123048, "step": 1190 }, { "epoch": 2.5151832460732986, "learning_rate": 0.0001224744871391589, "loss": 3.1794748306274414, "step": 1200 }, { "epoch": 2.5361256544502617, "learning_rate": 0.00012349510786531856, "loss": 3.2144695281982423, "step": 1210 }, { "epoch": 2.5570680628272253, "learning_rate": 0.0001245157285914782, "loss": 3.147447204589844, "step": 1220 }, { "epoch": 2.5780104712041885, "learning_rate": 0.00012553634931763784, "loss": 3.1747854232788084, "step": 1230 }, { "epoch": 2.5989528795811516, "learning_rate": 0.0001265569700437975, "loss": 3.137411880493164, "step": 1240 }, { "epoch": 2.619895287958115, "learning_rate": 0.00012757759076995718, "loss": 3.157614326477051, "step": 1250 }, { "epoch": 2.640837696335079, "learning_rate": 0.00012859821149611682, "loss": 3.1284500122070313, "step": 1260 }, { "epoch": 2.661780104712042, "learning_rate": 0.0001296188322222765, "loss": 3.168661117553711, "step": 1270 }, { "epoch": 2.682722513089005, "learning_rate": 0.00013063945294843616, "loss": 3.1208589553833006, "step": 1280 }, { "epoch": 2.7036649214659687, "learning_rate": 0.0001316600736745958, "loss": 3.177284049987793, "step": 1290 }, { "epoch": 2.724607329842932, "learning_rate": 0.00013268069440075547, "loss": 3.129215431213379, "step": 1300 }, { "epoch": 2.7455497382198955, "learning_rate": 0.00013370131512691514, "loss": 3.1385051727294924, "step": 1310 }, { "epoch": 2.7664921465968586, "learning_rate": 0.00013472193585307478, "loss": 3.093943977355957, "step": 1320 }, { "epoch": 2.7874345549738218, "learning_rate": 0.00013574255657923444, "loss": 3.125334358215332, "step": 1330 }, { "epoch": 2.8083769633507853, "learning_rate": 0.00013676317730539409, "loss": 3.1063247680664063, "step": 1340 }, { "epoch": 2.829319371727749, "learning_rate": 0.00013778379803155375, "loss": 3.153512382507324, "step": 1350 }, { "epoch": 2.850261780104712, "learning_rate": 0.0001388044187577134, "loss": 3.0612106323242188, "step": 1360 }, { "epoch": 2.8712041884816752, "learning_rate": 0.00013982503948387306, "loss": 3.0893680572509767, "step": 1370 }, { "epoch": 2.892146596858639, "learning_rate": 0.00014084566021003273, "loss": 3.0948190689086914, "step": 1380 }, { "epoch": 2.913089005235602, "learning_rate": 0.00014186628093619237, "loss": 3.052564811706543, "step": 1390 }, { "epoch": 2.9340314136125656, "learning_rate": 0.00014288690166235201, "loss": 3.061579132080078, "step": 1400 }, { "epoch": 2.9549738219895287, "learning_rate": 0.00014390752238851168, "loss": 3.0893646240234376, "step": 1410 }, { "epoch": 2.975916230366492, "learning_rate": 0.00014492814311467135, "loss": 3.0637126922607423, "step": 1420 }, { "epoch": 2.9968586387434555, "learning_rate": 0.000145948763840831, "loss": 3.063129425048828, "step": 1430 }, { "epoch": 2.998952879581152, "eval_loss": 2.8699653148651123, "eval_runtime": 73.0344, "eval_samples_per_second": 150.012, "step": 1431 }, { "epoch": 3.018848167539267, "learning_rate": 0.00014696938456699066, "loss": 3.175088310241699, "step": 1440 }, { "epoch": 3.0397905759162303, "learning_rate": 0.00014799000529315033, "loss": 3.0708381652832033, "step": 1450 }, { "epoch": 3.060732984293194, "learning_rate": 0.00014901062601930997, "loss": 3.043408012390137, "step": 1460 }, { "epoch": 3.081675392670157, "learning_rate": 0.00015003124674546964, "loss": 3.039161491394043, "step": 1470 }, { "epoch": 3.1026178010471206, "learning_rate": 0.0001510518674716293, "loss": 3.0207361221313476, "step": 1480 }, { "epoch": 3.1235602094240837, "learning_rate": 0.00015207248819778895, "loss": 3.0527109146118163, "step": 1490 }, { "epoch": 3.144502617801047, "learning_rate": 0.00015309310892394862, "loss": 3.045629692077637, "step": 1500 }, { "epoch": 3.1654450261780105, "learning_rate": 0.00015411372965010828, "loss": 3.0113618850708006, "step": 1510 }, { "epoch": 3.1863874345549736, "learning_rate": 0.00015513435037626793, "loss": 3.0372419357299805, "step": 1520 }, { "epoch": 3.2073298429319372, "learning_rate": 0.0001561549711024276, "loss": 3.0219789505004884, "step": 1530 }, { "epoch": 3.2282722513089004, "learning_rate": 0.00015717559182858723, "loss": 3.0140264511108397, "step": 1540 }, { "epoch": 3.249214659685864, "learning_rate": 0.0001581962125547469, "loss": 3.039451789855957, "step": 1550 }, { "epoch": 3.270157068062827, "learning_rate": 0.00015921683328090657, "loss": 3.0277042388916016, "step": 1560 }, { "epoch": 3.2910994764397907, "learning_rate": 0.0001602374540070662, "loss": 3.0352230072021484, "step": 1570 }, { "epoch": 3.312041884816754, "learning_rate": 0.00016125807473322588, "loss": 3.0115480422973633, "step": 1580 }, { "epoch": 3.332984293193717, "learning_rate": 0.00016227869545938555, "loss": 3.008187103271484, "step": 1590 }, { "epoch": 3.3539267015706806, "learning_rate": 0.00016329931618554516, "loss": 3.02030086517334, "step": 1600 }, { "epoch": 3.374869109947644, "learning_rate": 0.00016431993691170483, "loss": 3.0034799575805664, "step": 1610 }, { "epoch": 3.3958115183246074, "learning_rate": 0.0001653405576378645, "loss": 3.0058149337768554, "step": 1620 }, { "epoch": 3.4167539267015705, "learning_rate": 0.00016636117836402414, "loss": 2.9678651809692385, "step": 1630 }, { "epoch": 3.437696335078534, "learning_rate": 0.0001673817990901838, "loss": 2.999662399291992, "step": 1640 }, { "epoch": 3.4586387434554973, "learning_rate": 0.00016840241981634345, "loss": 2.9900545120239257, "step": 1650 }, { "epoch": 3.479581151832461, "learning_rate": 0.00016942304054250312, "loss": 2.967659759521484, "step": 1660 }, { "epoch": 3.500523560209424, "learning_rate": 0.0001704436612686628, "loss": 2.9933372497558595, "step": 1670 }, { "epoch": 3.521465968586387, "learning_rate": 0.00017146428199482243, "loss": 2.9620410919189455, "step": 1680 }, { "epoch": 3.5424083769633508, "learning_rate": 0.0001724849027209821, "loss": 2.973393440246582, "step": 1690 }, { "epoch": 3.5633507853403144, "learning_rate": 0.00017350552344714176, "loss": 2.9914371490478517, "step": 1700 }, { "epoch": 3.5842931937172775, "learning_rate": 0.0001745261441733014, "loss": 2.959955596923828, "step": 1710 }, { "epoch": 3.6052356020942407, "learning_rate": 0.00017554676489946107, "loss": 2.947012519836426, "step": 1720 }, { "epoch": 3.6261780104712042, "learning_rate": 0.00017656738562562074, "loss": 2.9780450820922852, "step": 1730 }, { "epoch": 3.6471204188481674, "learning_rate": 0.00017758800635178038, "loss": 2.9911376953125, "step": 1740 }, { "epoch": 3.668062827225131, "learning_rate": 0.00017860862707794005, "loss": 2.936799430847168, "step": 1750 }, { "epoch": 3.689005235602094, "learning_rate": 0.00017962924780409972, "loss": 2.9443117141723634, "step": 1760 }, { "epoch": 3.7099476439790577, "learning_rate": 0.00018064986853025936, "loss": 2.9714879989624023, "step": 1770 }, { "epoch": 3.730890052356021, "learning_rate": 0.00018167048925641903, "loss": 2.9554422378540037, "step": 1780 }, { "epoch": 3.7518324607329845, "learning_rate": 0.0001826911099825787, "loss": 2.971892738342285, "step": 1790 }, { "epoch": 3.7727748691099476, "learning_rate": 0.00018371173070873834, "loss": 2.9589488983154295, "step": 1800 }, { "epoch": 3.793717277486911, "learning_rate": 0.000184732351434898, "loss": 2.944401741027832, "step": 1810 }, { "epoch": 3.8146596858638744, "learning_rate": 0.00018575297216105762, "loss": 2.926609420776367, "step": 1820 }, { "epoch": 3.8356020942408375, "learning_rate": 0.0001867735928872173, "loss": 2.906996726989746, "step": 1830 }, { "epoch": 3.856544502617801, "learning_rate": 0.00018779421361337696, "loss": 2.9651628494262696, "step": 1840 }, { "epoch": 3.8774869109947643, "learning_rate": 0.0001888148343395366, "loss": 2.94600887298584, "step": 1850 }, { "epoch": 3.898429319371728, "learning_rate": 0.00018983545506569627, "loss": 2.915750503540039, "step": 1860 }, { "epoch": 3.919371727748691, "learning_rate": 0.00019085607579185594, "loss": 2.942264747619629, "step": 1870 }, { "epoch": 3.9403141361256546, "learning_rate": 0.00019187669651801558, "loss": 2.9150556564331054, "step": 1880 }, { "epoch": 3.9612565445026178, "learning_rate": 0.00019289731724417525, "loss": 2.9057634353637694, "step": 1890 }, { "epoch": 3.982198952879581, "learning_rate": 0.0001939179379703349, "loss": 2.934947967529297, "step": 1900 }, { "epoch": 3.998952879581152, "eval_loss": 2.7904012203216553, "eval_runtime": 73.0809, "eval_samples_per_second": 149.916, "step": 1908 }, { "epoch": 4.004188481675393, "learning_rate": 0.00019493855869649455, "loss": 3.0482250213623048, "step": 1910 }, { "epoch": 4.025130890052356, "learning_rate": 0.00019595917942265422, "loss": 2.9126008987426757, "step": 1920 }, { "epoch": 4.046073298429319, "learning_rate": 0.0001969798001488139, "loss": 2.903646469116211, "step": 1930 }, { "epoch": 4.067015706806282, "learning_rate": 0.00019800042087497353, "loss": 2.9102720260620116, "step": 1940 }, { "epoch": 4.0879581151832465, "learning_rate": 0.0001990210416011332, "loss": 2.9236400604248045, "step": 1950 }, { "epoch": 4.10890052356021, "learning_rate": 0.00020004166232729287, "loss": 2.8818483352661133, "step": 1960 }, { "epoch": 4.129842931937173, "learning_rate": 0.0002010622830534525, "loss": 2.9328163146972654, "step": 1970 }, { "epoch": 4.150785340314136, "learning_rate": 0.00020208290377961218, "loss": 2.896937370300293, "step": 1980 }, { "epoch": 4.171727748691099, "learning_rate": 0.00020310352450577185, "loss": 2.9203187942504885, "step": 1990 }, { "epoch": 4.192670157068063, "learning_rate": 0.0002041241452319315, "loss": 2.887624740600586, "step": 2000 }, { "epoch": 4.213612565445026, "learning_rate": 0.00020514476595809116, "loss": 2.89230899810791, "step": 2010 }, { "epoch": 4.234554973821989, "learning_rate": 0.0002061653866842508, "loss": 2.8896648406982424, "step": 2020 }, { "epoch": 4.255497382198953, "learning_rate": 0.00020718600741041044, "loss": 2.922181510925293, "step": 2030 }, { "epoch": 4.276439790575917, "learning_rate": 0.0002082066281365701, "loss": 2.888766860961914, "step": 2040 }, { "epoch": 4.29738219895288, "learning_rate": 0.00020922724886272975, "loss": 2.8884586334228515, "step": 2050 }, { "epoch": 4.318324607329843, "learning_rate": 0.00021024786958888942, "loss": 2.8894960403442385, "step": 2060 }, { "epoch": 4.339267015706806, "learning_rate": 0.00021126849031504908, "loss": 2.88952522277832, "step": 2070 }, { "epoch": 4.36020942408377, "learning_rate": 0.00021228911104120873, "loss": 2.8968246459960936, "step": 2080 }, { "epoch": 4.381151832460733, "learning_rate": 0.0002133097317673684, "loss": 2.8720794677734376, "step": 2090 }, { "epoch": 4.402094240837696, "learning_rate": 0.00021433035249352806, "loss": 2.9035682678222656, "step": 2100 }, { "epoch": 4.4230366492146596, "learning_rate": 0.0002153509732196877, "loss": 2.8975749969482423, "step": 2110 }, { "epoch": 4.443979057591623, "learning_rate": 0.00021637159394584737, "loss": 2.8942371368408204, "step": 2120 }, { "epoch": 4.464921465968587, "learning_rate": 0.00021739221467200704, "loss": 2.8582950592041017, "step": 2130 }, { "epoch": 4.48586387434555, "learning_rate": 0.00021841283539816668, "loss": 2.8642202377319337, "step": 2140 }, { "epoch": 4.506806282722513, "learning_rate": 0.00021943345612432635, "loss": 2.8771089553833007, "step": 2150 }, { "epoch": 4.527748691099476, "learning_rate": 0.000220454076850486, "loss": 2.8463533401489256, "step": 2160 }, { "epoch": 4.548691099476439, "learning_rate": 0.00022147469757664566, "loss": 2.865732765197754, "step": 2170 }, { "epoch": 4.569633507853403, "learning_rate": 0.00022249531830280533, "loss": 2.8657468795776366, "step": 2180 }, { "epoch": 4.5905759162303665, "learning_rate": 0.00022351593902896497, "loss": 2.8737346649169924, "step": 2190 }, { "epoch": 4.61151832460733, "learning_rate": 0.00022453655975512464, "loss": 2.906464385986328, "step": 2200 }, { "epoch": 4.632460732984293, "learning_rate": 0.0002255571804812843, "loss": 2.8711727142333983, "step": 2210 }, { "epoch": 4.653403141361257, "learning_rate": 0.00022657780120744395, "loss": 2.866417121887207, "step": 2220 }, { "epoch": 4.67434554973822, "learning_rate": 0.00022759842193360361, "loss": 2.873806190490723, "step": 2230 }, { "epoch": 4.695287958115183, "learning_rate": 0.00022861904265976328, "loss": 2.867740249633789, "step": 2240 }, { "epoch": 4.716230366492146, "learning_rate": 0.0002296396633859229, "loss": 2.848478317260742, "step": 2250 }, { "epoch": 4.73717277486911, "learning_rate": 0.00023066028411208256, "loss": 2.865742492675781, "step": 2260 }, { "epoch": 4.7581151832460735, "learning_rate": 0.00023168090483824223, "loss": 2.8338348388671877, "step": 2270 }, { "epoch": 4.779057591623037, "learning_rate": 0.00023270152556440187, "loss": 2.848302459716797, "step": 2280 }, { "epoch": 4.8, "learning_rate": 0.00023372214629056154, "loss": 2.847154235839844, "step": 2290 }, { "epoch": 4.820942408376963, "learning_rate": 0.00023474276701672118, "loss": 2.889314651489258, "step": 2300 }, { "epoch": 4.841884816753927, "learning_rate": 0.00023576338774288085, "loss": 2.8715303421020506, "step": 2310 }, { "epoch": 4.86282722513089, "learning_rate": 0.00023678400846904052, "loss": 2.8583951950073243, "step": 2320 }, { "epoch": 4.883769633507853, "learning_rate": 0.00023780462919520016, "loss": 2.865638542175293, "step": 2330 }, { "epoch": 4.9047120418848165, "learning_rate": 0.00023882524992135983, "loss": 2.8511993408203127, "step": 2340 }, { "epoch": 4.92565445026178, "learning_rate": 0.0002398458706475195, "loss": 2.8662994384765623, "step": 2350 }, { "epoch": 4.946596858638744, "learning_rate": 0.00024086649137367914, "loss": 2.829090690612793, "step": 2360 }, { "epoch": 4.967539267015707, "learning_rate": 0.0002418871120998388, "loss": 2.8659574508666994, "step": 2370 }, { "epoch": 4.98848167539267, "learning_rate": 0.00024290773282599848, "loss": 2.8302743911743162, "step": 2380 }, { "epoch": 4.998952879581152, "eval_loss": 2.730773687362671, "eval_runtime": 73.1511, "eval_samples_per_second": 149.772, "step": 2385 }, { "epoch": 5.010471204188482, "learning_rate": 0.00024392835355215812, "loss": 2.994700050354004, "step": 2390 }, { "epoch": 5.031413612565445, "learning_rate": 0.0002449489742783178, "loss": 2.8189886093139647, "step": 2400 }, { "epoch": 5.052356020942408, "learning_rate": 0.00024596959500447745, "loss": 2.818514823913574, "step": 2410 }, { "epoch": 5.0732984293193715, "learning_rate": 0.0002469902157306371, "loss": 2.8165609359741213, "step": 2420 }, { "epoch": 5.0942408376963355, "learning_rate": 0.00024801083645679674, "loss": 2.8114208221435546, "step": 2430 }, { "epoch": 5.115183246073299, "learning_rate": 0.0002490314571829564, "loss": 2.841958236694336, "step": 2440 }, { "epoch": 5.136125654450262, "learning_rate": 0.00025005207790911607, "loss": 2.8192333221435546, "step": 2450 }, { "epoch": 5.157068062827225, "learning_rate": 0.0002510726986352757, "loss": 2.842849540710449, "step": 2460 }, { "epoch": 5.178010471204188, "learning_rate": 0.00025209331936143535, "loss": 2.797623062133789, "step": 2470 }, { "epoch": 5.198952879581152, "learning_rate": 0.000253113940087595, "loss": 2.8256582260131835, "step": 2480 }, { "epoch": 5.219895287958115, "learning_rate": 0.0002541345608137547, "loss": 2.853096771240234, "step": 2490 }, { "epoch": 5.2408376963350785, "learning_rate": 0.00025515518153991436, "loss": 2.8135236740112304, "step": 2500 }, { "epoch": 5.261780104712042, "learning_rate": 0.00025617580226607403, "loss": 2.826374053955078, "step": 2510 }, { "epoch": 5.282722513089006, "learning_rate": 0.00025719642299223364, "loss": 2.8227916717529298, "step": 2520 }, { "epoch": 5.303664921465969, "learning_rate": 0.0002582170437183933, "loss": 2.800633430480957, "step": 2530 }, { "epoch": 5.324607329842932, "learning_rate": 0.000259237664444553, "loss": 2.8282939910888674, "step": 2540 }, { "epoch": 5.345549738219895, "learning_rate": 0.00026025828517071265, "loss": 2.789654350280762, "step": 2550 }, { "epoch": 5.366492146596858, "learning_rate": 0.0002612789058968723, "loss": 2.832943916320801, "step": 2560 }, { "epoch": 5.387434554973822, "learning_rate": 0.00026229952662303193, "loss": 2.7946260452270506, "step": 2570 }, { "epoch": 5.408376963350785, "learning_rate": 0.0002633201473491916, "loss": 2.8106937408447266, "step": 2580 }, { "epoch": 5.429319371727749, "learning_rate": 0.00026434076807535127, "loss": 2.807754135131836, "step": 2590 }, { "epoch": 5.450261780104712, "learning_rate": 0.00026536138880151093, "loss": 2.809930992126465, "step": 2600 }, { "epoch": 5.471204188481676, "learning_rate": 0.0002663820095276706, "loss": 2.824372100830078, "step": 2610 }, { "epoch": 5.492146596858639, "learning_rate": 0.00026740263025383027, "loss": 2.797639846801758, "step": 2620 }, { "epoch": 5.513089005235602, "learning_rate": 0.0002684232509799899, "loss": 2.7917612075805662, "step": 2630 }, { "epoch": 5.534031413612565, "learning_rate": 0.00026944387170614955, "loss": 2.818395233154297, "step": 2640 }, { "epoch": 5.554973821989529, "learning_rate": 0.0002704644924323092, "loss": 2.7933046340942385, "step": 2650 }, { "epoch": 5.575916230366492, "learning_rate": 0.0002714851131584689, "loss": 2.794571876525879, "step": 2660 }, { "epoch": 5.596858638743456, "learning_rate": 0.0002725057338846285, "loss": 2.8182382583618164, "step": 2670 }, { "epoch": 5.617801047120419, "learning_rate": 0.00027352635461078817, "loss": 2.80462703704834, "step": 2680 }, { "epoch": 5.638743455497382, "learning_rate": 0.00027454697533694784, "loss": 2.793869400024414, "step": 2690 }, { "epoch": 5.659685863874346, "learning_rate": 0.0002755675960631075, "loss": 2.7867889404296875, "step": 2700 }, { "epoch": 5.680628272251309, "learning_rate": 0.0002765882167892671, "loss": 2.759464073181152, "step": 2710 }, { "epoch": 5.701570680628272, "learning_rate": 0.0002776088375154268, "loss": 2.7763525009155274, "step": 2720 }, { "epoch": 5.722513089005235, "learning_rate": 0.00027862945824158646, "loss": 2.7857263565063475, "step": 2730 }, { "epoch": 5.7434554973821985, "learning_rate": 0.00027965007896774613, "loss": 2.793513298034668, "step": 2740 }, { "epoch": 5.7643979057591626, "learning_rate": 0.0002806706996939058, "loss": 2.780983543395996, "step": 2750 }, { "epoch": 5.785340314136126, "learning_rate": 0.00028169132042006546, "loss": 2.762991714477539, "step": 2760 }, { "epoch": 5.806282722513089, "learning_rate": 0.00028271194114622513, "loss": 2.7711687088012695, "step": 2770 }, { "epoch": 5.827225130890052, "learning_rate": 0.00028373256187238475, "loss": 2.7514936447143556, "step": 2780 }, { "epoch": 5.848167539267016, "learning_rate": 0.0002847531825985444, "loss": 2.7642467498779295, "step": 2790 }, { "epoch": 5.869109947643979, "learning_rate": 0.00028577380332470403, "loss": 2.766267776489258, "step": 2800 }, { "epoch": 5.890052356020942, "learning_rate": 0.0002867944240508637, "loss": 2.7898386001586912, "step": 2810 }, { "epoch": 5.9109947643979055, "learning_rate": 0.00028781504477702337, "loss": 2.7557825088500976, "step": 2820 }, { "epoch": 5.9319371727748695, "learning_rate": 0.00028883566550318303, "loss": 2.7799331665039064, "step": 2830 }, { "epoch": 5.952879581151833, "learning_rate": 0.0002898562862293427, "loss": 2.7844886779785156, "step": 2840 }, { "epoch": 5.973821989528796, "learning_rate": 0.0002908769069555023, "loss": 2.791143608093262, "step": 2850 }, { "epoch": 5.994764397905759, "learning_rate": 0.000291897527681662, "loss": 2.7879209518432617, "step": 2860 }, { "epoch": 5.998952879581152, "eval_loss": 2.704967737197876, "eval_runtime": 73.9106, "eval_samples_per_second": 148.233, "step": 2862 }, { "epoch": 6.016753926701571, "learning_rate": 0.00029291814840782165, "loss": 2.8958906173706054, "step": 2870 }, { "epoch": 6.037696335078534, "learning_rate": 0.0002939387691339813, "loss": 2.76577205657959, "step": 2880 }, { "epoch": 6.058638743455497, "learning_rate": 0.000294959389860141, "loss": 2.792632484436035, "step": 2890 }, { "epoch": 6.0795811518324605, "learning_rate": 0.00029598001058630066, "loss": 2.7835086822509765, "step": 2900 }, { "epoch": 6.100523560209424, "learning_rate": 0.00029700063131246027, "loss": 2.761422538757324, "step": 2910 }, { "epoch": 6.121465968586388, "learning_rate": 0.00029802125203861994, "loss": 2.763009452819824, "step": 2920 }, { "epoch": 6.142408376963351, "learning_rate": 0.0002990418727647796, "loss": 2.763312339782715, "step": 2930 }, { "epoch": 6.163350785340314, "learning_rate": 0.0003000624934909393, "loss": 2.764869499206543, "step": 2940 }, { "epoch": 6.184293193717277, "learning_rate": 0.00030108311421709894, "loss": 2.741547393798828, "step": 2950 }, { "epoch": 6.205235602094241, "learning_rate": 0.0003021037349432586, "loss": 2.7309391021728517, "step": 2960 }, { "epoch": 6.226178010471204, "learning_rate": 0.0003031243556694182, "loss": 2.7382484436035157, "step": 2970 }, { "epoch": 6.2471204188481675, "learning_rate": 0.0003041449763955779, "loss": 2.73870849609375, "step": 2980 }, { "epoch": 6.268062827225131, "learning_rate": 0.00030516559712173756, "loss": 2.7489850997924803, "step": 2990 }, { "epoch": 6.289005235602094, "learning_rate": 0.00030618621784789723, "loss": 2.720503807067871, "step": 3000 }, { "epoch": 6.309947643979058, "learning_rate": 0.0003072068385740569, "loss": 2.770734977722168, "step": 3010 }, { "epoch": 6.330890052356021, "learning_rate": 0.00030822745930021657, "loss": 2.7358495712280275, "step": 3020 }, { "epoch": 6.351832460732984, "learning_rate": 0.0003092480800263762, "loss": 2.7683557510375976, "step": 3030 }, { "epoch": 6.372774869109947, "learning_rate": 0.00031026870075253585, "loss": 2.7386125564575194, "step": 3040 }, { "epoch": 6.393717277486911, "learning_rate": 0.0003112893214786955, "loss": 2.730208396911621, "step": 3050 }, { "epoch": 6.4146596858638745, "learning_rate": 0.0003123099422048552, "loss": 2.753367042541504, "step": 3060 }, { "epoch": 6.435602094240838, "learning_rate": 0.00031333056293101486, "loss": 2.738628959655762, "step": 3070 }, { "epoch": 6.456544502617801, "learning_rate": 0.00031435118365717447, "loss": 2.7470897674560546, "step": 3080 }, { "epoch": 6.477486910994765, "learning_rate": 0.00031537180438333414, "loss": 2.7530258178710936, "step": 3090 }, { "epoch": 6.498429319371728, "learning_rate": 0.0003163924251094938, "loss": 2.7581613540649412, "step": 3100 }, { "epoch": 6.519371727748691, "learning_rate": 0.0003174130458356535, "loss": 2.719674301147461, "step": 3110 }, { "epoch": 6.540314136125654, "learning_rate": 0.00031843366656181314, "loss": 2.7147310256958006, "step": 3120 }, { "epoch": 6.561256544502617, "learning_rate": 0.0003194542872879728, "loss": 2.731971549987793, "step": 3130 }, { "epoch": 6.5821989528795815, "learning_rate": 0.0003204749080141324, "loss": 2.73608512878418, "step": 3140 }, { "epoch": 6.603141361256545, "learning_rate": 0.0003214955287402921, "loss": 2.7456966400146485, "step": 3150 }, { "epoch": 6.624083769633508, "learning_rate": 0.00032251614946645176, "loss": 2.7154998779296875, "step": 3160 }, { "epoch": 6.645026178010471, "learning_rate": 0.00032353677019261143, "loss": 2.737691116333008, "step": 3170 }, { "epoch": 6.665968586387434, "learning_rate": 0.0003245573909187711, "loss": 2.734482192993164, "step": 3180 }, { "epoch": 6.686910994764398, "learning_rate": 0.00032557801164493077, "loss": 2.74371395111084, "step": 3190 }, { "epoch": 6.707853403141361, "learning_rate": 0.0003265986323710903, "loss": 2.7436079025268554, "step": 3200 }, { "epoch": 6.728795811518324, "learning_rate": 0.00032761925309725, "loss": 2.7301549911499023, "step": 3210 }, { "epoch": 6.749738219895288, "learning_rate": 0.00032863987382340966, "loss": 2.7386409759521486, "step": 3220 }, { "epoch": 6.770680628272252, "learning_rate": 0.00032966049454956933, "loss": 2.718589973449707, "step": 3230 }, { "epoch": 6.791623036649215, "learning_rate": 0.000330681115275729, "loss": 2.7293943405151366, "step": 3240 }, { "epoch": 6.812565445026178, "learning_rate": 0.0003317017360018886, "loss": 2.730001449584961, "step": 3250 }, { "epoch": 6.833507853403141, "learning_rate": 0.0003327223567280483, "loss": 2.7539945602416993, "step": 3260 }, { "epoch": 6.854450261780105, "learning_rate": 0.00033374297745420795, "loss": 2.7157943725585936, "step": 3270 }, { "epoch": 6.875392670157068, "learning_rate": 0.0003347635981803676, "loss": 2.7142717361450197, "step": 3280 }, { "epoch": 6.896335078534031, "learning_rate": 0.0003357842189065273, "loss": 2.723209190368652, "step": 3290 }, { "epoch": 6.9172774869109945, "learning_rate": 0.0003368048396326869, "loss": 2.725255012512207, "step": 3300 }, { "epoch": 6.938219895287958, "learning_rate": 0.00033782546035884657, "loss": 2.7175876617431642, "step": 3310 }, { "epoch": 6.959162303664922, "learning_rate": 0.00033884608108500624, "loss": 2.7198652267456054, "step": 3320 }, { "epoch": 6.980104712041885, "learning_rate": 0.0003398667018111659, "loss": 2.719003105163574, "step": 3330 }, { "epoch": 6.998952879581152, "eval_loss": 2.657553195953369, "eval_runtime": 73.1108, "eval_samples_per_second": 149.855, "step": 3339 }, { "epoch": 7.002094240837696, "learning_rate": 0.0003408873225373256, "loss": 2.8445356369018553, "step": 3340 }, { "epoch": 7.02303664921466, "learning_rate": 0.00034190794326348524, "loss": 2.6978481292724608, "step": 3350 }, { "epoch": 7.043979057591623, "learning_rate": 0.00034292856398964486, "loss": 2.6844539642333984, "step": 3360 }, { "epoch": 7.064921465968586, "learning_rate": 0.0003439491847158045, "loss": 2.7076372146606444, "step": 3370 }, { "epoch": 7.0858638743455495, "learning_rate": 0.0003449698054419642, "loss": 2.7051311492919923, "step": 3380 }, { "epoch": 7.106806282722513, "learning_rate": 0.00034599042616812386, "loss": 2.700316619873047, "step": 3390 }, { "epoch": 7.127748691099477, "learning_rate": 0.00034701104689428353, "loss": 2.732998085021973, "step": 3400 }, { "epoch": 7.14869109947644, "learning_rate": 0.0003480316676204432, "loss": 2.7185394287109377, "step": 3410 }, { "epoch": 7.169633507853403, "learning_rate": 0.0003490522883466028, "loss": 2.705458068847656, "step": 3420 }, { "epoch": 7.190575916230366, "learning_rate": 0.0003500729090727625, "loss": 2.708370590209961, "step": 3430 }, { "epoch": 7.21151832460733, "learning_rate": 0.00035109352979892215, "loss": 2.7156093597412108, "step": 3440 }, { "epoch": 7.232460732984293, "learning_rate": 0.0003521141505250818, "loss": 2.6954971313476563, "step": 3450 }, { "epoch": 7.2534031413612565, "learning_rate": 0.0003531347712512415, "loss": 2.7061573028564454, "step": 3460 }, { "epoch": 7.27434554973822, "learning_rate": 0.00035415539197740115, "loss": 2.7047218322753905, "step": 3470 }, { "epoch": 7.295287958115184, "learning_rate": 0.00035517601270356077, "loss": 2.6945539474487306, "step": 3480 }, { "epoch": 7.316230366492147, "learning_rate": 0.00035619663342972044, "loss": 2.67620735168457, "step": 3490 }, { "epoch": 7.33717277486911, "learning_rate": 0.0003572172541558801, "loss": 2.6955190658569337, "step": 3500 }, { "epoch": 7.358115183246073, "learning_rate": 0.00035823787488203977, "loss": 2.684027671813965, "step": 3510 }, { "epoch": 7.379057591623036, "learning_rate": 0.00035925849560819944, "loss": 2.698904800415039, "step": 3520 }, { "epoch": 7.4, "learning_rate": 0.0003602791163343591, "loss": 2.695516014099121, "step": 3530 }, { "epoch": 7.4209424083769635, "learning_rate": 0.0003612997370605187, "loss": 2.7022026062011717, "step": 3540 }, { "epoch": 7.441884816753927, "learning_rate": 0.0003623203577866784, "loss": 2.703862762451172, "step": 3550 }, { "epoch": 7.46282722513089, "learning_rate": 0.00036334097851283806, "loss": 2.6929235458374023, "step": 3560 }, { "epoch": 7.483769633507853, "learning_rate": 0.00036436159923899773, "loss": 2.6659242630004885, "step": 3570 }, { "epoch": 7.504712041884817, "learning_rate": 0.0003653822199651574, "loss": 2.660002899169922, "step": 3580 }, { "epoch": 7.52565445026178, "learning_rate": 0.000366402840691317, "loss": 2.699945831298828, "step": 3590 }, { "epoch": 7.546596858638743, "learning_rate": 0.0003674234614174767, "loss": 2.677934455871582, "step": 3600 }, { "epoch": 7.5675392670157065, "learning_rate": 0.00036844408214363635, "loss": 2.6725204467773436, "step": 3610 }, { "epoch": 7.5884816753926705, "learning_rate": 0.000369464702869796, "loss": 2.701091003417969, "step": 3620 }, { "epoch": 7.609424083769634, "learning_rate": 0.00037048532359595563, "loss": 2.6974748611450194, "step": 3630 }, { "epoch": 7.630366492146597, "learning_rate": 0.00037150594432211524, "loss": 2.6910377502441407, "step": 3640 }, { "epoch": 7.65130890052356, "learning_rate": 0.0003725265650482749, "loss": 2.6975467681884764, "step": 3650 }, { "epoch": 7.672251308900524, "learning_rate": 0.0003735471857744346, "loss": 2.6613983154296874, "step": 3660 }, { "epoch": 7.693193717277487, "learning_rate": 0.00037456780650059425, "loss": 2.6739892959594727, "step": 3670 }, { "epoch": 7.71413612565445, "learning_rate": 0.0003755884272267539, "loss": 2.684556770324707, "step": 3680 }, { "epoch": 7.735078534031413, "learning_rate": 0.0003766090479529136, "loss": 2.662580680847168, "step": 3690 }, { "epoch": 7.756020942408377, "learning_rate": 0.0003776296686790732, "loss": 2.671764373779297, "step": 3700 }, { "epoch": 7.776963350785341, "learning_rate": 0.00037865028940523287, "loss": 2.687736701965332, "step": 3710 }, { "epoch": 7.797905759162304, "learning_rate": 0.00037967091013139253, "loss": 2.676908493041992, "step": 3720 }, { "epoch": 7.818848167539267, "learning_rate": 0.0003806915308575522, "loss": 2.6953929901123046, "step": 3730 }, { "epoch": 7.83979057591623, "learning_rate": 0.00038171215158371187, "loss": 2.7138477325439454, "step": 3740 }, { "epoch": 7.860732984293193, "learning_rate": 0.00038273277230987154, "loss": 2.658716583251953, "step": 3750 }, { "epoch": 7.881675392670157, "learning_rate": 0.00038375339303603115, "loss": 2.6610225677490233, "step": 3760 }, { "epoch": 7.90261780104712, "learning_rate": 0.0003847740137621908, "loss": 2.6816684722900392, "step": 3770 }, { "epoch": 7.923560209424084, "learning_rate": 0.0003857946344883505, "loss": 2.69453125, "step": 3780 }, { "epoch": 7.944502617801048, "learning_rate": 0.00038681525521451016, "loss": 2.6690832138061524, "step": 3790 }, { "epoch": 7.965445026178011, "learning_rate": 0.0003878358759406698, "loss": 2.6719383239746093, "step": 3800 }, { "epoch": 7.986387434554974, "learning_rate": 0.00038885649666682944, "loss": 2.6821807861328124, "step": 3810 }, { "epoch": 7.998952879581152, "eval_loss": 2.6321663856506348, "eval_runtime": 73.4258, "eval_samples_per_second": 149.212, "step": 3816 }, { "epoch": 8.008376963350786, "learning_rate": 0.0003898771173929891, "loss": 2.792423057556152, "step": 3820 }, { "epoch": 8.029319371727748, "learning_rate": 0.0003908977381191488, "loss": 2.639652061462402, "step": 3830 }, { "epoch": 8.050261780104712, "learning_rate": 0.00039191835884530845, "loss": 2.658628463745117, "step": 3840 }, { "epoch": 8.071204188481675, "learning_rate": 0.0003929389795714681, "loss": 2.6759145736694334, "step": 3850 }, { "epoch": 8.092146596858639, "learning_rate": 0.0003939596002976278, "loss": 2.7123003005981445, "step": 3860 }, { "epoch": 8.113089005235603, "learning_rate": 0.0003949802210237874, "loss": 2.654216766357422, "step": 3870 }, { "epoch": 8.134031413612565, "learning_rate": 0.00039600084174994706, "loss": 2.653605651855469, "step": 3880 }, { "epoch": 8.154973821989529, "learning_rate": 0.00039702146247610673, "loss": 2.6602855682373048, "step": 3890 }, { "epoch": 8.175916230366493, "learning_rate": 0.0003980420832022664, "loss": 2.664006805419922, "step": 3900 }, { "epoch": 8.196858638743455, "learning_rate": 0.00039906270392842607, "loss": 2.6484548568725588, "step": 3910 }, { "epoch": 8.21780104712042, "learning_rate": 0.00040008332465458574, "loss": 2.6500553131103515, "step": 3920 }, { "epoch": 8.238743455497382, "learning_rate": 0.00040110394538074535, "loss": 2.6387815475463867, "step": 3930 }, { "epoch": 8.259685863874346, "learning_rate": 0.000402124566106905, "loss": 2.666366958618164, "step": 3940 }, { "epoch": 8.28062827225131, "learning_rate": 0.0004031451868330647, "loss": 2.69473876953125, "step": 3950 }, { "epoch": 8.301570680628272, "learning_rate": 0.00040416580755922436, "loss": 2.649556350708008, "step": 3960 }, { "epoch": 8.322513089005236, "learning_rate": 0.000405186428285384, "loss": 2.661054801940918, "step": 3970 }, { "epoch": 8.343455497382198, "learning_rate": 0.0004062070490115437, "loss": 2.651872253417969, "step": 3980 }, { "epoch": 8.364397905759162, "learning_rate": 0.0004072276697377033, "loss": 2.657582473754883, "step": 3990 }, { "epoch": 8.385340314136126, "learning_rate": 0.000408248290463863, "loss": 2.647386932373047, "step": 4000 }, { "epoch": 8.406282722513089, "learning_rate": 0.00040926891119002264, "loss": 2.622752380371094, "step": 4010 }, { "epoch": 8.427225130890053, "learning_rate": 0.0004102895319161823, "loss": 2.6471187591552736, "step": 4020 }, { "epoch": 8.448167539267015, "learning_rate": 0.000411310152642342, "loss": 2.6503711700439454, "step": 4030 }, { "epoch": 8.469109947643979, "learning_rate": 0.0004123307733685016, "loss": 2.662638854980469, "step": 4040 }, { "epoch": 8.490052356020943, "learning_rate": 0.00041335139409466126, "loss": 2.6614749908447264, "step": 4050 }, { "epoch": 8.510994764397905, "learning_rate": 0.0004143720148208209, "loss": 2.651297760009766, "step": 4060 }, { "epoch": 8.53193717277487, "learning_rate": 0.00041539263554698055, "loss": 2.654410552978516, "step": 4070 }, { "epoch": 8.552879581151833, "learning_rate": 0.0004164132562731402, "loss": 2.6440593719482424, "step": 4080 }, { "epoch": 8.573821989528795, "learning_rate": 0.00041743387699929983, "loss": 2.6485244750976564, "step": 4090 }, { "epoch": 8.59476439790576, "learning_rate": 0.0004184544977254595, "loss": 2.645181655883789, "step": 4100 }, { "epoch": 8.615706806282722, "learning_rate": 0.00041947511845161916, "loss": 2.6527120590209963, "step": 4110 }, { "epoch": 8.636649214659686, "learning_rate": 0.00042049573917777883, "loss": 2.644283676147461, "step": 4120 }, { "epoch": 8.65759162303665, "learning_rate": 0.0004215163599039385, "loss": 2.6589195251464846, "step": 4130 }, { "epoch": 8.678534031413612, "learning_rate": 0.00042253698063009817, "loss": 2.638910675048828, "step": 4140 }, { "epoch": 8.699476439790576, "learning_rate": 0.0004235576013562578, "loss": 2.6332365036010743, "step": 4150 }, { "epoch": 8.72041884816754, "learning_rate": 0.00042457822208241745, "loss": 2.6593414306640626, "step": 4160 }, { "epoch": 8.741361256544502, "learning_rate": 0.0004255988428085771, "loss": 2.6320539474487306, "step": 4170 }, { "epoch": 8.762303664921467, "learning_rate": 0.0004266194635347368, "loss": 2.6423057556152343, "step": 4180 }, { "epoch": 8.783246073298429, "learning_rate": 0.00042764008426089646, "loss": 2.6288238525390626, "step": 4190 }, { "epoch": 8.804188481675393, "learning_rate": 0.0004286607049870561, "loss": 2.624134635925293, "step": 4200 }, { "epoch": 8.825130890052357, "learning_rate": 0.00042968132571321574, "loss": 2.6502635955810545, "step": 4210 }, { "epoch": 8.846073298429319, "learning_rate": 0.0004307019464393754, "loss": 2.630023384094238, "step": 4220 }, { "epoch": 8.867015706806283, "learning_rate": 0.0004317225671655351, "loss": 2.6346521377563477, "step": 4230 }, { "epoch": 8.887958115183245, "learning_rate": 0.00043274318789169474, "loss": 2.612893295288086, "step": 4240 }, { "epoch": 8.90890052356021, "learning_rate": 0.0004337638086178544, "loss": 2.605606460571289, "step": 4250 }, { "epoch": 8.929842931937173, "learning_rate": 0.0004347844293440141, "loss": 2.6086076736450194, "step": 4260 }, { "epoch": 8.950785340314136, "learning_rate": 0.0004358050500701737, "loss": 2.6492921829223635, "step": 4270 }, { "epoch": 8.9717277486911, "learning_rate": 0.00043682567079633336, "loss": 2.60416202545166, "step": 4280 }, { "epoch": 8.992670157068062, "learning_rate": 0.00043784629152249303, "loss": 2.639349937438965, "step": 4290 }, { "epoch": 8.998952879581152, "eval_loss": 2.6156058311462402, "eval_runtime": 73.0048, "eval_samples_per_second": 150.072, "step": 4293 }, { "epoch": 9.014659685863874, "learning_rate": 0.0004388669122486527, "loss": 2.7455169677734377, "step": 4300 }, { "epoch": 9.035602094240838, "learning_rate": 0.00043988753297481237, "loss": 2.632405471801758, "step": 4310 }, { "epoch": 9.0565445026178, "learning_rate": 0.000440908153700972, "loss": 2.644683074951172, "step": 4320 }, { "epoch": 9.077486910994764, "learning_rate": 0.00044192877442713165, "loss": 2.620805358886719, "step": 4330 }, { "epoch": 9.098429319371728, "learning_rate": 0.0004429493951532913, "loss": 2.6189605712890627, "step": 4340 }, { "epoch": 9.11937172774869, "learning_rate": 0.000443970015879451, "loss": 2.614165687561035, "step": 4350 }, { "epoch": 9.140314136125655, "learning_rate": 0.00044499063660561065, "loss": 2.615524673461914, "step": 4360 }, { "epoch": 9.161256544502617, "learning_rate": 0.0004460112573317703, "loss": 2.623036003112793, "step": 4370 }, { "epoch": 9.182198952879581, "learning_rate": 0.00044703187805792994, "loss": 2.636097526550293, "step": 4380 }, { "epoch": 9.203141361256545, "learning_rate": 0.0004480524987840896, "loss": 2.5902896881103517, "step": 4390 }, { "epoch": 9.224083769633507, "learning_rate": 0.0004490731195102493, "loss": 2.596613121032715, "step": 4400 }, { "epoch": 9.245026178010471, "learning_rate": 0.00045009374023640894, "loss": 2.6126483917236327, "step": 4410 }, { "epoch": 9.265968586387434, "learning_rate": 0.0004511143609625686, "loss": 2.6270191192626955, "step": 4420 }, { "epoch": 9.286910994764398, "learning_rate": 0.0004521349816887283, "loss": 2.603664779663086, "step": 4430 }, { "epoch": 9.307853403141362, "learning_rate": 0.0004531556024148879, "loss": 2.618840980529785, "step": 4440 }, { "epoch": 9.328795811518324, "learning_rate": 0.00045417622314104756, "loss": 2.6355594635009765, "step": 4450 }, { "epoch": 9.349738219895288, "learning_rate": 0.00045519684386720723, "loss": 2.6463857650756837, "step": 4460 }, { "epoch": 9.370680628272252, "learning_rate": 0.0004562174645933669, "loss": 2.6071035385131838, "step": 4470 }, { "epoch": 9.391623036649214, "learning_rate": 0.00045723808531952657, "loss": 2.6336111068725585, "step": 4480 }, { "epoch": 9.412565445026178, "learning_rate": 0.0004582587060456861, "loss": 2.6231353759765623, "step": 4490 }, { "epoch": 9.43350785340314, "learning_rate": 0.0004592793267718458, "loss": 2.6107151031494142, "step": 4500 }, { "epoch": 9.454450261780105, "learning_rate": 0.00046029994749800546, "loss": 2.6213560104370117, "step": 4510 }, { "epoch": 9.475392670157069, "learning_rate": 0.00046132056822416513, "loss": 2.584638214111328, "step": 4520 }, { "epoch": 9.496335078534031, "learning_rate": 0.0004623411889503248, "loss": 2.6262628555297853, "step": 4530 }, { "epoch": 9.517277486910995, "learning_rate": 0.00046336180967648447, "loss": 2.5897647857666017, "step": 4540 }, { "epoch": 9.538219895287957, "learning_rate": 0.0004643824304026441, "loss": 2.6060354232788088, "step": 4550 }, { "epoch": 9.559162303664921, "learning_rate": 0.00046540305112880375, "loss": 2.6086732864379885, "step": 4560 }, { "epoch": 9.580104712041885, "learning_rate": 0.0004664236718549634, "loss": 2.5964023590087892, "step": 4570 }, { "epoch": 9.601047120418848, "learning_rate": 0.0004674442925811231, "loss": 2.622255325317383, "step": 4580 }, { "epoch": 9.621989528795812, "learning_rate": 0.00046846491330728275, "loss": 2.5982023239135743, "step": 4590 }, { "epoch": 9.642931937172776, "learning_rate": 0.00046948553403344237, "loss": 2.6015974044799806, "step": 4600 }, { "epoch": 9.663874345549738, "learning_rate": 0.00047050615475960204, "loss": 2.617095184326172, "step": 4610 }, { "epoch": 9.684816753926702, "learning_rate": 0.0004715267754857617, "loss": 2.5803293228149413, "step": 4620 }, { "epoch": 9.705759162303664, "learning_rate": 0.00047254739621192137, "loss": 2.615343475341797, "step": 4630 }, { "epoch": 9.726701570680628, "learning_rate": 0.00047356801693808104, "loss": 2.5800424575805665, "step": 4640 }, { "epoch": 9.747643979057592, "learning_rate": 0.0004745886376642407, "loss": 2.598635673522949, "step": 4650 }, { "epoch": 9.768586387434555, "learning_rate": 0.0004756092583904003, "loss": 2.615553283691406, "step": 4660 }, { "epoch": 9.789528795811519, "learning_rate": 0.00047662987911656, "loss": 2.582602882385254, "step": 4670 }, { "epoch": 9.810471204188481, "learning_rate": 0.00047765049984271966, "loss": 2.5848695755004885, "step": 4680 }, { "epoch": 9.831413612565445, "learning_rate": 0.00047867112056887933, "loss": 2.6057199478149413, "step": 4690 }, { "epoch": 9.852356020942409, "learning_rate": 0.000479691741295039, "loss": 2.6193204879760743, "step": 4700 }, { "epoch": 9.873298429319371, "learning_rate": 0.00048071236202119866, "loss": 2.6137775421142577, "step": 4710 }, { "epoch": 9.894240837696335, "learning_rate": 0.0004817329827473583, "loss": 2.613667678833008, "step": 4720 }, { "epoch": 9.915183246073298, "learning_rate": 0.00048275360347351795, "loss": 2.599857711791992, "step": 4730 }, { "epoch": 9.936125654450262, "learning_rate": 0.0004837742241996776, "loss": 2.579629325866699, "step": 4740 }, { "epoch": 9.957068062827226, "learning_rate": 0.0004847948449258373, "loss": 2.6212156295776365, "step": 4750 }, { "epoch": 9.978010471204188, "learning_rate": 0.00048581546565199695, "loss": 2.5806146621704102, "step": 4760 }, { "epoch": 9.998952879581152, "learning_rate": 0.0004868360863781566, "loss": 2.5807775497436523, "step": 4770 }, { "epoch": 9.998952879581152, "eval_loss": 2.604253053665161, "eval_runtime": 74.0972, "eval_samples_per_second": 147.86, "step": 4770 }, { "epoch": 10.020942408376964, "learning_rate": 0.00048785670710431623, "loss": 2.721482849121094, "step": 4780 }, { "epoch": 10.041884816753926, "learning_rate": 0.000488877327830476, "loss": 2.5954622268676757, "step": 4790 }, { "epoch": 10.06282722513089, "learning_rate": 0.0004898979485566356, "loss": 2.6108915328979494, "step": 4800 }, { "epoch": 10.083769633507853, "learning_rate": 0.0004909185692827952, "loss": 2.607802391052246, "step": 4810 }, { "epoch": 10.104712041884817, "learning_rate": 0.0004919391900089549, "loss": 2.594425010681152, "step": 4820 }, { "epoch": 10.12565445026178, "learning_rate": 0.0004929598107351145, "loss": 2.6012176513671874, "step": 4830 }, { "epoch": 10.146596858638743, "learning_rate": 0.0004939804314612742, "loss": 2.5987337112426756, "step": 4840 }, { "epoch": 10.167539267015707, "learning_rate": 0.0004950010521874339, "loss": 2.5843479156494142, "step": 4850 }, { "epoch": 10.188481675392671, "learning_rate": 0.0004960216729135935, "loss": 2.612710380554199, "step": 4860 }, { "epoch": 10.209424083769633, "learning_rate": 0.0004970422936397532, "loss": 2.608279991149902, "step": 4870 }, { "epoch": 10.230366492146597, "learning_rate": 0.0004980629143659128, "loss": 2.599015998840332, "step": 4880 }, { "epoch": 10.25130890052356, "learning_rate": 0.0004990835350920725, "loss": 2.6169193267822264, "step": 4890 }, { "epoch": 10.272251308900524, "learning_rate": 0.0005001041558182321, "loss": 2.5874061584472656, "step": 4900 }, { "epoch": 10.293193717277488, "learning_rate": 0.0005011247765443918, "loss": 2.5972221374511717, "step": 4910 }, { "epoch": 10.31413612565445, "learning_rate": 0.0005021453972705514, "loss": 2.592855453491211, "step": 4920 }, { "epoch": 10.335078534031414, "learning_rate": 0.0005031660179967111, "loss": 2.5813924789428713, "step": 4930 }, { "epoch": 10.356020942408376, "learning_rate": 0.0005041866387228707, "loss": 2.5748531341552736, "step": 4940 }, { "epoch": 10.37696335078534, "learning_rate": 0.0005052072594490304, "loss": 2.5954639434814455, "step": 4950 }, { "epoch": 10.397905759162304, "learning_rate": 0.00050622788017519, "loss": 2.5695329666137696, "step": 4960 }, { "epoch": 10.418848167539267, "learning_rate": 0.0005072485009013497, "loss": 2.5873296737670897, "step": 4970 }, { "epoch": 10.43979057591623, "learning_rate": 0.0005082691216275094, "loss": 2.6035577774047853, "step": 4980 }, { "epoch": 10.460732984293193, "learning_rate": 0.000509289742353669, "loss": 2.5872554779052734, "step": 4990 }, { "epoch": 10.481675392670157, "learning_rate": 0.0005103103630798287, "loss": 2.548787498474121, "step": 5000 }, { "epoch": 10.502617801047121, "learning_rate": 0.0005098008169087462, "loss": 2.5832122802734374, "step": 5010 }, { "epoch": 10.523560209424083, "learning_rate": 0.0005092927940452339, "loss": 2.5651966094970704, "step": 5020 }, { "epoch": 10.544502617801047, "learning_rate": 0.0005087862869144114, "loss": 2.5844688415527344, "step": 5030 }, { "epoch": 10.565445026178011, "learning_rate": 0.0005082812879940277, "loss": 2.5721431732177735, "step": 5040 }, { "epoch": 10.586387434554974, "learning_rate": 0.0005077777898139921, "loss": 2.59671630859375, "step": 5050 }, { "epoch": 10.607329842931938, "learning_rate": 0.0005072757849559103, "loss": 2.5834468841552733, "step": 5060 }, { "epoch": 10.6282722513089, "learning_rate": 0.0005067752660526248, "loss": 2.566558074951172, "step": 5070 }, { "epoch": 10.649214659685864, "learning_rate": 0.0005062762257877613, "loss": 2.579891395568848, "step": 5080 }, { "epoch": 10.670157068062828, "learning_rate": 0.0005057786568952791, "loss": 2.5753833770751955, "step": 5090 }, { "epoch": 10.69109947643979, "learning_rate": 0.000505282552159027, "loss": 2.5887382507324217, "step": 5100 }, { "epoch": 10.712041884816754, "learning_rate": 0.000504787904412304, "loss": 2.578693962097168, "step": 5110 }, { "epoch": 10.732984293193716, "learning_rate": 0.000504294706537424, "loss": 2.5702871322631835, "step": 5120 }, { "epoch": 10.75392670157068, "learning_rate": 0.0005038029514652858, "loss": 2.579792594909668, "step": 5130 }, { "epoch": 10.774869109947645, "learning_rate": 0.0005033126321749477, "loss": 2.5686906814575194, "step": 5140 }, { "epoch": 10.795811518324607, "learning_rate": 0.000502823741693206, "loss": 2.5612104415893553, "step": 5150 }, { "epoch": 10.81675392670157, "learning_rate": 0.0005023362730941793, "loss": 2.5499547958374023, "step": 5160 }, { "epoch": 10.837696335078533, "learning_rate": 0.0005018502194988955, "loss": 2.5822380065917967, "step": 5170 }, { "epoch": 10.858638743455497, "learning_rate": 0.0005013655740748848, "loss": 2.5612071990966796, "step": 5180 }, { "epoch": 10.879581151832461, "learning_rate": 0.0005008823300357761, "loss": 2.5559200286865233, "step": 5190 }, { "epoch": 10.900523560209423, "learning_rate": 0.0005004004806408972, "loss": 2.566044807434082, "step": 5200 }, { "epoch": 10.921465968586388, "learning_rate": 0.0004999200191948814, "loss": 2.5504446029663086, "step": 5210 }, { "epoch": 10.942408376963352, "learning_rate": 0.0004994409390472751, "loss": 2.5642301559448244, "step": 5220 }, { "epoch": 10.963350785340314, "learning_rate": 0.0004989632335921523, "loss": 2.560255241394043, "step": 5230 }, { "epoch": 10.984293193717278, "learning_rate": 0.0004984868962677315, "loss": 2.5622379302978517, "step": 5240 }, { "epoch": 10.998952879581152, "eval_loss": 2.5793886184692383, "eval_runtime": 73.6792, "eval_samples_per_second": 148.699, "step": 5247 }, { "epoch": 11.006282722513088, "learning_rate": 0.0004980119205559973, "loss": 2.682263946533203, "step": 5250 }, { "epoch": 11.027225130890052, "learning_rate": 0.0004975382999823259, "loss": 2.581485557556152, "step": 5260 }, { "epoch": 11.048167539267016, "learning_rate": 0.0004970660281151141, "loss": 2.5775730133056642, "step": 5270 }, { "epoch": 11.069109947643978, "learning_rate": 0.0004965950985654126, "loss": 2.587117385864258, "step": 5280 }, { "epoch": 11.090052356020943, "learning_rate": 0.0004961255049865635, "loss": 2.5482534408569335, "step": 5290 }, { "epoch": 11.110994764397907, "learning_rate": 0.0004956572410738401, "loss": 2.5464542388916014, "step": 5300 }, { "epoch": 11.131937172774869, "learning_rate": 0.000495190300564092, "loss": 2.557751274108887, "step": 5310 }, { "epoch": 11.152879581151833, "learning_rate": 0.0004947246772353933, "loss": 2.54296932220459, "step": 5320 }, { "epoch": 11.173821989528795, "learning_rate": 0.0004942603649066942, "loss": 2.576173782348633, "step": 5330 }, { "epoch": 11.19476439790576, "learning_rate": 0.0004937973574374762, "loss": 2.535029411315918, "step": 5340 }, { "epoch": 11.215706806282723, "learning_rate": 0.0004933356487274114, "loss": 2.551316833496094, "step": 5350 }, { "epoch": 11.236649214659685, "learning_rate": 0.0004928752327160248, "loss": 2.551384925842285, "step": 5360 }, { "epoch": 11.25759162303665, "learning_rate": 0.0004924161033823598, "loss": 2.5437076568603514, "step": 5370 }, { "epoch": 11.278534031413612, "learning_rate": 0.0004919582547446482, "loss": 2.5423688888549805, "step": 5380 }, { "epoch": 11.299476439790576, "learning_rate": 0.0004915016808599824, "loss": 2.5350723266601562, "step": 5390 }, { "epoch": 11.32041884816754, "learning_rate": 0.0004910463758239914, "loss": 2.5370588302612305, "step": 5400 }, { "epoch": 11.341361256544502, "learning_rate": 0.0004905923337705201, "loss": 2.5458969116210937, "step": 5410 }, { "epoch": 11.362303664921466, "learning_rate": 0.0004901395488713123, "loss": 2.5638805389404298, "step": 5420 }, { "epoch": 11.38324607329843, "learning_rate": 0.0004896880153356963, "loss": 2.5516336441040037, "step": 5430 }, { "epoch": 11.404188481675392, "learning_rate": 0.000489237727410273, "loss": 2.559841346740723, "step": 5440 }, { "epoch": 11.425130890052356, "learning_rate": 0.0004887886793786093, "loss": 2.559991645812988, "step": 5450 }, { "epoch": 11.446073298429319, "learning_rate": 0.0004883408655609327, "loss": 2.5499288558959963, "step": 5460 }, { "epoch": 11.467015706806283, "learning_rate": 0.0004878942803138293, "loss": 2.544484519958496, "step": 5470 }, { "epoch": 11.487958115183247, "learning_rate": 0.0004874489180299454, "loss": 2.537228584289551, "step": 5480 }, { "epoch": 11.508900523560209, "learning_rate": 0.00048700477313769213, "loss": 2.5507528305053713, "step": 5490 }, { "epoch": 11.529842931937173, "learning_rate": 0.00048656184010095185, "loss": 2.5349197387695312, "step": 5500 }, { "epoch": 11.550785340314135, "learning_rate": 0.00048612011341878916, "loss": 2.5785711288452147, "step": 5510 }, { "epoch": 11.5717277486911, "learning_rate": 0.0004856795876251634, "loss": 2.5618894577026365, "step": 5520 }, { "epoch": 11.592670157068063, "learning_rate": 0.00048524025728864493, "loss": 2.553698921203613, "step": 5530 }, { "epoch": 11.613612565445026, "learning_rate": 0.0004848021170121335, "loss": 2.563484954833984, "step": 5540 }, { "epoch": 11.63455497382199, "learning_rate": 0.0004843651614325803, "loss": 2.5655393600463867, "step": 5550 }, { "epoch": 11.655497382198952, "learning_rate": 0.00048392938522071163, "loss": 2.550769233703613, "step": 5560 }, { "epoch": 11.676439790575916, "learning_rate": 0.0004834947830807563, "loss": 2.549762725830078, "step": 5570 }, { "epoch": 11.69738219895288, "learning_rate": 0.00048306134975017523, "loss": 2.5488073348999025, "step": 5580 }, { "epoch": 11.718324607329842, "learning_rate": 0.0004826290799993939, "loss": 2.554892158508301, "step": 5590 }, { "epoch": 11.739267015706806, "learning_rate": 0.0004821979686315372, "loss": 2.519801902770996, "step": 5600 }, { "epoch": 11.76020942408377, "learning_rate": 0.00048176801048216693, "loss": 2.541176414489746, "step": 5610 }, { "epoch": 11.781151832460733, "learning_rate": 0.0004813392004190223, "loss": 2.52908878326416, "step": 5620 }, { "epoch": 11.802094240837697, "learning_rate": 0.00048091153334176224, "loss": 2.5230037689208986, "step": 5630 }, { "epoch": 11.823036649214659, "learning_rate": 0.00048048500418171097, "loss": 2.5191682815551757, "step": 5640 }, { "epoch": 11.843979057591623, "learning_rate": 0.0004800596079016053, "loss": 2.553547668457031, "step": 5650 }, { "epoch": 11.864921465968587, "learning_rate": 0.0004796353394953452, "loss": 2.5445688247680662, "step": 5660 }, { "epoch": 11.88586387434555, "learning_rate": 0.0004792121939877459, "loss": 2.554265022277832, "step": 5670 }, { "epoch": 11.906806282722513, "learning_rate": 0.00047879016643429336, "loss": 2.5312326431274412, "step": 5680 }, { "epoch": 11.927748691099476, "learning_rate": 0.00047836925192090116, "loss": 2.54235897064209, "step": 5690 }, { "epoch": 11.94869109947644, "learning_rate": 0.0004779494455636703, "loss": 2.5591432571411135, "step": 5700 }, { "epoch": 11.969633507853404, "learning_rate": 0.00047753074250865145, "loss": 2.5049566268920898, "step": 5710 }, { "epoch": 11.990575916230366, "learning_rate": 0.00047711313793160877, "loss": 2.5459238052368165, "step": 5720 }, { "epoch": 11.998952879581152, "eval_loss": 2.5662965774536133, "eval_runtime": 74.2325, "eval_samples_per_second": 147.59, "step": 5724 }, { "epoch": 12.012565445026178, "learning_rate": 0.000476696627037787, "loss": 2.6624425888061523, "step": 5730 }, { "epoch": 12.033507853403142, "learning_rate": 0.0004762812050616797, "loss": 2.5242809295654296, "step": 5740 }, { "epoch": 12.054450261780104, "learning_rate": 0.0004758668672668006, "loss": 2.541863441467285, "step": 5750 }, { "epoch": 12.075392670157068, "learning_rate": 0.00047545360894545664, "loss": 2.5424705505371095, "step": 5760 }, { "epoch": 12.09633507853403, "learning_rate": 0.0004750414254185235, "loss": 2.5307668685913085, "step": 5770 }, { "epoch": 12.117277486910995, "learning_rate": 0.0004746303120352226, "loss": 2.5350624084472657, "step": 5780 }, { "epoch": 12.138219895287959, "learning_rate": 0.00047422026417290146, "loss": 2.5554269790649413, "step": 5790 }, { "epoch": 12.159162303664921, "learning_rate": 0.0004738112772368146, "loss": 2.5168834686279298, "step": 5800 }, { "epoch": 12.180104712041885, "learning_rate": 0.00047340334665990787, "loss": 2.531605529785156, "step": 5810 }, { "epoch": 12.201047120418847, "learning_rate": 0.0004729964679026039, "loss": 2.515584373474121, "step": 5820 }, { "epoch": 12.221989528795811, "learning_rate": 0.0004725906364525903, "loss": 2.522596549987793, "step": 5830 }, { "epoch": 12.242931937172775, "learning_rate": 0.0004721858478246089, "loss": 2.5171236038208007, "step": 5840 }, { "epoch": 12.263874345549738, "learning_rate": 0.0004717820975602482, "loss": 2.529332160949707, "step": 5850 }, { "epoch": 12.284816753926702, "learning_rate": 0.0004713793812277367, "loss": 2.5047964096069335, "step": 5860 }, { "epoch": 12.305759162303666, "learning_rate": 0.00047097769442173856, "loss": 2.518666458129883, "step": 5870 }, { "epoch": 12.326701570680628, "learning_rate": 0.00047057703276315164, "loss": 2.5095588684082033, "step": 5880 }, { "epoch": 12.347643979057592, "learning_rate": 0.0004701773918989065, "loss": 2.5557069778442383, "step": 5890 }, { "epoch": 12.368586387434554, "learning_rate": 0.00046977876750176805, "loss": 2.5204561233520506, "step": 5900 }, { "epoch": 12.389528795811518, "learning_rate": 0.0004693811552701385, "loss": 2.5440658569335937, "step": 5910 }, { "epoch": 12.410471204188482, "learning_rate": 0.0004689845509278626, "loss": 2.5195499420166017, "step": 5920 }, { "epoch": 12.431413612565445, "learning_rate": 0.00046858895022403474, "loss": 2.506319999694824, "step": 5930 }, { "epoch": 12.452356020942409, "learning_rate": 0.000468194348932807, "loss": 2.507068061828613, "step": 5940 }, { "epoch": 12.473298429319371, "learning_rate": 0.00046780074285319984, "loss": 2.559153938293457, "step": 5950 }, { "epoch": 12.494240837696335, "learning_rate": 0.0004674081278089144, "loss": 2.5483341217041016, "step": 5960 }, { "epoch": 12.515183246073299, "learning_rate": 0.00046701649964814616, "loss": 2.523490333557129, "step": 5970 }, { "epoch": 12.536125654450261, "learning_rate": 0.0004666258542434007, "loss": 2.511086654663086, "step": 5980 }, { "epoch": 12.557068062827225, "learning_rate": 0.000466236187491311, "loss": 2.525220489501953, "step": 5990 }, { "epoch": 12.578010471204188, "learning_rate": 0.00046584749531245617, "loss": 2.5341968536376953, "step": 6000 }, { "epoch": 12.598952879581152, "learning_rate": 0.0004654597736511823, "loss": 2.517439842224121, "step": 6010 }, { "epoch": 12.619895287958116, "learning_rate": 0.0004650730184754247, "loss": 2.535861778259277, "step": 6020 }, { "epoch": 12.640837696335078, "learning_rate": 0.0004646872257765318, "loss": 2.5128170013427735, "step": 6030 }, { "epoch": 12.661780104712042, "learning_rate": 0.00046430239156909045, "loss": 2.5013412475585937, "step": 6040 }, { "epoch": 12.682722513089006, "learning_rate": 0.00046391851189075343, "loss": 2.492375373840332, "step": 6050 }, { "epoch": 12.703664921465968, "learning_rate": 0.00046353558280206746, "loss": 2.533987045288086, "step": 6060 }, { "epoch": 12.724607329842932, "learning_rate": 0.00046315360038630404, "loss": 2.5339818954467774, "step": 6070 }, { "epoch": 12.745549738219895, "learning_rate": 0.0004627725607492909, "loss": 2.5307100296020506, "step": 6080 }, { "epoch": 12.766492146596859, "learning_rate": 0.00046239246001924503, "loss": 2.5334211349487306, "step": 6090 }, { "epoch": 12.787434554973823, "learning_rate": 0.000462013294346608, "loss": 2.524607849121094, "step": 6100 }, { "epoch": 12.808376963350785, "learning_rate": 0.00046163505990388167, "loss": 2.5250701904296875, "step": 6110 }, { "epoch": 12.829319371727749, "learning_rate": 0.00046125775288546623, "loss": 2.514480400085449, "step": 6120 }, { "epoch": 12.850261780104713, "learning_rate": 0.00046088136950749937, "loss": 2.506093215942383, "step": 6130 }, { "epoch": 12.871204188481675, "learning_rate": 0.0004605059060076967, "loss": 2.5329927444458007, "step": 6140 }, { "epoch": 12.89214659685864, "learning_rate": 0.0004601313586451939, "loss": 2.5250947952270506, "step": 6150 }, { "epoch": 12.913089005235602, "learning_rate": 0.00045975772370039034, "loss": 2.535073471069336, "step": 6160 }, { "epoch": 12.934031413612566, "learning_rate": 0.0004593849974747937, "loss": 2.524639892578125, "step": 6170 }, { "epoch": 12.95497382198953, "learning_rate": 0.0004590131762908664, "loss": 2.509628486633301, "step": 6180 }, { "epoch": 12.975916230366492, "learning_rate": 0.00045864225649187287, "loss": 2.5401321411132813, "step": 6190 }, { "epoch": 12.996858638743456, "learning_rate": 0.000458272234441729, "loss": 2.512648582458496, "step": 6200 }, { "epoch": 12.998952879581152, "eval_loss": 2.558600902557373, "eval_runtime": 73.5975, "eval_samples_per_second": 148.864, "step": 6201 }, { "epoch": 13.018848167539266, "learning_rate": 0.00045790310652485205, "loss": 2.6353900909423826, "step": 6210 }, { "epoch": 13.03979057591623, "learning_rate": 0.0004575348691460124, "loss": 2.519637870788574, "step": 6220 }, { "epoch": 13.060732984293194, "learning_rate": 0.00045716751873018654, "loss": 2.515974426269531, "step": 6230 }, { "epoch": 13.081675392670157, "learning_rate": 0.00045680105172241103, "loss": 2.5275392532348633, "step": 6240 }, { "epoch": 13.10261780104712, "learning_rate": 0.0004564354645876384, "loss": 2.518478012084961, "step": 6250 }, { "epoch": 13.123560209424085, "learning_rate": 0.00045607075381059363, "loss": 2.506203460693359, "step": 6260 }, { "epoch": 13.144502617801047, "learning_rate": 0.00045570691589563234, "loss": 2.501953125, "step": 6270 }, { "epoch": 13.165445026178011, "learning_rate": 0.0004553439473666, "loss": 2.536935234069824, "step": 6280 }, { "epoch": 13.186387434554973, "learning_rate": 0.0004549818447666924, "loss": 2.500376892089844, "step": 6290 }, { "epoch": 13.207329842931937, "learning_rate": 0.00045462060465831743, "loss": 2.507547950744629, "step": 6300 }, { "epoch": 13.228272251308901, "learning_rate": 0.0004542602236229581, "loss": 2.50396842956543, "step": 6310 }, { "epoch": 13.249214659685864, "learning_rate": 0.00045390069826103653, "loss": 2.4975730895996096, "step": 6320 }, { "epoch": 13.270157068062828, "learning_rate": 0.00045354202519177925, "loss": 2.5031005859375, "step": 6330 }, { "epoch": 13.29109947643979, "learning_rate": 0.0004531842010530839, "loss": 2.5102792739868165, "step": 6340 }, { "epoch": 13.312041884816754, "learning_rate": 0.0004528272225013865, "loss": 2.510196876525879, "step": 6350 }, { "epoch": 13.332984293193718, "learning_rate": 0.00045247108621153056, "loss": 2.529274559020996, "step": 6360 }, { "epoch": 13.35392670157068, "learning_rate": 0.0004521157888766368, "loss": 2.5169746398925783, "step": 6370 }, { "epoch": 13.374869109947644, "learning_rate": 0.00045176132720797443, "loss": 2.4974170684814454, "step": 6380 }, { "epoch": 13.395811518324606, "learning_rate": 0.0004514076979348328, "loss": 2.4937871932983398, "step": 6390 }, { "epoch": 13.41675392670157, "learning_rate": 0.0004510548978043951, "loss": 2.4988937377929688, "step": 6400 }, { "epoch": 13.437696335078535, "learning_rate": 0.00045070292358161265, "loss": 2.5192642211914062, "step": 6410 }, { "epoch": 13.458638743455497, "learning_rate": 0.0004503517720490801, "loss": 2.515308380126953, "step": 6420 }, { "epoch": 13.47958115183246, "learning_rate": 0.000450001440006912, "loss": 2.4873653411865235, "step": 6430 }, { "epoch": 13.500523560209425, "learning_rate": 0.00044965192427262043, "loss": 2.4860763549804688, "step": 6440 }, { "epoch": 13.521465968586387, "learning_rate": 0.0004493032216809934, "loss": 2.4914045333862305, "step": 6450 }, { "epoch": 13.542408376963351, "learning_rate": 0.00044895532908397455, "loss": 2.505444145202637, "step": 6460 }, { "epoch": 13.563350785340313, "learning_rate": 0.00044860824335054384, "loss": 2.484037971496582, "step": 6470 }, { "epoch": 13.584293193717278, "learning_rate": 0.00044826196136659916, "loss": 2.4801618576049806, "step": 6480 }, { "epoch": 13.605235602094242, "learning_rate": 0.00044791648003483884, "loss": 2.497146415710449, "step": 6490 }, { "epoch": 13.626178010471204, "learning_rate": 0.0004475717962746455, "loss": 2.510635757446289, "step": 6500 }, { "epoch": 13.647120418848168, "learning_rate": 0.0004472279070219706, "loss": 2.5058326721191406, "step": 6510 }, { "epoch": 13.66806282722513, "learning_rate": 0.00044688480922922, "loss": 2.490641975402832, "step": 6520 }, { "epoch": 13.689005235602094, "learning_rate": 0.00044654249986514057, "loss": 2.4954011917114256, "step": 6530 }, { "epoch": 13.709947643979058, "learning_rate": 0.0004462009759147076, "loss": 2.496523857116699, "step": 6540 }, { "epoch": 13.73089005235602, "learning_rate": 0.0004458602343790135, "loss": 2.4896028518676756, "step": 6550 }, { "epoch": 13.751832460732984, "learning_rate": 0.00044552027227515704, "loss": 2.490574836730957, "step": 6560 }, { "epoch": 13.772774869109949, "learning_rate": 0.00044518108663613355, "loss": 2.4956533432006838, "step": 6570 }, { "epoch": 13.79371727748691, "learning_rate": 0.00044484267451072644, "loss": 2.5057823181152346, "step": 6580 }, { "epoch": 13.814659685863875, "learning_rate": 0.0004445050329633992, "loss": 2.493949127197266, "step": 6590 }, { "epoch": 13.835602094240837, "learning_rate": 0.0004441681590741884, "loss": 2.514782524108887, "step": 6600 }, { "epoch": 13.856544502617801, "learning_rate": 0.0004438320499385977, "loss": 2.463920783996582, "step": 6610 }, { "epoch": 13.877486910994765, "learning_rate": 0.00044349670266749286, "loss": 2.494730567932129, "step": 6620 }, { "epoch": 13.898429319371727, "learning_rate": 0.0004431621143869969, "loss": 2.4927881240844725, "step": 6630 }, { "epoch": 13.919371727748691, "learning_rate": 0.00044282828223838727, "loss": 2.5001829147338865, "step": 6640 }, { "epoch": 13.940314136125654, "learning_rate": 0.0004424952033779929, "loss": 2.485161018371582, "step": 6650 }, { "epoch": 13.961256544502618, "learning_rate": 0.00044216287497709253, "loss": 2.4990121841430666, "step": 6660 }, { "epoch": 13.982198952879582, "learning_rate": 0.0004418312942218139, "loss": 2.478795051574707, "step": 6670 }, { "epoch": 13.998952879581152, "eval_loss": 2.551051616668701, "eval_runtime": 73.8418, "eval_samples_per_second": 148.371, "step": 6678 }, { "epoch": 14.004188481675392, "learning_rate": 0.0004415004583130336, "loss": 2.631510925292969, "step": 6680 }, { "epoch": 14.025130890052356, "learning_rate": 0.0004411703644662778, "loss": 2.46261043548584, "step": 6690 }, { "epoch": 14.04607329842932, "learning_rate": 0.00044084100991162385, "loss": 2.488113212585449, "step": 6700 }, { "epoch": 14.067015706806282, "learning_rate": 0.00044051239189360286, "loss": 2.4580398559570313, "step": 6710 }, { "epoch": 14.087958115183246, "learning_rate": 0.00044018450767110235, "loss": 2.482432174682617, "step": 6720 }, { "epoch": 14.108900523560209, "learning_rate": 0.0004398573545172709, "loss": 2.4827293395996093, "step": 6730 }, { "epoch": 14.129842931937173, "learning_rate": 0.0004395309297194223, "loss": 2.472520637512207, "step": 6740 }, { "epoch": 14.150785340314137, "learning_rate": 0.0004392052305789416, "loss": 2.47951602935791, "step": 6750 }, { "epoch": 14.171727748691099, "learning_rate": 0.0004388802544111908, "loss": 2.4616981506347657, "step": 6760 }, { "epoch": 14.192670157068063, "learning_rate": 0.0004385559985454165, "loss": 2.4829242706298826, "step": 6770 }, { "epoch": 14.213612565445025, "learning_rate": 0.0004382324603246575, "loss": 2.478873634338379, "step": 6780 }, { "epoch": 14.23455497382199, "learning_rate": 0.0004379096371056532, "loss": 2.4993722915649412, "step": 6790 }, { "epoch": 14.255497382198953, "learning_rate": 0.000437587526258753, "loss": 2.474994659423828, "step": 6800 }, { "epoch": 14.276439790575916, "learning_rate": 0.0004372661251678265, "loss": 2.495197296142578, "step": 6810 }, { "epoch": 14.29738219895288, "learning_rate": 0.00043694543123017407, "loss": 2.476504325866699, "step": 6820 }, { "epoch": 14.318324607329842, "learning_rate": 0.0004366254418564382, "loss": 2.5161060333251952, "step": 6830 }, { "epoch": 14.339267015706806, "learning_rate": 0.0004363061544705161, "loss": 2.4868789672851563, "step": 6840 }, { "epoch": 14.36020942408377, "learning_rate": 0.0004359875665094723, "loss": 2.4974212646484375, "step": 6850 }, { "epoch": 14.381151832460732, "learning_rate": 0.00043566967542345227, "loss": 2.4662216186523436, "step": 6860 }, { "epoch": 14.402094240837696, "learning_rate": 0.00043535247867559673, "loss": 2.469373321533203, "step": 6870 }, { "epoch": 14.42303664921466, "learning_rate": 0.00043503597374195665, "loss": 2.483184242248535, "step": 6880 }, { "epoch": 14.443979057591623, "learning_rate": 0.0004347201581114088, "loss": 2.4689809799194338, "step": 6890 }, { "epoch": 14.464921465968587, "learning_rate": 0.0004344050292855724, "loss": 2.439427375793457, "step": 6900 }, { "epoch": 14.485863874345549, "learning_rate": 0.00043409058477872554, "loss": 2.47011775970459, "step": 6910 }, { "epoch": 14.506806282722513, "learning_rate": 0.00043377682211772343, "loss": 2.4866916656494142, "step": 6920 }, { "epoch": 14.527748691099477, "learning_rate": 0.0004334637388419161, "loss": 2.4834897994995115, "step": 6930 }, { "epoch": 14.54869109947644, "learning_rate": 0.0004331513325030681, "loss": 2.5011289596557615, "step": 6940 }, { "epoch": 14.569633507853403, "learning_rate": 0.0004328396006652773, "loss": 2.478676986694336, "step": 6950 }, { "epoch": 14.590575916230367, "learning_rate": 0.00043252854090489564, "loss": 2.4692920684814452, "step": 6960 }, { "epoch": 14.61151832460733, "learning_rate": 0.00043221815081044985, "loss": 2.492611122131348, "step": 6970 }, { "epoch": 14.632460732984294, "learning_rate": 0.00043190842798256285, "loss": 2.4726083755493162, "step": 6980 }, { "epoch": 14.653403141361256, "learning_rate": 0.00043159937003387584, "loss": 2.491672706604004, "step": 6990 }, { "epoch": 14.67434554973822, "learning_rate": 0.00043129097458897135, "loss": 2.474324417114258, "step": 7000 }, { "epoch": 14.695287958115184, "learning_rate": 0.000430983239284296, "loss": 2.4726449966430666, "step": 7010 }, { "epoch": 14.716230366492146, "learning_rate": 0.0004306761617680849, "loss": 2.456452178955078, "step": 7020 }, { "epoch": 14.73717277486911, "learning_rate": 0.00043036973970028583, "loss": 2.4777704238891602, "step": 7030 }, { "epoch": 14.758115183246073, "learning_rate": 0.00043006397075248464, "loss": 2.492514801025391, "step": 7040 }, { "epoch": 14.779057591623037, "learning_rate": 0.00042975885260783056, "loss": 2.464923095703125, "step": 7050 }, { "epoch": 14.8, "learning_rate": 0.00042945438296096303, "loss": 2.4520200729370116, "step": 7060 }, { "epoch": 14.820942408376963, "learning_rate": 0.0004291505595179379, "loss": 2.461465072631836, "step": 7070 }, { "epoch": 14.841884816753927, "learning_rate": 0.0004288473799961553, "loss": 2.474461555480957, "step": 7080 }, { "epoch": 14.86282722513089, "learning_rate": 0.0004285448421242875, "loss": 2.474432945251465, "step": 7090 }, { "epoch": 14.883769633507853, "learning_rate": 0.00042824294364220724, "loss": 2.506844329833984, "step": 7100 }, { "epoch": 14.904712041884817, "learning_rate": 0.0004279416823009172, "loss": 2.466670036315918, "step": 7110 }, { "epoch": 14.92565445026178, "learning_rate": 0.0004276410558624791, "loss": 2.4866743087768555, "step": 7120 }, { "epoch": 14.946596858638744, "learning_rate": 0.0004273410620999446, "loss": 2.4524404525756838, "step": 7130 }, { "epoch": 14.967539267015706, "learning_rate": 0.0004270416987972853, "loss": 2.4684980392456053, "step": 7140 }, { "epoch": 14.98848167539267, "learning_rate": 0.00042674296374932424, "loss": 2.469831848144531, "step": 7150 }, { "epoch": 14.998952879581152, "eval_loss": 2.5442276000976562, "eval_runtime": 73.9909, "eval_samples_per_second": 148.072, "step": 7155 }, { "epoch": 15.010471204188482, "learning_rate": 0.0004264448547616681, "loss": 2.5812490463256834, "step": 7160 }, { "epoch": 15.031413612565444, "learning_rate": 0.00042614736965063864, "loss": 2.4873594284057616, "step": 7170 }, { "epoch": 15.052356020942408, "learning_rate": 0.0004258505062432064, "loss": 2.4659671783447266, "step": 7180 }, { "epoch": 15.073298429319372, "learning_rate": 0.0004255542623769234, "loss": 2.4558393478393556, "step": 7190 }, { "epoch": 15.094240837696335, "learning_rate": 0.00042525863589985727, "loss": 2.4745227813720705, "step": 7200 }, { "epoch": 15.115183246073299, "learning_rate": 0.00042496362467052564, "loss": 2.4763622283935547, "step": 7210 }, { "epoch": 15.136125654450261, "learning_rate": 0.00042466922655783073, "loss": 2.4713407516479493, "step": 7220 }, { "epoch": 15.157068062827225, "learning_rate": 0.00042437543944099504, "loss": 2.4634868621826174, "step": 7230 }, { "epoch": 15.178010471204189, "learning_rate": 0.00042408226120949674, "loss": 2.476248931884766, "step": 7240 }, { "epoch": 15.198952879581151, "learning_rate": 0.00042378968976300647, "loss": 2.4730270385742186, "step": 7250 }, { "epoch": 15.219895287958115, "learning_rate": 0.00042349772301132377, "loss": 2.476571273803711, "step": 7260 }, { "epoch": 15.24083769633508, "learning_rate": 0.0004232063588743146, "loss": 2.4510690689086916, "step": 7270 }, { "epoch": 15.261780104712042, "learning_rate": 0.00042291559528184904, "loss": 2.465399742126465, "step": 7280 }, { "epoch": 15.282722513089006, "learning_rate": 0.0004226254301737393, "loss": 2.4773502349853516, "step": 7290 }, { "epoch": 15.303664921465968, "learning_rate": 0.0004223358614996787, "loss": 2.43621711730957, "step": 7300 }, { "epoch": 15.324607329842932, "learning_rate": 0.00042204688721918075, "loss": 2.456114959716797, "step": 7310 }, { "epoch": 15.345549738219896, "learning_rate": 0.0004217585053015187, "loss": 2.468073844909668, "step": 7320 }, { "epoch": 15.366492146596858, "learning_rate": 0.0004214707137256656, "loss": 2.471833419799805, "step": 7330 }, { "epoch": 15.387434554973822, "learning_rate": 0.0004211835104802349, "loss": 2.499461364746094, "step": 7340 }, { "epoch": 15.408376963350785, "learning_rate": 0.00042089689356342115, "loss": 2.4492721557617188, "step": 7350 }, { "epoch": 15.429319371727749, "learning_rate": 0.0004206108609829418, "loss": 2.4671262741088866, "step": 7360 }, { "epoch": 15.450261780104713, "learning_rate": 0.00042032541075597875, "loss": 2.465005111694336, "step": 7370 }, { "epoch": 15.471204188481675, "learning_rate": 0.0004200405409091207, "loss": 2.4648488998413085, "step": 7380 }, { "epoch": 15.492146596858639, "learning_rate": 0.00041975624947830593, "loss": 2.463612174987793, "step": 7390 }, { "epoch": 15.513089005235603, "learning_rate": 0.00041947253450876515, "loss": 2.4697538375854493, "step": 7400 }, { "epoch": 15.534031413612565, "learning_rate": 0.00041918939405496546, "loss": 2.45694637298584, "step": 7410 }, { "epoch": 15.55497382198953, "learning_rate": 0.00041890682618055396, "loss": 2.4443153381347655, "step": 7420 }, { "epoch": 15.575916230366492, "learning_rate": 0.0004186248289583023, "loss": 2.445983123779297, "step": 7430 }, { "epoch": 15.596858638743456, "learning_rate": 0.00041834340047005144, "loss": 2.489885711669922, "step": 7440 }, { "epoch": 15.61780104712042, "learning_rate": 0.0004180625388066569, "loss": 2.4711660385131835, "step": 7450 }, { "epoch": 15.638743455497382, "learning_rate": 0.00041778224206793433, "loss": 2.4884315490722657, "step": 7460 }, { "epoch": 15.659685863874346, "learning_rate": 0.00041750250836260536, "loss": 2.477284240722656, "step": 7470 }, { "epoch": 15.680628272251308, "learning_rate": 0.0004172233358082443, "loss": 2.475067901611328, "step": 7480 }, { "epoch": 15.701570680628272, "learning_rate": 0.00041694472253122467, "loss": 2.482602119445801, "step": 7490 }, { "epoch": 15.722513089005236, "learning_rate": 0.00041666666666666664, "loss": 2.442608642578125, "step": 7500 }, { "epoch": 15.743455497382199, "learning_rate": 0.0004163891663583843, "loss": 2.468288040161133, "step": 7510 }, { "epoch": 15.764397905759163, "learning_rate": 0.00041611221975883396, "loss": 2.4465059280395507, "step": 7520 }, { "epoch": 15.785340314136125, "learning_rate": 0.00041583582502906203, "loss": 2.4614633560180663, "step": 7530 }, { "epoch": 15.806282722513089, "learning_rate": 0.0004155599803386543, "loss": 2.4629968643188476, "step": 7540 }, { "epoch": 15.827225130890053, "learning_rate": 0.0004152846838656846, "loss": 2.454400062561035, "step": 7550 }, { "epoch": 15.848167539267015, "learning_rate": 0.00041500993379666443, "loss": 2.467230224609375, "step": 7560 }, { "epoch": 15.86910994764398, "learning_rate": 0.0004147357283264927, "loss": 2.442008209228516, "step": 7570 }, { "epoch": 15.890052356020943, "learning_rate": 0.000414462065658406, "loss": 2.4506603240966798, "step": 7580 }, { "epoch": 15.910994764397905, "learning_rate": 0.0004141889440039292, "loss": 2.4443122863769533, "step": 7590 }, { "epoch": 15.93193717277487, "learning_rate": 0.00041391636158282614, "loss": 2.4457521438598633, "step": 7600 }, { "epoch": 15.952879581151832, "learning_rate": 0.00041364431662305114, "loss": 2.457781982421875, "step": 7610 }, { "epoch": 15.973821989528796, "learning_rate": 0.0004133728073607005, "loss": 2.440464210510254, "step": 7620 }, { "epoch": 15.99476439790576, "learning_rate": 0.00041310183203996446, "loss": 2.4534429550170898, "step": 7630 }, { "epoch": 15.998952879581152, "eval_loss": 2.5449907779693604, "eval_runtime": 73.6337, "eval_samples_per_second": 148.791, "step": 7632 }, { "epoch": 16.016753926701572, "learning_rate": 0.0004128313889130795, "loss": 2.5835424423217774, "step": 7640 }, { "epoch": 16.037696335078532, "learning_rate": 0.0004125614762402809, "loss": 2.458993148803711, "step": 7650 }, { "epoch": 16.058638743455496, "learning_rate": 0.00041229209228975627, "loss": 2.471218299865723, "step": 7660 }, { "epoch": 16.07958115183246, "learning_rate": 0.000412023235337598, "loss": 2.464751052856445, "step": 7670 }, { "epoch": 16.100523560209425, "learning_rate": 0.00041175490366775766, "loss": 2.4599708557128905, "step": 7680 }, { "epoch": 16.12146596858639, "learning_rate": 0.0004114870955719997, "loss": 2.4355844497680663, "step": 7690 }, { "epoch": 16.14240837696335, "learning_rate": 0.00041121980934985563, "loss": 2.4272241592407227, "step": 7700 }, { "epoch": 16.163350785340313, "learning_rate": 0.000410953043308579, "loss": 2.4612340927124023, "step": 7710 }, { "epoch": 16.184293193717277, "learning_rate": 0.0004106867957631001, "loss": 2.465089797973633, "step": 7720 }, { "epoch": 16.20523560209424, "learning_rate": 0.00041042106503598165, "loss": 2.451694297790527, "step": 7730 }, { "epoch": 16.226178010471205, "learning_rate": 0.0004101558494573738, "loss": 2.464099884033203, "step": 7740 }, { "epoch": 16.24712041884817, "learning_rate": 0.0004098911473649706, "loss": 2.448426055908203, "step": 7750 }, { "epoch": 16.26806282722513, "learning_rate": 0.0004096269571039658, "loss": 2.455006217956543, "step": 7760 }, { "epoch": 16.289005235602094, "learning_rate": 0.00040936327702701005, "loss": 2.453194808959961, "step": 7770 }, { "epoch": 16.309947643979058, "learning_rate": 0.00040910010549416687, "loss": 2.4759195327758787, "step": 7780 }, { "epoch": 16.330890052356022, "learning_rate": 0.0004088374408728706, "loss": 2.4628747940063476, "step": 7790 }, { "epoch": 16.351832460732986, "learning_rate": 0.0004085752815378834, "loss": 2.446619415283203, "step": 7800 }, { "epoch": 16.372774869109946, "learning_rate": 0.0004083136258712532, "loss": 2.4754364013671877, "step": 7810 }, { "epoch": 16.39371727748691, "learning_rate": 0.0004080524722622717, "loss": 2.4566783905029297, "step": 7820 }, { "epoch": 16.414659685863874, "learning_rate": 0.00040779181910743294, "loss": 2.426336479187012, "step": 7830 }, { "epoch": 16.43560209424084, "learning_rate": 0.0004075316648103914, "loss": 2.460182762145996, "step": 7840 }, { "epoch": 16.456544502617803, "learning_rate": 0.0004072720077819216, "loss": 2.454692268371582, "step": 7850 }, { "epoch": 16.477486910994763, "learning_rate": 0.0004070128464398768, "loss": 2.4589263916015627, "step": 7860 }, { "epoch": 16.498429319371727, "learning_rate": 0.0004067541792091489, "loss": 2.472345161437988, "step": 7870 }, { "epoch": 16.51937172774869, "learning_rate": 0.0004064960045216279, "loss": 2.427416229248047, "step": 7880 }, { "epoch": 16.540314136125655, "learning_rate": 0.0004062383208161624, "loss": 2.454151725769043, "step": 7890 }, { "epoch": 16.56125654450262, "learning_rate": 0.0004059811265385193, "loss": 2.4490371704101563, "step": 7900 }, { "epoch": 16.58219895287958, "learning_rate": 0.00040572442014134516, "loss": 2.479467010498047, "step": 7910 }, { "epoch": 16.603141361256544, "learning_rate": 0.00040546820008412654, "loss": 2.4391218185424806, "step": 7920 }, { "epoch": 16.624083769633508, "learning_rate": 0.0004052124648331515, "loss": 2.455718421936035, "step": 7930 }, { "epoch": 16.645026178010472, "learning_rate": 0.00040495721286147086, "loss": 2.4620994567871093, "step": 7940 }, { "epoch": 16.665968586387436, "learning_rate": 0.00040470244264886006, "loss": 2.448670196533203, "step": 7950 }, { "epoch": 16.686910994764396, "learning_rate": 0.00040444815268178097, "loss": 2.426989936828613, "step": 7960 }, { "epoch": 16.70785340314136, "learning_rate": 0.00040419434145334414, "loss": 2.447972869873047, "step": 7970 }, { "epoch": 16.728795811518324, "learning_rate": 0.00040394100746327154, "loss": 2.457029342651367, "step": 7980 }, { "epoch": 16.74973821989529, "learning_rate": 0.0004036881492178589, "loss": 2.4539730072021486, "step": 7990 }, { "epoch": 16.770680628272252, "learning_rate": 0.00040343576522993926, "loss": 2.4607629776000977, "step": 8000 }, { "epoch": 16.791623036649213, "learning_rate": 0.00040318385401884554, "loss": 2.43496036529541, "step": 8010 }, { "epoch": 16.812565445026177, "learning_rate": 0.00040293241411037484, "loss": 2.423869323730469, "step": 8020 }, { "epoch": 16.83350785340314, "learning_rate": 0.00040268144403675154, "loss": 2.4423187255859373, "step": 8030 }, { "epoch": 16.854450261780105, "learning_rate": 0.0004024309423365915, "loss": 2.4698711395263673, "step": 8040 }, { "epoch": 16.87539267015707, "learning_rate": 0.0004021809075548668, "loss": 2.423082160949707, "step": 8050 }, { "epoch": 16.89633507853403, "learning_rate": 0.0004019313382428694, "loss": 2.443895149230957, "step": 8060 }, { "epoch": 16.917277486910994, "learning_rate": 0.00040168223295817656, "loss": 2.455313301086426, "step": 8070 }, { "epoch": 16.938219895287958, "learning_rate": 0.00040143359026461554, "loss": 2.415020751953125, "step": 8080 }, { "epoch": 16.95916230366492, "learning_rate": 0.000401185408732229, "loss": 2.462967109680176, "step": 8090 }, { "epoch": 16.980104712041886, "learning_rate": 0.0004009376869372401, "loss": 2.416962242126465, "step": 8100 }, { "epoch": 16.99895287958115, "eval_loss": 2.5404856204986572, "eval_runtime": 73.757, "eval_samples_per_second": 148.542, "step": 8109 }, { "epoch": 17.002094240837696, "learning_rate": 0.00040069042346201864, "loss": 2.549093818664551, "step": 8110 }, { "epoch": 17.02303664921466, "learning_rate": 0.00040044361689504655, "loss": 2.452895736694336, "step": 8120 }, { "epoch": 17.043979057591624, "learning_rate": 0.0004001972658308847, "loss": 2.460617446899414, "step": 8130 }, { "epoch": 17.064921465968588, "learning_rate": 0.0003999513688701383, "loss": 2.4506912231445312, "step": 8140 }, { "epoch": 17.08586387434555, "learning_rate": 0.00039970592461942457, "loss": 2.440316581726074, "step": 8150 }, { "epoch": 17.106806282722513, "learning_rate": 0.00039946093169133874, "loss": 2.4249364852905275, "step": 8160 }, { "epoch": 17.127748691099477, "learning_rate": 0.0003992163887044217, "loss": 2.4489822387695312, "step": 8170 }, { "epoch": 17.14869109947644, "learning_rate": 0.0003989722942831268, "loss": 2.455015754699707, "step": 8180 }, { "epoch": 17.169633507853405, "learning_rate": 0.0003987286470577879, "loss": 2.4564001083374025, "step": 8190 }, { "epoch": 17.190575916230365, "learning_rate": 0.0003984854456645864, "loss": 2.4481569290161134, "step": 8200 }, { "epoch": 17.21151832460733, "learning_rate": 0.0003982426887455199, "loss": 2.4090858459472657, "step": 8210 }, { "epoch": 17.232460732984293, "learning_rate": 0.00039800037494836985, "loss": 2.4279315948486326, "step": 8220 }, { "epoch": 17.253403141361257, "learning_rate": 0.00039775850292667005, "loss": 2.4328563690185545, "step": 8230 }, { "epoch": 17.27434554973822, "learning_rate": 0.0003975170713396753, "loss": 2.426299476623535, "step": 8240 }, { "epoch": 17.295287958115182, "learning_rate": 0.0003972760788523301, "loss": 2.424925994873047, "step": 8250 }, { "epoch": 17.316230366492146, "learning_rate": 0.0003970355241352378, "loss": 2.4173357009887697, "step": 8260 }, { "epoch": 17.33717277486911, "learning_rate": 0.00039679540586462953, "loss": 2.4525693893432616, "step": 8270 }, { "epoch": 17.358115183246074, "learning_rate": 0.00039655572272233384, "loss": 2.473075103759766, "step": 8280 }, { "epoch": 17.379057591623038, "learning_rate": 0.0003963164733957462, "loss": 2.422397232055664, "step": 8290 }, { "epoch": 17.4, "learning_rate": 0.00039607765657779864, "loss": 2.441000556945801, "step": 8300 }, { "epoch": 17.420942408376963, "learning_rate": 0.0003958392709669304, "loss": 2.4337257385253905, "step": 8310 }, { "epoch": 17.441884816753927, "learning_rate": 0.00039560131526705723, "loss": 2.4255434036254884, "step": 8320 }, { "epoch": 17.46282722513089, "learning_rate": 0.0003953637881875425, "loss": 2.4355316162109375, "step": 8330 }, { "epoch": 17.483769633507855, "learning_rate": 0.0003951266884431675, "loss": 2.428698921203613, "step": 8340 }, { "epoch": 17.504712041884815, "learning_rate": 0.00039489001475410214, "loss": 2.4530813217163088, "step": 8350 }, { "epoch": 17.52565445026178, "learning_rate": 0.00039465376584587626, "loss": 2.4496335983276367, "step": 8360 }, { "epoch": 17.546596858638743, "learning_rate": 0.00039441794044935054, "loss": 2.425421142578125, "step": 8370 }, { "epoch": 17.567539267015707, "learning_rate": 0.00039418253730068797, "loss": 2.4285154342651367, "step": 8380 }, { "epoch": 17.58848167539267, "learning_rate": 0.0003939475551413253, "loss": 2.4037647247314453, "step": 8390 }, { "epoch": 17.609424083769632, "learning_rate": 0.000393712992717945, "loss": 2.423297119140625, "step": 8400 }, { "epoch": 17.630366492146596, "learning_rate": 0.0003934788487824469, "loss": 2.47174186706543, "step": 8410 }, { "epoch": 17.65130890052356, "learning_rate": 0.0003932451220919205, "loss": 2.441014289855957, "step": 8420 }, { "epoch": 17.672251308900524, "learning_rate": 0.0003930118114086172, "loss": 2.4317821502685546, "step": 8430 }, { "epoch": 17.693193717277488, "learning_rate": 0.00039277891549992266, "loss": 2.4377744674682615, "step": 8440 }, { "epoch": 17.71413612565445, "learning_rate": 0.0003925464331383298, "loss": 2.4461442947387697, "step": 8450 }, { "epoch": 17.735078534031413, "learning_rate": 0.00039231436310141113, "loss": 2.466485595703125, "step": 8460 }, { "epoch": 17.756020942408377, "learning_rate": 0.00039208270417179214, "loss": 2.4318614959716798, "step": 8470 }, { "epoch": 17.77696335078534, "learning_rate": 0.0003918514551371243, "loss": 2.431291389465332, "step": 8480 }, { "epoch": 17.797905759162305, "learning_rate": 0.0003916206147900585, "loss": 2.43109130859375, "step": 8490 }, { "epoch": 17.81884816753927, "learning_rate": 0.00039139018192821845, "loss": 2.4323259353637696, "step": 8500 }, { "epoch": 17.83979057591623, "learning_rate": 0.00039116015535417445, "loss": 2.439468193054199, "step": 8510 }, { "epoch": 17.860732984293193, "learning_rate": 0.00039093053387541745, "loss": 2.4567943572998048, "step": 8520 }, { "epoch": 17.881675392670157, "learning_rate": 0.00039070131630433274, "loss": 2.4346736907958983, "step": 8530 }, { "epoch": 17.90261780104712, "learning_rate": 0.00039047250145817424, "loss": 2.441089630126953, "step": 8540 }, { "epoch": 17.923560209424085, "learning_rate": 0.00039024408815903914, "loss": 2.4277088165283205, "step": 8550 }, { "epoch": 17.944502617801046, "learning_rate": 0.0003900160752338421, "loss": 2.4228445053100587, "step": 8560 }, { "epoch": 17.96544502617801, "learning_rate": 0.00038978846151429, "loss": 2.399433708190918, "step": 8570 }, { "epoch": 17.986387434554974, "learning_rate": 0.0003895612458368572, "loss": 2.4009246826171875, "step": 8580 }, { "epoch": 17.99895287958115, "eval_loss": 2.5318360328674316, "eval_runtime": 74.0596, "eval_samples_per_second": 147.935, "step": 8586 }, { "epoch": 18.008376963350784, "learning_rate": 0.00038933442704275974, "loss": 2.537807655334473, "step": 8590 }, { "epoch": 18.02931937172775, "learning_rate": 0.0003891080039779314, "loss": 2.446313667297363, "step": 8600 }, { "epoch": 18.050261780104712, "learning_rate": 0.0003888819754929986, "loss": 2.4345209121704103, "step": 8610 }, { "epoch": 18.071204188481676, "learning_rate": 0.0003886563404432558, "loss": 2.437006187438965, "step": 8620 }, { "epoch": 18.09214659685864, "learning_rate": 0.0003884310976886414, "loss": 2.420798110961914, "step": 8630 }, { "epoch": 18.1130890052356, "learning_rate": 0.0003882062460937135, "loss": 2.4323537826538084, "step": 8640 }, { "epoch": 18.134031413612565, "learning_rate": 0.0003879817845276255, "loss": 2.442038345336914, "step": 8650 }, { "epoch": 18.15497382198953, "learning_rate": 0.0003877577118641029, "loss": 2.433667755126953, "step": 8660 }, { "epoch": 18.175916230366493, "learning_rate": 0.00038753402698141903, "loss": 2.424707221984863, "step": 8670 }, { "epoch": 18.196858638743457, "learning_rate": 0.0003873107287623715, "loss": 2.4348966598510744, "step": 8680 }, { "epoch": 18.217801047120417, "learning_rate": 0.00038708781609425905, "loss": 2.404917907714844, "step": 8690 }, { "epoch": 18.23874345549738, "learning_rate": 0.000386865287868858, "loss": 2.4346105575561525, "step": 8700 }, { "epoch": 18.259685863874346, "learning_rate": 0.0003866431429823993, "loss": 2.442304992675781, "step": 8710 }, { "epoch": 18.28062827225131, "learning_rate": 0.00038642138033554525, "loss": 2.42406005859375, "step": 8720 }, { "epoch": 18.301570680628274, "learning_rate": 0.00038619999883336703, "loss": 2.440979766845703, "step": 8730 }, { "epoch": 18.322513089005234, "learning_rate": 0.0003859789973853217, "loss": 2.440751075744629, "step": 8740 }, { "epoch": 18.343455497382198, "learning_rate": 0.0003857583749052298, "loss": 2.4363412857055664, "step": 8750 }, { "epoch": 18.364397905759162, "learning_rate": 0.0003855381303112527, "loss": 2.4235382080078125, "step": 8760 }, { "epoch": 18.385340314136126, "learning_rate": 0.0003853182625258708, "loss": 2.4238630294799806, "step": 8770 }, { "epoch": 18.40628272251309, "learning_rate": 0.0003850987704758608, "loss": 2.426643943786621, "step": 8780 }, { "epoch": 18.42722513089005, "learning_rate": 0.00038487965309227413, "loss": 2.438970947265625, "step": 8790 }, { "epoch": 18.448167539267015, "learning_rate": 0.0003846609093104148, "loss": 2.423859786987305, "step": 8800 }, { "epoch": 18.46910994764398, "learning_rate": 0.00038444253806981784, "loss": 2.4040243148803713, "step": 8810 }, { "epoch": 18.490052356020943, "learning_rate": 0.00038422453831422784, "loss": 2.420393371582031, "step": 8820 }, { "epoch": 18.510994764397907, "learning_rate": 0.0003840069089915771, "loss": 2.427932929992676, "step": 8830 }, { "epoch": 18.531937172774867, "learning_rate": 0.00038378964905396454, "loss": 2.419098663330078, "step": 8840 }, { "epoch": 18.55287958115183, "learning_rate": 0.00038357275745763475, "loss": 2.4184850692749023, "step": 8850 }, { "epoch": 18.573821989528795, "learning_rate": 0.0003833562331629563, "loss": 2.402060127258301, "step": 8860 }, { "epoch": 18.59476439790576, "learning_rate": 0.0003831400751344014, "loss": 2.403904914855957, "step": 8870 }, { "epoch": 18.615706806282724, "learning_rate": 0.00038292428234052486, "loss": 2.4094032287597655, "step": 8880 }, { "epoch": 18.636649214659684, "learning_rate": 0.0003827088537539434, "loss": 2.3887189865112304, "step": 8890 }, { "epoch": 18.657591623036648, "learning_rate": 0.00038249378835131535, "loss": 2.4003849029541016, "step": 8900 }, { "epoch": 18.678534031413612, "learning_rate": 0.0003822790851133196, "loss": 2.4100620269775392, "step": 8910 }, { "epoch": 18.699476439790576, "learning_rate": 0.00038206474302463617, "loss": 2.4087665557861326, "step": 8920 }, { "epoch": 18.72041884816754, "learning_rate": 0.00038185076107392544, "loss": 2.4067865371704102, "step": 8930 }, { "epoch": 18.741361256544504, "learning_rate": 0.0003816371382538082, "loss": 2.3902347564697264, "step": 8940 }, { "epoch": 18.762303664921465, "learning_rate": 0.0003814238735608459, "loss": 2.4000757217407225, "step": 8950 }, { "epoch": 18.78324607329843, "learning_rate": 0.0003812109659955207, "loss": 2.4148767471313475, "step": 8960 }, { "epoch": 18.804188481675393, "learning_rate": 0.00038099841456221617, "loss": 2.414336395263672, "step": 8970 }, { "epoch": 18.825130890052357, "learning_rate": 0.0003807862182691969, "loss": 2.440867042541504, "step": 8980 }, { "epoch": 18.84607329842932, "learning_rate": 0.00038057437612859003, "loss": 2.4532596588134767, "step": 8990 }, { "epoch": 18.86701570680628, "learning_rate": 0.0003803628871563653, "loss": 2.4020782470703126, "step": 9000 }, { "epoch": 18.887958115183245, "learning_rate": 0.0003801517503723161, "loss": 2.430096435546875, "step": 9010 }, { "epoch": 18.90890052356021, "learning_rate": 0.00037994096480004037, "loss": 2.419812774658203, "step": 9020 }, { "epoch": 18.929842931937173, "learning_rate": 0.0003797305294669214, "loss": 2.4075344085693358, "step": 9030 }, { "epoch": 18.950785340314138, "learning_rate": 0.00037952044340410954, "loss": 2.4337480545043944, "step": 9040 }, { "epoch": 18.971727748691098, "learning_rate": 0.00037931070564650276, "loss": 2.4088159561157227, "step": 9050 }, { "epoch": 18.992670157068062, "learning_rate": 0.0003791013152327286, "loss": 2.4013919830322266, "step": 9060 }, { "epoch": 18.99895287958115, "eval_loss": 2.529340982437134, "eval_runtime": 73.527, "eval_samples_per_second": 149.007, "step": 9063 }, { "epoch": 19.014659685863876, "learning_rate": 0.00037889227120512545, "loss": 2.5226316452026367, "step": 9070 }, { "epoch": 19.035602094240836, "learning_rate": 0.0003786835726097239, "loss": 2.4024560928344725, "step": 9080 }, { "epoch": 19.0565445026178, "learning_rate": 0.00037847521849622895, "loss": 2.4342859268188475, "step": 9090 }, { "epoch": 19.077486910994764, "learning_rate": 0.0003782672079180015, "loss": 2.4292444229125976, "step": 9100 }, { "epoch": 19.09842931937173, "learning_rate": 0.0003780595399320404, "loss": 2.4289926528930663, "step": 9110 }, { "epoch": 19.119371727748693, "learning_rate": 0.00037785221359896444, "loss": 2.440321159362793, "step": 9120 }, { "epoch": 19.140314136125653, "learning_rate": 0.00037764522798299443, "loss": 2.4353168487548826, "step": 9130 }, { "epoch": 19.161256544502617, "learning_rate": 0.0003774385821519358, "loss": 2.389999008178711, "step": 9140 }, { "epoch": 19.18219895287958, "learning_rate": 0.0003772322751771605, "loss": 2.4299448013305662, "step": 9150 }, { "epoch": 19.203141361256545, "learning_rate": 0.00037702630613358986, "loss": 2.4094564437866213, "step": 9160 }, { "epoch": 19.22408376963351, "learning_rate": 0.0003768206740996769, "loss": 2.416705322265625, "step": 9170 }, { "epoch": 19.24502617801047, "learning_rate": 0.00037661537815738915, "loss": 2.4174514770507813, "step": 9180 }, { "epoch": 19.265968586387434, "learning_rate": 0.00037641041739219143, "loss": 2.4112581253051757, "step": 9190 }, { "epoch": 19.286910994764398, "learning_rate": 0.00037620579089302876, "loss": 2.4125255584716796, "step": 9200 }, { "epoch": 19.307853403141362, "learning_rate": 0.0003760014977523091, "loss": 2.41434268951416, "step": 9210 }, { "epoch": 19.328795811518326, "learning_rate": 0.00037579753706588697, "loss": 2.419674301147461, "step": 9220 }, { "epoch": 19.349738219895286, "learning_rate": 0.00037559390793304604, "loss": 2.3896152496337892, "step": 9230 }, { "epoch": 19.37068062827225, "learning_rate": 0.00037539060945648286, "loss": 2.4006847381591796, "step": 9240 }, { "epoch": 19.391623036649214, "learning_rate": 0.00037518764074229014, "loss": 2.4157575607299804, "step": 9250 }, { "epoch": 19.41256544502618, "learning_rate": 0.00037498500089994, "loss": 2.398466873168945, "step": 9260 }, { "epoch": 19.433507853403142, "learning_rate": 0.00037478268904226795, "loss": 2.4136272430419923, "step": 9270 }, { "epoch": 19.454450261780103, "learning_rate": 0.00037458070428545635, "loss": 2.412180709838867, "step": 9280 }, { "epoch": 19.475392670157067, "learning_rate": 0.00037437904574901817, "loss": 2.417103385925293, "step": 9290 }, { "epoch": 19.49633507853403, "learning_rate": 0.00037417771255578104, "loss": 2.3830541610717773, "step": 9300 }, { "epoch": 19.517277486910995, "learning_rate": 0.00037397670383187097, "loss": 2.4057411193847655, "step": 9310 }, { "epoch": 19.53821989528796, "learning_rate": 0.0003737760187066967, "loss": 2.429146957397461, "step": 9320 }, { "epoch": 19.559162303664923, "learning_rate": 0.00037357565631293365, "loss": 2.4229619979858397, "step": 9330 }, { "epoch": 19.580104712041884, "learning_rate": 0.00037337561578650833, "loss": 2.4030439376831056, "step": 9340 }, { "epoch": 19.601047120418848, "learning_rate": 0.00037317589626658255, "loss": 2.4122753143310547, "step": 9350 }, { "epoch": 19.62198952879581, "learning_rate": 0.0003729764968955379, "loss": 2.420066070556641, "step": 9360 }, { "epoch": 19.642931937172776, "learning_rate": 0.00037277741681896045, "loss": 2.4116867065429686, "step": 9370 }, { "epoch": 19.66387434554974, "learning_rate": 0.0003725786551856251, "loss": 2.4222272872924804, "step": 9380 }, { "epoch": 19.6848167539267, "learning_rate": 0.0003723802111474804, "loss": 2.398889350891113, "step": 9390 }, { "epoch": 19.705759162303664, "learning_rate": 0.0003721820838596335, "loss": 2.3947797775268556, "step": 9400 }, { "epoch": 19.72670157068063, "learning_rate": 0.00037198427248033485, "loss": 2.39971981048584, "step": 9410 }, { "epoch": 19.747643979057592, "learning_rate": 0.00037178677617096337, "loss": 2.3918169021606444, "step": 9420 }, { "epoch": 19.768586387434556, "learning_rate": 0.0003715895940960111, "loss": 2.393696975708008, "step": 9430 }, { "epoch": 19.789528795811517, "learning_rate": 0.000371392725423069, "loss": 2.403204345703125, "step": 9440 }, { "epoch": 19.81047120418848, "learning_rate": 0.00037119616932281165, "loss": 2.3984851837158203, "step": 9450 }, { "epoch": 19.831413612565445, "learning_rate": 0.00037099992496898276, "loss": 2.442034149169922, "step": 9460 }, { "epoch": 19.85235602094241, "learning_rate": 0.00037080399153838065, "loss": 2.3905046463012694, "step": 9470 }, { "epoch": 19.873298429319373, "learning_rate": 0.00037060836821084373, "loss": 2.4114078521728515, "step": 9480 }, { "epoch": 19.894240837696334, "learning_rate": 0.00037041305416923604, "loss": 2.393054962158203, "step": 9490 }, { "epoch": 19.915183246073298, "learning_rate": 0.0003702180485994327, "loss": 2.388008689880371, "step": 9500 }, { "epoch": 19.93612565445026, "learning_rate": 0.00037002335069030614, "loss": 2.4009252548217774, "step": 9510 }, { "epoch": 19.957068062827226, "learning_rate": 0.0003698289596337116, "loss": 2.3996566772460937, "step": 9520 }, { "epoch": 19.97801047120419, "learning_rate": 0.00036963487462447303, "loss": 2.3795480728149414, "step": 9530 }, { "epoch": 19.99895287958115, "learning_rate": 0.0003694410948603691, "loss": 2.4031463623046876, "step": 9540 }, { "epoch": 19.99895287958115, "eval_loss": 2.5357587337493896, "eval_runtime": 73.9758, "eval_samples_per_second": 148.102, "step": 9540 }, { "epoch": 20.020942408376964, "learning_rate": 0.00036924761954211944, "loss": 2.5228919982910156, "step": 9550 }, { "epoch": 20.041884816753928, "learning_rate": 0.0003690544478733707, "loss": 2.3785959243774415, "step": 9560 }, { "epoch": 20.06282722513089, "learning_rate": 0.0003688615790606828, "loss": 2.4037866592407227, "step": 9570 }, { "epoch": 20.083769633507853, "learning_rate": 0.000368669012313515, "loss": 2.3924365997314454, "step": 9580 }, { "epoch": 20.104712041884817, "learning_rate": 0.0003684767468442126, "loss": 2.4029878616333007, "step": 9590 }, { "epoch": 20.12565445026178, "learning_rate": 0.0003682847818679935, "loss": 2.387605094909668, "step": 9600 }, { "epoch": 20.146596858638745, "learning_rate": 0.0003680931166029342, "loss": 2.417312431335449, "step": 9610 }, { "epoch": 20.167539267015705, "learning_rate": 0.000367901750269957, "loss": 2.381046485900879, "step": 9620 }, { "epoch": 20.18848167539267, "learning_rate": 0.00036771068209281657, "loss": 2.376552963256836, "step": 9630 }, { "epoch": 20.209424083769633, "learning_rate": 0.0003675199112980863, "loss": 2.4089908599853516, "step": 9640 }, { "epoch": 20.230366492146597, "learning_rate": 0.0003673294371151458, "loss": 2.401862907409668, "step": 9650 }, { "epoch": 20.25130890052356, "learning_rate": 0.0003671392587761674, "loss": 2.406145477294922, "step": 9660 }, { "epoch": 20.272251308900522, "learning_rate": 0.0003669493755161031, "loss": 2.414588737487793, "step": 9670 }, { "epoch": 20.293193717277486, "learning_rate": 0.00036675978657267204, "loss": 2.4057403564453126, "step": 9680 }, { "epoch": 20.31413612565445, "learning_rate": 0.00036657049118634733, "loss": 2.404916000366211, "step": 9690 }, { "epoch": 20.335078534031414, "learning_rate": 0.0003663814886003432, "loss": 2.4110477447509764, "step": 9700 }, { "epoch": 20.356020942408378, "learning_rate": 0.00036619277806060276, "loss": 2.402661895751953, "step": 9710 }, { "epoch": 20.376963350785342, "learning_rate": 0.0003660043588157846, "loss": 2.404218864440918, "step": 9720 }, { "epoch": 20.397905759162303, "learning_rate": 0.00036581623011725114, "loss": 2.4069591522216798, "step": 9730 }, { "epoch": 20.418848167539267, "learning_rate": 0.0003656283912190554, "loss": 2.4185781478881836, "step": 9740 }, { "epoch": 20.43979057591623, "learning_rate": 0.00036544084137792883, "loss": 2.3999982833862306, "step": 9750 }, { "epoch": 20.460732984293195, "learning_rate": 0.00036525357985326903, "loss": 2.3702335357666016, "step": 9760 }, { "epoch": 20.48167539267016, "learning_rate": 0.0003650666059071275, "loss": 2.3878076553344725, "step": 9770 }, { "epoch": 20.50261780104712, "learning_rate": 0.00036487991880419725, "loss": 2.3818979263305664, "step": 9780 }, { "epoch": 20.523560209424083, "learning_rate": 0.00036469351781180073, "loss": 2.363344192504883, "step": 9790 }, { "epoch": 20.544502617801047, "learning_rate": 0.00036450740219987765, "loss": 2.3810457229614257, "step": 9800 }, { "epoch": 20.56544502617801, "learning_rate": 0.0003643215712409734, "loss": 2.411943054199219, "step": 9810 }, { "epoch": 20.586387434554975, "learning_rate": 0.00036413602421022653, "loss": 2.4092056274414064, "step": 9820 }, { "epoch": 20.607329842931936, "learning_rate": 0.0003639507603853572, "loss": 2.3944089889526365, "step": 9830 }, { "epoch": 20.6282722513089, "learning_rate": 0.00036376577904665525, "loss": 2.3668121337890624, "step": 9840 }, { "epoch": 20.649214659685864, "learning_rate": 0.00036358107947696876, "loss": 2.391695022583008, "step": 9850 }, { "epoch": 20.670157068062828, "learning_rate": 0.0003633966609616919, "loss": 2.380820083618164, "step": 9860 }, { "epoch": 20.691099476439792, "learning_rate": 0.00036321252278875344, "loss": 2.372467041015625, "step": 9870 }, { "epoch": 20.712041884816752, "learning_rate": 0.00036302866424860566, "loss": 2.3973648071289064, "step": 9880 }, { "epoch": 20.732984293193716, "learning_rate": 0.00036284508463421217, "loss": 2.3995847702026367, "step": 9890 }, { "epoch": 20.75392670157068, "learning_rate": 0.0003626617832410371, "loss": 2.3931917190551757, "step": 9900 }, { "epoch": 20.774869109947645, "learning_rate": 0.00036247875936703335, "loss": 2.4107311248779295, "step": 9910 }, { "epoch": 20.79581151832461, "learning_rate": 0.00036229601231263145, "loss": 2.367414855957031, "step": 9920 }, { "epoch": 20.81675392670157, "learning_rate": 0.0003621135413807282, "loss": 2.405007171630859, "step": 9930 }, { "epoch": 20.837696335078533, "learning_rate": 0.0003619313458766758, "loss": 2.364247512817383, "step": 9940 }, { "epoch": 20.858638743455497, "learning_rate": 0.0003617494251082704, "loss": 2.3823482513427736, "step": 9950 }, { "epoch": 20.87958115183246, "learning_rate": 0.0003615677783857413, "loss": 2.393014144897461, "step": 9960 }, { "epoch": 20.900523560209425, "learning_rate": 0.0003613864050217397, "loss": 2.3839509963989256, "step": 9970 }, { "epoch": 20.921465968586386, "learning_rate": 0.0003612053043313283, "loss": 2.378824806213379, "step": 9980 }, { "epoch": 20.94240837696335, "learning_rate": 0.0003610244756319697, "loss": 2.3893613815307617, "step": 9990 }, { "epoch": 20.963350785340314, "learning_rate": 0.00036084391824351607, "loss": 2.37738151550293, "step": 10000 }, { "epoch": 20.984293193717278, "learning_rate": 0.00036066363148819854, "loss": 2.389986038208008, "step": 10010 }, { "epoch": 20.99895287958115, "eval_loss": 2.5336103439331055, "eval_runtime": 73.4935, "eval_samples_per_second": 149.074, "step": 10017 }, { "epoch": 21.006282722513088, "learning_rate": 0.000360483614690616, "loss": 2.528822135925293, "step": 10020 }, { "epoch": 21.027225130890052, "learning_rate": 0.00036030386717772494, "loss": 2.3780399322509767, "step": 10030 }, { "epoch": 21.048167539267016, "learning_rate": 0.0003601243882788286, "loss": 2.3978437423706054, "step": 10040 }, { "epoch": 21.06910994764398, "learning_rate": 0.0003599451773255667, "loss": 2.382208061218262, "step": 10050 }, { "epoch": 21.09005235602094, "learning_rate": 0.00035976623365190465, "loss": 2.375508499145508, "step": 10060 }, { "epoch": 21.110994764397905, "learning_rate": 0.0003595875565941235, "loss": 2.422568511962891, "step": 10070 }, { "epoch": 21.13193717277487, "learning_rate": 0.00035940914549080944, "loss": 2.4112144470214845, "step": 10080 }, { "epoch": 21.152879581151833, "learning_rate": 0.0003592309996828435, "loss": 2.400478172302246, "step": 10090 }, { "epoch": 21.173821989528797, "learning_rate": 0.0003590531185133913, "loss": 2.403495216369629, "step": 10100 }, { "epoch": 21.194764397905757, "learning_rate": 0.0003588755013278929, "loss": 2.375596046447754, "step": 10110 }, { "epoch": 21.21570680628272, "learning_rate": 0.00035869814747405306, "loss": 2.3807771682739256, "step": 10120 }, { "epoch": 21.236649214659685, "learning_rate": 0.00035852105630183027, "loss": 2.40921630859375, "step": 10130 }, { "epoch": 21.25759162303665, "learning_rate": 0.0003583442271634278, "loss": 2.398925018310547, "step": 10140 }, { "epoch": 21.278534031413614, "learning_rate": 0.000358167659413283, "loss": 2.373432731628418, "step": 10150 }, { "epoch": 21.299476439790578, "learning_rate": 0.00035799135240805765, "loss": 2.4216379165649413, "step": 10160 }, { "epoch": 21.320418848167538, "learning_rate": 0.0003578153055066282, "loss": 2.3817609786987304, "step": 10170 }, { "epoch": 21.341361256544502, "learning_rate": 0.00035763951807007597, "loss": 2.4331357955932615, "step": 10180 }, { "epoch": 21.362303664921466, "learning_rate": 0.0003574639894616771, "loss": 2.4034128189086914, "step": 10190 }, { "epoch": 21.38324607329843, "learning_rate": 0.0003572887190468934, "loss": 2.3663650512695313, "step": 10200 }, { "epoch": 21.404188481675394, "learning_rate": 0.00035711370619336214, "loss": 2.3921630859375, "step": 10210 }, { "epoch": 21.425130890052355, "learning_rate": 0.00035693895027088694, "loss": 2.3804367065429686, "step": 10220 }, { "epoch": 21.44607329842932, "learning_rate": 0.00035676445065142793, "loss": 2.4022769927978516, "step": 10230 }, { "epoch": 21.467015706806283, "learning_rate": 0.0003565902067090925, "loss": 2.3611806869506835, "step": 10240 }, { "epoch": 21.487958115183247, "learning_rate": 0.0003564162178201257, "loss": 2.383506202697754, "step": 10250 }, { "epoch": 21.50890052356021, "learning_rate": 0.0003562424833629007, "loss": 2.385580062866211, "step": 10260 }, { "epoch": 21.52984293193717, "learning_rate": 0.0003560690027179101, "loss": 2.3934825897216796, "step": 10270 }, { "epoch": 21.550785340314135, "learning_rate": 0.00035589577526775603, "loss": 2.385503387451172, "step": 10280 }, { "epoch": 21.5717277486911, "learning_rate": 0.000355722800397141, "loss": 2.344258499145508, "step": 10290 }, { "epoch": 21.592670157068063, "learning_rate": 0.00035555007749285897, "loss": 2.4048336029052733, "step": 10300 }, { "epoch": 21.613612565445028, "learning_rate": 0.00035537760594378607, "loss": 2.3891706466674805, "step": 10310 }, { "epoch": 21.634554973821988, "learning_rate": 0.00035520538514087155, "loss": 2.397173309326172, "step": 10320 }, { "epoch": 21.655497382198952, "learning_rate": 0.0003550334144771289, "loss": 2.3752115249633787, "step": 10330 }, { "epoch": 21.676439790575916, "learning_rate": 0.00035486169334762637, "loss": 2.3773225784301757, "step": 10340 }, { "epoch": 21.69738219895288, "learning_rate": 0.00035469022114947857, "loss": 2.410744476318359, "step": 10350 }, { "epoch": 21.718324607329844, "learning_rate": 0.00035451899728183736, "loss": 2.4002202987670898, "step": 10360 }, { "epoch": 21.739267015706805, "learning_rate": 0.00035434802114588305, "loss": 2.371893119812012, "step": 10370 }, { "epoch": 21.76020942408377, "learning_rate": 0.00035417729214481556, "loss": 2.412856674194336, "step": 10380 }, { "epoch": 21.781151832460733, "learning_rate": 0.0003540068096838456, "loss": 2.414295959472656, "step": 10390 }, { "epoch": 21.802094240837697, "learning_rate": 0.0003538365731701862, "loss": 2.3874536514282227, "step": 10400 }, { "epoch": 21.82303664921466, "learning_rate": 0.0003536665820130437, "loss": 2.408889389038086, "step": 10410 }, { "epoch": 21.843979057591625, "learning_rate": 0.00035349683562360966, "loss": 2.4029043197631834, "step": 10420 }, { "epoch": 21.864921465968585, "learning_rate": 0.0003533273334150517, "loss": 2.3872053146362306, "step": 10430 }, { "epoch": 21.88586387434555, "learning_rate": 0.0003531580748025054, "loss": 2.373563766479492, "step": 10440 }, { "epoch": 21.906806282722513, "learning_rate": 0.00035298905920306563, "loss": 2.3822809219360352, "step": 10450 }, { "epoch": 21.927748691099477, "learning_rate": 0.00035282028603577823, "loss": 2.4076284408569335, "step": 10460 }, { "epoch": 21.94869109947644, "learning_rate": 0.0003526517547216315, "loss": 2.3945655822753906, "step": 10470 }, { "epoch": 21.969633507853402, "learning_rate": 0.000352483464683548, "loss": 2.360683059692383, "step": 10480 }, { "epoch": 21.990575916230366, "learning_rate": 0.0003523154153463761, "loss": 2.371842956542969, "step": 10490 }, { "epoch": 21.99895287958115, "eval_loss": 2.536722183227539, "eval_runtime": 79.9983, "eval_samples_per_second": 136.953, "step": 10494 }, { "epoch": 22.012565445026176, "learning_rate": 0.00035214760613688187, "loss": 2.498021697998047, "step": 10500 }, { "epoch": 22.03350785340314, "learning_rate": 0.0003519800364837407, "loss": 2.377554702758789, "step": 10510 }, { "epoch": 22.054450261780104, "learning_rate": 0.0003518127058175293, "loss": 2.380527687072754, "step": 10520 }, { "epoch": 22.07539267015707, "learning_rate": 0.00035164561357071755, "loss": 2.3838827133178713, "step": 10530 }, { "epoch": 22.096335078534032, "learning_rate": 0.0003514787591776602, "loss": 2.3740776062011717, "step": 10540 }, { "epoch": 22.117277486910996, "learning_rate": 0.0003513121420745892, "loss": 2.374008560180664, "step": 10550 }, { "epoch": 22.138219895287957, "learning_rate": 0.0003511457616996052, "loss": 2.373431587219238, "step": 10560 }, { "epoch": 22.15916230366492, "learning_rate": 0.0003509796174926703, "loss": 2.403927803039551, "step": 10570 }, { "epoch": 22.180104712041885, "learning_rate": 0.00035081370889559934, "loss": 2.4006370544433593, "step": 10580 }, { "epoch": 22.20104712041885, "learning_rate": 0.0003506480353520526, "loss": 2.3758676528930662, "step": 10590 }, { "epoch": 22.221989528795813, "learning_rate": 0.0003504825963075276, "loss": 2.417715644836426, "step": 10600 }, { "epoch": 22.242931937172774, "learning_rate": 0.00035031739120935175, "loss": 2.3941156387329103, "step": 10610 }, { "epoch": 22.263874345549738, "learning_rate": 0.0003501524195066741, "loss": 2.3949649810791014, "step": 10620 }, { "epoch": 22.2848167539267, "learning_rate": 0.0003499876806504578, "loss": 2.4047883987426757, "step": 10630 }, { "epoch": 22.305759162303666, "learning_rate": 0.00034982317409347263, "loss": 2.3971155166625975, "step": 10640 }, { "epoch": 22.32670157068063, "learning_rate": 0.00034965889929028707, "loss": 2.404866027832031, "step": 10650 }, { "epoch": 22.34764397905759, "learning_rate": 0.000349494855697261, "loss": 2.3915122985839843, "step": 10660 }, { "epoch": 22.368586387434554, "learning_rate": 0.0003493310427725377, "loss": 2.39025936126709, "step": 10670 }, { "epoch": 22.38952879581152, "learning_rate": 0.0003491674599760369, "loss": 2.399850273132324, "step": 10680 }, { "epoch": 22.410471204188482, "learning_rate": 0.0003490041067694469, "loss": 2.3945247650146486, "step": 10690 }, { "epoch": 22.431413612565446, "learning_rate": 0.00034884098261621724, "loss": 2.397679901123047, "step": 10700 }, { "epoch": 22.452356020942407, "learning_rate": 0.00034867808698155125, "loss": 2.355159568786621, "step": 10710 }, { "epoch": 22.47329842931937, "learning_rate": 0.0003485154193323988, "loss": 2.3898927688598635, "step": 10720 }, { "epoch": 22.494240837696335, "learning_rate": 0.00034835297913744903, "loss": 2.367123031616211, "step": 10730 }, { "epoch": 22.5151832460733, "learning_rate": 0.0003481907658671227, "loss": 2.3852542877197265, "step": 10740 }, { "epoch": 22.536125654450263, "learning_rate": 0.0003480287789935653, "loss": 2.4065229415893556, "step": 10750 }, { "epoch": 22.557068062827224, "learning_rate": 0.00034786701799063976, "loss": 2.3588846206665037, "step": 10760 }, { "epoch": 22.578010471204188, "learning_rate": 0.00034770548233391924, "loss": 2.390997123718262, "step": 10770 }, { "epoch": 22.59895287958115, "learning_rate": 0.0003475441715006799, "loss": 2.3878786087036135, "step": 10780 }, { "epoch": 22.619895287958116, "learning_rate": 0.0003473830849698938, "loss": 2.398370552062988, "step": 10790 }, { "epoch": 22.64083769633508, "learning_rate": 0.0003472222222222222, "loss": 2.3805349349975584, "step": 10800 }, { "epoch": 22.66178010471204, "learning_rate": 0.00034706158274000796, "loss": 2.3977741241455077, "step": 10810 }, { "epoch": 22.682722513089004, "learning_rate": 0.00034690116600726885, "loss": 2.387373924255371, "step": 10820 }, { "epoch": 22.70366492146597, "learning_rate": 0.0003467409715096907, "loss": 2.3700994491577148, "step": 10830 }, { "epoch": 22.724607329842932, "learning_rate": 0.00034658099873462027, "loss": 2.3671117782592774, "step": 10840 }, { "epoch": 22.745549738219896, "learning_rate": 0.0003464212471710583, "loss": 2.377743148803711, "step": 10850 }, { "epoch": 22.76649214659686, "learning_rate": 0.0003462617163096529, "loss": 2.386002540588379, "step": 10860 }, { "epoch": 22.78743455497382, "learning_rate": 0.00034610240564269265, "loss": 2.3687204360961913, "step": 10870 }, { "epoch": 22.808376963350785, "learning_rate": 0.0003459433146640997, "loss": 2.3671218872070314, "step": 10880 }, { "epoch": 22.82931937172775, "learning_rate": 0.00034578444286942307, "loss": 2.3793460845947267, "step": 10890 }, { "epoch": 22.850261780104713, "learning_rate": 0.00034562578975583187, "loss": 2.374790382385254, "step": 10900 }, { "epoch": 22.871204188481677, "learning_rate": 0.00034546735482210894, "loss": 2.356049728393555, "step": 10910 }, { "epoch": 22.892146596858638, "learning_rate": 0.0003453091375686437, "loss": 2.361851119995117, "step": 10920 }, { "epoch": 22.9130890052356, "learning_rate": 0.00034515113749742586, "loss": 2.3911083221435545, "step": 10930 }, { "epoch": 22.934031413612566, "learning_rate": 0.00034499335411203894, "loss": 2.353407096862793, "step": 10940 }, { "epoch": 22.95497382198953, "learning_rate": 0.00034483578691765326, "loss": 2.3763240814208983, "step": 10950 }, { "epoch": 22.975916230366494, "learning_rate": 0.00034467843542102, "loss": 2.3745288848876953, "step": 10960 }, { "epoch": 22.996858638743454, "learning_rate": 0.0003445212991304641, "loss": 2.367890167236328, "step": 10970 }, { "epoch": 22.99895287958115, "eval_loss": 2.542264461517334, "eval_runtime": 74.647, "eval_samples_per_second": 146.771, "step": 10971 }, { "epoch": 23.018848167539268, "learning_rate": 0.00034436437755587827, "loss": 2.4732553482055666, "step": 10980 }, { "epoch": 23.039790575916232, "learning_rate": 0.00034420767020871656, "loss": 2.3789663314819336, "step": 10990 }, { "epoch": 23.060732984293193, "learning_rate": 0.00034405117660198765, "loss": 2.387537384033203, "step": 11000 }, { "epoch": 23.081675392670157, "learning_rate": 0.00034389489625024885, "loss": 2.3760297775268553, "step": 11010 }, { "epoch": 23.10261780104712, "learning_rate": 0.00034373882866959936, "loss": 2.3746875762939452, "step": 11020 }, { "epoch": 23.123560209424085, "learning_rate": 0.0003435829733776745, "loss": 2.4071685791015627, "step": 11030 }, { "epoch": 23.14450261780105, "learning_rate": 0.00034342732989363903, "loss": 2.3595859527587892, "step": 11040 }, { "epoch": 23.16544502617801, "learning_rate": 0.0003432718977381811, "loss": 2.356878662109375, "step": 11050 }, { "epoch": 23.186387434554973, "learning_rate": 0.0003431166764335058, "loss": 2.3806716918945314, "step": 11060 }, { "epoch": 23.207329842931937, "learning_rate": 0.0003429616655033297, "loss": 2.365432929992676, "step": 11070 }, { "epoch": 23.2282722513089, "learning_rate": 0.00034280686447287373, "loss": 2.3477930068969726, "step": 11080 }, { "epoch": 23.249214659685865, "learning_rate": 0.00034265227286885776, "loss": 2.359480094909668, "step": 11090 }, { "epoch": 23.270157068062826, "learning_rate": 0.00034249789021949435, "loss": 2.3736724853515625, "step": 11100 }, { "epoch": 23.29109947643979, "learning_rate": 0.0003423437160544826, "loss": 2.3853965759277345, "step": 11110 }, { "epoch": 23.312041884816754, "learning_rate": 0.0003421897499050022, "loss": 2.3594213485717774, "step": 11120 }, { "epoch": 23.332984293193718, "learning_rate": 0.0003420359913037075, "loss": 2.3540416717529298, "step": 11130 }, { "epoch": 23.353926701570682, "learning_rate": 0.0003418824397847216, "loss": 2.371465301513672, "step": 11140 }, { "epoch": 23.374869109947642, "learning_rate": 0.00034172909488363007, "loss": 2.360518455505371, "step": 11150 }, { "epoch": 23.395811518324606, "learning_rate": 0.00034157595613747545, "loss": 2.347417640686035, "step": 11160 }, { "epoch": 23.41675392670157, "learning_rate": 0.00034142302308475133, "loss": 2.388157081604004, "step": 11170 }, { "epoch": 23.437696335078535, "learning_rate": 0.0003412702952653962, "loss": 2.348739433288574, "step": 11180 }, { "epoch": 23.4586387434555, "learning_rate": 0.00034111777222078796, "loss": 2.3871492385864257, "step": 11190 }, { "epoch": 23.47958115183246, "learning_rate": 0.00034096545349373804, "loss": 2.3624570846557615, "step": 11200 }, { "epoch": 23.500523560209423, "learning_rate": 0.0003408133386284857, "loss": 2.337727165222168, "step": 11210 }, { "epoch": 23.521465968586387, "learning_rate": 0.0003406614271706919, "loss": 2.3544214248657225, "step": 11220 }, { "epoch": 23.54240837696335, "learning_rate": 0.0003405097186674344, "loss": 2.3601694107055664, "step": 11230 }, { "epoch": 23.563350785340315, "learning_rate": 0.00034035821266720136, "loss": 2.3869655609130858, "step": 11240 }, { "epoch": 23.58429319371728, "learning_rate": 0.0003402069087198858, "loss": 2.3417810440063476, "step": 11250 }, { "epoch": 23.60523560209424, "learning_rate": 0.00034005580637678053, "loss": 2.3512496948242188, "step": 11260 }, { "epoch": 23.626178010471204, "learning_rate": 0.00033990490519057183, "loss": 2.3688682556152343, "step": 11270 }, { "epoch": 23.647120418848168, "learning_rate": 0.0003397542047153345, "loss": 2.3577795028686523, "step": 11280 }, { "epoch": 23.668062827225132, "learning_rate": 0.0003396037045065257, "loss": 2.380731201171875, "step": 11290 }, { "epoch": 23.689005235602096, "learning_rate": 0.0003394534041209802, "loss": 2.349542427062988, "step": 11300 }, { "epoch": 23.709947643979056, "learning_rate": 0.0003393033031169043, "loss": 2.3590700149536135, "step": 11310 }, { "epoch": 23.73089005235602, "learning_rate": 0.0003391534010538705, "loss": 2.392327880859375, "step": 11320 }, { "epoch": 23.751832460732984, "learning_rate": 0.00033900369749281225, "loss": 2.3760391235351563, "step": 11330 }, { "epoch": 23.77277486910995, "learning_rate": 0.00033885419199601845, "loss": 2.355258560180664, "step": 11340 }, { "epoch": 23.793717277486913, "learning_rate": 0.000338704884127128, "loss": 2.369922065734863, "step": 11350 }, { "epoch": 23.814659685863873, "learning_rate": 0.00033855577345112453, "loss": 2.410330390930176, "step": 11360 }, { "epoch": 23.835602094240837, "learning_rate": 0.0003384068595343312, "loss": 2.374154472351074, "step": 11370 }, { "epoch": 23.8565445026178, "learning_rate": 0.00033825814194440504, "loss": 2.360888671875, "step": 11380 }, { "epoch": 23.877486910994765, "learning_rate": 0.0003381096202503321, "loss": 2.372193145751953, "step": 11390 }, { "epoch": 23.89842931937173, "learning_rate": 0.00033796129402242193, "loss": 2.3628297805786134, "step": 11400 }, { "epoch": 23.91937172774869, "learning_rate": 0.0003378131628323024, "loss": 2.365167999267578, "step": 11410 }, { "epoch": 23.940314136125654, "learning_rate": 0.0003376652262529146, "loss": 2.385006332397461, "step": 11420 }, { "epoch": 23.961256544502618, "learning_rate": 0.00033751748385850753, "loss": 2.3524898529052733, "step": 11430 }, { "epoch": 23.982198952879582, "learning_rate": 0.00033736993522463316, "loss": 2.374051094055176, "step": 11440 }, { "epoch": 23.99895287958115, "eval_loss": 2.532155990600586, "eval_runtime": 73.6953, "eval_samples_per_second": 148.666, "step": 11448 }, { "epoch": 24.004188481675392, "learning_rate": 0.00033722257992814113, "loss": 2.4828319549560547, "step": 11450 }, { "epoch": 24.025130890052356, "learning_rate": 0.0003370754175471737, "loss": 2.360254669189453, "step": 11460 }, { "epoch": 24.04607329842932, "learning_rate": 0.0003369284476611607, "loss": 2.349439811706543, "step": 11470 }, { "epoch": 24.067015706806284, "learning_rate": 0.00033678166985081433, "loss": 2.36633415222168, "step": 11480 }, { "epoch": 24.087958115183245, "learning_rate": 0.0003366350836981245, "loss": 2.365359306335449, "step": 11490 }, { "epoch": 24.10890052356021, "learning_rate": 0.0003364886887863534, "loss": 2.362344169616699, "step": 11500 }, { "epoch": 24.129842931937173, "learning_rate": 0.0003363424847000309, "loss": 2.357081985473633, "step": 11510 }, { "epoch": 24.150785340314137, "learning_rate": 0.0003361964710249494, "loss": 2.3507287979125975, "step": 11520 }, { "epoch": 24.1717277486911, "learning_rate": 0.00033605064734815865, "loss": 2.3537702560424805, "step": 11530 }, { "epoch": 24.19267015706806, "learning_rate": 0.0003359050132579615, "loss": 2.37689151763916, "step": 11540 }, { "epoch": 24.213612565445025, "learning_rate": 0.00033575956834390843, "loss": 2.348763847351074, "step": 11550 }, { "epoch": 24.23455497382199, "learning_rate": 0.00033561431219679297, "loss": 2.3715591430664062, "step": 11560 }, { "epoch": 24.255497382198953, "learning_rate": 0.00033546924440864666, "loss": 2.3734716415405273, "step": 11570 }, { "epoch": 24.276439790575917, "learning_rate": 0.0003353243645727346, "loss": 2.3519350051879884, "step": 11580 }, { "epoch": 24.297382198952878, "learning_rate": 0.0003351796722835502, "loss": 2.355198287963867, "step": 11590 }, { "epoch": 24.318324607329842, "learning_rate": 0.00033503516713681087, "loss": 2.3704608917236327, "step": 11600 }, { "epoch": 24.339267015706806, "learning_rate": 0.00033489084872945283, "loss": 2.3960491180419923, "step": 11610 }, { "epoch": 24.36020942408377, "learning_rate": 0.0003347467166596268, "loss": 2.3748762130737306, "step": 11620 }, { "epoch": 24.381151832460734, "learning_rate": 0.0003346027705266929, "loss": 2.358123016357422, "step": 11630 }, { "epoch": 24.402094240837695, "learning_rate": 0.0003344590099312164, "loss": 2.345402717590332, "step": 11640 }, { "epoch": 24.42303664921466, "learning_rate": 0.00033431543447496275, "loss": 2.331704330444336, "step": 11650 }, { "epoch": 24.443979057591623, "learning_rate": 0.000334172043760893, "loss": 2.377284812927246, "step": 11660 }, { "epoch": 24.464921465968587, "learning_rate": 0.0003340288373931593, "loss": 2.354692268371582, "step": 11670 }, { "epoch": 24.48586387434555, "learning_rate": 0.0003338858149771002, "loss": 2.3740156173706053, "step": 11680 }, { "epoch": 24.506806282722515, "learning_rate": 0.0003337429761192361, "loss": 2.367665672302246, "step": 11690 }, { "epoch": 24.527748691099475, "learning_rate": 0.00033360032042726483, "loss": 2.362037467956543, "step": 11700 }, { "epoch": 24.54869109947644, "learning_rate": 0.000333457847510057, "loss": 2.371419334411621, "step": 11710 }, { "epoch": 24.569633507853403, "learning_rate": 0.0003333155569776514, "loss": 2.33715934753418, "step": 11720 }, { "epoch": 24.590575916230367, "learning_rate": 0.00033317344844125064, "loss": 2.3361494064331056, "step": 11730 }, { "epoch": 24.61151832460733, "learning_rate": 0.00033303152151321696, "loss": 2.346495819091797, "step": 11740 }, { "epoch": 24.632460732984292, "learning_rate": 0.00033288977580706714, "loss": 2.3827404022216796, "step": 11750 }, { "epoch": 24.653403141361256, "learning_rate": 0.0003327482109374687, "loss": 2.3554365158081056, "step": 11760 }, { "epoch": 24.67434554973822, "learning_rate": 0.00033260682652023517, "loss": 2.3607540130615234, "step": 11770 }, { "epoch": 24.695287958115184, "learning_rate": 0.0003324656221723217, "loss": 2.3806394577026366, "step": 11780 }, { "epoch": 24.716230366492148, "learning_rate": 0.000332324597511821, "loss": 2.3277612686157227, "step": 11790 }, { "epoch": 24.73717277486911, "learning_rate": 0.00033218375215795864, "loss": 2.3573076248168947, "step": 11800 }, { "epoch": 24.758115183246073, "learning_rate": 0.00033204308573108897, "loss": 2.3293807983398436, "step": 11810 }, { "epoch": 24.779057591623037, "learning_rate": 0.00033190259785269066, "loss": 2.3627220153808595, "step": 11820 }, { "epoch": 24.8, "learning_rate": 0.0003317622881453626, "loss": 2.3504779815673826, "step": 11830 }, { "epoch": 24.820942408376965, "learning_rate": 0.0003316221562328194, "loss": 2.3526493072509767, "step": 11840 }, { "epoch": 24.841884816753925, "learning_rate": 0.0003314822017398875, "loss": 2.367503547668457, "step": 11850 }, { "epoch": 24.86282722513089, "learning_rate": 0.00033134242429250053, "loss": 2.364429473876953, "step": 11860 }, { "epoch": 24.883769633507853, "learning_rate": 0.00033120282351769556, "loss": 2.3418235778808594, "step": 11870 }, { "epoch": 24.904712041884817, "learning_rate": 0.0003310633990436084, "loss": 2.361065483093262, "step": 11880 }, { "epoch": 24.92565445026178, "learning_rate": 0.00033092415049947006, "loss": 2.3631685256958006, "step": 11890 }, { "epoch": 24.946596858638742, "learning_rate": 0.00033078507751560195, "loss": 2.346321868896484, "step": 11900 }, { "epoch": 24.967539267015706, "learning_rate": 0.00033064617972341235, "loss": 2.3589923858642576, "step": 11910 }, { "epoch": 24.98848167539267, "learning_rate": 0.0003305074567553919, "loss": 2.3485301971435546, "step": 11920 }, { "epoch": 24.99895287958115, "eval_loss": 2.5436818599700928, "eval_runtime": 73.6581, "eval_samples_per_second": 148.741, "step": 11925 }, { "epoch": 25.01047120418848, "learning_rate": 0.0003303689082451096, "loss": 2.483962059020996, "step": 11930 }, { "epoch": 25.031413612565444, "learning_rate": 0.00033023053382720904, "loss": 2.352615547180176, "step": 11940 }, { "epoch": 25.05235602094241, "learning_rate": 0.0003300923331374039, "loss": 2.379102325439453, "step": 11950 }, { "epoch": 25.073298429319372, "learning_rate": 0.00032995430581247417, "loss": 2.3579853057861326, "step": 11960 }, { "epoch": 25.094240837696336, "learning_rate": 0.0003298164514902622, "loss": 2.3461565017700194, "step": 11970 }, { "epoch": 25.115183246073297, "learning_rate": 0.0003296787698096686, "loss": 2.328052520751953, "step": 11980 }, { "epoch": 25.13612565445026, "learning_rate": 0.0003295412604106482, "loss": 2.337063026428223, "step": 11990 }, { "epoch": 25.157068062827225, "learning_rate": 0.00032940392293420614, "loss": 2.3367223739624023, "step": 12000 }, { "epoch": 25.17801047120419, "learning_rate": 0.00032926675702239425, "loss": 2.365107536315918, "step": 12010 }, { "epoch": 25.198952879581153, "learning_rate": 0.00032912976231830646, "loss": 2.3596302032470704, "step": 12020 }, { "epoch": 25.219895287958114, "learning_rate": 0.0003289929384660757, "loss": 2.336884307861328, "step": 12030 }, { "epoch": 25.240837696335078, "learning_rate": 0.0003288562851108693, "loss": 2.3663518905639647, "step": 12040 }, { "epoch": 25.26178010471204, "learning_rate": 0.0003287198018988856, "loss": 2.3383811950683593, "step": 12050 }, { "epoch": 25.282722513089006, "learning_rate": 0.00032858348847734985, "loss": 2.3640661239624023, "step": 12060 }, { "epoch": 25.30366492146597, "learning_rate": 0.00032844734449451055, "loss": 2.3613861083984373, "step": 12070 }, { "epoch": 25.324607329842934, "learning_rate": 0.00032831136959963553, "loss": 2.3227806091308594, "step": 12080 }, { "epoch": 25.345549738219894, "learning_rate": 0.00032817556344300823, "loss": 2.328192710876465, "step": 12090 }, { "epoch": 25.36649214659686, "learning_rate": 0.0003280399256759237, "loss": 2.3461523056030273, "step": 12100 }, { "epoch": 25.387434554973822, "learning_rate": 0.0003279044559506852, "loss": 2.3762447357177736, "step": 12110 }, { "epoch": 25.408376963350786, "learning_rate": 0.0003277691539206003, "loss": 2.325837326049805, "step": 12120 }, { "epoch": 25.42931937172775, "learning_rate": 0.0003276340192399769, "loss": 2.3660905838012694, "step": 12130 }, { "epoch": 25.45026178010471, "learning_rate": 0.00032749905156412, "loss": 2.360948181152344, "step": 12140 }, { "epoch": 25.471204188481675, "learning_rate": 0.0003273642505493275, "loss": 2.3165866851806642, "step": 12150 }, { "epoch": 25.49214659685864, "learning_rate": 0.0003272296158528871, "loss": 2.3487401962280274, "step": 12160 }, { "epoch": 25.513089005235603, "learning_rate": 0.000327095147133072, "loss": 2.351056671142578, "step": 12170 }, { "epoch": 25.534031413612567, "learning_rate": 0.00032696084404913777, "loss": 2.3396501541137695, "step": 12180 }, { "epoch": 25.554973821989527, "learning_rate": 0.00032682670626131837, "loss": 2.3343048095703125, "step": 12190 }, { "epoch": 25.57591623036649, "learning_rate": 0.0003266927334308229, "loss": 2.3392221450805666, "step": 12200 }, { "epoch": 25.596858638743456, "learning_rate": 0.0003265589252198317, "loss": 2.339245414733887, "step": 12210 }, { "epoch": 25.61780104712042, "learning_rate": 0.0003264252812914928, "loss": 2.343129539489746, "step": 12220 }, { "epoch": 25.638743455497384, "learning_rate": 0.0003262918013099186, "loss": 2.344712829589844, "step": 12230 }, { "epoch": 25.659685863874344, "learning_rate": 0.00032615848494018204, "loss": 2.364294242858887, "step": 12240 }, { "epoch": 25.680628272251308, "learning_rate": 0.0003260253318483131, "loss": 2.3588529586791993, "step": 12250 }, { "epoch": 25.701570680628272, "learning_rate": 0.0003258923417012957, "loss": 2.3558927536010743, "step": 12260 }, { "epoch": 25.722513089005236, "learning_rate": 0.00032575951416706354, "loss": 2.370713996887207, "step": 12270 }, { "epoch": 25.7434554973822, "learning_rate": 0.0003256268489144972, "loss": 2.3426084518432617, "step": 12280 }, { "epoch": 25.76439790575916, "learning_rate": 0.0003254943456134202, "loss": 2.3299545288085937, "step": 12290 }, { "epoch": 25.785340314136125, "learning_rate": 0.0003253620039345959, "loss": 2.343545913696289, "step": 12300 }, { "epoch": 25.80628272251309, "learning_rate": 0.0003252298235497241, "loss": 2.3348289489746095, "step": 12310 }, { "epoch": 25.827225130890053, "learning_rate": 0.0003250978041314371, "loss": 2.3712085723876952, "step": 12320 }, { "epoch": 25.848167539267017, "learning_rate": 0.000324965945353297, "loss": 2.347680854797363, "step": 12330 }, { "epoch": 25.869109947643977, "learning_rate": 0.0003248342468897917, "loss": 2.342079925537109, "step": 12340 }, { "epoch": 25.89005235602094, "learning_rate": 0.00032470270841633195, "loss": 2.376851272583008, "step": 12350 }, { "epoch": 25.910994764397905, "learning_rate": 0.00032457132960924783, "loss": 2.3613746643066404, "step": 12360 }, { "epoch": 25.93193717277487, "learning_rate": 0.00032444011014578535, "loss": 2.3406829833984375, "step": 12370 }, { "epoch": 25.952879581151834, "learning_rate": 0.00032430904970410314, "loss": 2.328056526184082, "step": 12380 }, { "epoch": 25.973821989528794, "learning_rate": 0.0003241781479632693, "loss": 2.3438344955444337, "step": 12390 }, { "epoch": 25.994764397905758, "learning_rate": 0.0003240474046032579, "loss": 2.349610137939453, "step": 12400 }, { "epoch": 25.99895287958115, "eval_loss": 2.533395767211914, "eval_runtime": 73.6578, "eval_samples_per_second": 148.742, "step": 12402 }, { "epoch": 26.016753926701572, "learning_rate": 0.00032391681930494566, "loss": 2.4640811920166015, "step": 12410 }, { "epoch": 26.037696335078532, "learning_rate": 0.000323786391750109, "loss": 2.3338626861572265, "step": 12420 }, { "epoch": 26.058638743455496, "learning_rate": 0.0003236561216214202, "loss": 2.342071533203125, "step": 12430 }, { "epoch": 26.07958115183246, "learning_rate": 0.000323526008602445, "loss": 2.374074172973633, "step": 12440 }, { "epoch": 26.100523560209425, "learning_rate": 0.0003233960523776387, "loss": 2.3421449661254883, "step": 12450 }, { "epoch": 26.12146596858639, "learning_rate": 0.0003232662526323429, "loss": 2.3614429473876952, "step": 12460 }, { "epoch": 26.14240837696335, "learning_rate": 0.0003231366090527828, "loss": 2.319747543334961, "step": 12470 }, { "epoch": 26.163350785340313, "learning_rate": 0.00032300712132606366, "loss": 2.3622182846069335, "step": 12480 }, { "epoch": 26.184293193717277, "learning_rate": 0.0003228777891401678, "loss": 2.359231185913086, "step": 12490 }, { "epoch": 26.20523560209424, "learning_rate": 0.0003227486121839514, "loss": 2.355366516113281, "step": 12500 }, { "epoch": 26.226178010471205, "learning_rate": 0.00032261959014714107, "loss": 2.3299293518066406, "step": 12510 }, { "epoch": 26.24712041884817, "learning_rate": 0.0003224907227203312, "loss": 2.3555164337158203, "step": 12520 }, { "epoch": 26.26806282722513, "learning_rate": 0.0003223620095949806, "loss": 2.314861869812012, "step": 12530 }, { "epoch": 26.289005235602094, "learning_rate": 0.00032223345046340936, "loss": 2.3362022399902345, "step": 12540 }, { "epoch": 26.309947643979058, "learning_rate": 0.00032210504501879576, "loss": 2.3406482696533204, "step": 12550 }, { "epoch": 26.330890052356022, "learning_rate": 0.0003219767929551733, "loss": 2.30753231048584, "step": 12560 }, { "epoch": 26.351832460732986, "learning_rate": 0.00032184869396742754, "loss": 2.351367950439453, "step": 12570 }, { "epoch": 26.372774869109946, "learning_rate": 0.00032172074775129323, "loss": 2.3465883255004885, "step": 12580 }, { "epoch": 26.39371727748691, "learning_rate": 0.00032159295400335114, "loss": 2.3782730102539062, "step": 12590 }, { "epoch": 26.414659685863874, "learning_rate": 0.00032146531242102476, "loss": 2.3480430603027345, "step": 12600 }, { "epoch": 26.43560209424084, "learning_rate": 0.0003213378227025779, "loss": 2.370161437988281, "step": 12610 }, { "epoch": 26.456544502617803, "learning_rate": 0.00032121048454711114, "loss": 2.3542537689208984, "step": 12620 }, { "epoch": 26.477486910994763, "learning_rate": 0.00032108329765455926, "loss": 2.3564731597900392, "step": 12630 }, { "epoch": 26.498429319371727, "learning_rate": 0.00032095626172568784, "loss": 2.333011817932129, "step": 12640 }, { "epoch": 26.51937172774869, "learning_rate": 0.00032082937646209084, "loss": 2.3433643341064454, "step": 12650 }, { "epoch": 26.540314136125655, "learning_rate": 0.0003207026415661871, "loss": 2.3346595764160156, "step": 12660 }, { "epoch": 26.56125654450262, "learning_rate": 0.0003205760567412178, "loss": 2.339708709716797, "step": 12670 }, { "epoch": 26.58219895287958, "learning_rate": 0.00032044962169124335, "loss": 2.3501649856567384, "step": 12680 }, { "epoch": 26.603141361256544, "learning_rate": 0.0003203233361211406, "loss": 2.3222862243652345, "step": 12690 }, { "epoch": 26.624083769633508, "learning_rate": 0.00032019719973659996, "loss": 2.3362213134765626, "step": 12700 }, { "epoch": 26.645026178010472, "learning_rate": 0.00032007121224412224, "loss": 2.311092567443848, "step": 12710 }, { "epoch": 26.665968586387436, "learning_rate": 0.0003199453733510162, "loss": 2.332124137878418, "step": 12720 }, { "epoch": 26.686910994764396, "learning_rate": 0.00031981968276539543, "loss": 2.3406246185302733, "step": 12730 }, { "epoch": 26.70785340314136, "learning_rate": 0.0003196941401961754, "loss": 2.3419260025024413, "step": 12740 }, { "epoch": 26.728795811518324, "learning_rate": 0.000319568745353071, "loss": 2.344953727722168, "step": 12750 }, { "epoch": 26.74973821989529, "learning_rate": 0.0003194434979465935, "loss": 2.3517208099365234, "step": 12760 }, { "epoch": 26.770680628272252, "learning_rate": 0.0003193183976880476, "loss": 2.3811822891235352, "step": 12770 }, { "epoch": 26.791623036649213, "learning_rate": 0.00031919344428952895, "loss": 2.3604736328125, "step": 12780 }, { "epoch": 26.812565445026177, "learning_rate": 0.0003190686374639211, "loss": 2.3444387435913088, "step": 12790 }, { "epoch": 26.83350785340314, "learning_rate": 0.00031894397692489295, "loss": 2.340729331970215, "step": 12800 }, { "epoch": 26.854450261780105, "learning_rate": 0.0003188194623868958, "loss": 2.340890121459961, "step": 12810 }, { "epoch": 26.87539267015707, "learning_rate": 0.00031869509356516063, "loss": 2.3352834701538088, "step": 12820 }, { "epoch": 26.89633507853403, "learning_rate": 0.00031857087017569556, "loss": 2.3224008560180662, "step": 12830 }, { "epoch": 26.917277486910994, "learning_rate": 0.0003184467919352828, "loss": 2.3237512588500975, "step": 12840 }, { "epoch": 26.938219895287958, "learning_rate": 0.0003183228585614763, "loss": 2.3366432189941406, "step": 12850 }, { "epoch": 26.95916230366492, "learning_rate": 0.0003181990697725988, "loss": 2.3389394760131834, "step": 12860 }, { "epoch": 26.980104712041886, "learning_rate": 0.0003180754252877392, "loss": 2.2963605880737306, "step": 12870 }, { "epoch": 26.99895287958115, "eval_loss": 2.5350682735443115, "eval_runtime": 73.272, "eval_samples_per_second": 149.525, "step": 12879 }, { "epoch": 27.002094240837696, "learning_rate": 0.0003179519248267498, "loss": 2.4346525192260744, "step": 12880 }, { "epoch": 27.02303664921466, "learning_rate": 0.000317828568110244, "loss": 2.3435186386108398, "step": 12890 }, { "epoch": 27.043979057591624, "learning_rate": 0.000317705354859593, "loss": 2.351651191711426, "step": 12900 }, { "epoch": 27.064921465968588, "learning_rate": 0.0003175822847969239, "loss": 2.3490814208984374, "step": 12910 }, { "epoch": 27.08586387434555, "learning_rate": 0.00031745935764511645, "loss": 2.3329612731933596, "step": 12920 }, { "epoch": 27.106806282722513, "learning_rate": 0.0003173365731278007, "loss": 2.321672248840332, "step": 12930 }, { "epoch": 27.127748691099477, "learning_rate": 0.00031721393096935445, "loss": 2.3357425689697267, "step": 12940 }, { "epoch": 27.14869109947644, "learning_rate": 0.00031709143089490063, "loss": 2.3383440017700194, "step": 12950 }, { "epoch": 27.169633507853405, "learning_rate": 0.00031696907263030445, "loss": 2.3266096115112305, "step": 12960 }, { "epoch": 27.190575916230365, "learning_rate": 0.00031684685590217115, "loss": 2.3512828826904295, "step": 12970 }, { "epoch": 27.21151832460733, "learning_rate": 0.00031672478043784336, "loss": 2.329998016357422, "step": 12980 }, { "epoch": 27.232460732984293, "learning_rate": 0.0003166028459653984, "loss": 2.353693962097168, "step": 12990 }, { "epoch": 27.253403141361257, "learning_rate": 0.0003164810522136458, "loss": 2.3388673782348635, "step": 13000 }, { "epoch": 27.27434554973822, "learning_rate": 0.0003163593989121249, "loss": 2.34061222076416, "step": 13010 }, { "epoch": 27.295287958115182, "learning_rate": 0.0003162378857911022, "loss": 2.3279703140258787, "step": 13020 }, { "epoch": 27.316230366492146, "learning_rate": 0.00031611651258156884, "loss": 2.3643896102905275, "step": 13030 }, { "epoch": 27.33717277486911, "learning_rate": 0.0003159952790152381, "loss": 2.326703643798828, "step": 13040 }, { "epoch": 27.358115183246074, "learning_rate": 0.0003158741848245431, "loss": 2.3338809967041017, "step": 13050 }, { "epoch": 27.379057591623038, "learning_rate": 0.0003157532297426339, "loss": 2.318799591064453, "step": 13060 }, { "epoch": 27.4, "learning_rate": 0.00031563241350337546, "loss": 2.3162815093994142, "step": 13070 }, { "epoch": 27.420942408376963, "learning_rate": 0.00031551173584134514, "loss": 2.354751968383789, "step": 13080 }, { "epoch": 27.441884816753927, "learning_rate": 0.0003153911964918298, "loss": 2.3353591918945313, "step": 13090 }, { "epoch": 27.46282722513089, "learning_rate": 0.0003152707951908239, "loss": 2.3257909774780274, "step": 13100 }, { "epoch": 27.483769633507855, "learning_rate": 0.0003151505316750269, "loss": 2.336490821838379, "step": 13110 }, { "epoch": 27.504712041884815, "learning_rate": 0.0003150304056818405, "loss": 2.32800350189209, "step": 13120 }, { "epoch": 27.52565445026178, "learning_rate": 0.00031491041694936697, "loss": 2.3223346710205077, "step": 13130 }, { "epoch": 27.546596858638743, "learning_rate": 0.000314790565216406, "loss": 2.348642921447754, "step": 13140 }, { "epoch": 27.567539267015707, "learning_rate": 0.0003146708502224526, "loss": 2.3384424209594727, "step": 13150 }, { "epoch": 27.58848167539267, "learning_rate": 0.0003145512717076948, "loss": 2.301900863647461, "step": 13160 }, { "epoch": 27.609424083769632, "learning_rate": 0.00031443182941301147, "loss": 2.3309160232543946, "step": 13170 }, { "epoch": 27.630366492146596, "learning_rate": 0.0003143125230799694, "loss": 2.358192253112793, "step": 13180 }, { "epoch": 27.65130890052356, "learning_rate": 0.00031419335245082134, "loss": 2.347599220275879, "step": 13190 }, { "epoch": 27.672251308900524, "learning_rate": 0.00031407431726850375, "loss": 2.330830764770508, "step": 13200 }, { "epoch": 27.693193717277488, "learning_rate": 0.00031395541727663413, "loss": 2.33847599029541, "step": 13210 }, { "epoch": 27.71413612565445, "learning_rate": 0.0003138366522195088, "loss": 2.3454364776611327, "step": 13220 }, { "epoch": 27.735078534031413, "learning_rate": 0.0003137180218421011, "loss": 2.3458301544189455, "step": 13230 }, { "epoch": 27.756020942408377, "learning_rate": 0.0003135995258900582, "loss": 2.2951147079467775, "step": 13240 }, { "epoch": 27.77696335078534, "learning_rate": 0.0003134811641096994, "loss": 2.324018096923828, "step": 13250 }, { "epoch": 27.797905759162305, "learning_rate": 0.00031336293624801393, "loss": 2.320078468322754, "step": 13260 }, { "epoch": 27.81884816753927, "learning_rate": 0.00031324484205265824, "loss": 2.3213479995727537, "step": 13270 }, { "epoch": 27.83979057591623, "learning_rate": 0.000313126881271954, "loss": 2.352939224243164, "step": 13280 }, { "epoch": 27.860732984293193, "learning_rate": 0.0003130090536548859, "loss": 2.3275819778442384, "step": 13290 }, { "epoch": 27.881675392670157, "learning_rate": 0.00031289135895109924, "loss": 2.341213607788086, "step": 13300 }, { "epoch": 27.90261780104712, "learning_rate": 0.00031277379691089786, "loss": 2.352794647216797, "step": 13310 }, { "epoch": 27.923560209424085, "learning_rate": 0.00031265636728524174, "loss": 2.329135513305664, "step": 13320 }, { "epoch": 27.944502617801046, "learning_rate": 0.000312539069825745, "loss": 2.3414382934570312, "step": 13330 }, { "epoch": 27.96544502617801, "learning_rate": 0.00031242190428467325, "loss": 2.3638214111328124, "step": 13340 }, { "epoch": 27.986387434554974, "learning_rate": 0.0003123048704149423, "loss": 2.326797294616699, "step": 13350 }, { "epoch": 27.99895287958115, "eval_loss": 2.532017469406128, "eval_runtime": 73.734, "eval_samples_per_second": 148.588, "step": 13356 }, { "epoch": 28.008376963350784, "learning_rate": 0.0003121879679701147, "loss": 2.463714599609375, "step": 13360 }, { "epoch": 28.02931937172775, "learning_rate": 0.00031207119670439884, "loss": 2.3355535507202148, "step": 13370 }, { "epoch": 28.050261780104712, "learning_rate": 0.00031195455637264574, "loss": 2.3194732666015625, "step": 13380 }, { "epoch": 28.071204188481676, "learning_rate": 0.00031183804673034756, "loss": 2.3297607421875, "step": 13390 }, { "epoch": 28.09214659685864, "learning_rate": 0.0003117216675336353, "loss": 2.33233642578125, "step": 13400 }, { "epoch": 28.1130890052356, "learning_rate": 0.00031160541853927627, "loss": 2.3335954666137697, "step": 13410 }, { "epoch": 28.134031413612565, "learning_rate": 0.0003114892995046725, "loss": 2.3236547470092774, "step": 13420 }, { "epoch": 28.15497382198953, "learning_rate": 0.00031137331018785835, "loss": 2.3411203384399415, "step": 13430 }, { "epoch": 28.175916230366493, "learning_rate": 0.00031125745034749834, "loss": 2.343415451049805, "step": 13440 }, { "epoch": 28.196858638743457, "learning_rate": 0.00031114171974288516, "loss": 2.305185890197754, "step": 13450 }, { "epoch": 28.217801047120417, "learning_rate": 0.00031102611813393753, "loss": 2.3106929779052736, "step": 13460 }, { "epoch": 28.23874345549738, "learning_rate": 0.0003109106452811981, "loss": 2.342930221557617, "step": 13470 }, { "epoch": 28.259685863874346, "learning_rate": 0.00031079530094583135, "loss": 2.3201034545898436, "step": 13480 }, { "epoch": 28.28062827225131, "learning_rate": 0.0003106800848896216, "loss": 2.353871154785156, "step": 13490 }, { "epoch": 28.301570680628274, "learning_rate": 0.0003105649968749708, "loss": 2.3587244033813475, "step": 13500 }, { "epoch": 28.322513089005234, "learning_rate": 0.0003104500366648965, "loss": 2.334798812866211, "step": 13510 }, { "epoch": 28.343455497382198, "learning_rate": 0.0003103352040230302, "loss": 2.3387428283691407, "step": 13520 }, { "epoch": 28.364397905759162, "learning_rate": 0.00031022049871361445, "loss": 2.35083065032959, "step": 13530 }, { "epoch": 28.385340314136126, "learning_rate": 0.0003101059205015017, "loss": 2.329609680175781, "step": 13540 }, { "epoch": 28.40628272251309, "learning_rate": 0.0003099914691521518, "loss": 2.3420963287353516, "step": 13550 }, { "epoch": 28.42722513089005, "learning_rate": 0.00030987714443163, "loss": 2.3433679580688476, "step": 13560 }, { "epoch": 28.448167539267015, "learning_rate": 0.00030976294610660516, "loss": 2.3441110610961915, "step": 13570 }, { "epoch": 28.46910994764398, "learning_rate": 0.00030964887394434754, "loss": 2.338638687133789, "step": 13580 }, { "epoch": 28.490052356020943, "learning_rate": 0.000309534927712727, "loss": 2.3203834533691405, "step": 13590 }, { "epoch": 28.510994764397907, "learning_rate": 0.0003094211071802107, "loss": 2.3022727966308594, "step": 13600 }, { "epoch": 28.531937172774867, "learning_rate": 0.00030930741211586155, "loss": 2.3490713119506834, "step": 13610 }, { "epoch": 28.55287958115183, "learning_rate": 0.0003091938422893358, "loss": 2.3286787033081056, "step": 13620 }, { "epoch": 28.573821989528795, "learning_rate": 0.00030908039747088155, "loss": 2.305118942260742, "step": 13630 }, { "epoch": 28.59476439790576, "learning_rate": 0.00030896707743133635, "loss": 2.3220989227294924, "step": 13640 }, { "epoch": 28.615706806282724, "learning_rate": 0.0003088538819421255, "loss": 2.3236154556274413, "step": 13650 }, { "epoch": 28.636649214659684, "learning_rate": 0.00030874081077526003, "loss": 2.323534393310547, "step": 13660 }, { "epoch": 28.657591623036648, "learning_rate": 0.00030862786370333505, "loss": 2.3269046783447265, "step": 13670 }, { "epoch": 28.678534031413612, "learning_rate": 0.00030851504049952727, "loss": 2.3261356353759766, "step": 13680 }, { "epoch": 28.699476439790576, "learning_rate": 0.00030840234093759347, "loss": 2.3454893112182615, "step": 13690 }, { "epoch": 28.72041884816754, "learning_rate": 0.0003082897647918688, "loss": 2.3275333404541017, "step": 13700 }, { "epoch": 28.741361256544504, "learning_rate": 0.0003081773118372642, "loss": 2.3333641052246095, "step": 13710 }, { "epoch": 28.762303664921465, "learning_rate": 0.00030806498184926523, "loss": 2.3693473815917967, "step": 13720 }, { "epoch": 28.78324607329843, "learning_rate": 0.0003079527746039298, "loss": 2.3141483306884765, "step": 13730 }, { "epoch": 28.804188481675393, "learning_rate": 0.00030784068987788624, "loss": 2.353886032104492, "step": 13740 }, { "epoch": 28.825130890052357, "learning_rate": 0.00030772872744833183, "loss": 2.3143518447875975, "step": 13750 }, { "epoch": 28.84607329842932, "learning_rate": 0.00030761688709303036, "loss": 2.317976379394531, "step": 13760 }, { "epoch": 28.86701570680628, "learning_rate": 0.0003075051685903109, "loss": 2.331821060180664, "step": 13770 }, { "epoch": 28.887958115183245, "learning_rate": 0.00030739357171906536, "loss": 2.3297216415405275, "step": 13780 }, { "epoch": 28.90890052356021, "learning_rate": 0.0003072820962587471, "loss": 2.3354673385620117, "step": 13790 }, { "epoch": 28.929842931937173, "learning_rate": 0.00030717074198936904, "loss": 2.308320999145508, "step": 13800 }, { "epoch": 28.950785340314138, "learning_rate": 0.0003070595086915015, "loss": 2.312677192687988, "step": 13810 }, { "epoch": 28.971727748691098, "learning_rate": 0.00030694839614627076, "loss": 2.3000450134277344, "step": 13820 }, { "epoch": 28.992670157068062, "learning_rate": 0.0003068374041353571, "loss": 2.333408737182617, "step": 13830 }, { "epoch": 28.99895287958115, "eval_loss": 2.541404962539673, "eval_runtime": 73.9823, "eval_samples_per_second": 148.09, "step": 13833 }, { "epoch": 29.014659685863876, "learning_rate": 0.000306726532440993, "loss": 2.4384193420410156, "step": 13840 }, { "epoch": 29.035602094240836, "learning_rate": 0.0003066157808459613, "loss": 2.3120851516723633, "step": 13850 }, { "epoch": 29.0565445026178, "learning_rate": 0.0003065051491335936, "loss": 2.333901596069336, "step": 13860 }, { "epoch": 29.077486910994764, "learning_rate": 0.0003063946370877681, "loss": 2.311614227294922, "step": 13870 }, { "epoch": 29.09842931937173, "learning_rate": 0.0003062842444929085, "loss": 2.328507423400879, "step": 13880 }, { "epoch": 29.119371727748693, "learning_rate": 0.00030617397113398125, "loss": 2.3186750411987305, "step": 13890 }, { "epoch": 29.140314136125653, "learning_rate": 0.00030606381679649483, "loss": 2.3101566314697264, "step": 13900 }, { "epoch": 29.161256544502617, "learning_rate": 0.00030595378126649727, "loss": 2.3323139190673827, "step": 13910 }, { "epoch": 29.18219895287958, "learning_rate": 0.0003058438643305747, "loss": 2.3120336532592773, "step": 13920 }, { "epoch": 29.203141361256545, "learning_rate": 0.00030573406577584955, "loss": 2.3213123321533202, "step": 13930 }, { "epoch": 29.22408376963351, "learning_rate": 0.000305624385389979, "loss": 2.3158872604370115, "step": 13940 }, { "epoch": 29.24502617801047, "learning_rate": 0.0003055148229611527, "loss": 2.3301626205444337, "step": 13950 }, { "epoch": 29.265968586387434, "learning_rate": 0.00030540537827809176, "loss": 2.2979711532592773, "step": 13960 }, { "epoch": 29.286910994764398, "learning_rate": 0.0003052960511300467, "loss": 2.337363433837891, "step": 13970 }, { "epoch": 29.307853403141362, "learning_rate": 0.0003051868413067956, "loss": 2.3084648132324217, "step": 13980 }, { "epoch": 29.328795811518326, "learning_rate": 0.00030507774859864277, "loss": 2.3315618515014647, "step": 13990 }, { "epoch": 29.349738219895286, "learning_rate": 0.0003049687727964166, "loss": 2.342039680480957, "step": 14000 }, { "epoch": 29.37068062827225, "learning_rate": 0.00030485991369146834, "loss": 2.327268600463867, "step": 14010 }, { "epoch": 29.391623036649214, "learning_rate": 0.00030475117107567015, "loss": 2.311885643005371, "step": 14020 }, { "epoch": 29.41256544502618, "learning_rate": 0.0003046425447414135, "loss": 2.297453498840332, "step": 14030 }, { "epoch": 29.433507853403142, "learning_rate": 0.0003045340344816073, "loss": 2.295667839050293, "step": 14040 }, { "epoch": 29.454450261780103, "learning_rate": 0.0003044256400896769, "loss": 2.3093278884887694, "step": 14050 }, { "epoch": 29.475392670157067, "learning_rate": 0.0003043173613595614, "loss": 2.3026140213012694, "step": 14060 }, { "epoch": 29.49633507853403, "learning_rate": 0.0003042091980857131, "loss": 2.339429473876953, "step": 14070 }, { "epoch": 29.517277486910995, "learning_rate": 0.0003041011500630949, "loss": 2.3204904556274415, "step": 14080 }, { "epoch": 29.53821989528796, "learning_rate": 0.00030399321708717947, "loss": 2.327162170410156, "step": 14090 }, { "epoch": 29.559162303664923, "learning_rate": 0.00030388539895394697, "loss": 2.3462697982788088, "step": 14100 }, { "epoch": 29.580104712041884, "learning_rate": 0.00030377769545988394, "loss": 2.2912479400634767, "step": 14110 }, { "epoch": 29.601047120418848, "learning_rate": 0.00030367010640198143, "loss": 2.35098876953125, "step": 14120 }, { "epoch": 29.62198952879581, "learning_rate": 0.0003035626315777333, "loss": 2.304596710205078, "step": 14130 }, { "epoch": 29.642931937172776, "learning_rate": 0.00030345527078513493, "loss": 2.3083545684814455, "step": 14140 }, { "epoch": 29.66387434554974, "learning_rate": 0.0003033480238226813, "loss": 2.304719924926758, "step": 14150 }, { "epoch": 29.6848167539267, "learning_rate": 0.0003032408904893656, "loss": 2.309472846984863, "step": 14160 }, { "epoch": 29.705759162303664, "learning_rate": 0.00030313387058467756, "loss": 2.3114566802978516, "step": 14170 }, { "epoch": 29.72670157068063, "learning_rate": 0.0003030269639086021, "loss": 2.3168495178222654, "step": 14180 }, { "epoch": 29.747643979057592, "learning_rate": 0.0003029201702616173, "loss": 2.3027936935424806, "step": 14190 }, { "epoch": 29.768586387434556, "learning_rate": 0.0003028134894446933, "loss": 2.330441474914551, "step": 14200 }, { "epoch": 29.789528795811517, "learning_rate": 0.00030270692125929034, "loss": 2.2950525283813477, "step": 14210 }, { "epoch": 29.81047120418848, "learning_rate": 0.00030260046550735763, "loss": 2.3066877365112304, "step": 14220 }, { "epoch": 29.831413612565445, "learning_rate": 0.0003024941219913316, "loss": 2.3138294219970703, "step": 14230 }, { "epoch": 29.85235602094241, "learning_rate": 0.00030238789051413416, "loss": 2.3398483276367186, "step": 14240 }, { "epoch": 29.873298429319373, "learning_rate": 0.00030228177087917153, "loss": 2.3180753707885744, "step": 14250 }, { "epoch": 29.894240837696334, "learning_rate": 0.00030217576289033235, "loss": 2.293859100341797, "step": 14260 }, { "epoch": 29.915183246073298, "learning_rate": 0.00030206986635198654, "loss": 2.315079116821289, "step": 14270 }, { "epoch": 29.93612565445026, "learning_rate": 0.00030196408106898356, "loss": 2.3188785552978515, "step": 14280 }, { "epoch": 29.957068062827226, "learning_rate": 0.0003018584068466507, "loss": 2.355891799926758, "step": 14290 }, { "epoch": 29.97801047120419, "learning_rate": 0.0003017528434907922, "loss": 2.310663032531738, "step": 14300 }, { "epoch": 29.99895287958115, "learning_rate": 0.00030164739080768704, "loss": 2.357052803039551, "step": 14310 }, { "epoch": 29.99895287958115, "eval_loss": 2.5363190174102783, "eval_runtime": 73.3558, "eval_samples_per_second": 149.354, "step": 14310 }, { "epoch": 30.020942408376964, "learning_rate": 0.0003015420486040879, "loss": 2.432624626159668, "step": 14320 }, { "epoch": 30.041884816753928, "learning_rate": 0.00030143681668721935, "loss": 2.3034442901611327, "step": 14330 }, { "epoch": 30.06282722513089, "learning_rate": 0.00030133169486477694, "loss": 2.3489042282104493, "step": 14340 }, { "epoch": 30.083769633507853, "learning_rate": 0.0003012266829449249, "loss": 2.3431249618530274, "step": 14350 }, { "epoch": 30.104712041884817, "learning_rate": 0.00030112178073629544, "loss": 2.332902526855469, "step": 14360 }, { "epoch": 30.12565445026178, "learning_rate": 0.0003010169880479867, "loss": 2.323573112487793, "step": 14370 }, { "epoch": 30.146596858638745, "learning_rate": 0.0003009123046895618, "loss": 2.299881362915039, "step": 14380 }, { "epoch": 30.167539267015705, "learning_rate": 0.00030080773047104687, "loss": 2.319793701171875, "step": 14390 }, { "epoch": 30.18848167539267, "learning_rate": 0.0003007032652029301, "loss": 2.3272857666015625, "step": 14400 }, { "epoch": 30.209424083769633, "learning_rate": 0.00030059890869615983, "loss": 2.3354257583618163, "step": 14410 }, { "epoch": 30.230366492146597, "learning_rate": 0.0003004946607621435, "loss": 2.3249101638793945, "step": 14420 }, { "epoch": 30.25130890052356, "learning_rate": 0.0003003905212127461, "loss": 2.331306266784668, "step": 14430 }, { "epoch": 30.272251308900522, "learning_rate": 0.00030028648986028843, "loss": 2.3302356719970705, "step": 14440 }, { "epoch": 30.293193717277486, "learning_rate": 0.00030018256651754633, "loss": 2.3084732055664063, "step": 14450 }, { "epoch": 30.31413612565445, "learning_rate": 0.00030007875099774864, "loss": 2.324197006225586, "step": 14460 }, { "epoch": 30.335078534031414, "learning_rate": 0.0002999750431145761, "loss": 2.309644317626953, "step": 14470 }, { "epoch": 30.356020942408378, "learning_rate": 0.0002998714426821599, "loss": 2.332279014587402, "step": 14480 }, { "epoch": 30.376963350785342, "learning_rate": 0.00029976794951508027, "loss": 2.3015905380249024, "step": 14490 }, { "epoch": 30.397905759162303, "learning_rate": 0.00029966456342836505, "loss": 2.3307212829589843, "step": 14500 }, { "epoch": 30.418848167539267, "learning_rate": 0.0002995612842374884, "loss": 2.3488508224487306, "step": 14510 }, { "epoch": 30.43979057591623, "learning_rate": 0.0002994581117583693, "loss": 2.2981189727783202, "step": 14520 }, { "epoch": 30.460732984293195, "learning_rate": 0.00029935504580737006, "loss": 2.292937088012695, "step": 14530 }, { "epoch": 30.48167539267016, "learning_rate": 0.00029925208620129546, "loss": 2.329487609863281, "step": 14540 }, { "epoch": 30.50261780104712, "learning_rate": 0.0002991492327573909, "loss": 2.275893974304199, "step": 14550 }, { "epoch": 30.523560209424083, "learning_rate": 0.0002990464852933409, "loss": 2.289459228515625, "step": 14560 }, { "epoch": 30.544502617801047, "learning_rate": 0.0002989438436272684, "loss": 2.2976861953735352, "step": 14570 }, { "epoch": 30.56544502617801, "learning_rate": 0.00029884130757773275, "loss": 2.319015884399414, "step": 14580 }, { "epoch": 30.586387434554975, "learning_rate": 0.0002987388769637288, "loss": 2.3237770080566404, "step": 14590 }, { "epoch": 30.607329842931936, "learning_rate": 0.00029863655160468534, "loss": 2.330046844482422, "step": 14600 }, { "epoch": 30.6282722513089, "learning_rate": 0.0002985343313204637, "loss": 2.328061103820801, "step": 14610 }, { "epoch": 30.649214659685864, "learning_rate": 0.0002984322159313568, "loss": 2.3274772644042967, "step": 14620 }, { "epoch": 30.670157068062828, "learning_rate": 0.00029833020525808714, "loss": 2.325545883178711, "step": 14630 }, { "epoch": 30.691099476439792, "learning_rate": 0.00029822829912180636, "loss": 2.3240276336669923, "step": 14640 }, { "epoch": 30.712041884816752, "learning_rate": 0.0002981264973440931, "loss": 2.324121856689453, "step": 14650 }, { "epoch": 30.732984293193716, "learning_rate": 0.00029802479974695223, "loss": 2.3233869552612303, "step": 14660 }, { "epoch": 30.75392670157068, "learning_rate": 0.00029792320615281337, "loss": 2.3022382736206053, "step": 14670 }, { "epoch": 30.774869109947645, "learning_rate": 0.00029782171638452937, "loss": 2.3219308853149414, "step": 14680 }, { "epoch": 30.79581151832461, "learning_rate": 0.0002977203302653755, "loss": 2.3212976455688477, "step": 14690 }, { "epoch": 30.81675392670157, "learning_rate": 0.0002976190476190476, "loss": 2.345839500427246, "step": 14700 }, { "epoch": 30.837696335078533, "learning_rate": 0.0002975178682696613, "loss": 2.2968841552734376, "step": 14710 }, { "epoch": 30.858638743455497, "learning_rate": 0.0002974167920417504, "loss": 2.313581848144531, "step": 14720 }, { "epoch": 30.87958115183246, "learning_rate": 0.00029731581876026557, "loss": 2.326977348327637, "step": 14730 }, { "epoch": 30.900523560209425, "learning_rate": 0.00029721494825057357, "loss": 2.3257322311401367, "step": 14740 }, { "epoch": 30.921465968586386, "learning_rate": 0.00029711418033845523, "loss": 2.285732460021973, "step": 14750 }, { "epoch": 30.94240837696335, "learning_rate": 0.0002970135148501047, "loss": 2.3275766372680664, "step": 14760 }, { "epoch": 30.963350785340314, "learning_rate": 0.00029691295161212816, "loss": 2.3182727813720705, "step": 14770 }, { "epoch": 30.984293193717278, "learning_rate": 0.0002968124904515423, "loss": 2.3104841232299806, "step": 14780 }, { "epoch": 30.99895287958115, "eval_loss": 2.544727087020874, "eval_runtime": 73.5548, "eval_samples_per_second": 148.95, "step": 14787 }, { "epoch": 31.006282722513088, "learning_rate": 0.00029671213119577346, "loss": 2.4215261459350588, "step": 14790 }, { "epoch": 31.027225130890052, "learning_rate": 0.00029661187367265593, "loss": 2.3005090713500977, "step": 14800 }, { "epoch": 31.048167539267016, "learning_rate": 0.0002965117177104311, "loss": 2.3047313690185547, "step": 14810 }, { "epoch": 31.06910994764398, "learning_rate": 0.0002964116631377459, "loss": 2.3039810180664064, "step": 14820 }, { "epoch": 31.09005235602094, "learning_rate": 0.000296311709783652, "loss": 2.3020254135131837, "step": 14830 }, { "epoch": 31.110994764397905, "learning_rate": 0.00029621185747760406, "loss": 2.3200841903686524, "step": 14840 }, { "epoch": 31.13193717277487, "learning_rate": 0.0002961121060494589, "loss": 2.2955398559570312, "step": 14850 }, { "epoch": 31.152879581151833, "learning_rate": 0.00029601245532947417, "loss": 2.322628974914551, "step": 14860 }, { "epoch": 31.173821989528797, "learning_rate": 0.0002959129051483069, "loss": 2.3180873870849608, "step": 14870 }, { "epoch": 31.194764397905757, "learning_rate": 0.00029581345533701285, "loss": 2.299137306213379, "step": 14880 }, { "epoch": 31.21570680628272, "learning_rate": 0.0002957141057270448, "loss": 2.2992317199707033, "step": 14890 }, { "epoch": 31.236649214659685, "learning_rate": 0.0002956148561502513, "loss": 2.3339006423950197, "step": 14900 }, { "epoch": 31.25759162303665, "learning_rate": 0.00029551570643887603, "loss": 2.2937063217163085, "step": 14910 }, { "epoch": 31.278534031413614, "learning_rate": 0.00029541665642555606, "loss": 2.3086185455322266, "step": 14920 }, { "epoch": 31.299476439790578, "learning_rate": 0.00029531770594332096, "loss": 2.3051830291748048, "step": 14930 }, { "epoch": 31.320418848167538, "learning_rate": 0.0002952188548255915, "loss": 2.3165931701660156, "step": 14940 }, { "epoch": 31.341361256544502, "learning_rate": 0.00029512010290617854, "loss": 2.310456657409668, "step": 14950 }, { "epoch": 31.362303664921466, "learning_rate": 0.0002950214500192816, "loss": 2.3057369232177733, "step": 14960 }, { "epoch": 31.38324607329843, "learning_rate": 0.00029492289599948834, "loss": 2.316122627258301, "step": 14970 }, { "epoch": 31.404188481675394, "learning_rate": 0.0002948244406817725, "loss": 2.3287500381469726, "step": 14980 }, { "epoch": 31.425130890052355, "learning_rate": 0.00029472608390149343, "loss": 2.309092330932617, "step": 14990 }, { "epoch": 31.44607329842932, "learning_rate": 0.00029462782549439473, "loss": 2.331714057922363, "step": 15000 } ], "max_steps": 15000, "num_train_epochs": 32, "total_flos": 4125839411805155328, "trial_name": null, "trial_params": null }