cls_zh2en / trainer_state.json
Python's picture
Upload 9 files
a3d5ad9
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 31.44607329842932,
"global_step": 15000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0020942408376963353,
"learning_rate": 1.0206207261596573e-07,
"loss": 42.81952667236328,
"step": 1
},
{
"epoch": 0.020942408376963352,
"learning_rate": 1.0206207261596575e-06,
"loss": 43.49555460611979,
"step": 10
},
{
"epoch": 0.041884816753926704,
"learning_rate": 2.041241452319315e-06,
"loss": 43.81195983886719,
"step": 20
},
{
"epoch": 0.06282722513089005,
"learning_rate": 3.0618621784789722e-06,
"loss": 43.1638671875,
"step": 30
},
{
"epoch": 0.08376963350785341,
"learning_rate": 4.08248290463863e-06,
"loss": 43.10018615722656,
"step": 40
},
{
"epoch": 0.10471204188481675,
"learning_rate": 5.103103630798286e-06,
"loss": 42.361767578125,
"step": 50
},
{
"epoch": 0.1256544502617801,
"learning_rate": 6.1237243569579445e-06,
"loss": 41.58636474609375,
"step": 60
},
{
"epoch": 0.14659685863874344,
"learning_rate": 7.144345083117603e-06,
"loss": 40.42512817382813,
"step": 70
},
{
"epoch": 0.16753926701570682,
"learning_rate": 8.16496580927726e-06,
"loss": 40.1504150390625,
"step": 80
},
{
"epoch": 0.18848167539267016,
"learning_rate": 9.185586535436916e-06,
"loss": 39.03293151855469,
"step": 90
},
{
"epoch": 0.2094240837696335,
"learning_rate": 1.0206207261596573e-05,
"loss": 38.355224609375,
"step": 100
},
{
"epoch": 0.23036649214659685,
"learning_rate": 1.1226827987756233e-05,
"loss": 37.3426025390625,
"step": 110
},
{
"epoch": 0.2513089005235602,
"learning_rate": 1.2247448713915889e-05,
"loss": 36.23554992675781,
"step": 120
},
{
"epoch": 0.27225130890052357,
"learning_rate": 1.3268069440075545e-05,
"loss": 35.76961975097656,
"step": 130
},
{
"epoch": 0.2931937172774869,
"learning_rate": 1.4288690166235205e-05,
"loss": 34.53816528320313,
"step": 140
},
{
"epoch": 0.31413612565445026,
"learning_rate": 1.530931089239486e-05,
"loss": 33.968804931640626,
"step": 150
},
{
"epoch": 0.33507853403141363,
"learning_rate": 1.632993161855452e-05,
"loss": 32.835089111328124,
"step": 160
},
{
"epoch": 0.35602094240837695,
"learning_rate": 1.7350552344714174e-05,
"loss": 32.344024658203125,
"step": 170
},
{
"epoch": 0.3769633507853403,
"learning_rate": 1.8371173070873833e-05,
"loss": 31.358078002929688,
"step": 180
},
{
"epoch": 0.39790575916230364,
"learning_rate": 1.939179379703349e-05,
"loss": 30.222982788085936,
"step": 190
},
{
"epoch": 0.418848167539267,
"learning_rate": 2.0412414523193145e-05,
"loss": 29.279229736328126,
"step": 200
},
{
"epoch": 0.4397905759162304,
"learning_rate": 2.1433035249352804e-05,
"loss": 28.632891845703124,
"step": 210
},
{
"epoch": 0.4607329842931937,
"learning_rate": 2.2453655975512465e-05,
"loss": 27.667315673828124,
"step": 220
},
{
"epoch": 0.4816753926701571,
"learning_rate": 2.347427670167212e-05,
"loss": 26.753375244140624,
"step": 230
},
{
"epoch": 0.5026178010471204,
"learning_rate": 2.4494897427831778e-05,
"loss": 26.086724853515626,
"step": 240
},
{
"epoch": 0.5235602094240838,
"learning_rate": 2.5515518153991436e-05,
"loss": 24.617787170410157,
"step": 250
},
{
"epoch": 0.5445026178010471,
"learning_rate": 2.653613888015109e-05,
"loss": 23.82097473144531,
"step": 260
},
{
"epoch": 0.5654450261780105,
"learning_rate": 2.755675960631075e-05,
"loss": 22.989166259765625,
"step": 270
},
{
"epoch": 0.5863874345549738,
"learning_rate": 2.857738033247041e-05,
"loss": 22.07659454345703,
"step": 280
},
{
"epoch": 0.6073298429319371,
"learning_rate": 2.9598001058630065e-05,
"loss": 21.30054168701172,
"step": 290
},
{
"epoch": 0.6282722513089005,
"learning_rate": 3.061862178478972e-05,
"loss": 20.448097229003906,
"step": 300
},
{
"epoch": 0.6492146596858639,
"learning_rate": 3.163924251094938e-05,
"loss": 19.719020080566406,
"step": 310
},
{
"epoch": 0.6701570680628273,
"learning_rate": 3.265986323710904e-05,
"loss": 19.040341186523438,
"step": 320
},
{
"epoch": 0.6910994764397905,
"learning_rate": 3.3680483963268694e-05,
"loss": 18.16093292236328,
"step": 330
},
{
"epoch": 0.7120418848167539,
"learning_rate": 3.470110468942835e-05,
"loss": 17.382850646972656,
"step": 340
},
{
"epoch": 0.7329842931937173,
"learning_rate": 3.5721725415588004e-05,
"loss": 16.651702880859375,
"step": 350
},
{
"epoch": 0.7539267015706806,
"learning_rate": 3.6742346141747665e-05,
"loss": 16.079180908203124,
"step": 360
},
{
"epoch": 0.774869109947644,
"learning_rate": 3.7762966867907327e-05,
"loss": 15.322096252441407,
"step": 370
},
{
"epoch": 0.7958115183246073,
"learning_rate": 3.878358759406698e-05,
"loss": 14.650254821777343,
"step": 380
},
{
"epoch": 0.8167539267015707,
"learning_rate": 3.980420832022664e-05,
"loss": 14.0874267578125,
"step": 390
},
{
"epoch": 0.837696335078534,
"learning_rate": 4.082482904638629e-05,
"loss": 13.5120849609375,
"step": 400
},
{
"epoch": 0.8586387434554974,
"learning_rate": 4.184544977254595e-05,
"loss": 12.803053283691407,
"step": 410
},
{
"epoch": 0.8795811518324608,
"learning_rate": 4.286607049870561e-05,
"loss": 12.207872009277343,
"step": 420
},
{
"epoch": 0.900523560209424,
"learning_rate": 4.388669122486527e-05,
"loss": 11.640280151367188,
"step": 430
},
{
"epoch": 0.9214659685863874,
"learning_rate": 4.490731195102493e-05,
"loss": 11.116693878173828,
"step": 440
},
{
"epoch": 0.9424083769633508,
"learning_rate": 4.5927932677184585e-05,
"loss": 10.586290740966797,
"step": 450
},
{
"epoch": 0.9633507853403142,
"learning_rate": 4.694855340334424e-05,
"loss": 10.197176361083985,
"step": 460
},
{
"epoch": 0.9842931937172775,
"learning_rate": 4.7969174129503894e-05,
"loss": 9.696656036376954,
"step": 470
},
{
"epoch": 0.9989528795811519,
"eval_loss": 7.631092548370361,
"eval_runtime": 73.7635,
"eval_samples_per_second": 148.529,
"step": 477
},
{
"epoch": 1.006282722513089,
"learning_rate": 4.8989794855663556e-05,
"loss": 9.649329376220702,
"step": 480
},
{
"epoch": 1.0272251308900524,
"learning_rate": 5.001041558182322e-05,
"loss": 8.800138092041015,
"step": 490
},
{
"epoch": 1.0481675392670158,
"learning_rate": 5.103103630798287e-05,
"loss": 8.46273422241211,
"step": 500
},
{
"epoch": 1.0691099476439792,
"learning_rate": 5.205165703414253e-05,
"loss": 8.093246459960938,
"step": 510
},
{
"epoch": 1.0900523560209425,
"learning_rate": 5.307227776030218e-05,
"loss": 7.800753021240235,
"step": 520
},
{
"epoch": 1.1109947643979057,
"learning_rate": 5.409289848646184e-05,
"loss": 7.580593872070312,
"step": 530
},
{
"epoch": 1.131937172774869,
"learning_rate": 5.51135192126215e-05,
"loss": 7.297437286376953,
"step": 540
},
{
"epoch": 1.1528795811518324,
"learning_rate": 5.613413993878116e-05,
"loss": 7.115350341796875,
"step": 550
},
{
"epoch": 1.1738219895287958,
"learning_rate": 5.715476066494082e-05,
"loss": 6.890144348144531,
"step": 560
},
{
"epoch": 1.1947643979057592,
"learning_rate": 5.817538139110047e-05,
"loss": 6.647792053222656,
"step": 570
},
{
"epoch": 1.2157068062827225,
"learning_rate": 5.919600211726013e-05,
"loss": 6.418707275390625,
"step": 580
},
{
"epoch": 1.236649214659686,
"learning_rate": 6.0216622843419785e-05,
"loss": 6.1961822509765625,
"step": 590
},
{
"epoch": 1.2575916230366493,
"learning_rate": 6.123724356957945e-05,
"loss": 6.020335388183594,
"step": 600
},
{
"epoch": 1.2785340314136127,
"learning_rate": 6.22578642957391e-05,
"loss": 5.866229248046875,
"step": 610
},
{
"epoch": 1.2994764397905758,
"learning_rate": 6.327848502189876e-05,
"loss": 5.687960433959961,
"step": 620
},
{
"epoch": 1.3204188481675394,
"learning_rate": 6.429910574805841e-05,
"loss": 5.516669082641601,
"step": 630
},
{
"epoch": 1.3413612565445026,
"learning_rate": 6.531972647421808e-05,
"loss": 5.365422058105469,
"step": 640
},
{
"epoch": 1.362303664921466,
"learning_rate": 6.634034720037773e-05,
"loss": 5.2304027557373045,
"step": 650
},
{
"epoch": 1.3832460732984293,
"learning_rate": 6.736096792653739e-05,
"loss": 5.1493980407714846,
"step": 660
},
{
"epoch": 1.4041884816753927,
"learning_rate": 6.838158865269704e-05,
"loss": 5.069922256469726,
"step": 670
},
{
"epoch": 1.425130890052356,
"learning_rate": 6.94022093788567e-05,
"loss": 4.946885299682617,
"step": 680
},
{
"epoch": 1.4460732984293194,
"learning_rate": 7.042283010501637e-05,
"loss": 4.852196502685547,
"step": 690
},
{
"epoch": 1.4670157068062828,
"learning_rate": 7.144345083117601e-05,
"loss": 4.79791488647461,
"step": 700
},
{
"epoch": 1.487958115183246,
"learning_rate": 7.246407155733568e-05,
"loss": 4.701091766357422,
"step": 710
},
{
"epoch": 1.5089005235602095,
"learning_rate": 7.348469228349533e-05,
"loss": 4.629792404174805,
"step": 720
},
{
"epoch": 1.5298429319371727,
"learning_rate": 7.450531300965498e-05,
"loss": 4.491447067260742,
"step": 730
},
{
"epoch": 1.5507853403141363,
"learning_rate": 7.552593373581465e-05,
"loss": 4.365177917480469,
"step": 740
},
{
"epoch": 1.5717277486910994,
"learning_rate": 7.654655446197431e-05,
"loss": 4.266152572631836,
"step": 750
},
{
"epoch": 1.5926701570680628,
"learning_rate": 7.756717518813396e-05,
"loss": 4.203376770019531,
"step": 760
},
{
"epoch": 1.6136125654450262,
"learning_rate": 7.858779591429362e-05,
"loss": 4.128662872314453,
"step": 770
},
{
"epoch": 1.6345549738219896,
"learning_rate": 7.960841664045329e-05,
"loss": 4.131737899780274,
"step": 780
},
{
"epoch": 1.655497382198953,
"learning_rate": 8.062903736661294e-05,
"loss": 4.008557891845703,
"step": 790
},
{
"epoch": 1.676439790575916,
"learning_rate": 8.164965809277258e-05,
"loss": 3.9548309326171873,
"step": 800
},
{
"epoch": 1.6973821989528797,
"learning_rate": 8.267027881893225e-05,
"loss": 3.903990936279297,
"step": 810
},
{
"epoch": 1.7183246073298428,
"learning_rate": 8.36908995450919e-05,
"loss": 3.8517215728759764,
"step": 820
},
{
"epoch": 1.7392670157068064,
"learning_rate": 8.471152027125156e-05,
"loss": 3.815013122558594,
"step": 830
},
{
"epoch": 1.7602094240837696,
"learning_rate": 8.573214099741121e-05,
"loss": 3.762827682495117,
"step": 840
},
{
"epoch": 1.781151832460733,
"learning_rate": 8.675276172357088e-05,
"loss": 3.739139938354492,
"step": 850
},
{
"epoch": 1.8020942408376963,
"learning_rate": 8.777338244973054e-05,
"loss": 3.6771942138671876,
"step": 860
},
{
"epoch": 1.8230366492146597,
"learning_rate": 8.879400317589019e-05,
"loss": 3.671974945068359,
"step": 870
},
{
"epoch": 1.843979057591623,
"learning_rate": 8.981462390204986e-05,
"loss": 3.6025531768798826,
"step": 880
},
{
"epoch": 1.8649214659685864,
"learning_rate": 9.083524462820951e-05,
"loss": 3.580000305175781,
"step": 890
},
{
"epoch": 1.8858638743455498,
"learning_rate": 9.185586535436917e-05,
"loss": 3.570189666748047,
"step": 900
},
{
"epoch": 1.906806282722513,
"learning_rate": 9.287648608052881e-05,
"loss": 3.5345611572265625,
"step": 910
},
{
"epoch": 1.9277486910994766,
"learning_rate": 9.389710680668848e-05,
"loss": 3.4658973693847654,
"step": 920
},
{
"epoch": 1.9486910994764397,
"learning_rate": 9.491772753284813e-05,
"loss": 3.4885902404785156,
"step": 930
},
{
"epoch": 1.9696335078534033,
"learning_rate": 9.593834825900779e-05,
"loss": 3.438787078857422,
"step": 940
},
{
"epoch": 1.9905759162303664,
"learning_rate": 9.695896898516746e-05,
"loss": 3.434320831298828,
"step": 950
},
{
"epoch": 1.998952879581152,
"eval_loss": 3.1282973289489746,
"eval_runtime": 73.33,
"eval_samples_per_second": 149.407,
"step": 954
},
{
"epoch": 2.012565445026178,
"learning_rate": 9.797958971132711e-05,
"loss": 3.5866859436035154,
"step": 960
},
{
"epoch": 2.033507853403141,
"learning_rate": 9.900021043748677e-05,
"loss": 3.4268508911132813,
"step": 970
},
{
"epoch": 2.054450261780105,
"learning_rate": 0.00010002083116364643,
"loss": 3.3977298736572266,
"step": 980
},
{
"epoch": 2.075392670157068,
"learning_rate": 0.00010104145188980609,
"loss": 3.349309539794922,
"step": 990
},
{
"epoch": 2.0963350785340316,
"learning_rate": 0.00010206207261596574,
"loss": 3.4084583282470704,
"step": 1000
},
{
"epoch": 2.1172774869109947,
"learning_rate": 0.0001030826933421254,
"loss": 3.3497646331787108,
"step": 1010
},
{
"epoch": 2.1382198952879583,
"learning_rate": 0.00010410331406828505,
"loss": 3.2944507598876953,
"step": 1020
},
{
"epoch": 2.1591623036649215,
"learning_rate": 0.00010512393479444471,
"loss": 3.312997055053711,
"step": 1030
},
{
"epoch": 2.180104712041885,
"learning_rate": 0.00010614455552060436,
"loss": 3.3428993225097656,
"step": 1040
},
{
"epoch": 2.201047120418848,
"learning_rate": 0.00010716517624676403,
"loss": 3.292295455932617,
"step": 1050
},
{
"epoch": 2.2219895287958114,
"learning_rate": 0.00010818579697292369,
"loss": 3.291975402832031,
"step": 1060
},
{
"epoch": 2.242931937172775,
"learning_rate": 0.00010920641769908334,
"loss": 3.2534629821777346,
"step": 1070
},
{
"epoch": 2.263874345549738,
"learning_rate": 0.000110227038425243,
"loss": 3.247083282470703,
"step": 1080
},
{
"epoch": 2.2848167539267017,
"learning_rate": 0.00011124765915140266,
"loss": 3.253615951538086,
"step": 1090
},
{
"epoch": 2.305759162303665,
"learning_rate": 0.00011226827987756232,
"loss": 3.2301055908203127,
"step": 1100
},
{
"epoch": 2.3267015706806284,
"learning_rate": 0.00011328890060372197,
"loss": 3.241224670410156,
"step": 1110
},
{
"epoch": 2.3476439790575916,
"learning_rate": 0.00011430952132988164,
"loss": 3.210051345825195,
"step": 1120
},
{
"epoch": 2.368586387434555,
"learning_rate": 0.00011533014205604128,
"loss": 3.225263214111328,
"step": 1130
},
{
"epoch": 2.3895287958115183,
"learning_rate": 0.00011635076278220094,
"loss": 3.1904216766357423,
"step": 1140
},
{
"epoch": 2.4104712041884815,
"learning_rate": 0.00011737138350836059,
"loss": 3.230540466308594,
"step": 1150
},
{
"epoch": 2.431413612565445,
"learning_rate": 0.00011839200423452026,
"loss": 3.191972351074219,
"step": 1160
},
{
"epoch": 2.4523560209424082,
"learning_rate": 0.00011941262496067991,
"loss": 3.191108512878418,
"step": 1170
},
{
"epoch": 2.473298429319372,
"learning_rate": 0.00012043324568683957,
"loss": 3.192665863037109,
"step": 1180
},
{
"epoch": 2.494240837696335,
"learning_rate": 0.00012145386641299924,
"loss": 3.1610179901123048,
"step": 1190
},
{
"epoch": 2.5151832460732986,
"learning_rate": 0.0001224744871391589,
"loss": 3.1794748306274414,
"step": 1200
},
{
"epoch": 2.5361256544502617,
"learning_rate": 0.00012349510786531856,
"loss": 3.2144695281982423,
"step": 1210
},
{
"epoch": 2.5570680628272253,
"learning_rate": 0.0001245157285914782,
"loss": 3.147447204589844,
"step": 1220
},
{
"epoch": 2.5780104712041885,
"learning_rate": 0.00012553634931763784,
"loss": 3.1747854232788084,
"step": 1230
},
{
"epoch": 2.5989528795811516,
"learning_rate": 0.0001265569700437975,
"loss": 3.137411880493164,
"step": 1240
},
{
"epoch": 2.619895287958115,
"learning_rate": 0.00012757759076995718,
"loss": 3.157614326477051,
"step": 1250
},
{
"epoch": 2.640837696335079,
"learning_rate": 0.00012859821149611682,
"loss": 3.1284500122070313,
"step": 1260
},
{
"epoch": 2.661780104712042,
"learning_rate": 0.0001296188322222765,
"loss": 3.168661117553711,
"step": 1270
},
{
"epoch": 2.682722513089005,
"learning_rate": 0.00013063945294843616,
"loss": 3.1208589553833006,
"step": 1280
},
{
"epoch": 2.7036649214659687,
"learning_rate": 0.0001316600736745958,
"loss": 3.177284049987793,
"step": 1290
},
{
"epoch": 2.724607329842932,
"learning_rate": 0.00013268069440075547,
"loss": 3.129215431213379,
"step": 1300
},
{
"epoch": 2.7455497382198955,
"learning_rate": 0.00013370131512691514,
"loss": 3.1385051727294924,
"step": 1310
},
{
"epoch": 2.7664921465968586,
"learning_rate": 0.00013472193585307478,
"loss": 3.093943977355957,
"step": 1320
},
{
"epoch": 2.7874345549738218,
"learning_rate": 0.00013574255657923444,
"loss": 3.125334358215332,
"step": 1330
},
{
"epoch": 2.8083769633507853,
"learning_rate": 0.00013676317730539409,
"loss": 3.1063247680664063,
"step": 1340
},
{
"epoch": 2.829319371727749,
"learning_rate": 0.00013778379803155375,
"loss": 3.153512382507324,
"step": 1350
},
{
"epoch": 2.850261780104712,
"learning_rate": 0.0001388044187577134,
"loss": 3.0612106323242188,
"step": 1360
},
{
"epoch": 2.8712041884816752,
"learning_rate": 0.00013982503948387306,
"loss": 3.0893680572509767,
"step": 1370
},
{
"epoch": 2.892146596858639,
"learning_rate": 0.00014084566021003273,
"loss": 3.0948190689086914,
"step": 1380
},
{
"epoch": 2.913089005235602,
"learning_rate": 0.00014186628093619237,
"loss": 3.052564811706543,
"step": 1390
},
{
"epoch": 2.9340314136125656,
"learning_rate": 0.00014288690166235201,
"loss": 3.061579132080078,
"step": 1400
},
{
"epoch": 2.9549738219895287,
"learning_rate": 0.00014390752238851168,
"loss": 3.0893646240234376,
"step": 1410
},
{
"epoch": 2.975916230366492,
"learning_rate": 0.00014492814311467135,
"loss": 3.0637126922607423,
"step": 1420
},
{
"epoch": 2.9968586387434555,
"learning_rate": 0.000145948763840831,
"loss": 3.063129425048828,
"step": 1430
},
{
"epoch": 2.998952879581152,
"eval_loss": 2.8699653148651123,
"eval_runtime": 73.0344,
"eval_samples_per_second": 150.012,
"step": 1431
},
{
"epoch": 3.018848167539267,
"learning_rate": 0.00014696938456699066,
"loss": 3.175088310241699,
"step": 1440
},
{
"epoch": 3.0397905759162303,
"learning_rate": 0.00014799000529315033,
"loss": 3.0708381652832033,
"step": 1450
},
{
"epoch": 3.060732984293194,
"learning_rate": 0.00014901062601930997,
"loss": 3.043408012390137,
"step": 1460
},
{
"epoch": 3.081675392670157,
"learning_rate": 0.00015003124674546964,
"loss": 3.039161491394043,
"step": 1470
},
{
"epoch": 3.1026178010471206,
"learning_rate": 0.0001510518674716293,
"loss": 3.0207361221313476,
"step": 1480
},
{
"epoch": 3.1235602094240837,
"learning_rate": 0.00015207248819778895,
"loss": 3.0527109146118163,
"step": 1490
},
{
"epoch": 3.144502617801047,
"learning_rate": 0.00015309310892394862,
"loss": 3.045629692077637,
"step": 1500
},
{
"epoch": 3.1654450261780105,
"learning_rate": 0.00015411372965010828,
"loss": 3.0113618850708006,
"step": 1510
},
{
"epoch": 3.1863874345549736,
"learning_rate": 0.00015513435037626793,
"loss": 3.0372419357299805,
"step": 1520
},
{
"epoch": 3.2073298429319372,
"learning_rate": 0.0001561549711024276,
"loss": 3.0219789505004884,
"step": 1530
},
{
"epoch": 3.2282722513089004,
"learning_rate": 0.00015717559182858723,
"loss": 3.0140264511108397,
"step": 1540
},
{
"epoch": 3.249214659685864,
"learning_rate": 0.0001581962125547469,
"loss": 3.039451789855957,
"step": 1550
},
{
"epoch": 3.270157068062827,
"learning_rate": 0.00015921683328090657,
"loss": 3.0277042388916016,
"step": 1560
},
{
"epoch": 3.2910994764397907,
"learning_rate": 0.0001602374540070662,
"loss": 3.0352230072021484,
"step": 1570
},
{
"epoch": 3.312041884816754,
"learning_rate": 0.00016125807473322588,
"loss": 3.0115480422973633,
"step": 1580
},
{
"epoch": 3.332984293193717,
"learning_rate": 0.00016227869545938555,
"loss": 3.008187103271484,
"step": 1590
},
{
"epoch": 3.3539267015706806,
"learning_rate": 0.00016329931618554516,
"loss": 3.02030086517334,
"step": 1600
},
{
"epoch": 3.374869109947644,
"learning_rate": 0.00016431993691170483,
"loss": 3.0034799575805664,
"step": 1610
},
{
"epoch": 3.3958115183246074,
"learning_rate": 0.0001653405576378645,
"loss": 3.0058149337768554,
"step": 1620
},
{
"epoch": 3.4167539267015705,
"learning_rate": 0.00016636117836402414,
"loss": 2.9678651809692385,
"step": 1630
},
{
"epoch": 3.437696335078534,
"learning_rate": 0.0001673817990901838,
"loss": 2.999662399291992,
"step": 1640
},
{
"epoch": 3.4586387434554973,
"learning_rate": 0.00016840241981634345,
"loss": 2.9900545120239257,
"step": 1650
},
{
"epoch": 3.479581151832461,
"learning_rate": 0.00016942304054250312,
"loss": 2.967659759521484,
"step": 1660
},
{
"epoch": 3.500523560209424,
"learning_rate": 0.0001704436612686628,
"loss": 2.9933372497558595,
"step": 1670
},
{
"epoch": 3.521465968586387,
"learning_rate": 0.00017146428199482243,
"loss": 2.9620410919189455,
"step": 1680
},
{
"epoch": 3.5424083769633508,
"learning_rate": 0.0001724849027209821,
"loss": 2.973393440246582,
"step": 1690
},
{
"epoch": 3.5633507853403144,
"learning_rate": 0.00017350552344714176,
"loss": 2.9914371490478517,
"step": 1700
},
{
"epoch": 3.5842931937172775,
"learning_rate": 0.0001745261441733014,
"loss": 2.959955596923828,
"step": 1710
},
{
"epoch": 3.6052356020942407,
"learning_rate": 0.00017554676489946107,
"loss": 2.947012519836426,
"step": 1720
},
{
"epoch": 3.6261780104712042,
"learning_rate": 0.00017656738562562074,
"loss": 2.9780450820922852,
"step": 1730
},
{
"epoch": 3.6471204188481674,
"learning_rate": 0.00017758800635178038,
"loss": 2.9911376953125,
"step": 1740
},
{
"epoch": 3.668062827225131,
"learning_rate": 0.00017860862707794005,
"loss": 2.936799430847168,
"step": 1750
},
{
"epoch": 3.689005235602094,
"learning_rate": 0.00017962924780409972,
"loss": 2.9443117141723634,
"step": 1760
},
{
"epoch": 3.7099476439790577,
"learning_rate": 0.00018064986853025936,
"loss": 2.9714879989624023,
"step": 1770
},
{
"epoch": 3.730890052356021,
"learning_rate": 0.00018167048925641903,
"loss": 2.9554422378540037,
"step": 1780
},
{
"epoch": 3.7518324607329845,
"learning_rate": 0.0001826911099825787,
"loss": 2.971892738342285,
"step": 1790
},
{
"epoch": 3.7727748691099476,
"learning_rate": 0.00018371173070873834,
"loss": 2.9589488983154295,
"step": 1800
},
{
"epoch": 3.793717277486911,
"learning_rate": 0.000184732351434898,
"loss": 2.944401741027832,
"step": 1810
},
{
"epoch": 3.8146596858638744,
"learning_rate": 0.00018575297216105762,
"loss": 2.926609420776367,
"step": 1820
},
{
"epoch": 3.8356020942408375,
"learning_rate": 0.0001867735928872173,
"loss": 2.906996726989746,
"step": 1830
},
{
"epoch": 3.856544502617801,
"learning_rate": 0.00018779421361337696,
"loss": 2.9651628494262696,
"step": 1840
},
{
"epoch": 3.8774869109947643,
"learning_rate": 0.0001888148343395366,
"loss": 2.94600887298584,
"step": 1850
},
{
"epoch": 3.898429319371728,
"learning_rate": 0.00018983545506569627,
"loss": 2.915750503540039,
"step": 1860
},
{
"epoch": 3.919371727748691,
"learning_rate": 0.00019085607579185594,
"loss": 2.942264747619629,
"step": 1870
},
{
"epoch": 3.9403141361256546,
"learning_rate": 0.00019187669651801558,
"loss": 2.9150556564331054,
"step": 1880
},
{
"epoch": 3.9612565445026178,
"learning_rate": 0.00019289731724417525,
"loss": 2.9057634353637694,
"step": 1890
},
{
"epoch": 3.982198952879581,
"learning_rate": 0.0001939179379703349,
"loss": 2.934947967529297,
"step": 1900
},
{
"epoch": 3.998952879581152,
"eval_loss": 2.7904012203216553,
"eval_runtime": 73.0809,
"eval_samples_per_second": 149.916,
"step": 1908
},
{
"epoch": 4.004188481675393,
"learning_rate": 0.00019493855869649455,
"loss": 3.0482250213623048,
"step": 1910
},
{
"epoch": 4.025130890052356,
"learning_rate": 0.00019595917942265422,
"loss": 2.9126008987426757,
"step": 1920
},
{
"epoch": 4.046073298429319,
"learning_rate": 0.0001969798001488139,
"loss": 2.903646469116211,
"step": 1930
},
{
"epoch": 4.067015706806282,
"learning_rate": 0.00019800042087497353,
"loss": 2.9102720260620116,
"step": 1940
},
{
"epoch": 4.0879581151832465,
"learning_rate": 0.0001990210416011332,
"loss": 2.9236400604248045,
"step": 1950
},
{
"epoch": 4.10890052356021,
"learning_rate": 0.00020004166232729287,
"loss": 2.8818483352661133,
"step": 1960
},
{
"epoch": 4.129842931937173,
"learning_rate": 0.0002010622830534525,
"loss": 2.9328163146972654,
"step": 1970
},
{
"epoch": 4.150785340314136,
"learning_rate": 0.00020208290377961218,
"loss": 2.896937370300293,
"step": 1980
},
{
"epoch": 4.171727748691099,
"learning_rate": 0.00020310352450577185,
"loss": 2.9203187942504885,
"step": 1990
},
{
"epoch": 4.192670157068063,
"learning_rate": 0.0002041241452319315,
"loss": 2.887624740600586,
"step": 2000
},
{
"epoch": 4.213612565445026,
"learning_rate": 0.00020514476595809116,
"loss": 2.89230899810791,
"step": 2010
},
{
"epoch": 4.234554973821989,
"learning_rate": 0.0002061653866842508,
"loss": 2.8896648406982424,
"step": 2020
},
{
"epoch": 4.255497382198953,
"learning_rate": 0.00020718600741041044,
"loss": 2.922181510925293,
"step": 2030
},
{
"epoch": 4.276439790575917,
"learning_rate": 0.0002082066281365701,
"loss": 2.888766860961914,
"step": 2040
},
{
"epoch": 4.29738219895288,
"learning_rate": 0.00020922724886272975,
"loss": 2.8884586334228515,
"step": 2050
},
{
"epoch": 4.318324607329843,
"learning_rate": 0.00021024786958888942,
"loss": 2.8894960403442385,
"step": 2060
},
{
"epoch": 4.339267015706806,
"learning_rate": 0.00021126849031504908,
"loss": 2.88952522277832,
"step": 2070
},
{
"epoch": 4.36020942408377,
"learning_rate": 0.00021228911104120873,
"loss": 2.8968246459960936,
"step": 2080
},
{
"epoch": 4.381151832460733,
"learning_rate": 0.0002133097317673684,
"loss": 2.8720794677734376,
"step": 2090
},
{
"epoch": 4.402094240837696,
"learning_rate": 0.00021433035249352806,
"loss": 2.9035682678222656,
"step": 2100
},
{
"epoch": 4.4230366492146596,
"learning_rate": 0.0002153509732196877,
"loss": 2.8975749969482423,
"step": 2110
},
{
"epoch": 4.443979057591623,
"learning_rate": 0.00021637159394584737,
"loss": 2.8942371368408204,
"step": 2120
},
{
"epoch": 4.464921465968587,
"learning_rate": 0.00021739221467200704,
"loss": 2.8582950592041017,
"step": 2130
},
{
"epoch": 4.48586387434555,
"learning_rate": 0.00021841283539816668,
"loss": 2.8642202377319337,
"step": 2140
},
{
"epoch": 4.506806282722513,
"learning_rate": 0.00021943345612432635,
"loss": 2.8771089553833007,
"step": 2150
},
{
"epoch": 4.527748691099476,
"learning_rate": 0.000220454076850486,
"loss": 2.8463533401489256,
"step": 2160
},
{
"epoch": 4.548691099476439,
"learning_rate": 0.00022147469757664566,
"loss": 2.865732765197754,
"step": 2170
},
{
"epoch": 4.569633507853403,
"learning_rate": 0.00022249531830280533,
"loss": 2.8657468795776366,
"step": 2180
},
{
"epoch": 4.5905759162303665,
"learning_rate": 0.00022351593902896497,
"loss": 2.8737346649169924,
"step": 2190
},
{
"epoch": 4.61151832460733,
"learning_rate": 0.00022453655975512464,
"loss": 2.906464385986328,
"step": 2200
},
{
"epoch": 4.632460732984293,
"learning_rate": 0.0002255571804812843,
"loss": 2.8711727142333983,
"step": 2210
},
{
"epoch": 4.653403141361257,
"learning_rate": 0.00022657780120744395,
"loss": 2.866417121887207,
"step": 2220
},
{
"epoch": 4.67434554973822,
"learning_rate": 0.00022759842193360361,
"loss": 2.873806190490723,
"step": 2230
},
{
"epoch": 4.695287958115183,
"learning_rate": 0.00022861904265976328,
"loss": 2.867740249633789,
"step": 2240
},
{
"epoch": 4.716230366492146,
"learning_rate": 0.0002296396633859229,
"loss": 2.848478317260742,
"step": 2250
},
{
"epoch": 4.73717277486911,
"learning_rate": 0.00023066028411208256,
"loss": 2.865742492675781,
"step": 2260
},
{
"epoch": 4.7581151832460735,
"learning_rate": 0.00023168090483824223,
"loss": 2.8338348388671877,
"step": 2270
},
{
"epoch": 4.779057591623037,
"learning_rate": 0.00023270152556440187,
"loss": 2.848302459716797,
"step": 2280
},
{
"epoch": 4.8,
"learning_rate": 0.00023372214629056154,
"loss": 2.847154235839844,
"step": 2290
},
{
"epoch": 4.820942408376963,
"learning_rate": 0.00023474276701672118,
"loss": 2.889314651489258,
"step": 2300
},
{
"epoch": 4.841884816753927,
"learning_rate": 0.00023576338774288085,
"loss": 2.8715303421020506,
"step": 2310
},
{
"epoch": 4.86282722513089,
"learning_rate": 0.00023678400846904052,
"loss": 2.8583951950073243,
"step": 2320
},
{
"epoch": 4.883769633507853,
"learning_rate": 0.00023780462919520016,
"loss": 2.865638542175293,
"step": 2330
},
{
"epoch": 4.9047120418848165,
"learning_rate": 0.00023882524992135983,
"loss": 2.8511993408203127,
"step": 2340
},
{
"epoch": 4.92565445026178,
"learning_rate": 0.0002398458706475195,
"loss": 2.8662994384765623,
"step": 2350
},
{
"epoch": 4.946596858638744,
"learning_rate": 0.00024086649137367914,
"loss": 2.829090690612793,
"step": 2360
},
{
"epoch": 4.967539267015707,
"learning_rate": 0.0002418871120998388,
"loss": 2.8659574508666994,
"step": 2370
},
{
"epoch": 4.98848167539267,
"learning_rate": 0.00024290773282599848,
"loss": 2.8302743911743162,
"step": 2380
},
{
"epoch": 4.998952879581152,
"eval_loss": 2.730773687362671,
"eval_runtime": 73.1511,
"eval_samples_per_second": 149.772,
"step": 2385
},
{
"epoch": 5.010471204188482,
"learning_rate": 0.00024392835355215812,
"loss": 2.994700050354004,
"step": 2390
},
{
"epoch": 5.031413612565445,
"learning_rate": 0.0002449489742783178,
"loss": 2.8189886093139647,
"step": 2400
},
{
"epoch": 5.052356020942408,
"learning_rate": 0.00024596959500447745,
"loss": 2.818514823913574,
"step": 2410
},
{
"epoch": 5.0732984293193715,
"learning_rate": 0.0002469902157306371,
"loss": 2.8165609359741213,
"step": 2420
},
{
"epoch": 5.0942408376963355,
"learning_rate": 0.00024801083645679674,
"loss": 2.8114208221435546,
"step": 2430
},
{
"epoch": 5.115183246073299,
"learning_rate": 0.0002490314571829564,
"loss": 2.841958236694336,
"step": 2440
},
{
"epoch": 5.136125654450262,
"learning_rate": 0.00025005207790911607,
"loss": 2.8192333221435546,
"step": 2450
},
{
"epoch": 5.157068062827225,
"learning_rate": 0.0002510726986352757,
"loss": 2.842849540710449,
"step": 2460
},
{
"epoch": 5.178010471204188,
"learning_rate": 0.00025209331936143535,
"loss": 2.797623062133789,
"step": 2470
},
{
"epoch": 5.198952879581152,
"learning_rate": 0.000253113940087595,
"loss": 2.8256582260131835,
"step": 2480
},
{
"epoch": 5.219895287958115,
"learning_rate": 0.0002541345608137547,
"loss": 2.853096771240234,
"step": 2490
},
{
"epoch": 5.2408376963350785,
"learning_rate": 0.00025515518153991436,
"loss": 2.8135236740112304,
"step": 2500
},
{
"epoch": 5.261780104712042,
"learning_rate": 0.00025617580226607403,
"loss": 2.826374053955078,
"step": 2510
},
{
"epoch": 5.282722513089006,
"learning_rate": 0.00025719642299223364,
"loss": 2.8227916717529298,
"step": 2520
},
{
"epoch": 5.303664921465969,
"learning_rate": 0.0002582170437183933,
"loss": 2.800633430480957,
"step": 2530
},
{
"epoch": 5.324607329842932,
"learning_rate": 0.000259237664444553,
"loss": 2.8282939910888674,
"step": 2540
},
{
"epoch": 5.345549738219895,
"learning_rate": 0.00026025828517071265,
"loss": 2.789654350280762,
"step": 2550
},
{
"epoch": 5.366492146596858,
"learning_rate": 0.0002612789058968723,
"loss": 2.832943916320801,
"step": 2560
},
{
"epoch": 5.387434554973822,
"learning_rate": 0.00026229952662303193,
"loss": 2.7946260452270506,
"step": 2570
},
{
"epoch": 5.408376963350785,
"learning_rate": 0.0002633201473491916,
"loss": 2.8106937408447266,
"step": 2580
},
{
"epoch": 5.429319371727749,
"learning_rate": 0.00026434076807535127,
"loss": 2.807754135131836,
"step": 2590
},
{
"epoch": 5.450261780104712,
"learning_rate": 0.00026536138880151093,
"loss": 2.809930992126465,
"step": 2600
},
{
"epoch": 5.471204188481676,
"learning_rate": 0.0002663820095276706,
"loss": 2.824372100830078,
"step": 2610
},
{
"epoch": 5.492146596858639,
"learning_rate": 0.00026740263025383027,
"loss": 2.797639846801758,
"step": 2620
},
{
"epoch": 5.513089005235602,
"learning_rate": 0.0002684232509799899,
"loss": 2.7917612075805662,
"step": 2630
},
{
"epoch": 5.534031413612565,
"learning_rate": 0.00026944387170614955,
"loss": 2.818395233154297,
"step": 2640
},
{
"epoch": 5.554973821989529,
"learning_rate": 0.0002704644924323092,
"loss": 2.7933046340942385,
"step": 2650
},
{
"epoch": 5.575916230366492,
"learning_rate": 0.0002714851131584689,
"loss": 2.794571876525879,
"step": 2660
},
{
"epoch": 5.596858638743456,
"learning_rate": 0.0002725057338846285,
"loss": 2.8182382583618164,
"step": 2670
},
{
"epoch": 5.617801047120419,
"learning_rate": 0.00027352635461078817,
"loss": 2.80462703704834,
"step": 2680
},
{
"epoch": 5.638743455497382,
"learning_rate": 0.00027454697533694784,
"loss": 2.793869400024414,
"step": 2690
},
{
"epoch": 5.659685863874346,
"learning_rate": 0.0002755675960631075,
"loss": 2.7867889404296875,
"step": 2700
},
{
"epoch": 5.680628272251309,
"learning_rate": 0.0002765882167892671,
"loss": 2.759464073181152,
"step": 2710
},
{
"epoch": 5.701570680628272,
"learning_rate": 0.0002776088375154268,
"loss": 2.7763525009155274,
"step": 2720
},
{
"epoch": 5.722513089005235,
"learning_rate": 0.00027862945824158646,
"loss": 2.7857263565063475,
"step": 2730
},
{
"epoch": 5.7434554973821985,
"learning_rate": 0.00027965007896774613,
"loss": 2.793513298034668,
"step": 2740
},
{
"epoch": 5.7643979057591626,
"learning_rate": 0.0002806706996939058,
"loss": 2.780983543395996,
"step": 2750
},
{
"epoch": 5.785340314136126,
"learning_rate": 0.00028169132042006546,
"loss": 2.762991714477539,
"step": 2760
},
{
"epoch": 5.806282722513089,
"learning_rate": 0.00028271194114622513,
"loss": 2.7711687088012695,
"step": 2770
},
{
"epoch": 5.827225130890052,
"learning_rate": 0.00028373256187238475,
"loss": 2.7514936447143556,
"step": 2780
},
{
"epoch": 5.848167539267016,
"learning_rate": 0.0002847531825985444,
"loss": 2.7642467498779295,
"step": 2790
},
{
"epoch": 5.869109947643979,
"learning_rate": 0.00028577380332470403,
"loss": 2.766267776489258,
"step": 2800
},
{
"epoch": 5.890052356020942,
"learning_rate": 0.0002867944240508637,
"loss": 2.7898386001586912,
"step": 2810
},
{
"epoch": 5.9109947643979055,
"learning_rate": 0.00028781504477702337,
"loss": 2.7557825088500976,
"step": 2820
},
{
"epoch": 5.9319371727748695,
"learning_rate": 0.00028883566550318303,
"loss": 2.7799331665039064,
"step": 2830
},
{
"epoch": 5.952879581151833,
"learning_rate": 0.0002898562862293427,
"loss": 2.7844886779785156,
"step": 2840
},
{
"epoch": 5.973821989528796,
"learning_rate": 0.0002908769069555023,
"loss": 2.791143608093262,
"step": 2850
},
{
"epoch": 5.994764397905759,
"learning_rate": 0.000291897527681662,
"loss": 2.7879209518432617,
"step": 2860
},
{
"epoch": 5.998952879581152,
"eval_loss": 2.704967737197876,
"eval_runtime": 73.9106,
"eval_samples_per_second": 148.233,
"step": 2862
},
{
"epoch": 6.016753926701571,
"learning_rate": 0.00029291814840782165,
"loss": 2.8958906173706054,
"step": 2870
},
{
"epoch": 6.037696335078534,
"learning_rate": 0.0002939387691339813,
"loss": 2.76577205657959,
"step": 2880
},
{
"epoch": 6.058638743455497,
"learning_rate": 0.000294959389860141,
"loss": 2.792632484436035,
"step": 2890
},
{
"epoch": 6.0795811518324605,
"learning_rate": 0.00029598001058630066,
"loss": 2.7835086822509765,
"step": 2900
},
{
"epoch": 6.100523560209424,
"learning_rate": 0.00029700063131246027,
"loss": 2.761422538757324,
"step": 2910
},
{
"epoch": 6.121465968586388,
"learning_rate": 0.00029802125203861994,
"loss": 2.763009452819824,
"step": 2920
},
{
"epoch": 6.142408376963351,
"learning_rate": 0.0002990418727647796,
"loss": 2.763312339782715,
"step": 2930
},
{
"epoch": 6.163350785340314,
"learning_rate": 0.0003000624934909393,
"loss": 2.764869499206543,
"step": 2940
},
{
"epoch": 6.184293193717277,
"learning_rate": 0.00030108311421709894,
"loss": 2.741547393798828,
"step": 2950
},
{
"epoch": 6.205235602094241,
"learning_rate": 0.0003021037349432586,
"loss": 2.7309391021728517,
"step": 2960
},
{
"epoch": 6.226178010471204,
"learning_rate": 0.0003031243556694182,
"loss": 2.7382484436035157,
"step": 2970
},
{
"epoch": 6.2471204188481675,
"learning_rate": 0.0003041449763955779,
"loss": 2.73870849609375,
"step": 2980
},
{
"epoch": 6.268062827225131,
"learning_rate": 0.00030516559712173756,
"loss": 2.7489850997924803,
"step": 2990
},
{
"epoch": 6.289005235602094,
"learning_rate": 0.00030618621784789723,
"loss": 2.720503807067871,
"step": 3000
},
{
"epoch": 6.309947643979058,
"learning_rate": 0.0003072068385740569,
"loss": 2.770734977722168,
"step": 3010
},
{
"epoch": 6.330890052356021,
"learning_rate": 0.00030822745930021657,
"loss": 2.7358495712280275,
"step": 3020
},
{
"epoch": 6.351832460732984,
"learning_rate": 0.0003092480800263762,
"loss": 2.7683557510375976,
"step": 3030
},
{
"epoch": 6.372774869109947,
"learning_rate": 0.00031026870075253585,
"loss": 2.7386125564575194,
"step": 3040
},
{
"epoch": 6.393717277486911,
"learning_rate": 0.0003112893214786955,
"loss": 2.730208396911621,
"step": 3050
},
{
"epoch": 6.4146596858638745,
"learning_rate": 0.0003123099422048552,
"loss": 2.753367042541504,
"step": 3060
},
{
"epoch": 6.435602094240838,
"learning_rate": 0.00031333056293101486,
"loss": 2.738628959655762,
"step": 3070
},
{
"epoch": 6.456544502617801,
"learning_rate": 0.00031435118365717447,
"loss": 2.7470897674560546,
"step": 3080
},
{
"epoch": 6.477486910994765,
"learning_rate": 0.00031537180438333414,
"loss": 2.7530258178710936,
"step": 3090
},
{
"epoch": 6.498429319371728,
"learning_rate": 0.0003163924251094938,
"loss": 2.7581613540649412,
"step": 3100
},
{
"epoch": 6.519371727748691,
"learning_rate": 0.0003174130458356535,
"loss": 2.719674301147461,
"step": 3110
},
{
"epoch": 6.540314136125654,
"learning_rate": 0.00031843366656181314,
"loss": 2.7147310256958006,
"step": 3120
},
{
"epoch": 6.561256544502617,
"learning_rate": 0.0003194542872879728,
"loss": 2.731971549987793,
"step": 3130
},
{
"epoch": 6.5821989528795815,
"learning_rate": 0.0003204749080141324,
"loss": 2.73608512878418,
"step": 3140
},
{
"epoch": 6.603141361256545,
"learning_rate": 0.0003214955287402921,
"loss": 2.7456966400146485,
"step": 3150
},
{
"epoch": 6.624083769633508,
"learning_rate": 0.00032251614946645176,
"loss": 2.7154998779296875,
"step": 3160
},
{
"epoch": 6.645026178010471,
"learning_rate": 0.00032353677019261143,
"loss": 2.737691116333008,
"step": 3170
},
{
"epoch": 6.665968586387434,
"learning_rate": 0.0003245573909187711,
"loss": 2.734482192993164,
"step": 3180
},
{
"epoch": 6.686910994764398,
"learning_rate": 0.00032557801164493077,
"loss": 2.74371395111084,
"step": 3190
},
{
"epoch": 6.707853403141361,
"learning_rate": 0.0003265986323710903,
"loss": 2.7436079025268554,
"step": 3200
},
{
"epoch": 6.728795811518324,
"learning_rate": 0.00032761925309725,
"loss": 2.7301549911499023,
"step": 3210
},
{
"epoch": 6.749738219895288,
"learning_rate": 0.00032863987382340966,
"loss": 2.7386409759521486,
"step": 3220
},
{
"epoch": 6.770680628272252,
"learning_rate": 0.00032966049454956933,
"loss": 2.718589973449707,
"step": 3230
},
{
"epoch": 6.791623036649215,
"learning_rate": 0.000330681115275729,
"loss": 2.7293943405151366,
"step": 3240
},
{
"epoch": 6.812565445026178,
"learning_rate": 0.0003317017360018886,
"loss": 2.730001449584961,
"step": 3250
},
{
"epoch": 6.833507853403141,
"learning_rate": 0.0003327223567280483,
"loss": 2.7539945602416993,
"step": 3260
},
{
"epoch": 6.854450261780105,
"learning_rate": 0.00033374297745420795,
"loss": 2.7157943725585936,
"step": 3270
},
{
"epoch": 6.875392670157068,
"learning_rate": 0.0003347635981803676,
"loss": 2.7142717361450197,
"step": 3280
},
{
"epoch": 6.896335078534031,
"learning_rate": 0.0003357842189065273,
"loss": 2.723209190368652,
"step": 3290
},
{
"epoch": 6.9172774869109945,
"learning_rate": 0.0003368048396326869,
"loss": 2.725255012512207,
"step": 3300
},
{
"epoch": 6.938219895287958,
"learning_rate": 0.00033782546035884657,
"loss": 2.7175876617431642,
"step": 3310
},
{
"epoch": 6.959162303664922,
"learning_rate": 0.00033884608108500624,
"loss": 2.7198652267456054,
"step": 3320
},
{
"epoch": 6.980104712041885,
"learning_rate": 0.0003398667018111659,
"loss": 2.719003105163574,
"step": 3330
},
{
"epoch": 6.998952879581152,
"eval_loss": 2.657553195953369,
"eval_runtime": 73.1108,
"eval_samples_per_second": 149.855,
"step": 3339
},
{
"epoch": 7.002094240837696,
"learning_rate": 0.0003408873225373256,
"loss": 2.8445356369018553,
"step": 3340
},
{
"epoch": 7.02303664921466,
"learning_rate": 0.00034190794326348524,
"loss": 2.6978481292724608,
"step": 3350
},
{
"epoch": 7.043979057591623,
"learning_rate": 0.00034292856398964486,
"loss": 2.6844539642333984,
"step": 3360
},
{
"epoch": 7.064921465968586,
"learning_rate": 0.0003439491847158045,
"loss": 2.7076372146606444,
"step": 3370
},
{
"epoch": 7.0858638743455495,
"learning_rate": 0.0003449698054419642,
"loss": 2.7051311492919923,
"step": 3380
},
{
"epoch": 7.106806282722513,
"learning_rate": 0.00034599042616812386,
"loss": 2.700316619873047,
"step": 3390
},
{
"epoch": 7.127748691099477,
"learning_rate": 0.00034701104689428353,
"loss": 2.732998085021973,
"step": 3400
},
{
"epoch": 7.14869109947644,
"learning_rate": 0.0003480316676204432,
"loss": 2.7185394287109377,
"step": 3410
},
{
"epoch": 7.169633507853403,
"learning_rate": 0.0003490522883466028,
"loss": 2.705458068847656,
"step": 3420
},
{
"epoch": 7.190575916230366,
"learning_rate": 0.0003500729090727625,
"loss": 2.708370590209961,
"step": 3430
},
{
"epoch": 7.21151832460733,
"learning_rate": 0.00035109352979892215,
"loss": 2.7156093597412108,
"step": 3440
},
{
"epoch": 7.232460732984293,
"learning_rate": 0.0003521141505250818,
"loss": 2.6954971313476563,
"step": 3450
},
{
"epoch": 7.2534031413612565,
"learning_rate": 0.0003531347712512415,
"loss": 2.7061573028564454,
"step": 3460
},
{
"epoch": 7.27434554973822,
"learning_rate": 0.00035415539197740115,
"loss": 2.7047218322753905,
"step": 3470
},
{
"epoch": 7.295287958115184,
"learning_rate": 0.00035517601270356077,
"loss": 2.6945539474487306,
"step": 3480
},
{
"epoch": 7.316230366492147,
"learning_rate": 0.00035619663342972044,
"loss": 2.67620735168457,
"step": 3490
},
{
"epoch": 7.33717277486911,
"learning_rate": 0.0003572172541558801,
"loss": 2.6955190658569337,
"step": 3500
},
{
"epoch": 7.358115183246073,
"learning_rate": 0.00035823787488203977,
"loss": 2.684027671813965,
"step": 3510
},
{
"epoch": 7.379057591623036,
"learning_rate": 0.00035925849560819944,
"loss": 2.698904800415039,
"step": 3520
},
{
"epoch": 7.4,
"learning_rate": 0.0003602791163343591,
"loss": 2.695516014099121,
"step": 3530
},
{
"epoch": 7.4209424083769635,
"learning_rate": 0.0003612997370605187,
"loss": 2.7022026062011717,
"step": 3540
},
{
"epoch": 7.441884816753927,
"learning_rate": 0.0003623203577866784,
"loss": 2.703862762451172,
"step": 3550
},
{
"epoch": 7.46282722513089,
"learning_rate": 0.00036334097851283806,
"loss": 2.6929235458374023,
"step": 3560
},
{
"epoch": 7.483769633507853,
"learning_rate": 0.00036436159923899773,
"loss": 2.6659242630004885,
"step": 3570
},
{
"epoch": 7.504712041884817,
"learning_rate": 0.0003653822199651574,
"loss": 2.660002899169922,
"step": 3580
},
{
"epoch": 7.52565445026178,
"learning_rate": 0.000366402840691317,
"loss": 2.699945831298828,
"step": 3590
},
{
"epoch": 7.546596858638743,
"learning_rate": 0.0003674234614174767,
"loss": 2.677934455871582,
"step": 3600
},
{
"epoch": 7.5675392670157065,
"learning_rate": 0.00036844408214363635,
"loss": 2.6725204467773436,
"step": 3610
},
{
"epoch": 7.5884816753926705,
"learning_rate": 0.000369464702869796,
"loss": 2.701091003417969,
"step": 3620
},
{
"epoch": 7.609424083769634,
"learning_rate": 0.00037048532359595563,
"loss": 2.6974748611450194,
"step": 3630
},
{
"epoch": 7.630366492146597,
"learning_rate": 0.00037150594432211524,
"loss": 2.6910377502441407,
"step": 3640
},
{
"epoch": 7.65130890052356,
"learning_rate": 0.0003725265650482749,
"loss": 2.6975467681884764,
"step": 3650
},
{
"epoch": 7.672251308900524,
"learning_rate": 0.0003735471857744346,
"loss": 2.6613983154296874,
"step": 3660
},
{
"epoch": 7.693193717277487,
"learning_rate": 0.00037456780650059425,
"loss": 2.6739892959594727,
"step": 3670
},
{
"epoch": 7.71413612565445,
"learning_rate": 0.0003755884272267539,
"loss": 2.684556770324707,
"step": 3680
},
{
"epoch": 7.735078534031413,
"learning_rate": 0.0003766090479529136,
"loss": 2.662580680847168,
"step": 3690
},
{
"epoch": 7.756020942408377,
"learning_rate": 0.0003776296686790732,
"loss": 2.671764373779297,
"step": 3700
},
{
"epoch": 7.776963350785341,
"learning_rate": 0.00037865028940523287,
"loss": 2.687736701965332,
"step": 3710
},
{
"epoch": 7.797905759162304,
"learning_rate": 0.00037967091013139253,
"loss": 2.676908493041992,
"step": 3720
},
{
"epoch": 7.818848167539267,
"learning_rate": 0.0003806915308575522,
"loss": 2.6953929901123046,
"step": 3730
},
{
"epoch": 7.83979057591623,
"learning_rate": 0.00038171215158371187,
"loss": 2.7138477325439454,
"step": 3740
},
{
"epoch": 7.860732984293193,
"learning_rate": 0.00038273277230987154,
"loss": 2.658716583251953,
"step": 3750
},
{
"epoch": 7.881675392670157,
"learning_rate": 0.00038375339303603115,
"loss": 2.6610225677490233,
"step": 3760
},
{
"epoch": 7.90261780104712,
"learning_rate": 0.0003847740137621908,
"loss": 2.6816684722900392,
"step": 3770
},
{
"epoch": 7.923560209424084,
"learning_rate": 0.0003857946344883505,
"loss": 2.69453125,
"step": 3780
},
{
"epoch": 7.944502617801048,
"learning_rate": 0.00038681525521451016,
"loss": 2.6690832138061524,
"step": 3790
},
{
"epoch": 7.965445026178011,
"learning_rate": 0.0003878358759406698,
"loss": 2.6719383239746093,
"step": 3800
},
{
"epoch": 7.986387434554974,
"learning_rate": 0.00038885649666682944,
"loss": 2.6821807861328124,
"step": 3810
},
{
"epoch": 7.998952879581152,
"eval_loss": 2.6321663856506348,
"eval_runtime": 73.4258,
"eval_samples_per_second": 149.212,
"step": 3816
},
{
"epoch": 8.008376963350786,
"learning_rate": 0.0003898771173929891,
"loss": 2.792423057556152,
"step": 3820
},
{
"epoch": 8.029319371727748,
"learning_rate": 0.0003908977381191488,
"loss": 2.639652061462402,
"step": 3830
},
{
"epoch": 8.050261780104712,
"learning_rate": 0.00039191835884530845,
"loss": 2.658628463745117,
"step": 3840
},
{
"epoch": 8.071204188481675,
"learning_rate": 0.0003929389795714681,
"loss": 2.6759145736694334,
"step": 3850
},
{
"epoch": 8.092146596858639,
"learning_rate": 0.0003939596002976278,
"loss": 2.7123003005981445,
"step": 3860
},
{
"epoch": 8.113089005235603,
"learning_rate": 0.0003949802210237874,
"loss": 2.654216766357422,
"step": 3870
},
{
"epoch": 8.134031413612565,
"learning_rate": 0.00039600084174994706,
"loss": 2.653605651855469,
"step": 3880
},
{
"epoch": 8.154973821989529,
"learning_rate": 0.00039702146247610673,
"loss": 2.6602855682373048,
"step": 3890
},
{
"epoch": 8.175916230366493,
"learning_rate": 0.0003980420832022664,
"loss": 2.664006805419922,
"step": 3900
},
{
"epoch": 8.196858638743455,
"learning_rate": 0.00039906270392842607,
"loss": 2.6484548568725588,
"step": 3910
},
{
"epoch": 8.21780104712042,
"learning_rate": 0.00040008332465458574,
"loss": 2.6500553131103515,
"step": 3920
},
{
"epoch": 8.238743455497382,
"learning_rate": 0.00040110394538074535,
"loss": 2.6387815475463867,
"step": 3930
},
{
"epoch": 8.259685863874346,
"learning_rate": 0.000402124566106905,
"loss": 2.666366958618164,
"step": 3940
},
{
"epoch": 8.28062827225131,
"learning_rate": 0.0004031451868330647,
"loss": 2.69473876953125,
"step": 3950
},
{
"epoch": 8.301570680628272,
"learning_rate": 0.00040416580755922436,
"loss": 2.649556350708008,
"step": 3960
},
{
"epoch": 8.322513089005236,
"learning_rate": 0.000405186428285384,
"loss": 2.661054801940918,
"step": 3970
},
{
"epoch": 8.343455497382198,
"learning_rate": 0.0004062070490115437,
"loss": 2.651872253417969,
"step": 3980
},
{
"epoch": 8.364397905759162,
"learning_rate": 0.0004072276697377033,
"loss": 2.657582473754883,
"step": 3990
},
{
"epoch": 8.385340314136126,
"learning_rate": 0.000408248290463863,
"loss": 2.647386932373047,
"step": 4000
},
{
"epoch": 8.406282722513089,
"learning_rate": 0.00040926891119002264,
"loss": 2.622752380371094,
"step": 4010
},
{
"epoch": 8.427225130890053,
"learning_rate": 0.0004102895319161823,
"loss": 2.6471187591552736,
"step": 4020
},
{
"epoch": 8.448167539267015,
"learning_rate": 0.000411310152642342,
"loss": 2.6503711700439454,
"step": 4030
},
{
"epoch": 8.469109947643979,
"learning_rate": 0.0004123307733685016,
"loss": 2.662638854980469,
"step": 4040
},
{
"epoch": 8.490052356020943,
"learning_rate": 0.00041335139409466126,
"loss": 2.6614749908447264,
"step": 4050
},
{
"epoch": 8.510994764397905,
"learning_rate": 0.0004143720148208209,
"loss": 2.651297760009766,
"step": 4060
},
{
"epoch": 8.53193717277487,
"learning_rate": 0.00041539263554698055,
"loss": 2.654410552978516,
"step": 4070
},
{
"epoch": 8.552879581151833,
"learning_rate": 0.0004164132562731402,
"loss": 2.6440593719482424,
"step": 4080
},
{
"epoch": 8.573821989528795,
"learning_rate": 0.00041743387699929983,
"loss": 2.6485244750976564,
"step": 4090
},
{
"epoch": 8.59476439790576,
"learning_rate": 0.0004184544977254595,
"loss": 2.645181655883789,
"step": 4100
},
{
"epoch": 8.615706806282722,
"learning_rate": 0.00041947511845161916,
"loss": 2.6527120590209963,
"step": 4110
},
{
"epoch": 8.636649214659686,
"learning_rate": 0.00042049573917777883,
"loss": 2.644283676147461,
"step": 4120
},
{
"epoch": 8.65759162303665,
"learning_rate": 0.0004215163599039385,
"loss": 2.6589195251464846,
"step": 4130
},
{
"epoch": 8.678534031413612,
"learning_rate": 0.00042253698063009817,
"loss": 2.638910675048828,
"step": 4140
},
{
"epoch": 8.699476439790576,
"learning_rate": 0.0004235576013562578,
"loss": 2.6332365036010743,
"step": 4150
},
{
"epoch": 8.72041884816754,
"learning_rate": 0.00042457822208241745,
"loss": 2.6593414306640626,
"step": 4160
},
{
"epoch": 8.741361256544502,
"learning_rate": 0.0004255988428085771,
"loss": 2.6320539474487306,
"step": 4170
},
{
"epoch": 8.762303664921467,
"learning_rate": 0.0004266194635347368,
"loss": 2.6423057556152343,
"step": 4180
},
{
"epoch": 8.783246073298429,
"learning_rate": 0.00042764008426089646,
"loss": 2.6288238525390626,
"step": 4190
},
{
"epoch": 8.804188481675393,
"learning_rate": 0.0004286607049870561,
"loss": 2.624134635925293,
"step": 4200
},
{
"epoch": 8.825130890052357,
"learning_rate": 0.00042968132571321574,
"loss": 2.6502635955810545,
"step": 4210
},
{
"epoch": 8.846073298429319,
"learning_rate": 0.0004307019464393754,
"loss": 2.630023384094238,
"step": 4220
},
{
"epoch": 8.867015706806283,
"learning_rate": 0.0004317225671655351,
"loss": 2.6346521377563477,
"step": 4230
},
{
"epoch": 8.887958115183245,
"learning_rate": 0.00043274318789169474,
"loss": 2.612893295288086,
"step": 4240
},
{
"epoch": 8.90890052356021,
"learning_rate": 0.0004337638086178544,
"loss": 2.605606460571289,
"step": 4250
},
{
"epoch": 8.929842931937173,
"learning_rate": 0.0004347844293440141,
"loss": 2.6086076736450194,
"step": 4260
},
{
"epoch": 8.950785340314136,
"learning_rate": 0.0004358050500701737,
"loss": 2.6492921829223635,
"step": 4270
},
{
"epoch": 8.9717277486911,
"learning_rate": 0.00043682567079633336,
"loss": 2.60416202545166,
"step": 4280
},
{
"epoch": 8.992670157068062,
"learning_rate": 0.00043784629152249303,
"loss": 2.639349937438965,
"step": 4290
},
{
"epoch": 8.998952879581152,
"eval_loss": 2.6156058311462402,
"eval_runtime": 73.0048,
"eval_samples_per_second": 150.072,
"step": 4293
},
{
"epoch": 9.014659685863874,
"learning_rate": 0.0004388669122486527,
"loss": 2.7455169677734377,
"step": 4300
},
{
"epoch": 9.035602094240838,
"learning_rate": 0.00043988753297481237,
"loss": 2.632405471801758,
"step": 4310
},
{
"epoch": 9.0565445026178,
"learning_rate": 0.000440908153700972,
"loss": 2.644683074951172,
"step": 4320
},
{
"epoch": 9.077486910994764,
"learning_rate": 0.00044192877442713165,
"loss": 2.620805358886719,
"step": 4330
},
{
"epoch": 9.098429319371728,
"learning_rate": 0.0004429493951532913,
"loss": 2.6189605712890627,
"step": 4340
},
{
"epoch": 9.11937172774869,
"learning_rate": 0.000443970015879451,
"loss": 2.614165687561035,
"step": 4350
},
{
"epoch": 9.140314136125655,
"learning_rate": 0.00044499063660561065,
"loss": 2.615524673461914,
"step": 4360
},
{
"epoch": 9.161256544502617,
"learning_rate": 0.0004460112573317703,
"loss": 2.623036003112793,
"step": 4370
},
{
"epoch": 9.182198952879581,
"learning_rate": 0.00044703187805792994,
"loss": 2.636097526550293,
"step": 4380
},
{
"epoch": 9.203141361256545,
"learning_rate": 0.0004480524987840896,
"loss": 2.5902896881103517,
"step": 4390
},
{
"epoch": 9.224083769633507,
"learning_rate": 0.0004490731195102493,
"loss": 2.596613121032715,
"step": 4400
},
{
"epoch": 9.245026178010471,
"learning_rate": 0.00045009374023640894,
"loss": 2.6126483917236327,
"step": 4410
},
{
"epoch": 9.265968586387434,
"learning_rate": 0.0004511143609625686,
"loss": 2.6270191192626955,
"step": 4420
},
{
"epoch": 9.286910994764398,
"learning_rate": 0.0004521349816887283,
"loss": 2.603664779663086,
"step": 4430
},
{
"epoch": 9.307853403141362,
"learning_rate": 0.0004531556024148879,
"loss": 2.618840980529785,
"step": 4440
},
{
"epoch": 9.328795811518324,
"learning_rate": 0.00045417622314104756,
"loss": 2.6355594635009765,
"step": 4450
},
{
"epoch": 9.349738219895288,
"learning_rate": 0.00045519684386720723,
"loss": 2.6463857650756837,
"step": 4460
},
{
"epoch": 9.370680628272252,
"learning_rate": 0.0004562174645933669,
"loss": 2.6071035385131838,
"step": 4470
},
{
"epoch": 9.391623036649214,
"learning_rate": 0.00045723808531952657,
"loss": 2.6336111068725585,
"step": 4480
},
{
"epoch": 9.412565445026178,
"learning_rate": 0.0004582587060456861,
"loss": 2.6231353759765623,
"step": 4490
},
{
"epoch": 9.43350785340314,
"learning_rate": 0.0004592793267718458,
"loss": 2.6107151031494142,
"step": 4500
},
{
"epoch": 9.454450261780105,
"learning_rate": 0.00046029994749800546,
"loss": 2.6213560104370117,
"step": 4510
},
{
"epoch": 9.475392670157069,
"learning_rate": 0.00046132056822416513,
"loss": 2.584638214111328,
"step": 4520
},
{
"epoch": 9.496335078534031,
"learning_rate": 0.0004623411889503248,
"loss": 2.6262628555297853,
"step": 4530
},
{
"epoch": 9.517277486910995,
"learning_rate": 0.00046336180967648447,
"loss": 2.5897647857666017,
"step": 4540
},
{
"epoch": 9.538219895287957,
"learning_rate": 0.0004643824304026441,
"loss": 2.6060354232788088,
"step": 4550
},
{
"epoch": 9.559162303664921,
"learning_rate": 0.00046540305112880375,
"loss": 2.6086732864379885,
"step": 4560
},
{
"epoch": 9.580104712041885,
"learning_rate": 0.0004664236718549634,
"loss": 2.5964023590087892,
"step": 4570
},
{
"epoch": 9.601047120418848,
"learning_rate": 0.0004674442925811231,
"loss": 2.622255325317383,
"step": 4580
},
{
"epoch": 9.621989528795812,
"learning_rate": 0.00046846491330728275,
"loss": 2.5982023239135743,
"step": 4590
},
{
"epoch": 9.642931937172776,
"learning_rate": 0.00046948553403344237,
"loss": 2.6015974044799806,
"step": 4600
},
{
"epoch": 9.663874345549738,
"learning_rate": 0.00047050615475960204,
"loss": 2.617095184326172,
"step": 4610
},
{
"epoch": 9.684816753926702,
"learning_rate": 0.0004715267754857617,
"loss": 2.5803293228149413,
"step": 4620
},
{
"epoch": 9.705759162303664,
"learning_rate": 0.00047254739621192137,
"loss": 2.615343475341797,
"step": 4630
},
{
"epoch": 9.726701570680628,
"learning_rate": 0.00047356801693808104,
"loss": 2.5800424575805665,
"step": 4640
},
{
"epoch": 9.747643979057592,
"learning_rate": 0.0004745886376642407,
"loss": 2.598635673522949,
"step": 4650
},
{
"epoch": 9.768586387434555,
"learning_rate": 0.0004756092583904003,
"loss": 2.615553283691406,
"step": 4660
},
{
"epoch": 9.789528795811519,
"learning_rate": 0.00047662987911656,
"loss": 2.582602882385254,
"step": 4670
},
{
"epoch": 9.810471204188481,
"learning_rate": 0.00047765049984271966,
"loss": 2.5848695755004885,
"step": 4680
},
{
"epoch": 9.831413612565445,
"learning_rate": 0.00047867112056887933,
"loss": 2.6057199478149413,
"step": 4690
},
{
"epoch": 9.852356020942409,
"learning_rate": 0.000479691741295039,
"loss": 2.6193204879760743,
"step": 4700
},
{
"epoch": 9.873298429319371,
"learning_rate": 0.00048071236202119866,
"loss": 2.6137775421142577,
"step": 4710
},
{
"epoch": 9.894240837696335,
"learning_rate": 0.0004817329827473583,
"loss": 2.613667678833008,
"step": 4720
},
{
"epoch": 9.915183246073298,
"learning_rate": 0.00048275360347351795,
"loss": 2.599857711791992,
"step": 4730
},
{
"epoch": 9.936125654450262,
"learning_rate": 0.0004837742241996776,
"loss": 2.579629325866699,
"step": 4740
},
{
"epoch": 9.957068062827226,
"learning_rate": 0.0004847948449258373,
"loss": 2.6212156295776365,
"step": 4750
},
{
"epoch": 9.978010471204188,
"learning_rate": 0.00048581546565199695,
"loss": 2.5806146621704102,
"step": 4760
},
{
"epoch": 9.998952879581152,
"learning_rate": 0.0004868360863781566,
"loss": 2.5807775497436523,
"step": 4770
},
{
"epoch": 9.998952879581152,
"eval_loss": 2.604253053665161,
"eval_runtime": 74.0972,
"eval_samples_per_second": 147.86,
"step": 4770
},
{
"epoch": 10.020942408376964,
"learning_rate": 0.00048785670710431623,
"loss": 2.721482849121094,
"step": 4780
},
{
"epoch": 10.041884816753926,
"learning_rate": 0.000488877327830476,
"loss": 2.5954622268676757,
"step": 4790
},
{
"epoch": 10.06282722513089,
"learning_rate": 0.0004898979485566356,
"loss": 2.6108915328979494,
"step": 4800
},
{
"epoch": 10.083769633507853,
"learning_rate": 0.0004909185692827952,
"loss": 2.607802391052246,
"step": 4810
},
{
"epoch": 10.104712041884817,
"learning_rate": 0.0004919391900089549,
"loss": 2.594425010681152,
"step": 4820
},
{
"epoch": 10.12565445026178,
"learning_rate": 0.0004929598107351145,
"loss": 2.6012176513671874,
"step": 4830
},
{
"epoch": 10.146596858638743,
"learning_rate": 0.0004939804314612742,
"loss": 2.5987337112426756,
"step": 4840
},
{
"epoch": 10.167539267015707,
"learning_rate": 0.0004950010521874339,
"loss": 2.5843479156494142,
"step": 4850
},
{
"epoch": 10.188481675392671,
"learning_rate": 0.0004960216729135935,
"loss": 2.612710380554199,
"step": 4860
},
{
"epoch": 10.209424083769633,
"learning_rate": 0.0004970422936397532,
"loss": 2.608279991149902,
"step": 4870
},
{
"epoch": 10.230366492146597,
"learning_rate": 0.0004980629143659128,
"loss": 2.599015998840332,
"step": 4880
},
{
"epoch": 10.25130890052356,
"learning_rate": 0.0004990835350920725,
"loss": 2.6169193267822264,
"step": 4890
},
{
"epoch": 10.272251308900524,
"learning_rate": 0.0005001041558182321,
"loss": 2.5874061584472656,
"step": 4900
},
{
"epoch": 10.293193717277488,
"learning_rate": 0.0005011247765443918,
"loss": 2.5972221374511717,
"step": 4910
},
{
"epoch": 10.31413612565445,
"learning_rate": 0.0005021453972705514,
"loss": 2.592855453491211,
"step": 4920
},
{
"epoch": 10.335078534031414,
"learning_rate": 0.0005031660179967111,
"loss": 2.5813924789428713,
"step": 4930
},
{
"epoch": 10.356020942408376,
"learning_rate": 0.0005041866387228707,
"loss": 2.5748531341552736,
"step": 4940
},
{
"epoch": 10.37696335078534,
"learning_rate": 0.0005052072594490304,
"loss": 2.5954639434814455,
"step": 4950
},
{
"epoch": 10.397905759162304,
"learning_rate": 0.00050622788017519,
"loss": 2.5695329666137696,
"step": 4960
},
{
"epoch": 10.418848167539267,
"learning_rate": 0.0005072485009013497,
"loss": 2.5873296737670897,
"step": 4970
},
{
"epoch": 10.43979057591623,
"learning_rate": 0.0005082691216275094,
"loss": 2.6035577774047853,
"step": 4980
},
{
"epoch": 10.460732984293193,
"learning_rate": 0.000509289742353669,
"loss": 2.5872554779052734,
"step": 4990
},
{
"epoch": 10.481675392670157,
"learning_rate": 0.0005103103630798287,
"loss": 2.548787498474121,
"step": 5000
},
{
"epoch": 10.502617801047121,
"learning_rate": 0.0005098008169087462,
"loss": 2.5832122802734374,
"step": 5010
},
{
"epoch": 10.523560209424083,
"learning_rate": 0.0005092927940452339,
"loss": 2.5651966094970704,
"step": 5020
},
{
"epoch": 10.544502617801047,
"learning_rate": 0.0005087862869144114,
"loss": 2.5844688415527344,
"step": 5030
},
{
"epoch": 10.565445026178011,
"learning_rate": 0.0005082812879940277,
"loss": 2.5721431732177735,
"step": 5040
},
{
"epoch": 10.586387434554974,
"learning_rate": 0.0005077777898139921,
"loss": 2.59671630859375,
"step": 5050
},
{
"epoch": 10.607329842931938,
"learning_rate": 0.0005072757849559103,
"loss": 2.5834468841552733,
"step": 5060
},
{
"epoch": 10.6282722513089,
"learning_rate": 0.0005067752660526248,
"loss": 2.566558074951172,
"step": 5070
},
{
"epoch": 10.649214659685864,
"learning_rate": 0.0005062762257877613,
"loss": 2.579891395568848,
"step": 5080
},
{
"epoch": 10.670157068062828,
"learning_rate": 0.0005057786568952791,
"loss": 2.5753833770751955,
"step": 5090
},
{
"epoch": 10.69109947643979,
"learning_rate": 0.000505282552159027,
"loss": 2.5887382507324217,
"step": 5100
},
{
"epoch": 10.712041884816754,
"learning_rate": 0.000504787904412304,
"loss": 2.578693962097168,
"step": 5110
},
{
"epoch": 10.732984293193716,
"learning_rate": 0.000504294706537424,
"loss": 2.5702871322631835,
"step": 5120
},
{
"epoch": 10.75392670157068,
"learning_rate": 0.0005038029514652858,
"loss": 2.579792594909668,
"step": 5130
},
{
"epoch": 10.774869109947645,
"learning_rate": 0.0005033126321749477,
"loss": 2.5686906814575194,
"step": 5140
},
{
"epoch": 10.795811518324607,
"learning_rate": 0.000502823741693206,
"loss": 2.5612104415893553,
"step": 5150
},
{
"epoch": 10.81675392670157,
"learning_rate": 0.0005023362730941793,
"loss": 2.5499547958374023,
"step": 5160
},
{
"epoch": 10.837696335078533,
"learning_rate": 0.0005018502194988955,
"loss": 2.5822380065917967,
"step": 5170
},
{
"epoch": 10.858638743455497,
"learning_rate": 0.0005013655740748848,
"loss": 2.5612071990966796,
"step": 5180
},
{
"epoch": 10.879581151832461,
"learning_rate": 0.0005008823300357761,
"loss": 2.5559200286865233,
"step": 5190
},
{
"epoch": 10.900523560209423,
"learning_rate": 0.0005004004806408972,
"loss": 2.566044807434082,
"step": 5200
},
{
"epoch": 10.921465968586388,
"learning_rate": 0.0004999200191948814,
"loss": 2.5504446029663086,
"step": 5210
},
{
"epoch": 10.942408376963352,
"learning_rate": 0.0004994409390472751,
"loss": 2.5642301559448244,
"step": 5220
},
{
"epoch": 10.963350785340314,
"learning_rate": 0.0004989632335921523,
"loss": 2.560255241394043,
"step": 5230
},
{
"epoch": 10.984293193717278,
"learning_rate": 0.0004984868962677315,
"loss": 2.5622379302978517,
"step": 5240
},
{
"epoch": 10.998952879581152,
"eval_loss": 2.5793886184692383,
"eval_runtime": 73.6792,
"eval_samples_per_second": 148.699,
"step": 5247
},
{
"epoch": 11.006282722513088,
"learning_rate": 0.0004980119205559973,
"loss": 2.682263946533203,
"step": 5250
},
{
"epoch": 11.027225130890052,
"learning_rate": 0.0004975382999823259,
"loss": 2.581485557556152,
"step": 5260
},
{
"epoch": 11.048167539267016,
"learning_rate": 0.0004970660281151141,
"loss": 2.5775730133056642,
"step": 5270
},
{
"epoch": 11.069109947643978,
"learning_rate": 0.0004965950985654126,
"loss": 2.587117385864258,
"step": 5280
},
{
"epoch": 11.090052356020943,
"learning_rate": 0.0004961255049865635,
"loss": 2.5482534408569335,
"step": 5290
},
{
"epoch": 11.110994764397907,
"learning_rate": 0.0004956572410738401,
"loss": 2.5464542388916014,
"step": 5300
},
{
"epoch": 11.131937172774869,
"learning_rate": 0.000495190300564092,
"loss": 2.557751274108887,
"step": 5310
},
{
"epoch": 11.152879581151833,
"learning_rate": 0.0004947246772353933,
"loss": 2.54296932220459,
"step": 5320
},
{
"epoch": 11.173821989528795,
"learning_rate": 0.0004942603649066942,
"loss": 2.576173782348633,
"step": 5330
},
{
"epoch": 11.19476439790576,
"learning_rate": 0.0004937973574374762,
"loss": 2.535029411315918,
"step": 5340
},
{
"epoch": 11.215706806282723,
"learning_rate": 0.0004933356487274114,
"loss": 2.551316833496094,
"step": 5350
},
{
"epoch": 11.236649214659685,
"learning_rate": 0.0004928752327160248,
"loss": 2.551384925842285,
"step": 5360
},
{
"epoch": 11.25759162303665,
"learning_rate": 0.0004924161033823598,
"loss": 2.5437076568603514,
"step": 5370
},
{
"epoch": 11.278534031413612,
"learning_rate": 0.0004919582547446482,
"loss": 2.5423688888549805,
"step": 5380
},
{
"epoch": 11.299476439790576,
"learning_rate": 0.0004915016808599824,
"loss": 2.5350723266601562,
"step": 5390
},
{
"epoch": 11.32041884816754,
"learning_rate": 0.0004910463758239914,
"loss": 2.5370588302612305,
"step": 5400
},
{
"epoch": 11.341361256544502,
"learning_rate": 0.0004905923337705201,
"loss": 2.5458969116210937,
"step": 5410
},
{
"epoch": 11.362303664921466,
"learning_rate": 0.0004901395488713123,
"loss": 2.5638805389404298,
"step": 5420
},
{
"epoch": 11.38324607329843,
"learning_rate": 0.0004896880153356963,
"loss": 2.5516336441040037,
"step": 5430
},
{
"epoch": 11.404188481675392,
"learning_rate": 0.000489237727410273,
"loss": 2.559841346740723,
"step": 5440
},
{
"epoch": 11.425130890052356,
"learning_rate": 0.0004887886793786093,
"loss": 2.559991645812988,
"step": 5450
},
{
"epoch": 11.446073298429319,
"learning_rate": 0.0004883408655609327,
"loss": 2.5499288558959963,
"step": 5460
},
{
"epoch": 11.467015706806283,
"learning_rate": 0.0004878942803138293,
"loss": 2.544484519958496,
"step": 5470
},
{
"epoch": 11.487958115183247,
"learning_rate": 0.0004874489180299454,
"loss": 2.537228584289551,
"step": 5480
},
{
"epoch": 11.508900523560209,
"learning_rate": 0.00048700477313769213,
"loss": 2.5507528305053713,
"step": 5490
},
{
"epoch": 11.529842931937173,
"learning_rate": 0.00048656184010095185,
"loss": 2.5349197387695312,
"step": 5500
},
{
"epoch": 11.550785340314135,
"learning_rate": 0.00048612011341878916,
"loss": 2.5785711288452147,
"step": 5510
},
{
"epoch": 11.5717277486911,
"learning_rate": 0.0004856795876251634,
"loss": 2.5618894577026365,
"step": 5520
},
{
"epoch": 11.592670157068063,
"learning_rate": 0.00048524025728864493,
"loss": 2.553698921203613,
"step": 5530
},
{
"epoch": 11.613612565445026,
"learning_rate": 0.0004848021170121335,
"loss": 2.563484954833984,
"step": 5540
},
{
"epoch": 11.63455497382199,
"learning_rate": 0.0004843651614325803,
"loss": 2.5655393600463867,
"step": 5550
},
{
"epoch": 11.655497382198952,
"learning_rate": 0.00048392938522071163,
"loss": 2.550769233703613,
"step": 5560
},
{
"epoch": 11.676439790575916,
"learning_rate": 0.0004834947830807563,
"loss": 2.549762725830078,
"step": 5570
},
{
"epoch": 11.69738219895288,
"learning_rate": 0.00048306134975017523,
"loss": 2.5488073348999025,
"step": 5580
},
{
"epoch": 11.718324607329842,
"learning_rate": 0.0004826290799993939,
"loss": 2.554892158508301,
"step": 5590
},
{
"epoch": 11.739267015706806,
"learning_rate": 0.0004821979686315372,
"loss": 2.519801902770996,
"step": 5600
},
{
"epoch": 11.76020942408377,
"learning_rate": 0.00048176801048216693,
"loss": 2.541176414489746,
"step": 5610
},
{
"epoch": 11.781151832460733,
"learning_rate": 0.0004813392004190223,
"loss": 2.52908878326416,
"step": 5620
},
{
"epoch": 11.802094240837697,
"learning_rate": 0.00048091153334176224,
"loss": 2.5230037689208986,
"step": 5630
},
{
"epoch": 11.823036649214659,
"learning_rate": 0.00048048500418171097,
"loss": 2.5191682815551757,
"step": 5640
},
{
"epoch": 11.843979057591623,
"learning_rate": 0.0004800596079016053,
"loss": 2.553547668457031,
"step": 5650
},
{
"epoch": 11.864921465968587,
"learning_rate": 0.0004796353394953452,
"loss": 2.5445688247680662,
"step": 5660
},
{
"epoch": 11.88586387434555,
"learning_rate": 0.0004792121939877459,
"loss": 2.554265022277832,
"step": 5670
},
{
"epoch": 11.906806282722513,
"learning_rate": 0.00047879016643429336,
"loss": 2.5312326431274412,
"step": 5680
},
{
"epoch": 11.927748691099476,
"learning_rate": 0.00047836925192090116,
"loss": 2.54235897064209,
"step": 5690
},
{
"epoch": 11.94869109947644,
"learning_rate": 0.0004779494455636703,
"loss": 2.5591432571411135,
"step": 5700
},
{
"epoch": 11.969633507853404,
"learning_rate": 0.00047753074250865145,
"loss": 2.5049566268920898,
"step": 5710
},
{
"epoch": 11.990575916230366,
"learning_rate": 0.00047711313793160877,
"loss": 2.5459238052368165,
"step": 5720
},
{
"epoch": 11.998952879581152,
"eval_loss": 2.5662965774536133,
"eval_runtime": 74.2325,
"eval_samples_per_second": 147.59,
"step": 5724
},
{
"epoch": 12.012565445026178,
"learning_rate": 0.000476696627037787,
"loss": 2.6624425888061523,
"step": 5730
},
{
"epoch": 12.033507853403142,
"learning_rate": 0.0004762812050616797,
"loss": 2.5242809295654296,
"step": 5740
},
{
"epoch": 12.054450261780104,
"learning_rate": 0.0004758668672668006,
"loss": 2.541863441467285,
"step": 5750
},
{
"epoch": 12.075392670157068,
"learning_rate": 0.00047545360894545664,
"loss": 2.5424705505371095,
"step": 5760
},
{
"epoch": 12.09633507853403,
"learning_rate": 0.0004750414254185235,
"loss": 2.5307668685913085,
"step": 5770
},
{
"epoch": 12.117277486910995,
"learning_rate": 0.0004746303120352226,
"loss": 2.5350624084472657,
"step": 5780
},
{
"epoch": 12.138219895287959,
"learning_rate": 0.00047422026417290146,
"loss": 2.5554269790649413,
"step": 5790
},
{
"epoch": 12.159162303664921,
"learning_rate": 0.0004738112772368146,
"loss": 2.5168834686279298,
"step": 5800
},
{
"epoch": 12.180104712041885,
"learning_rate": 0.00047340334665990787,
"loss": 2.531605529785156,
"step": 5810
},
{
"epoch": 12.201047120418847,
"learning_rate": 0.0004729964679026039,
"loss": 2.515584373474121,
"step": 5820
},
{
"epoch": 12.221989528795811,
"learning_rate": 0.0004725906364525903,
"loss": 2.522596549987793,
"step": 5830
},
{
"epoch": 12.242931937172775,
"learning_rate": 0.0004721858478246089,
"loss": 2.5171236038208007,
"step": 5840
},
{
"epoch": 12.263874345549738,
"learning_rate": 0.0004717820975602482,
"loss": 2.529332160949707,
"step": 5850
},
{
"epoch": 12.284816753926702,
"learning_rate": 0.0004713793812277367,
"loss": 2.5047964096069335,
"step": 5860
},
{
"epoch": 12.305759162303666,
"learning_rate": 0.00047097769442173856,
"loss": 2.518666458129883,
"step": 5870
},
{
"epoch": 12.326701570680628,
"learning_rate": 0.00047057703276315164,
"loss": 2.5095588684082033,
"step": 5880
},
{
"epoch": 12.347643979057592,
"learning_rate": 0.0004701773918989065,
"loss": 2.5557069778442383,
"step": 5890
},
{
"epoch": 12.368586387434554,
"learning_rate": 0.00046977876750176805,
"loss": 2.5204561233520506,
"step": 5900
},
{
"epoch": 12.389528795811518,
"learning_rate": 0.0004693811552701385,
"loss": 2.5440658569335937,
"step": 5910
},
{
"epoch": 12.410471204188482,
"learning_rate": 0.0004689845509278626,
"loss": 2.5195499420166017,
"step": 5920
},
{
"epoch": 12.431413612565445,
"learning_rate": 0.00046858895022403474,
"loss": 2.506319999694824,
"step": 5930
},
{
"epoch": 12.452356020942409,
"learning_rate": 0.000468194348932807,
"loss": 2.507068061828613,
"step": 5940
},
{
"epoch": 12.473298429319371,
"learning_rate": 0.00046780074285319984,
"loss": 2.559153938293457,
"step": 5950
},
{
"epoch": 12.494240837696335,
"learning_rate": 0.0004674081278089144,
"loss": 2.5483341217041016,
"step": 5960
},
{
"epoch": 12.515183246073299,
"learning_rate": 0.00046701649964814616,
"loss": 2.523490333557129,
"step": 5970
},
{
"epoch": 12.536125654450261,
"learning_rate": 0.0004666258542434007,
"loss": 2.511086654663086,
"step": 5980
},
{
"epoch": 12.557068062827225,
"learning_rate": 0.000466236187491311,
"loss": 2.525220489501953,
"step": 5990
},
{
"epoch": 12.578010471204188,
"learning_rate": 0.00046584749531245617,
"loss": 2.5341968536376953,
"step": 6000
},
{
"epoch": 12.598952879581152,
"learning_rate": 0.0004654597736511823,
"loss": 2.517439842224121,
"step": 6010
},
{
"epoch": 12.619895287958116,
"learning_rate": 0.0004650730184754247,
"loss": 2.535861778259277,
"step": 6020
},
{
"epoch": 12.640837696335078,
"learning_rate": 0.0004646872257765318,
"loss": 2.5128170013427735,
"step": 6030
},
{
"epoch": 12.661780104712042,
"learning_rate": 0.00046430239156909045,
"loss": 2.5013412475585937,
"step": 6040
},
{
"epoch": 12.682722513089006,
"learning_rate": 0.00046391851189075343,
"loss": 2.492375373840332,
"step": 6050
},
{
"epoch": 12.703664921465968,
"learning_rate": 0.00046353558280206746,
"loss": 2.533987045288086,
"step": 6060
},
{
"epoch": 12.724607329842932,
"learning_rate": 0.00046315360038630404,
"loss": 2.5339818954467774,
"step": 6070
},
{
"epoch": 12.745549738219895,
"learning_rate": 0.0004627725607492909,
"loss": 2.5307100296020506,
"step": 6080
},
{
"epoch": 12.766492146596859,
"learning_rate": 0.00046239246001924503,
"loss": 2.5334211349487306,
"step": 6090
},
{
"epoch": 12.787434554973823,
"learning_rate": 0.000462013294346608,
"loss": 2.524607849121094,
"step": 6100
},
{
"epoch": 12.808376963350785,
"learning_rate": 0.00046163505990388167,
"loss": 2.5250701904296875,
"step": 6110
},
{
"epoch": 12.829319371727749,
"learning_rate": 0.00046125775288546623,
"loss": 2.514480400085449,
"step": 6120
},
{
"epoch": 12.850261780104713,
"learning_rate": 0.00046088136950749937,
"loss": 2.506093215942383,
"step": 6130
},
{
"epoch": 12.871204188481675,
"learning_rate": 0.0004605059060076967,
"loss": 2.5329927444458007,
"step": 6140
},
{
"epoch": 12.89214659685864,
"learning_rate": 0.0004601313586451939,
"loss": 2.5250947952270506,
"step": 6150
},
{
"epoch": 12.913089005235602,
"learning_rate": 0.00045975772370039034,
"loss": 2.535073471069336,
"step": 6160
},
{
"epoch": 12.934031413612566,
"learning_rate": 0.0004593849974747937,
"loss": 2.524639892578125,
"step": 6170
},
{
"epoch": 12.95497382198953,
"learning_rate": 0.0004590131762908664,
"loss": 2.509628486633301,
"step": 6180
},
{
"epoch": 12.975916230366492,
"learning_rate": 0.00045864225649187287,
"loss": 2.5401321411132813,
"step": 6190
},
{
"epoch": 12.996858638743456,
"learning_rate": 0.000458272234441729,
"loss": 2.512648582458496,
"step": 6200
},
{
"epoch": 12.998952879581152,
"eval_loss": 2.558600902557373,
"eval_runtime": 73.5975,
"eval_samples_per_second": 148.864,
"step": 6201
},
{
"epoch": 13.018848167539266,
"learning_rate": 0.00045790310652485205,
"loss": 2.6353900909423826,
"step": 6210
},
{
"epoch": 13.03979057591623,
"learning_rate": 0.0004575348691460124,
"loss": 2.519637870788574,
"step": 6220
},
{
"epoch": 13.060732984293194,
"learning_rate": 0.00045716751873018654,
"loss": 2.515974426269531,
"step": 6230
},
{
"epoch": 13.081675392670157,
"learning_rate": 0.00045680105172241103,
"loss": 2.5275392532348633,
"step": 6240
},
{
"epoch": 13.10261780104712,
"learning_rate": 0.0004564354645876384,
"loss": 2.518478012084961,
"step": 6250
},
{
"epoch": 13.123560209424085,
"learning_rate": 0.00045607075381059363,
"loss": 2.506203460693359,
"step": 6260
},
{
"epoch": 13.144502617801047,
"learning_rate": 0.00045570691589563234,
"loss": 2.501953125,
"step": 6270
},
{
"epoch": 13.165445026178011,
"learning_rate": 0.0004553439473666,
"loss": 2.536935234069824,
"step": 6280
},
{
"epoch": 13.186387434554973,
"learning_rate": 0.0004549818447666924,
"loss": 2.500376892089844,
"step": 6290
},
{
"epoch": 13.207329842931937,
"learning_rate": 0.00045462060465831743,
"loss": 2.507547950744629,
"step": 6300
},
{
"epoch": 13.228272251308901,
"learning_rate": 0.0004542602236229581,
"loss": 2.50396842956543,
"step": 6310
},
{
"epoch": 13.249214659685864,
"learning_rate": 0.00045390069826103653,
"loss": 2.4975730895996096,
"step": 6320
},
{
"epoch": 13.270157068062828,
"learning_rate": 0.00045354202519177925,
"loss": 2.5031005859375,
"step": 6330
},
{
"epoch": 13.29109947643979,
"learning_rate": 0.0004531842010530839,
"loss": 2.5102792739868165,
"step": 6340
},
{
"epoch": 13.312041884816754,
"learning_rate": 0.0004528272225013865,
"loss": 2.510196876525879,
"step": 6350
},
{
"epoch": 13.332984293193718,
"learning_rate": 0.00045247108621153056,
"loss": 2.529274559020996,
"step": 6360
},
{
"epoch": 13.35392670157068,
"learning_rate": 0.0004521157888766368,
"loss": 2.5169746398925783,
"step": 6370
},
{
"epoch": 13.374869109947644,
"learning_rate": 0.00045176132720797443,
"loss": 2.4974170684814454,
"step": 6380
},
{
"epoch": 13.395811518324606,
"learning_rate": 0.0004514076979348328,
"loss": 2.4937871932983398,
"step": 6390
},
{
"epoch": 13.41675392670157,
"learning_rate": 0.0004510548978043951,
"loss": 2.4988937377929688,
"step": 6400
},
{
"epoch": 13.437696335078535,
"learning_rate": 0.00045070292358161265,
"loss": 2.5192642211914062,
"step": 6410
},
{
"epoch": 13.458638743455497,
"learning_rate": 0.0004503517720490801,
"loss": 2.515308380126953,
"step": 6420
},
{
"epoch": 13.47958115183246,
"learning_rate": 0.000450001440006912,
"loss": 2.4873653411865235,
"step": 6430
},
{
"epoch": 13.500523560209425,
"learning_rate": 0.00044965192427262043,
"loss": 2.4860763549804688,
"step": 6440
},
{
"epoch": 13.521465968586387,
"learning_rate": 0.0004493032216809934,
"loss": 2.4914045333862305,
"step": 6450
},
{
"epoch": 13.542408376963351,
"learning_rate": 0.00044895532908397455,
"loss": 2.505444145202637,
"step": 6460
},
{
"epoch": 13.563350785340313,
"learning_rate": 0.00044860824335054384,
"loss": 2.484037971496582,
"step": 6470
},
{
"epoch": 13.584293193717278,
"learning_rate": 0.00044826196136659916,
"loss": 2.4801618576049806,
"step": 6480
},
{
"epoch": 13.605235602094242,
"learning_rate": 0.00044791648003483884,
"loss": 2.497146415710449,
"step": 6490
},
{
"epoch": 13.626178010471204,
"learning_rate": 0.0004475717962746455,
"loss": 2.510635757446289,
"step": 6500
},
{
"epoch": 13.647120418848168,
"learning_rate": 0.0004472279070219706,
"loss": 2.5058326721191406,
"step": 6510
},
{
"epoch": 13.66806282722513,
"learning_rate": 0.00044688480922922,
"loss": 2.490641975402832,
"step": 6520
},
{
"epoch": 13.689005235602094,
"learning_rate": 0.00044654249986514057,
"loss": 2.4954011917114256,
"step": 6530
},
{
"epoch": 13.709947643979058,
"learning_rate": 0.0004462009759147076,
"loss": 2.496523857116699,
"step": 6540
},
{
"epoch": 13.73089005235602,
"learning_rate": 0.0004458602343790135,
"loss": 2.4896028518676756,
"step": 6550
},
{
"epoch": 13.751832460732984,
"learning_rate": 0.00044552027227515704,
"loss": 2.490574836730957,
"step": 6560
},
{
"epoch": 13.772774869109949,
"learning_rate": 0.00044518108663613355,
"loss": 2.4956533432006838,
"step": 6570
},
{
"epoch": 13.79371727748691,
"learning_rate": 0.00044484267451072644,
"loss": 2.5057823181152346,
"step": 6580
},
{
"epoch": 13.814659685863875,
"learning_rate": 0.0004445050329633992,
"loss": 2.493949127197266,
"step": 6590
},
{
"epoch": 13.835602094240837,
"learning_rate": 0.0004441681590741884,
"loss": 2.514782524108887,
"step": 6600
},
{
"epoch": 13.856544502617801,
"learning_rate": 0.0004438320499385977,
"loss": 2.463920783996582,
"step": 6610
},
{
"epoch": 13.877486910994765,
"learning_rate": 0.00044349670266749286,
"loss": 2.494730567932129,
"step": 6620
},
{
"epoch": 13.898429319371727,
"learning_rate": 0.0004431621143869969,
"loss": 2.4927881240844725,
"step": 6630
},
{
"epoch": 13.919371727748691,
"learning_rate": 0.00044282828223838727,
"loss": 2.5001829147338865,
"step": 6640
},
{
"epoch": 13.940314136125654,
"learning_rate": 0.0004424952033779929,
"loss": 2.485161018371582,
"step": 6650
},
{
"epoch": 13.961256544502618,
"learning_rate": 0.00044216287497709253,
"loss": 2.4990121841430666,
"step": 6660
},
{
"epoch": 13.982198952879582,
"learning_rate": 0.0004418312942218139,
"loss": 2.478795051574707,
"step": 6670
},
{
"epoch": 13.998952879581152,
"eval_loss": 2.551051616668701,
"eval_runtime": 73.8418,
"eval_samples_per_second": 148.371,
"step": 6678
},
{
"epoch": 14.004188481675392,
"learning_rate": 0.0004415004583130336,
"loss": 2.631510925292969,
"step": 6680
},
{
"epoch": 14.025130890052356,
"learning_rate": 0.0004411703644662778,
"loss": 2.46261043548584,
"step": 6690
},
{
"epoch": 14.04607329842932,
"learning_rate": 0.00044084100991162385,
"loss": 2.488113212585449,
"step": 6700
},
{
"epoch": 14.067015706806282,
"learning_rate": 0.00044051239189360286,
"loss": 2.4580398559570313,
"step": 6710
},
{
"epoch": 14.087958115183246,
"learning_rate": 0.00044018450767110235,
"loss": 2.482432174682617,
"step": 6720
},
{
"epoch": 14.108900523560209,
"learning_rate": 0.0004398573545172709,
"loss": 2.4827293395996093,
"step": 6730
},
{
"epoch": 14.129842931937173,
"learning_rate": 0.0004395309297194223,
"loss": 2.472520637512207,
"step": 6740
},
{
"epoch": 14.150785340314137,
"learning_rate": 0.0004392052305789416,
"loss": 2.47951602935791,
"step": 6750
},
{
"epoch": 14.171727748691099,
"learning_rate": 0.0004388802544111908,
"loss": 2.4616981506347657,
"step": 6760
},
{
"epoch": 14.192670157068063,
"learning_rate": 0.0004385559985454165,
"loss": 2.4829242706298826,
"step": 6770
},
{
"epoch": 14.213612565445025,
"learning_rate": 0.0004382324603246575,
"loss": 2.478873634338379,
"step": 6780
},
{
"epoch": 14.23455497382199,
"learning_rate": 0.0004379096371056532,
"loss": 2.4993722915649412,
"step": 6790
},
{
"epoch": 14.255497382198953,
"learning_rate": 0.000437587526258753,
"loss": 2.474994659423828,
"step": 6800
},
{
"epoch": 14.276439790575916,
"learning_rate": 0.0004372661251678265,
"loss": 2.495197296142578,
"step": 6810
},
{
"epoch": 14.29738219895288,
"learning_rate": 0.00043694543123017407,
"loss": 2.476504325866699,
"step": 6820
},
{
"epoch": 14.318324607329842,
"learning_rate": 0.0004366254418564382,
"loss": 2.5161060333251952,
"step": 6830
},
{
"epoch": 14.339267015706806,
"learning_rate": 0.0004363061544705161,
"loss": 2.4868789672851563,
"step": 6840
},
{
"epoch": 14.36020942408377,
"learning_rate": 0.0004359875665094723,
"loss": 2.4974212646484375,
"step": 6850
},
{
"epoch": 14.381151832460732,
"learning_rate": 0.00043566967542345227,
"loss": 2.4662216186523436,
"step": 6860
},
{
"epoch": 14.402094240837696,
"learning_rate": 0.00043535247867559673,
"loss": 2.469373321533203,
"step": 6870
},
{
"epoch": 14.42303664921466,
"learning_rate": 0.00043503597374195665,
"loss": 2.483184242248535,
"step": 6880
},
{
"epoch": 14.443979057591623,
"learning_rate": 0.0004347201581114088,
"loss": 2.4689809799194338,
"step": 6890
},
{
"epoch": 14.464921465968587,
"learning_rate": 0.0004344050292855724,
"loss": 2.439427375793457,
"step": 6900
},
{
"epoch": 14.485863874345549,
"learning_rate": 0.00043409058477872554,
"loss": 2.47011775970459,
"step": 6910
},
{
"epoch": 14.506806282722513,
"learning_rate": 0.00043377682211772343,
"loss": 2.4866916656494142,
"step": 6920
},
{
"epoch": 14.527748691099477,
"learning_rate": 0.0004334637388419161,
"loss": 2.4834897994995115,
"step": 6930
},
{
"epoch": 14.54869109947644,
"learning_rate": 0.0004331513325030681,
"loss": 2.5011289596557615,
"step": 6940
},
{
"epoch": 14.569633507853403,
"learning_rate": 0.0004328396006652773,
"loss": 2.478676986694336,
"step": 6950
},
{
"epoch": 14.590575916230367,
"learning_rate": 0.00043252854090489564,
"loss": 2.4692920684814452,
"step": 6960
},
{
"epoch": 14.61151832460733,
"learning_rate": 0.00043221815081044985,
"loss": 2.492611122131348,
"step": 6970
},
{
"epoch": 14.632460732984294,
"learning_rate": 0.00043190842798256285,
"loss": 2.4726083755493162,
"step": 6980
},
{
"epoch": 14.653403141361256,
"learning_rate": 0.00043159937003387584,
"loss": 2.491672706604004,
"step": 6990
},
{
"epoch": 14.67434554973822,
"learning_rate": 0.00043129097458897135,
"loss": 2.474324417114258,
"step": 7000
},
{
"epoch": 14.695287958115184,
"learning_rate": 0.000430983239284296,
"loss": 2.4726449966430666,
"step": 7010
},
{
"epoch": 14.716230366492146,
"learning_rate": 0.0004306761617680849,
"loss": 2.456452178955078,
"step": 7020
},
{
"epoch": 14.73717277486911,
"learning_rate": 0.00043036973970028583,
"loss": 2.4777704238891602,
"step": 7030
},
{
"epoch": 14.758115183246073,
"learning_rate": 0.00043006397075248464,
"loss": 2.492514801025391,
"step": 7040
},
{
"epoch": 14.779057591623037,
"learning_rate": 0.00042975885260783056,
"loss": 2.464923095703125,
"step": 7050
},
{
"epoch": 14.8,
"learning_rate": 0.00042945438296096303,
"loss": 2.4520200729370116,
"step": 7060
},
{
"epoch": 14.820942408376963,
"learning_rate": 0.0004291505595179379,
"loss": 2.461465072631836,
"step": 7070
},
{
"epoch": 14.841884816753927,
"learning_rate": 0.0004288473799961553,
"loss": 2.474461555480957,
"step": 7080
},
{
"epoch": 14.86282722513089,
"learning_rate": 0.0004285448421242875,
"loss": 2.474432945251465,
"step": 7090
},
{
"epoch": 14.883769633507853,
"learning_rate": 0.00042824294364220724,
"loss": 2.506844329833984,
"step": 7100
},
{
"epoch": 14.904712041884817,
"learning_rate": 0.0004279416823009172,
"loss": 2.466670036315918,
"step": 7110
},
{
"epoch": 14.92565445026178,
"learning_rate": 0.0004276410558624791,
"loss": 2.4866743087768555,
"step": 7120
},
{
"epoch": 14.946596858638744,
"learning_rate": 0.0004273410620999446,
"loss": 2.4524404525756838,
"step": 7130
},
{
"epoch": 14.967539267015706,
"learning_rate": 0.0004270416987972853,
"loss": 2.4684980392456053,
"step": 7140
},
{
"epoch": 14.98848167539267,
"learning_rate": 0.00042674296374932424,
"loss": 2.469831848144531,
"step": 7150
},
{
"epoch": 14.998952879581152,
"eval_loss": 2.5442276000976562,
"eval_runtime": 73.9909,
"eval_samples_per_second": 148.072,
"step": 7155
},
{
"epoch": 15.010471204188482,
"learning_rate": 0.0004264448547616681,
"loss": 2.5812490463256834,
"step": 7160
},
{
"epoch": 15.031413612565444,
"learning_rate": 0.00042614736965063864,
"loss": 2.4873594284057616,
"step": 7170
},
{
"epoch": 15.052356020942408,
"learning_rate": 0.0004258505062432064,
"loss": 2.4659671783447266,
"step": 7180
},
{
"epoch": 15.073298429319372,
"learning_rate": 0.0004255542623769234,
"loss": 2.4558393478393556,
"step": 7190
},
{
"epoch": 15.094240837696335,
"learning_rate": 0.00042525863589985727,
"loss": 2.4745227813720705,
"step": 7200
},
{
"epoch": 15.115183246073299,
"learning_rate": 0.00042496362467052564,
"loss": 2.4763622283935547,
"step": 7210
},
{
"epoch": 15.136125654450261,
"learning_rate": 0.00042466922655783073,
"loss": 2.4713407516479493,
"step": 7220
},
{
"epoch": 15.157068062827225,
"learning_rate": 0.00042437543944099504,
"loss": 2.4634868621826174,
"step": 7230
},
{
"epoch": 15.178010471204189,
"learning_rate": 0.00042408226120949674,
"loss": 2.476248931884766,
"step": 7240
},
{
"epoch": 15.198952879581151,
"learning_rate": 0.00042378968976300647,
"loss": 2.4730270385742186,
"step": 7250
},
{
"epoch": 15.219895287958115,
"learning_rate": 0.00042349772301132377,
"loss": 2.476571273803711,
"step": 7260
},
{
"epoch": 15.24083769633508,
"learning_rate": 0.0004232063588743146,
"loss": 2.4510690689086916,
"step": 7270
},
{
"epoch": 15.261780104712042,
"learning_rate": 0.00042291559528184904,
"loss": 2.465399742126465,
"step": 7280
},
{
"epoch": 15.282722513089006,
"learning_rate": 0.0004226254301737393,
"loss": 2.4773502349853516,
"step": 7290
},
{
"epoch": 15.303664921465968,
"learning_rate": 0.0004223358614996787,
"loss": 2.43621711730957,
"step": 7300
},
{
"epoch": 15.324607329842932,
"learning_rate": 0.00042204688721918075,
"loss": 2.456114959716797,
"step": 7310
},
{
"epoch": 15.345549738219896,
"learning_rate": 0.0004217585053015187,
"loss": 2.468073844909668,
"step": 7320
},
{
"epoch": 15.366492146596858,
"learning_rate": 0.0004214707137256656,
"loss": 2.471833419799805,
"step": 7330
},
{
"epoch": 15.387434554973822,
"learning_rate": 0.0004211835104802349,
"loss": 2.499461364746094,
"step": 7340
},
{
"epoch": 15.408376963350785,
"learning_rate": 0.00042089689356342115,
"loss": 2.4492721557617188,
"step": 7350
},
{
"epoch": 15.429319371727749,
"learning_rate": 0.0004206108609829418,
"loss": 2.4671262741088866,
"step": 7360
},
{
"epoch": 15.450261780104713,
"learning_rate": 0.00042032541075597875,
"loss": 2.465005111694336,
"step": 7370
},
{
"epoch": 15.471204188481675,
"learning_rate": 0.0004200405409091207,
"loss": 2.4648488998413085,
"step": 7380
},
{
"epoch": 15.492146596858639,
"learning_rate": 0.00041975624947830593,
"loss": 2.463612174987793,
"step": 7390
},
{
"epoch": 15.513089005235603,
"learning_rate": 0.00041947253450876515,
"loss": 2.4697538375854493,
"step": 7400
},
{
"epoch": 15.534031413612565,
"learning_rate": 0.00041918939405496546,
"loss": 2.45694637298584,
"step": 7410
},
{
"epoch": 15.55497382198953,
"learning_rate": 0.00041890682618055396,
"loss": 2.4443153381347655,
"step": 7420
},
{
"epoch": 15.575916230366492,
"learning_rate": 0.0004186248289583023,
"loss": 2.445983123779297,
"step": 7430
},
{
"epoch": 15.596858638743456,
"learning_rate": 0.00041834340047005144,
"loss": 2.489885711669922,
"step": 7440
},
{
"epoch": 15.61780104712042,
"learning_rate": 0.0004180625388066569,
"loss": 2.4711660385131835,
"step": 7450
},
{
"epoch": 15.638743455497382,
"learning_rate": 0.00041778224206793433,
"loss": 2.4884315490722657,
"step": 7460
},
{
"epoch": 15.659685863874346,
"learning_rate": 0.00041750250836260536,
"loss": 2.477284240722656,
"step": 7470
},
{
"epoch": 15.680628272251308,
"learning_rate": 0.0004172233358082443,
"loss": 2.475067901611328,
"step": 7480
},
{
"epoch": 15.701570680628272,
"learning_rate": 0.00041694472253122467,
"loss": 2.482602119445801,
"step": 7490
},
{
"epoch": 15.722513089005236,
"learning_rate": 0.00041666666666666664,
"loss": 2.442608642578125,
"step": 7500
},
{
"epoch": 15.743455497382199,
"learning_rate": 0.0004163891663583843,
"loss": 2.468288040161133,
"step": 7510
},
{
"epoch": 15.764397905759163,
"learning_rate": 0.00041611221975883396,
"loss": 2.4465059280395507,
"step": 7520
},
{
"epoch": 15.785340314136125,
"learning_rate": 0.00041583582502906203,
"loss": 2.4614633560180663,
"step": 7530
},
{
"epoch": 15.806282722513089,
"learning_rate": 0.0004155599803386543,
"loss": 2.4629968643188476,
"step": 7540
},
{
"epoch": 15.827225130890053,
"learning_rate": 0.0004152846838656846,
"loss": 2.454400062561035,
"step": 7550
},
{
"epoch": 15.848167539267015,
"learning_rate": 0.00041500993379666443,
"loss": 2.467230224609375,
"step": 7560
},
{
"epoch": 15.86910994764398,
"learning_rate": 0.0004147357283264927,
"loss": 2.442008209228516,
"step": 7570
},
{
"epoch": 15.890052356020943,
"learning_rate": 0.000414462065658406,
"loss": 2.4506603240966798,
"step": 7580
},
{
"epoch": 15.910994764397905,
"learning_rate": 0.0004141889440039292,
"loss": 2.4443122863769533,
"step": 7590
},
{
"epoch": 15.93193717277487,
"learning_rate": 0.00041391636158282614,
"loss": 2.4457521438598633,
"step": 7600
},
{
"epoch": 15.952879581151832,
"learning_rate": 0.00041364431662305114,
"loss": 2.457781982421875,
"step": 7610
},
{
"epoch": 15.973821989528796,
"learning_rate": 0.0004133728073607005,
"loss": 2.440464210510254,
"step": 7620
},
{
"epoch": 15.99476439790576,
"learning_rate": 0.00041310183203996446,
"loss": 2.4534429550170898,
"step": 7630
},
{
"epoch": 15.998952879581152,
"eval_loss": 2.5449907779693604,
"eval_runtime": 73.6337,
"eval_samples_per_second": 148.791,
"step": 7632
},
{
"epoch": 16.016753926701572,
"learning_rate": 0.0004128313889130795,
"loss": 2.5835424423217774,
"step": 7640
},
{
"epoch": 16.037696335078532,
"learning_rate": 0.0004125614762402809,
"loss": 2.458993148803711,
"step": 7650
},
{
"epoch": 16.058638743455496,
"learning_rate": 0.00041229209228975627,
"loss": 2.471218299865723,
"step": 7660
},
{
"epoch": 16.07958115183246,
"learning_rate": 0.000412023235337598,
"loss": 2.464751052856445,
"step": 7670
},
{
"epoch": 16.100523560209425,
"learning_rate": 0.00041175490366775766,
"loss": 2.4599708557128905,
"step": 7680
},
{
"epoch": 16.12146596858639,
"learning_rate": 0.0004114870955719997,
"loss": 2.4355844497680663,
"step": 7690
},
{
"epoch": 16.14240837696335,
"learning_rate": 0.00041121980934985563,
"loss": 2.4272241592407227,
"step": 7700
},
{
"epoch": 16.163350785340313,
"learning_rate": 0.000410953043308579,
"loss": 2.4612340927124023,
"step": 7710
},
{
"epoch": 16.184293193717277,
"learning_rate": 0.0004106867957631001,
"loss": 2.465089797973633,
"step": 7720
},
{
"epoch": 16.20523560209424,
"learning_rate": 0.00041042106503598165,
"loss": 2.451694297790527,
"step": 7730
},
{
"epoch": 16.226178010471205,
"learning_rate": 0.0004101558494573738,
"loss": 2.464099884033203,
"step": 7740
},
{
"epoch": 16.24712041884817,
"learning_rate": 0.0004098911473649706,
"loss": 2.448426055908203,
"step": 7750
},
{
"epoch": 16.26806282722513,
"learning_rate": 0.0004096269571039658,
"loss": 2.455006217956543,
"step": 7760
},
{
"epoch": 16.289005235602094,
"learning_rate": 0.00040936327702701005,
"loss": 2.453194808959961,
"step": 7770
},
{
"epoch": 16.309947643979058,
"learning_rate": 0.00040910010549416687,
"loss": 2.4759195327758787,
"step": 7780
},
{
"epoch": 16.330890052356022,
"learning_rate": 0.0004088374408728706,
"loss": 2.4628747940063476,
"step": 7790
},
{
"epoch": 16.351832460732986,
"learning_rate": 0.0004085752815378834,
"loss": 2.446619415283203,
"step": 7800
},
{
"epoch": 16.372774869109946,
"learning_rate": 0.0004083136258712532,
"loss": 2.4754364013671877,
"step": 7810
},
{
"epoch": 16.39371727748691,
"learning_rate": 0.0004080524722622717,
"loss": 2.4566783905029297,
"step": 7820
},
{
"epoch": 16.414659685863874,
"learning_rate": 0.00040779181910743294,
"loss": 2.426336479187012,
"step": 7830
},
{
"epoch": 16.43560209424084,
"learning_rate": 0.0004075316648103914,
"loss": 2.460182762145996,
"step": 7840
},
{
"epoch": 16.456544502617803,
"learning_rate": 0.0004072720077819216,
"loss": 2.454692268371582,
"step": 7850
},
{
"epoch": 16.477486910994763,
"learning_rate": 0.0004070128464398768,
"loss": 2.4589263916015627,
"step": 7860
},
{
"epoch": 16.498429319371727,
"learning_rate": 0.0004067541792091489,
"loss": 2.472345161437988,
"step": 7870
},
{
"epoch": 16.51937172774869,
"learning_rate": 0.0004064960045216279,
"loss": 2.427416229248047,
"step": 7880
},
{
"epoch": 16.540314136125655,
"learning_rate": 0.0004062383208161624,
"loss": 2.454151725769043,
"step": 7890
},
{
"epoch": 16.56125654450262,
"learning_rate": 0.0004059811265385193,
"loss": 2.4490371704101563,
"step": 7900
},
{
"epoch": 16.58219895287958,
"learning_rate": 0.00040572442014134516,
"loss": 2.479467010498047,
"step": 7910
},
{
"epoch": 16.603141361256544,
"learning_rate": 0.00040546820008412654,
"loss": 2.4391218185424806,
"step": 7920
},
{
"epoch": 16.624083769633508,
"learning_rate": 0.0004052124648331515,
"loss": 2.455718421936035,
"step": 7930
},
{
"epoch": 16.645026178010472,
"learning_rate": 0.00040495721286147086,
"loss": 2.4620994567871093,
"step": 7940
},
{
"epoch": 16.665968586387436,
"learning_rate": 0.00040470244264886006,
"loss": 2.448670196533203,
"step": 7950
},
{
"epoch": 16.686910994764396,
"learning_rate": 0.00040444815268178097,
"loss": 2.426989936828613,
"step": 7960
},
{
"epoch": 16.70785340314136,
"learning_rate": 0.00040419434145334414,
"loss": 2.447972869873047,
"step": 7970
},
{
"epoch": 16.728795811518324,
"learning_rate": 0.00040394100746327154,
"loss": 2.457029342651367,
"step": 7980
},
{
"epoch": 16.74973821989529,
"learning_rate": 0.0004036881492178589,
"loss": 2.4539730072021486,
"step": 7990
},
{
"epoch": 16.770680628272252,
"learning_rate": 0.00040343576522993926,
"loss": 2.4607629776000977,
"step": 8000
},
{
"epoch": 16.791623036649213,
"learning_rate": 0.00040318385401884554,
"loss": 2.43496036529541,
"step": 8010
},
{
"epoch": 16.812565445026177,
"learning_rate": 0.00040293241411037484,
"loss": 2.423869323730469,
"step": 8020
},
{
"epoch": 16.83350785340314,
"learning_rate": 0.00040268144403675154,
"loss": 2.4423187255859373,
"step": 8030
},
{
"epoch": 16.854450261780105,
"learning_rate": 0.0004024309423365915,
"loss": 2.4698711395263673,
"step": 8040
},
{
"epoch": 16.87539267015707,
"learning_rate": 0.0004021809075548668,
"loss": 2.423082160949707,
"step": 8050
},
{
"epoch": 16.89633507853403,
"learning_rate": 0.0004019313382428694,
"loss": 2.443895149230957,
"step": 8060
},
{
"epoch": 16.917277486910994,
"learning_rate": 0.00040168223295817656,
"loss": 2.455313301086426,
"step": 8070
},
{
"epoch": 16.938219895287958,
"learning_rate": 0.00040143359026461554,
"loss": 2.415020751953125,
"step": 8080
},
{
"epoch": 16.95916230366492,
"learning_rate": 0.000401185408732229,
"loss": 2.462967109680176,
"step": 8090
},
{
"epoch": 16.980104712041886,
"learning_rate": 0.0004009376869372401,
"loss": 2.416962242126465,
"step": 8100
},
{
"epoch": 16.99895287958115,
"eval_loss": 2.5404856204986572,
"eval_runtime": 73.757,
"eval_samples_per_second": 148.542,
"step": 8109
},
{
"epoch": 17.002094240837696,
"learning_rate": 0.00040069042346201864,
"loss": 2.549093818664551,
"step": 8110
},
{
"epoch": 17.02303664921466,
"learning_rate": 0.00040044361689504655,
"loss": 2.452895736694336,
"step": 8120
},
{
"epoch": 17.043979057591624,
"learning_rate": 0.0004001972658308847,
"loss": 2.460617446899414,
"step": 8130
},
{
"epoch": 17.064921465968588,
"learning_rate": 0.0003999513688701383,
"loss": 2.4506912231445312,
"step": 8140
},
{
"epoch": 17.08586387434555,
"learning_rate": 0.00039970592461942457,
"loss": 2.440316581726074,
"step": 8150
},
{
"epoch": 17.106806282722513,
"learning_rate": 0.00039946093169133874,
"loss": 2.4249364852905275,
"step": 8160
},
{
"epoch": 17.127748691099477,
"learning_rate": 0.0003992163887044217,
"loss": 2.4489822387695312,
"step": 8170
},
{
"epoch": 17.14869109947644,
"learning_rate": 0.0003989722942831268,
"loss": 2.455015754699707,
"step": 8180
},
{
"epoch": 17.169633507853405,
"learning_rate": 0.0003987286470577879,
"loss": 2.4564001083374025,
"step": 8190
},
{
"epoch": 17.190575916230365,
"learning_rate": 0.0003984854456645864,
"loss": 2.4481569290161134,
"step": 8200
},
{
"epoch": 17.21151832460733,
"learning_rate": 0.0003982426887455199,
"loss": 2.4090858459472657,
"step": 8210
},
{
"epoch": 17.232460732984293,
"learning_rate": 0.00039800037494836985,
"loss": 2.4279315948486326,
"step": 8220
},
{
"epoch": 17.253403141361257,
"learning_rate": 0.00039775850292667005,
"loss": 2.4328563690185545,
"step": 8230
},
{
"epoch": 17.27434554973822,
"learning_rate": 0.0003975170713396753,
"loss": 2.426299476623535,
"step": 8240
},
{
"epoch": 17.295287958115182,
"learning_rate": 0.0003972760788523301,
"loss": 2.424925994873047,
"step": 8250
},
{
"epoch": 17.316230366492146,
"learning_rate": 0.0003970355241352378,
"loss": 2.4173357009887697,
"step": 8260
},
{
"epoch": 17.33717277486911,
"learning_rate": 0.00039679540586462953,
"loss": 2.4525693893432616,
"step": 8270
},
{
"epoch": 17.358115183246074,
"learning_rate": 0.00039655572272233384,
"loss": 2.473075103759766,
"step": 8280
},
{
"epoch": 17.379057591623038,
"learning_rate": 0.0003963164733957462,
"loss": 2.422397232055664,
"step": 8290
},
{
"epoch": 17.4,
"learning_rate": 0.00039607765657779864,
"loss": 2.441000556945801,
"step": 8300
},
{
"epoch": 17.420942408376963,
"learning_rate": 0.0003958392709669304,
"loss": 2.4337257385253905,
"step": 8310
},
{
"epoch": 17.441884816753927,
"learning_rate": 0.00039560131526705723,
"loss": 2.4255434036254884,
"step": 8320
},
{
"epoch": 17.46282722513089,
"learning_rate": 0.0003953637881875425,
"loss": 2.4355316162109375,
"step": 8330
},
{
"epoch": 17.483769633507855,
"learning_rate": 0.0003951266884431675,
"loss": 2.428698921203613,
"step": 8340
},
{
"epoch": 17.504712041884815,
"learning_rate": 0.00039489001475410214,
"loss": 2.4530813217163088,
"step": 8350
},
{
"epoch": 17.52565445026178,
"learning_rate": 0.00039465376584587626,
"loss": 2.4496335983276367,
"step": 8360
},
{
"epoch": 17.546596858638743,
"learning_rate": 0.00039441794044935054,
"loss": 2.425421142578125,
"step": 8370
},
{
"epoch": 17.567539267015707,
"learning_rate": 0.00039418253730068797,
"loss": 2.4285154342651367,
"step": 8380
},
{
"epoch": 17.58848167539267,
"learning_rate": 0.0003939475551413253,
"loss": 2.4037647247314453,
"step": 8390
},
{
"epoch": 17.609424083769632,
"learning_rate": 0.000393712992717945,
"loss": 2.423297119140625,
"step": 8400
},
{
"epoch": 17.630366492146596,
"learning_rate": 0.0003934788487824469,
"loss": 2.47174186706543,
"step": 8410
},
{
"epoch": 17.65130890052356,
"learning_rate": 0.0003932451220919205,
"loss": 2.441014289855957,
"step": 8420
},
{
"epoch": 17.672251308900524,
"learning_rate": 0.0003930118114086172,
"loss": 2.4317821502685546,
"step": 8430
},
{
"epoch": 17.693193717277488,
"learning_rate": 0.00039277891549992266,
"loss": 2.4377744674682615,
"step": 8440
},
{
"epoch": 17.71413612565445,
"learning_rate": 0.0003925464331383298,
"loss": 2.4461442947387697,
"step": 8450
},
{
"epoch": 17.735078534031413,
"learning_rate": 0.00039231436310141113,
"loss": 2.466485595703125,
"step": 8460
},
{
"epoch": 17.756020942408377,
"learning_rate": 0.00039208270417179214,
"loss": 2.4318614959716798,
"step": 8470
},
{
"epoch": 17.77696335078534,
"learning_rate": 0.0003918514551371243,
"loss": 2.431291389465332,
"step": 8480
},
{
"epoch": 17.797905759162305,
"learning_rate": 0.0003916206147900585,
"loss": 2.43109130859375,
"step": 8490
},
{
"epoch": 17.81884816753927,
"learning_rate": 0.00039139018192821845,
"loss": 2.4323259353637696,
"step": 8500
},
{
"epoch": 17.83979057591623,
"learning_rate": 0.00039116015535417445,
"loss": 2.439468193054199,
"step": 8510
},
{
"epoch": 17.860732984293193,
"learning_rate": 0.00039093053387541745,
"loss": 2.4567943572998048,
"step": 8520
},
{
"epoch": 17.881675392670157,
"learning_rate": 0.00039070131630433274,
"loss": 2.4346736907958983,
"step": 8530
},
{
"epoch": 17.90261780104712,
"learning_rate": 0.00039047250145817424,
"loss": 2.441089630126953,
"step": 8540
},
{
"epoch": 17.923560209424085,
"learning_rate": 0.00039024408815903914,
"loss": 2.4277088165283205,
"step": 8550
},
{
"epoch": 17.944502617801046,
"learning_rate": 0.0003900160752338421,
"loss": 2.4228445053100587,
"step": 8560
},
{
"epoch": 17.96544502617801,
"learning_rate": 0.00038978846151429,
"loss": 2.399433708190918,
"step": 8570
},
{
"epoch": 17.986387434554974,
"learning_rate": 0.0003895612458368572,
"loss": 2.4009246826171875,
"step": 8580
},
{
"epoch": 17.99895287958115,
"eval_loss": 2.5318360328674316,
"eval_runtime": 74.0596,
"eval_samples_per_second": 147.935,
"step": 8586
},
{
"epoch": 18.008376963350784,
"learning_rate": 0.00038933442704275974,
"loss": 2.537807655334473,
"step": 8590
},
{
"epoch": 18.02931937172775,
"learning_rate": 0.0003891080039779314,
"loss": 2.446313667297363,
"step": 8600
},
{
"epoch": 18.050261780104712,
"learning_rate": 0.0003888819754929986,
"loss": 2.4345209121704103,
"step": 8610
},
{
"epoch": 18.071204188481676,
"learning_rate": 0.0003886563404432558,
"loss": 2.437006187438965,
"step": 8620
},
{
"epoch": 18.09214659685864,
"learning_rate": 0.0003884310976886414,
"loss": 2.420798110961914,
"step": 8630
},
{
"epoch": 18.1130890052356,
"learning_rate": 0.0003882062460937135,
"loss": 2.4323537826538084,
"step": 8640
},
{
"epoch": 18.134031413612565,
"learning_rate": 0.0003879817845276255,
"loss": 2.442038345336914,
"step": 8650
},
{
"epoch": 18.15497382198953,
"learning_rate": 0.0003877577118641029,
"loss": 2.433667755126953,
"step": 8660
},
{
"epoch": 18.175916230366493,
"learning_rate": 0.00038753402698141903,
"loss": 2.424707221984863,
"step": 8670
},
{
"epoch": 18.196858638743457,
"learning_rate": 0.0003873107287623715,
"loss": 2.4348966598510744,
"step": 8680
},
{
"epoch": 18.217801047120417,
"learning_rate": 0.00038708781609425905,
"loss": 2.404917907714844,
"step": 8690
},
{
"epoch": 18.23874345549738,
"learning_rate": 0.000386865287868858,
"loss": 2.4346105575561525,
"step": 8700
},
{
"epoch": 18.259685863874346,
"learning_rate": 0.0003866431429823993,
"loss": 2.442304992675781,
"step": 8710
},
{
"epoch": 18.28062827225131,
"learning_rate": 0.00038642138033554525,
"loss": 2.42406005859375,
"step": 8720
},
{
"epoch": 18.301570680628274,
"learning_rate": 0.00038619999883336703,
"loss": 2.440979766845703,
"step": 8730
},
{
"epoch": 18.322513089005234,
"learning_rate": 0.0003859789973853217,
"loss": 2.440751075744629,
"step": 8740
},
{
"epoch": 18.343455497382198,
"learning_rate": 0.0003857583749052298,
"loss": 2.4363412857055664,
"step": 8750
},
{
"epoch": 18.364397905759162,
"learning_rate": 0.0003855381303112527,
"loss": 2.4235382080078125,
"step": 8760
},
{
"epoch": 18.385340314136126,
"learning_rate": 0.0003853182625258708,
"loss": 2.4238630294799806,
"step": 8770
},
{
"epoch": 18.40628272251309,
"learning_rate": 0.0003850987704758608,
"loss": 2.426643943786621,
"step": 8780
},
{
"epoch": 18.42722513089005,
"learning_rate": 0.00038487965309227413,
"loss": 2.438970947265625,
"step": 8790
},
{
"epoch": 18.448167539267015,
"learning_rate": 0.0003846609093104148,
"loss": 2.423859786987305,
"step": 8800
},
{
"epoch": 18.46910994764398,
"learning_rate": 0.00038444253806981784,
"loss": 2.4040243148803713,
"step": 8810
},
{
"epoch": 18.490052356020943,
"learning_rate": 0.00038422453831422784,
"loss": 2.420393371582031,
"step": 8820
},
{
"epoch": 18.510994764397907,
"learning_rate": 0.0003840069089915771,
"loss": 2.427932929992676,
"step": 8830
},
{
"epoch": 18.531937172774867,
"learning_rate": 0.00038378964905396454,
"loss": 2.419098663330078,
"step": 8840
},
{
"epoch": 18.55287958115183,
"learning_rate": 0.00038357275745763475,
"loss": 2.4184850692749023,
"step": 8850
},
{
"epoch": 18.573821989528795,
"learning_rate": 0.0003833562331629563,
"loss": 2.402060127258301,
"step": 8860
},
{
"epoch": 18.59476439790576,
"learning_rate": 0.0003831400751344014,
"loss": 2.403904914855957,
"step": 8870
},
{
"epoch": 18.615706806282724,
"learning_rate": 0.00038292428234052486,
"loss": 2.4094032287597655,
"step": 8880
},
{
"epoch": 18.636649214659684,
"learning_rate": 0.0003827088537539434,
"loss": 2.3887189865112304,
"step": 8890
},
{
"epoch": 18.657591623036648,
"learning_rate": 0.00038249378835131535,
"loss": 2.4003849029541016,
"step": 8900
},
{
"epoch": 18.678534031413612,
"learning_rate": 0.0003822790851133196,
"loss": 2.4100620269775392,
"step": 8910
},
{
"epoch": 18.699476439790576,
"learning_rate": 0.00038206474302463617,
"loss": 2.4087665557861326,
"step": 8920
},
{
"epoch": 18.72041884816754,
"learning_rate": 0.00038185076107392544,
"loss": 2.4067865371704102,
"step": 8930
},
{
"epoch": 18.741361256544504,
"learning_rate": 0.0003816371382538082,
"loss": 2.3902347564697264,
"step": 8940
},
{
"epoch": 18.762303664921465,
"learning_rate": 0.0003814238735608459,
"loss": 2.4000757217407225,
"step": 8950
},
{
"epoch": 18.78324607329843,
"learning_rate": 0.0003812109659955207,
"loss": 2.4148767471313475,
"step": 8960
},
{
"epoch": 18.804188481675393,
"learning_rate": 0.00038099841456221617,
"loss": 2.414336395263672,
"step": 8970
},
{
"epoch": 18.825130890052357,
"learning_rate": 0.0003807862182691969,
"loss": 2.440867042541504,
"step": 8980
},
{
"epoch": 18.84607329842932,
"learning_rate": 0.00038057437612859003,
"loss": 2.4532596588134767,
"step": 8990
},
{
"epoch": 18.86701570680628,
"learning_rate": 0.0003803628871563653,
"loss": 2.4020782470703126,
"step": 9000
},
{
"epoch": 18.887958115183245,
"learning_rate": 0.0003801517503723161,
"loss": 2.430096435546875,
"step": 9010
},
{
"epoch": 18.90890052356021,
"learning_rate": 0.00037994096480004037,
"loss": 2.419812774658203,
"step": 9020
},
{
"epoch": 18.929842931937173,
"learning_rate": 0.0003797305294669214,
"loss": 2.4075344085693358,
"step": 9030
},
{
"epoch": 18.950785340314138,
"learning_rate": 0.00037952044340410954,
"loss": 2.4337480545043944,
"step": 9040
},
{
"epoch": 18.971727748691098,
"learning_rate": 0.00037931070564650276,
"loss": 2.4088159561157227,
"step": 9050
},
{
"epoch": 18.992670157068062,
"learning_rate": 0.0003791013152327286,
"loss": 2.4013919830322266,
"step": 9060
},
{
"epoch": 18.99895287958115,
"eval_loss": 2.529340982437134,
"eval_runtime": 73.527,
"eval_samples_per_second": 149.007,
"step": 9063
},
{
"epoch": 19.014659685863876,
"learning_rate": 0.00037889227120512545,
"loss": 2.5226316452026367,
"step": 9070
},
{
"epoch": 19.035602094240836,
"learning_rate": 0.0003786835726097239,
"loss": 2.4024560928344725,
"step": 9080
},
{
"epoch": 19.0565445026178,
"learning_rate": 0.00037847521849622895,
"loss": 2.4342859268188475,
"step": 9090
},
{
"epoch": 19.077486910994764,
"learning_rate": 0.0003782672079180015,
"loss": 2.4292444229125976,
"step": 9100
},
{
"epoch": 19.09842931937173,
"learning_rate": 0.0003780595399320404,
"loss": 2.4289926528930663,
"step": 9110
},
{
"epoch": 19.119371727748693,
"learning_rate": 0.00037785221359896444,
"loss": 2.440321159362793,
"step": 9120
},
{
"epoch": 19.140314136125653,
"learning_rate": 0.00037764522798299443,
"loss": 2.4353168487548826,
"step": 9130
},
{
"epoch": 19.161256544502617,
"learning_rate": 0.0003774385821519358,
"loss": 2.389999008178711,
"step": 9140
},
{
"epoch": 19.18219895287958,
"learning_rate": 0.0003772322751771605,
"loss": 2.4299448013305662,
"step": 9150
},
{
"epoch": 19.203141361256545,
"learning_rate": 0.00037702630613358986,
"loss": 2.4094564437866213,
"step": 9160
},
{
"epoch": 19.22408376963351,
"learning_rate": 0.0003768206740996769,
"loss": 2.416705322265625,
"step": 9170
},
{
"epoch": 19.24502617801047,
"learning_rate": 0.00037661537815738915,
"loss": 2.4174514770507813,
"step": 9180
},
{
"epoch": 19.265968586387434,
"learning_rate": 0.00037641041739219143,
"loss": 2.4112581253051757,
"step": 9190
},
{
"epoch": 19.286910994764398,
"learning_rate": 0.00037620579089302876,
"loss": 2.4125255584716796,
"step": 9200
},
{
"epoch": 19.307853403141362,
"learning_rate": 0.0003760014977523091,
"loss": 2.41434268951416,
"step": 9210
},
{
"epoch": 19.328795811518326,
"learning_rate": 0.00037579753706588697,
"loss": 2.419674301147461,
"step": 9220
},
{
"epoch": 19.349738219895286,
"learning_rate": 0.00037559390793304604,
"loss": 2.3896152496337892,
"step": 9230
},
{
"epoch": 19.37068062827225,
"learning_rate": 0.00037539060945648286,
"loss": 2.4006847381591796,
"step": 9240
},
{
"epoch": 19.391623036649214,
"learning_rate": 0.00037518764074229014,
"loss": 2.4157575607299804,
"step": 9250
},
{
"epoch": 19.41256544502618,
"learning_rate": 0.00037498500089994,
"loss": 2.398466873168945,
"step": 9260
},
{
"epoch": 19.433507853403142,
"learning_rate": 0.00037478268904226795,
"loss": 2.4136272430419923,
"step": 9270
},
{
"epoch": 19.454450261780103,
"learning_rate": 0.00037458070428545635,
"loss": 2.412180709838867,
"step": 9280
},
{
"epoch": 19.475392670157067,
"learning_rate": 0.00037437904574901817,
"loss": 2.417103385925293,
"step": 9290
},
{
"epoch": 19.49633507853403,
"learning_rate": 0.00037417771255578104,
"loss": 2.3830541610717773,
"step": 9300
},
{
"epoch": 19.517277486910995,
"learning_rate": 0.00037397670383187097,
"loss": 2.4057411193847655,
"step": 9310
},
{
"epoch": 19.53821989528796,
"learning_rate": 0.0003737760187066967,
"loss": 2.429146957397461,
"step": 9320
},
{
"epoch": 19.559162303664923,
"learning_rate": 0.00037357565631293365,
"loss": 2.4229619979858397,
"step": 9330
},
{
"epoch": 19.580104712041884,
"learning_rate": 0.00037337561578650833,
"loss": 2.4030439376831056,
"step": 9340
},
{
"epoch": 19.601047120418848,
"learning_rate": 0.00037317589626658255,
"loss": 2.4122753143310547,
"step": 9350
},
{
"epoch": 19.62198952879581,
"learning_rate": 0.0003729764968955379,
"loss": 2.420066070556641,
"step": 9360
},
{
"epoch": 19.642931937172776,
"learning_rate": 0.00037277741681896045,
"loss": 2.4116867065429686,
"step": 9370
},
{
"epoch": 19.66387434554974,
"learning_rate": 0.0003725786551856251,
"loss": 2.4222272872924804,
"step": 9380
},
{
"epoch": 19.6848167539267,
"learning_rate": 0.0003723802111474804,
"loss": 2.398889350891113,
"step": 9390
},
{
"epoch": 19.705759162303664,
"learning_rate": 0.0003721820838596335,
"loss": 2.3947797775268556,
"step": 9400
},
{
"epoch": 19.72670157068063,
"learning_rate": 0.00037198427248033485,
"loss": 2.39971981048584,
"step": 9410
},
{
"epoch": 19.747643979057592,
"learning_rate": 0.00037178677617096337,
"loss": 2.3918169021606444,
"step": 9420
},
{
"epoch": 19.768586387434556,
"learning_rate": 0.0003715895940960111,
"loss": 2.393696975708008,
"step": 9430
},
{
"epoch": 19.789528795811517,
"learning_rate": 0.000371392725423069,
"loss": 2.403204345703125,
"step": 9440
},
{
"epoch": 19.81047120418848,
"learning_rate": 0.00037119616932281165,
"loss": 2.3984851837158203,
"step": 9450
},
{
"epoch": 19.831413612565445,
"learning_rate": 0.00037099992496898276,
"loss": 2.442034149169922,
"step": 9460
},
{
"epoch": 19.85235602094241,
"learning_rate": 0.00037080399153838065,
"loss": 2.3905046463012694,
"step": 9470
},
{
"epoch": 19.873298429319373,
"learning_rate": 0.00037060836821084373,
"loss": 2.4114078521728515,
"step": 9480
},
{
"epoch": 19.894240837696334,
"learning_rate": 0.00037041305416923604,
"loss": 2.393054962158203,
"step": 9490
},
{
"epoch": 19.915183246073298,
"learning_rate": 0.0003702180485994327,
"loss": 2.388008689880371,
"step": 9500
},
{
"epoch": 19.93612565445026,
"learning_rate": 0.00037002335069030614,
"loss": 2.4009252548217774,
"step": 9510
},
{
"epoch": 19.957068062827226,
"learning_rate": 0.0003698289596337116,
"loss": 2.3996566772460937,
"step": 9520
},
{
"epoch": 19.97801047120419,
"learning_rate": 0.00036963487462447303,
"loss": 2.3795480728149414,
"step": 9530
},
{
"epoch": 19.99895287958115,
"learning_rate": 0.0003694410948603691,
"loss": 2.4031463623046876,
"step": 9540
},
{
"epoch": 19.99895287958115,
"eval_loss": 2.5357587337493896,
"eval_runtime": 73.9758,
"eval_samples_per_second": 148.102,
"step": 9540
},
{
"epoch": 20.020942408376964,
"learning_rate": 0.00036924761954211944,
"loss": 2.5228919982910156,
"step": 9550
},
{
"epoch": 20.041884816753928,
"learning_rate": 0.0003690544478733707,
"loss": 2.3785959243774415,
"step": 9560
},
{
"epoch": 20.06282722513089,
"learning_rate": 0.0003688615790606828,
"loss": 2.4037866592407227,
"step": 9570
},
{
"epoch": 20.083769633507853,
"learning_rate": 0.000368669012313515,
"loss": 2.3924365997314454,
"step": 9580
},
{
"epoch": 20.104712041884817,
"learning_rate": 0.0003684767468442126,
"loss": 2.4029878616333007,
"step": 9590
},
{
"epoch": 20.12565445026178,
"learning_rate": 0.0003682847818679935,
"loss": 2.387605094909668,
"step": 9600
},
{
"epoch": 20.146596858638745,
"learning_rate": 0.0003680931166029342,
"loss": 2.417312431335449,
"step": 9610
},
{
"epoch": 20.167539267015705,
"learning_rate": 0.000367901750269957,
"loss": 2.381046485900879,
"step": 9620
},
{
"epoch": 20.18848167539267,
"learning_rate": 0.00036771068209281657,
"loss": 2.376552963256836,
"step": 9630
},
{
"epoch": 20.209424083769633,
"learning_rate": 0.0003675199112980863,
"loss": 2.4089908599853516,
"step": 9640
},
{
"epoch": 20.230366492146597,
"learning_rate": 0.0003673294371151458,
"loss": 2.401862907409668,
"step": 9650
},
{
"epoch": 20.25130890052356,
"learning_rate": 0.0003671392587761674,
"loss": 2.406145477294922,
"step": 9660
},
{
"epoch": 20.272251308900522,
"learning_rate": 0.0003669493755161031,
"loss": 2.414588737487793,
"step": 9670
},
{
"epoch": 20.293193717277486,
"learning_rate": 0.00036675978657267204,
"loss": 2.4057403564453126,
"step": 9680
},
{
"epoch": 20.31413612565445,
"learning_rate": 0.00036657049118634733,
"loss": 2.404916000366211,
"step": 9690
},
{
"epoch": 20.335078534031414,
"learning_rate": 0.0003663814886003432,
"loss": 2.4110477447509764,
"step": 9700
},
{
"epoch": 20.356020942408378,
"learning_rate": 0.00036619277806060276,
"loss": 2.402661895751953,
"step": 9710
},
{
"epoch": 20.376963350785342,
"learning_rate": 0.0003660043588157846,
"loss": 2.404218864440918,
"step": 9720
},
{
"epoch": 20.397905759162303,
"learning_rate": 0.00036581623011725114,
"loss": 2.4069591522216798,
"step": 9730
},
{
"epoch": 20.418848167539267,
"learning_rate": 0.0003656283912190554,
"loss": 2.4185781478881836,
"step": 9740
},
{
"epoch": 20.43979057591623,
"learning_rate": 0.00036544084137792883,
"loss": 2.3999982833862306,
"step": 9750
},
{
"epoch": 20.460732984293195,
"learning_rate": 0.00036525357985326903,
"loss": 2.3702335357666016,
"step": 9760
},
{
"epoch": 20.48167539267016,
"learning_rate": 0.0003650666059071275,
"loss": 2.3878076553344725,
"step": 9770
},
{
"epoch": 20.50261780104712,
"learning_rate": 0.00036487991880419725,
"loss": 2.3818979263305664,
"step": 9780
},
{
"epoch": 20.523560209424083,
"learning_rate": 0.00036469351781180073,
"loss": 2.363344192504883,
"step": 9790
},
{
"epoch": 20.544502617801047,
"learning_rate": 0.00036450740219987765,
"loss": 2.3810457229614257,
"step": 9800
},
{
"epoch": 20.56544502617801,
"learning_rate": 0.0003643215712409734,
"loss": 2.411943054199219,
"step": 9810
},
{
"epoch": 20.586387434554975,
"learning_rate": 0.00036413602421022653,
"loss": 2.4092056274414064,
"step": 9820
},
{
"epoch": 20.607329842931936,
"learning_rate": 0.0003639507603853572,
"loss": 2.3944089889526365,
"step": 9830
},
{
"epoch": 20.6282722513089,
"learning_rate": 0.00036376577904665525,
"loss": 2.3668121337890624,
"step": 9840
},
{
"epoch": 20.649214659685864,
"learning_rate": 0.00036358107947696876,
"loss": 2.391695022583008,
"step": 9850
},
{
"epoch": 20.670157068062828,
"learning_rate": 0.0003633966609616919,
"loss": 2.380820083618164,
"step": 9860
},
{
"epoch": 20.691099476439792,
"learning_rate": 0.00036321252278875344,
"loss": 2.372467041015625,
"step": 9870
},
{
"epoch": 20.712041884816752,
"learning_rate": 0.00036302866424860566,
"loss": 2.3973648071289064,
"step": 9880
},
{
"epoch": 20.732984293193716,
"learning_rate": 0.00036284508463421217,
"loss": 2.3995847702026367,
"step": 9890
},
{
"epoch": 20.75392670157068,
"learning_rate": 0.0003626617832410371,
"loss": 2.3931917190551757,
"step": 9900
},
{
"epoch": 20.774869109947645,
"learning_rate": 0.00036247875936703335,
"loss": 2.4107311248779295,
"step": 9910
},
{
"epoch": 20.79581151832461,
"learning_rate": 0.00036229601231263145,
"loss": 2.367414855957031,
"step": 9920
},
{
"epoch": 20.81675392670157,
"learning_rate": 0.0003621135413807282,
"loss": 2.405007171630859,
"step": 9930
},
{
"epoch": 20.837696335078533,
"learning_rate": 0.0003619313458766758,
"loss": 2.364247512817383,
"step": 9940
},
{
"epoch": 20.858638743455497,
"learning_rate": 0.0003617494251082704,
"loss": 2.3823482513427736,
"step": 9950
},
{
"epoch": 20.87958115183246,
"learning_rate": 0.0003615677783857413,
"loss": 2.393014144897461,
"step": 9960
},
{
"epoch": 20.900523560209425,
"learning_rate": 0.0003613864050217397,
"loss": 2.3839509963989256,
"step": 9970
},
{
"epoch": 20.921465968586386,
"learning_rate": 0.0003612053043313283,
"loss": 2.378824806213379,
"step": 9980
},
{
"epoch": 20.94240837696335,
"learning_rate": 0.0003610244756319697,
"loss": 2.3893613815307617,
"step": 9990
},
{
"epoch": 20.963350785340314,
"learning_rate": 0.00036084391824351607,
"loss": 2.37738151550293,
"step": 10000
},
{
"epoch": 20.984293193717278,
"learning_rate": 0.00036066363148819854,
"loss": 2.389986038208008,
"step": 10010
},
{
"epoch": 20.99895287958115,
"eval_loss": 2.5336103439331055,
"eval_runtime": 73.4935,
"eval_samples_per_second": 149.074,
"step": 10017
},
{
"epoch": 21.006282722513088,
"learning_rate": 0.000360483614690616,
"loss": 2.528822135925293,
"step": 10020
},
{
"epoch": 21.027225130890052,
"learning_rate": 0.00036030386717772494,
"loss": 2.3780399322509767,
"step": 10030
},
{
"epoch": 21.048167539267016,
"learning_rate": 0.0003601243882788286,
"loss": 2.3978437423706054,
"step": 10040
},
{
"epoch": 21.06910994764398,
"learning_rate": 0.0003599451773255667,
"loss": 2.382208061218262,
"step": 10050
},
{
"epoch": 21.09005235602094,
"learning_rate": 0.00035976623365190465,
"loss": 2.375508499145508,
"step": 10060
},
{
"epoch": 21.110994764397905,
"learning_rate": 0.0003595875565941235,
"loss": 2.422568511962891,
"step": 10070
},
{
"epoch": 21.13193717277487,
"learning_rate": 0.00035940914549080944,
"loss": 2.4112144470214845,
"step": 10080
},
{
"epoch": 21.152879581151833,
"learning_rate": 0.0003592309996828435,
"loss": 2.400478172302246,
"step": 10090
},
{
"epoch": 21.173821989528797,
"learning_rate": 0.0003590531185133913,
"loss": 2.403495216369629,
"step": 10100
},
{
"epoch": 21.194764397905757,
"learning_rate": 0.0003588755013278929,
"loss": 2.375596046447754,
"step": 10110
},
{
"epoch": 21.21570680628272,
"learning_rate": 0.00035869814747405306,
"loss": 2.3807771682739256,
"step": 10120
},
{
"epoch": 21.236649214659685,
"learning_rate": 0.00035852105630183027,
"loss": 2.40921630859375,
"step": 10130
},
{
"epoch": 21.25759162303665,
"learning_rate": 0.0003583442271634278,
"loss": 2.398925018310547,
"step": 10140
},
{
"epoch": 21.278534031413614,
"learning_rate": 0.000358167659413283,
"loss": 2.373432731628418,
"step": 10150
},
{
"epoch": 21.299476439790578,
"learning_rate": 0.00035799135240805765,
"loss": 2.4216379165649413,
"step": 10160
},
{
"epoch": 21.320418848167538,
"learning_rate": 0.0003578153055066282,
"loss": 2.3817609786987304,
"step": 10170
},
{
"epoch": 21.341361256544502,
"learning_rate": 0.00035763951807007597,
"loss": 2.4331357955932615,
"step": 10180
},
{
"epoch": 21.362303664921466,
"learning_rate": 0.0003574639894616771,
"loss": 2.4034128189086914,
"step": 10190
},
{
"epoch": 21.38324607329843,
"learning_rate": 0.0003572887190468934,
"loss": 2.3663650512695313,
"step": 10200
},
{
"epoch": 21.404188481675394,
"learning_rate": 0.00035711370619336214,
"loss": 2.3921630859375,
"step": 10210
},
{
"epoch": 21.425130890052355,
"learning_rate": 0.00035693895027088694,
"loss": 2.3804367065429686,
"step": 10220
},
{
"epoch": 21.44607329842932,
"learning_rate": 0.00035676445065142793,
"loss": 2.4022769927978516,
"step": 10230
},
{
"epoch": 21.467015706806283,
"learning_rate": 0.0003565902067090925,
"loss": 2.3611806869506835,
"step": 10240
},
{
"epoch": 21.487958115183247,
"learning_rate": 0.0003564162178201257,
"loss": 2.383506202697754,
"step": 10250
},
{
"epoch": 21.50890052356021,
"learning_rate": 0.0003562424833629007,
"loss": 2.385580062866211,
"step": 10260
},
{
"epoch": 21.52984293193717,
"learning_rate": 0.0003560690027179101,
"loss": 2.3934825897216796,
"step": 10270
},
{
"epoch": 21.550785340314135,
"learning_rate": 0.00035589577526775603,
"loss": 2.385503387451172,
"step": 10280
},
{
"epoch": 21.5717277486911,
"learning_rate": 0.000355722800397141,
"loss": 2.344258499145508,
"step": 10290
},
{
"epoch": 21.592670157068063,
"learning_rate": 0.00035555007749285897,
"loss": 2.4048336029052733,
"step": 10300
},
{
"epoch": 21.613612565445028,
"learning_rate": 0.00035537760594378607,
"loss": 2.3891706466674805,
"step": 10310
},
{
"epoch": 21.634554973821988,
"learning_rate": 0.00035520538514087155,
"loss": 2.397173309326172,
"step": 10320
},
{
"epoch": 21.655497382198952,
"learning_rate": 0.0003550334144771289,
"loss": 2.3752115249633787,
"step": 10330
},
{
"epoch": 21.676439790575916,
"learning_rate": 0.00035486169334762637,
"loss": 2.3773225784301757,
"step": 10340
},
{
"epoch": 21.69738219895288,
"learning_rate": 0.00035469022114947857,
"loss": 2.410744476318359,
"step": 10350
},
{
"epoch": 21.718324607329844,
"learning_rate": 0.00035451899728183736,
"loss": 2.4002202987670898,
"step": 10360
},
{
"epoch": 21.739267015706805,
"learning_rate": 0.00035434802114588305,
"loss": 2.371893119812012,
"step": 10370
},
{
"epoch": 21.76020942408377,
"learning_rate": 0.00035417729214481556,
"loss": 2.412856674194336,
"step": 10380
},
{
"epoch": 21.781151832460733,
"learning_rate": 0.0003540068096838456,
"loss": 2.414295959472656,
"step": 10390
},
{
"epoch": 21.802094240837697,
"learning_rate": 0.0003538365731701862,
"loss": 2.3874536514282227,
"step": 10400
},
{
"epoch": 21.82303664921466,
"learning_rate": 0.0003536665820130437,
"loss": 2.408889389038086,
"step": 10410
},
{
"epoch": 21.843979057591625,
"learning_rate": 0.00035349683562360966,
"loss": 2.4029043197631834,
"step": 10420
},
{
"epoch": 21.864921465968585,
"learning_rate": 0.0003533273334150517,
"loss": 2.3872053146362306,
"step": 10430
},
{
"epoch": 21.88586387434555,
"learning_rate": 0.0003531580748025054,
"loss": 2.373563766479492,
"step": 10440
},
{
"epoch": 21.906806282722513,
"learning_rate": 0.00035298905920306563,
"loss": 2.3822809219360352,
"step": 10450
},
{
"epoch": 21.927748691099477,
"learning_rate": 0.00035282028603577823,
"loss": 2.4076284408569335,
"step": 10460
},
{
"epoch": 21.94869109947644,
"learning_rate": 0.0003526517547216315,
"loss": 2.3945655822753906,
"step": 10470
},
{
"epoch": 21.969633507853402,
"learning_rate": 0.000352483464683548,
"loss": 2.360683059692383,
"step": 10480
},
{
"epoch": 21.990575916230366,
"learning_rate": 0.0003523154153463761,
"loss": 2.371842956542969,
"step": 10490
},
{
"epoch": 21.99895287958115,
"eval_loss": 2.536722183227539,
"eval_runtime": 79.9983,
"eval_samples_per_second": 136.953,
"step": 10494
},
{
"epoch": 22.012565445026176,
"learning_rate": 0.00035214760613688187,
"loss": 2.498021697998047,
"step": 10500
},
{
"epoch": 22.03350785340314,
"learning_rate": 0.0003519800364837407,
"loss": 2.377554702758789,
"step": 10510
},
{
"epoch": 22.054450261780104,
"learning_rate": 0.0003518127058175293,
"loss": 2.380527687072754,
"step": 10520
},
{
"epoch": 22.07539267015707,
"learning_rate": 0.00035164561357071755,
"loss": 2.3838827133178713,
"step": 10530
},
{
"epoch": 22.096335078534032,
"learning_rate": 0.0003514787591776602,
"loss": 2.3740776062011717,
"step": 10540
},
{
"epoch": 22.117277486910996,
"learning_rate": 0.0003513121420745892,
"loss": 2.374008560180664,
"step": 10550
},
{
"epoch": 22.138219895287957,
"learning_rate": 0.0003511457616996052,
"loss": 2.373431587219238,
"step": 10560
},
{
"epoch": 22.15916230366492,
"learning_rate": 0.0003509796174926703,
"loss": 2.403927803039551,
"step": 10570
},
{
"epoch": 22.180104712041885,
"learning_rate": 0.00035081370889559934,
"loss": 2.4006370544433593,
"step": 10580
},
{
"epoch": 22.20104712041885,
"learning_rate": 0.0003506480353520526,
"loss": 2.3758676528930662,
"step": 10590
},
{
"epoch": 22.221989528795813,
"learning_rate": 0.0003504825963075276,
"loss": 2.417715644836426,
"step": 10600
},
{
"epoch": 22.242931937172774,
"learning_rate": 0.00035031739120935175,
"loss": 2.3941156387329103,
"step": 10610
},
{
"epoch": 22.263874345549738,
"learning_rate": 0.0003501524195066741,
"loss": 2.3949649810791014,
"step": 10620
},
{
"epoch": 22.2848167539267,
"learning_rate": 0.0003499876806504578,
"loss": 2.4047883987426757,
"step": 10630
},
{
"epoch": 22.305759162303666,
"learning_rate": 0.00034982317409347263,
"loss": 2.3971155166625975,
"step": 10640
},
{
"epoch": 22.32670157068063,
"learning_rate": 0.00034965889929028707,
"loss": 2.404866027832031,
"step": 10650
},
{
"epoch": 22.34764397905759,
"learning_rate": 0.000349494855697261,
"loss": 2.3915122985839843,
"step": 10660
},
{
"epoch": 22.368586387434554,
"learning_rate": 0.0003493310427725377,
"loss": 2.39025936126709,
"step": 10670
},
{
"epoch": 22.38952879581152,
"learning_rate": 0.0003491674599760369,
"loss": 2.399850273132324,
"step": 10680
},
{
"epoch": 22.410471204188482,
"learning_rate": 0.0003490041067694469,
"loss": 2.3945247650146486,
"step": 10690
},
{
"epoch": 22.431413612565446,
"learning_rate": 0.00034884098261621724,
"loss": 2.397679901123047,
"step": 10700
},
{
"epoch": 22.452356020942407,
"learning_rate": 0.00034867808698155125,
"loss": 2.355159568786621,
"step": 10710
},
{
"epoch": 22.47329842931937,
"learning_rate": 0.0003485154193323988,
"loss": 2.3898927688598635,
"step": 10720
},
{
"epoch": 22.494240837696335,
"learning_rate": 0.00034835297913744903,
"loss": 2.367123031616211,
"step": 10730
},
{
"epoch": 22.5151832460733,
"learning_rate": 0.0003481907658671227,
"loss": 2.3852542877197265,
"step": 10740
},
{
"epoch": 22.536125654450263,
"learning_rate": 0.0003480287789935653,
"loss": 2.4065229415893556,
"step": 10750
},
{
"epoch": 22.557068062827224,
"learning_rate": 0.00034786701799063976,
"loss": 2.3588846206665037,
"step": 10760
},
{
"epoch": 22.578010471204188,
"learning_rate": 0.00034770548233391924,
"loss": 2.390997123718262,
"step": 10770
},
{
"epoch": 22.59895287958115,
"learning_rate": 0.0003475441715006799,
"loss": 2.3878786087036135,
"step": 10780
},
{
"epoch": 22.619895287958116,
"learning_rate": 0.0003473830849698938,
"loss": 2.398370552062988,
"step": 10790
},
{
"epoch": 22.64083769633508,
"learning_rate": 0.0003472222222222222,
"loss": 2.3805349349975584,
"step": 10800
},
{
"epoch": 22.66178010471204,
"learning_rate": 0.00034706158274000796,
"loss": 2.3977741241455077,
"step": 10810
},
{
"epoch": 22.682722513089004,
"learning_rate": 0.00034690116600726885,
"loss": 2.387373924255371,
"step": 10820
},
{
"epoch": 22.70366492146597,
"learning_rate": 0.0003467409715096907,
"loss": 2.3700994491577148,
"step": 10830
},
{
"epoch": 22.724607329842932,
"learning_rate": 0.00034658099873462027,
"loss": 2.3671117782592774,
"step": 10840
},
{
"epoch": 22.745549738219896,
"learning_rate": 0.0003464212471710583,
"loss": 2.377743148803711,
"step": 10850
},
{
"epoch": 22.76649214659686,
"learning_rate": 0.0003462617163096529,
"loss": 2.386002540588379,
"step": 10860
},
{
"epoch": 22.78743455497382,
"learning_rate": 0.00034610240564269265,
"loss": 2.3687204360961913,
"step": 10870
},
{
"epoch": 22.808376963350785,
"learning_rate": 0.0003459433146640997,
"loss": 2.3671218872070314,
"step": 10880
},
{
"epoch": 22.82931937172775,
"learning_rate": 0.00034578444286942307,
"loss": 2.3793460845947267,
"step": 10890
},
{
"epoch": 22.850261780104713,
"learning_rate": 0.00034562578975583187,
"loss": 2.374790382385254,
"step": 10900
},
{
"epoch": 22.871204188481677,
"learning_rate": 0.00034546735482210894,
"loss": 2.356049728393555,
"step": 10910
},
{
"epoch": 22.892146596858638,
"learning_rate": 0.0003453091375686437,
"loss": 2.361851119995117,
"step": 10920
},
{
"epoch": 22.9130890052356,
"learning_rate": 0.00034515113749742586,
"loss": 2.3911083221435545,
"step": 10930
},
{
"epoch": 22.934031413612566,
"learning_rate": 0.00034499335411203894,
"loss": 2.353407096862793,
"step": 10940
},
{
"epoch": 22.95497382198953,
"learning_rate": 0.00034483578691765326,
"loss": 2.3763240814208983,
"step": 10950
},
{
"epoch": 22.975916230366494,
"learning_rate": 0.00034467843542102,
"loss": 2.3745288848876953,
"step": 10960
},
{
"epoch": 22.996858638743454,
"learning_rate": 0.0003445212991304641,
"loss": 2.367890167236328,
"step": 10970
},
{
"epoch": 22.99895287958115,
"eval_loss": 2.542264461517334,
"eval_runtime": 74.647,
"eval_samples_per_second": 146.771,
"step": 10971
},
{
"epoch": 23.018848167539268,
"learning_rate": 0.00034436437755587827,
"loss": 2.4732553482055666,
"step": 10980
},
{
"epoch": 23.039790575916232,
"learning_rate": 0.00034420767020871656,
"loss": 2.3789663314819336,
"step": 10990
},
{
"epoch": 23.060732984293193,
"learning_rate": 0.00034405117660198765,
"loss": 2.387537384033203,
"step": 11000
},
{
"epoch": 23.081675392670157,
"learning_rate": 0.00034389489625024885,
"loss": 2.3760297775268553,
"step": 11010
},
{
"epoch": 23.10261780104712,
"learning_rate": 0.00034373882866959936,
"loss": 2.3746875762939452,
"step": 11020
},
{
"epoch": 23.123560209424085,
"learning_rate": 0.0003435829733776745,
"loss": 2.4071685791015627,
"step": 11030
},
{
"epoch": 23.14450261780105,
"learning_rate": 0.00034342732989363903,
"loss": 2.3595859527587892,
"step": 11040
},
{
"epoch": 23.16544502617801,
"learning_rate": 0.0003432718977381811,
"loss": 2.356878662109375,
"step": 11050
},
{
"epoch": 23.186387434554973,
"learning_rate": 0.0003431166764335058,
"loss": 2.3806716918945314,
"step": 11060
},
{
"epoch": 23.207329842931937,
"learning_rate": 0.0003429616655033297,
"loss": 2.365432929992676,
"step": 11070
},
{
"epoch": 23.2282722513089,
"learning_rate": 0.00034280686447287373,
"loss": 2.3477930068969726,
"step": 11080
},
{
"epoch": 23.249214659685865,
"learning_rate": 0.00034265227286885776,
"loss": 2.359480094909668,
"step": 11090
},
{
"epoch": 23.270157068062826,
"learning_rate": 0.00034249789021949435,
"loss": 2.3736724853515625,
"step": 11100
},
{
"epoch": 23.29109947643979,
"learning_rate": 0.0003423437160544826,
"loss": 2.3853965759277345,
"step": 11110
},
{
"epoch": 23.312041884816754,
"learning_rate": 0.0003421897499050022,
"loss": 2.3594213485717774,
"step": 11120
},
{
"epoch": 23.332984293193718,
"learning_rate": 0.0003420359913037075,
"loss": 2.3540416717529298,
"step": 11130
},
{
"epoch": 23.353926701570682,
"learning_rate": 0.0003418824397847216,
"loss": 2.371465301513672,
"step": 11140
},
{
"epoch": 23.374869109947642,
"learning_rate": 0.00034172909488363007,
"loss": 2.360518455505371,
"step": 11150
},
{
"epoch": 23.395811518324606,
"learning_rate": 0.00034157595613747545,
"loss": 2.347417640686035,
"step": 11160
},
{
"epoch": 23.41675392670157,
"learning_rate": 0.00034142302308475133,
"loss": 2.388157081604004,
"step": 11170
},
{
"epoch": 23.437696335078535,
"learning_rate": 0.0003412702952653962,
"loss": 2.348739433288574,
"step": 11180
},
{
"epoch": 23.4586387434555,
"learning_rate": 0.00034111777222078796,
"loss": 2.3871492385864257,
"step": 11190
},
{
"epoch": 23.47958115183246,
"learning_rate": 0.00034096545349373804,
"loss": 2.3624570846557615,
"step": 11200
},
{
"epoch": 23.500523560209423,
"learning_rate": 0.0003408133386284857,
"loss": 2.337727165222168,
"step": 11210
},
{
"epoch": 23.521465968586387,
"learning_rate": 0.0003406614271706919,
"loss": 2.3544214248657225,
"step": 11220
},
{
"epoch": 23.54240837696335,
"learning_rate": 0.0003405097186674344,
"loss": 2.3601694107055664,
"step": 11230
},
{
"epoch": 23.563350785340315,
"learning_rate": 0.00034035821266720136,
"loss": 2.3869655609130858,
"step": 11240
},
{
"epoch": 23.58429319371728,
"learning_rate": 0.0003402069087198858,
"loss": 2.3417810440063476,
"step": 11250
},
{
"epoch": 23.60523560209424,
"learning_rate": 0.00034005580637678053,
"loss": 2.3512496948242188,
"step": 11260
},
{
"epoch": 23.626178010471204,
"learning_rate": 0.00033990490519057183,
"loss": 2.3688682556152343,
"step": 11270
},
{
"epoch": 23.647120418848168,
"learning_rate": 0.0003397542047153345,
"loss": 2.3577795028686523,
"step": 11280
},
{
"epoch": 23.668062827225132,
"learning_rate": 0.0003396037045065257,
"loss": 2.380731201171875,
"step": 11290
},
{
"epoch": 23.689005235602096,
"learning_rate": 0.0003394534041209802,
"loss": 2.349542427062988,
"step": 11300
},
{
"epoch": 23.709947643979056,
"learning_rate": 0.0003393033031169043,
"loss": 2.3590700149536135,
"step": 11310
},
{
"epoch": 23.73089005235602,
"learning_rate": 0.0003391534010538705,
"loss": 2.392327880859375,
"step": 11320
},
{
"epoch": 23.751832460732984,
"learning_rate": 0.00033900369749281225,
"loss": 2.3760391235351563,
"step": 11330
},
{
"epoch": 23.77277486910995,
"learning_rate": 0.00033885419199601845,
"loss": 2.355258560180664,
"step": 11340
},
{
"epoch": 23.793717277486913,
"learning_rate": 0.000338704884127128,
"loss": 2.369922065734863,
"step": 11350
},
{
"epoch": 23.814659685863873,
"learning_rate": 0.00033855577345112453,
"loss": 2.410330390930176,
"step": 11360
},
{
"epoch": 23.835602094240837,
"learning_rate": 0.0003384068595343312,
"loss": 2.374154472351074,
"step": 11370
},
{
"epoch": 23.8565445026178,
"learning_rate": 0.00033825814194440504,
"loss": 2.360888671875,
"step": 11380
},
{
"epoch": 23.877486910994765,
"learning_rate": 0.0003381096202503321,
"loss": 2.372193145751953,
"step": 11390
},
{
"epoch": 23.89842931937173,
"learning_rate": 0.00033796129402242193,
"loss": 2.3628297805786134,
"step": 11400
},
{
"epoch": 23.91937172774869,
"learning_rate": 0.0003378131628323024,
"loss": 2.365167999267578,
"step": 11410
},
{
"epoch": 23.940314136125654,
"learning_rate": 0.0003376652262529146,
"loss": 2.385006332397461,
"step": 11420
},
{
"epoch": 23.961256544502618,
"learning_rate": 0.00033751748385850753,
"loss": 2.3524898529052733,
"step": 11430
},
{
"epoch": 23.982198952879582,
"learning_rate": 0.00033736993522463316,
"loss": 2.374051094055176,
"step": 11440
},
{
"epoch": 23.99895287958115,
"eval_loss": 2.532155990600586,
"eval_runtime": 73.6953,
"eval_samples_per_second": 148.666,
"step": 11448
},
{
"epoch": 24.004188481675392,
"learning_rate": 0.00033722257992814113,
"loss": 2.4828319549560547,
"step": 11450
},
{
"epoch": 24.025130890052356,
"learning_rate": 0.0003370754175471737,
"loss": 2.360254669189453,
"step": 11460
},
{
"epoch": 24.04607329842932,
"learning_rate": 0.0003369284476611607,
"loss": 2.349439811706543,
"step": 11470
},
{
"epoch": 24.067015706806284,
"learning_rate": 0.00033678166985081433,
"loss": 2.36633415222168,
"step": 11480
},
{
"epoch": 24.087958115183245,
"learning_rate": 0.0003366350836981245,
"loss": 2.365359306335449,
"step": 11490
},
{
"epoch": 24.10890052356021,
"learning_rate": 0.0003364886887863534,
"loss": 2.362344169616699,
"step": 11500
},
{
"epoch": 24.129842931937173,
"learning_rate": 0.0003363424847000309,
"loss": 2.357081985473633,
"step": 11510
},
{
"epoch": 24.150785340314137,
"learning_rate": 0.0003361964710249494,
"loss": 2.3507287979125975,
"step": 11520
},
{
"epoch": 24.1717277486911,
"learning_rate": 0.00033605064734815865,
"loss": 2.3537702560424805,
"step": 11530
},
{
"epoch": 24.19267015706806,
"learning_rate": 0.0003359050132579615,
"loss": 2.37689151763916,
"step": 11540
},
{
"epoch": 24.213612565445025,
"learning_rate": 0.00033575956834390843,
"loss": 2.348763847351074,
"step": 11550
},
{
"epoch": 24.23455497382199,
"learning_rate": 0.00033561431219679297,
"loss": 2.3715591430664062,
"step": 11560
},
{
"epoch": 24.255497382198953,
"learning_rate": 0.00033546924440864666,
"loss": 2.3734716415405273,
"step": 11570
},
{
"epoch": 24.276439790575917,
"learning_rate": 0.0003353243645727346,
"loss": 2.3519350051879884,
"step": 11580
},
{
"epoch": 24.297382198952878,
"learning_rate": 0.0003351796722835502,
"loss": 2.355198287963867,
"step": 11590
},
{
"epoch": 24.318324607329842,
"learning_rate": 0.00033503516713681087,
"loss": 2.3704608917236327,
"step": 11600
},
{
"epoch": 24.339267015706806,
"learning_rate": 0.00033489084872945283,
"loss": 2.3960491180419923,
"step": 11610
},
{
"epoch": 24.36020942408377,
"learning_rate": 0.0003347467166596268,
"loss": 2.3748762130737306,
"step": 11620
},
{
"epoch": 24.381151832460734,
"learning_rate": 0.0003346027705266929,
"loss": 2.358123016357422,
"step": 11630
},
{
"epoch": 24.402094240837695,
"learning_rate": 0.0003344590099312164,
"loss": 2.345402717590332,
"step": 11640
},
{
"epoch": 24.42303664921466,
"learning_rate": 0.00033431543447496275,
"loss": 2.331704330444336,
"step": 11650
},
{
"epoch": 24.443979057591623,
"learning_rate": 0.000334172043760893,
"loss": 2.377284812927246,
"step": 11660
},
{
"epoch": 24.464921465968587,
"learning_rate": 0.0003340288373931593,
"loss": 2.354692268371582,
"step": 11670
},
{
"epoch": 24.48586387434555,
"learning_rate": 0.0003338858149771002,
"loss": 2.3740156173706053,
"step": 11680
},
{
"epoch": 24.506806282722515,
"learning_rate": 0.0003337429761192361,
"loss": 2.367665672302246,
"step": 11690
},
{
"epoch": 24.527748691099475,
"learning_rate": 0.00033360032042726483,
"loss": 2.362037467956543,
"step": 11700
},
{
"epoch": 24.54869109947644,
"learning_rate": 0.000333457847510057,
"loss": 2.371419334411621,
"step": 11710
},
{
"epoch": 24.569633507853403,
"learning_rate": 0.0003333155569776514,
"loss": 2.33715934753418,
"step": 11720
},
{
"epoch": 24.590575916230367,
"learning_rate": 0.00033317344844125064,
"loss": 2.3361494064331056,
"step": 11730
},
{
"epoch": 24.61151832460733,
"learning_rate": 0.00033303152151321696,
"loss": 2.346495819091797,
"step": 11740
},
{
"epoch": 24.632460732984292,
"learning_rate": 0.00033288977580706714,
"loss": 2.3827404022216796,
"step": 11750
},
{
"epoch": 24.653403141361256,
"learning_rate": 0.0003327482109374687,
"loss": 2.3554365158081056,
"step": 11760
},
{
"epoch": 24.67434554973822,
"learning_rate": 0.00033260682652023517,
"loss": 2.3607540130615234,
"step": 11770
},
{
"epoch": 24.695287958115184,
"learning_rate": 0.0003324656221723217,
"loss": 2.3806394577026366,
"step": 11780
},
{
"epoch": 24.716230366492148,
"learning_rate": 0.000332324597511821,
"loss": 2.3277612686157227,
"step": 11790
},
{
"epoch": 24.73717277486911,
"learning_rate": 0.00033218375215795864,
"loss": 2.3573076248168947,
"step": 11800
},
{
"epoch": 24.758115183246073,
"learning_rate": 0.00033204308573108897,
"loss": 2.3293807983398436,
"step": 11810
},
{
"epoch": 24.779057591623037,
"learning_rate": 0.00033190259785269066,
"loss": 2.3627220153808595,
"step": 11820
},
{
"epoch": 24.8,
"learning_rate": 0.0003317622881453626,
"loss": 2.3504779815673826,
"step": 11830
},
{
"epoch": 24.820942408376965,
"learning_rate": 0.0003316221562328194,
"loss": 2.3526493072509767,
"step": 11840
},
{
"epoch": 24.841884816753925,
"learning_rate": 0.0003314822017398875,
"loss": 2.367503547668457,
"step": 11850
},
{
"epoch": 24.86282722513089,
"learning_rate": 0.00033134242429250053,
"loss": 2.364429473876953,
"step": 11860
},
{
"epoch": 24.883769633507853,
"learning_rate": 0.00033120282351769556,
"loss": 2.3418235778808594,
"step": 11870
},
{
"epoch": 24.904712041884817,
"learning_rate": 0.0003310633990436084,
"loss": 2.361065483093262,
"step": 11880
},
{
"epoch": 24.92565445026178,
"learning_rate": 0.00033092415049947006,
"loss": 2.3631685256958006,
"step": 11890
},
{
"epoch": 24.946596858638742,
"learning_rate": 0.00033078507751560195,
"loss": 2.346321868896484,
"step": 11900
},
{
"epoch": 24.967539267015706,
"learning_rate": 0.00033064617972341235,
"loss": 2.3589923858642576,
"step": 11910
},
{
"epoch": 24.98848167539267,
"learning_rate": 0.0003305074567553919,
"loss": 2.3485301971435546,
"step": 11920
},
{
"epoch": 24.99895287958115,
"eval_loss": 2.5436818599700928,
"eval_runtime": 73.6581,
"eval_samples_per_second": 148.741,
"step": 11925
},
{
"epoch": 25.01047120418848,
"learning_rate": 0.0003303689082451096,
"loss": 2.483962059020996,
"step": 11930
},
{
"epoch": 25.031413612565444,
"learning_rate": 0.00033023053382720904,
"loss": 2.352615547180176,
"step": 11940
},
{
"epoch": 25.05235602094241,
"learning_rate": 0.0003300923331374039,
"loss": 2.379102325439453,
"step": 11950
},
{
"epoch": 25.073298429319372,
"learning_rate": 0.00032995430581247417,
"loss": 2.3579853057861326,
"step": 11960
},
{
"epoch": 25.094240837696336,
"learning_rate": 0.0003298164514902622,
"loss": 2.3461565017700194,
"step": 11970
},
{
"epoch": 25.115183246073297,
"learning_rate": 0.0003296787698096686,
"loss": 2.328052520751953,
"step": 11980
},
{
"epoch": 25.13612565445026,
"learning_rate": 0.0003295412604106482,
"loss": 2.337063026428223,
"step": 11990
},
{
"epoch": 25.157068062827225,
"learning_rate": 0.00032940392293420614,
"loss": 2.3367223739624023,
"step": 12000
},
{
"epoch": 25.17801047120419,
"learning_rate": 0.00032926675702239425,
"loss": 2.365107536315918,
"step": 12010
},
{
"epoch": 25.198952879581153,
"learning_rate": 0.00032912976231830646,
"loss": 2.3596302032470704,
"step": 12020
},
{
"epoch": 25.219895287958114,
"learning_rate": 0.0003289929384660757,
"loss": 2.336884307861328,
"step": 12030
},
{
"epoch": 25.240837696335078,
"learning_rate": 0.0003288562851108693,
"loss": 2.3663518905639647,
"step": 12040
},
{
"epoch": 25.26178010471204,
"learning_rate": 0.0003287198018988856,
"loss": 2.3383811950683593,
"step": 12050
},
{
"epoch": 25.282722513089006,
"learning_rate": 0.00032858348847734985,
"loss": 2.3640661239624023,
"step": 12060
},
{
"epoch": 25.30366492146597,
"learning_rate": 0.00032844734449451055,
"loss": 2.3613861083984373,
"step": 12070
},
{
"epoch": 25.324607329842934,
"learning_rate": 0.00032831136959963553,
"loss": 2.3227806091308594,
"step": 12080
},
{
"epoch": 25.345549738219894,
"learning_rate": 0.00032817556344300823,
"loss": 2.328192710876465,
"step": 12090
},
{
"epoch": 25.36649214659686,
"learning_rate": 0.0003280399256759237,
"loss": 2.3461523056030273,
"step": 12100
},
{
"epoch": 25.387434554973822,
"learning_rate": 0.0003279044559506852,
"loss": 2.3762447357177736,
"step": 12110
},
{
"epoch": 25.408376963350786,
"learning_rate": 0.0003277691539206003,
"loss": 2.325837326049805,
"step": 12120
},
{
"epoch": 25.42931937172775,
"learning_rate": 0.0003276340192399769,
"loss": 2.3660905838012694,
"step": 12130
},
{
"epoch": 25.45026178010471,
"learning_rate": 0.00032749905156412,
"loss": 2.360948181152344,
"step": 12140
},
{
"epoch": 25.471204188481675,
"learning_rate": 0.0003273642505493275,
"loss": 2.3165866851806642,
"step": 12150
},
{
"epoch": 25.49214659685864,
"learning_rate": 0.0003272296158528871,
"loss": 2.3487401962280274,
"step": 12160
},
{
"epoch": 25.513089005235603,
"learning_rate": 0.000327095147133072,
"loss": 2.351056671142578,
"step": 12170
},
{
"epoch": 25.534031413612567,
"learning_rate": 0.00032696084404913777,
"loss": 2.3396501541137695,
"step": 12180
},
{
"epoch": 25.554973821989527,
"learning_rate": 0.00032682670626131837,
"loss": 2.3343048095703125,
"step": 12190
},
{
"epoch": 25.57591623036649,
"learning_rate": 0.0003266927334308229,
"loss": 2.3392221450805666,
"step": 12200
},
{
"epoch": 25.596858638743456,
"learning_rate": 0.0003265589252198317,
"loss": 2.339245414733887,
"step": 12210
},
{
"epoch": 25.61780104712042,
"learning_rate": 0.0003264252812914928,
"loss": 2.343129539489746,
"step": 12220
},
{
"epoch": 25.638743455497384,
"learning_rate": 0.0003262918013099186,
"loss": 2.344712829589844,
"step": 12230
},
{
"epoch": 25.659685863874344,
"learning_rate": 0.00032615848494018204,
"loss": 2.364294242858887,
"step": 12240
},
{
"epoch": 25.680628272251308,
"learning_rate": 0.0003260253318483131,
"loss": 2.3588529586791993,
"step": 12250
},
{
"epoch": 25.701570680628272,
"learning_rate": 0.0003258923417012957,
"loss": 2.3558927536010743,
"step": 12260
},
{
"epoch": 25.722513089005236,
"learning_rate": 0.00032575951416706354,
"loss": 2.370713996887207,
"step": 12270
},
{
"epoch": 25.7434554973822,
"learning_rate": 0.0003256268489144972,
"loss": 2.3426084518432617,
"step": 12280
},
{
"epoch": 25.76439790575916,
"learning_rate": 0.0003254943456134202,
"loss": 2.3299545288085937,
"step": 12290
},
{
"epoch": 25.785340314136125,
"learning_rate": 0.0003253620039345959,
"loss": 2.343545913696289,
"step": 12300
},
{
"epoch": 25.80628272251309,
"learning_rate": 0.0003252298235497241,
"loss": 2.3348289489746095,
"step": 12310
},
{
"epoch": 25.827225130890053,
"learning_rate": 0.0003250978041314371,
"loss": 2.3712085723876952,
"step": 12320
},
{
"epoch": 25.848167539267017,
"learning_rate": 0.000324965945353297,
"loss": 2.347680854797363,
"step": 12330
},
{
"epoch": 25.869109947643977,
"learning_rate": 0.0003248342468897917,
"loss": 2.342079925537109,
"step": 12340
},
{
"epoch": 25.89005235602094,
"learning_rate": 0.00032470270841633195,
"loss": 2.376851272583008,
"step": 12350
},
{
"epoch": 25.910994764397905,
"learning_rate": 0.00032457132960924783,
"loss": 2.3613746643066404,
"step": 12360
},
{
"epoch": 25.93193717277487,
"learning_rate": 0.00032444011014578535,
"loss": 2.3406829833984375,
"step": 12370
},
{
"epoch": 25.952879581151834,
"learning_rate": 0.00032430904970410314,
"loss": 2.328056526184082,
"step": 12380
},
{
"epoch": 25.973821989528794,
"learning_rate": 0.0003241781479632693,
"loss": 2.3438344955444337,
"step": 12390
},
{
"epoch": 25.994764397905758,
"learning_rate": 0.0003240474046032579,
"loss": 2.349610137939453,
"step": 12400
},
{
"epoch": 25.99895287958115,
"eval_loss": 2.533395767211914,
"eval_runtime": 73.6578,
"eval_samples_per_second": 148.742,
"step": 12402
},
{
"epoch": 26.016753926701572,
"learning_rate": 0.00032391681930494566,
"loss": 2.4640811920166015,
"step": 12410
},
{
"epoch": 26.037696335078532,
"learning_rate": 0.000323786391750109,
"loss": 2.3338626861572265,
"step": 12420
},
{
"epoch": 26.058638743455496,
"learning_rate": 0.0003236561216214202,
"loss": 2.342071533203125,
"step": 12430
},
{
"epoch": 26.07958115183246,
"learning_rate": 0.000323526008602445,
"loss": 2.374074172973633,
"step": 12440
},
{
"epoch": 26.100523560209425,
"learning_rate": 0.0003233960523776387,
"loss": 2.3421449661254883,
"step": 12450
},
{
"epoch": 26.12146596858639,
"learning_rate": 0.0003232662526323429,
"loss": 2.3614429473876952,
"step": 12460
},
{
"epoch": 26.14240837696335,
"learning_rate": 0.0003231366090527828,
"loss": 2.319747543334961,
"step": 12470
},
{
"epoch": 26.163350785340313,
"learning_rate": 0.00032300712132606366,
"loss": 2.3622182846069335,
"step": 12480
},
{
"epoch": 26.184293193717277,
"learning_rate": 0.0003228777891401678,
"loss": 2.359231185913086,
"step": 12490
},
{
"epoch": 26.20523560209424,
"learning_rate": 0.0003227486121839514,
"loss": 2.355366516113281,
"step": 12500
},
{
"epoch": 26.226178010471205,
"learning_rate": 0.00032261959014714107,
"loss": 2.3299293518066406,
"step": 12510
},
{
"epoch": 26.24712041884817,
"learning_rate": 0.0003224907227203312,
"loss": 2.3555164337158203,
"step": 12520
},
{
"epoch": 26.26806282722513,
"learning_rate": 0.0003223620095949806,
"loss": 2.314861869812012,
"step": 12530
},
{
"epoch": 26.289005235602094,
"learning_rate": 0.00032223345046340936,
"loss": 2.3362022399902345,
"step": 12540
},
{
"epoch": 26.309947643979058,
"learning_rate": 0.00032210504501879576,
"loss": 2.3406482696533204,
"step": 12550
},
{
"epoch": 26.330890052356022,
"learning_rate": 0.0003219767929551733,
"loss": 2.30753231048584,
"step": 12560
},
{
"epoch": 26.351832460732986,
"learning_rate": 0.00032184869396742754,
"loss": 2.351367950439453,
"step": 12570
},
{
"epoch": 26.372774869109946,
"learning_rate": 0.00032172074775129323,
"loss": 2.3465883255004885,
"step": 12580
},
{
"epoch": 26.39371727748691,
"learning_rate": 0.00032159295400335114,
"loss": 2.3782730102539062,
"step": 12590
},
{
"epoch": 26.414659685863874,
"learning_rate": 0.00032146531242102476,
"loss": 2.3480430603027345,
"step": 12600
},
{
"epoch": 26.43560209424084,
"learning_rate": 0.0003213378227025779,
"loss": 2.370161437988281,
"step": 12610
},
{
"epoch": 26.456544502617803,
"learning_rate": 0.00032121048454711114,
"loss": 2.3542537689208984,
"step": 12620
},
{
"epoch": 26.477486910994763,
"learning_rate": 0.00032108329765455926,
"loss": 2.3564731597900392,
"step": 12630
},
{
"epoch": 26.498429319371727,
"learning_rate": 0.00032095626172568784,
"loss": 2.333011817932129,
"step": 12640
},
{
"epoch": 26.51937172774869,
"learning_rate": 0.00032082937646209084,
"loss": 2.3433643341064454,
"step": 12650
},
{
"epoch": 26.540314136125655,
"learning_rate": 0.0003207026415661871,
"loss": 2.3346595764160156,
"step": 12660
},
{
"epoch": 26.56125654450262,
"learning_rate": 0.0003205760567412178,
"loss": 2.339708709716797,
"step": 12670
},
{
"epoch": 26.58219895287958,
"learning_rate": 0.00032044962169124335,
"loss": 2.3501649856567384,
"step": 12680
},
{
"epoch": 26.603141361256544,
"learning_rate": 0.0003203233361211406,
"loss": 2.3222862243652345,
"step": 12690
},
{
"epoch": 26.624083769633508,
"learning_rate": 0.00032019719973659996,
"loss": 2.3362213134765626,
"step": 12700
},
{
"epoch": 26.645026178010472,
"learning_rate": 0.00032007121224412224,
"loss": 2.311092567443848,
"step": 12710
},
{
"epoch": 26.665968586387436,
"learning_rate": 0.0003199453733510162,
"loss": 2.332124137878418,
"step": 12720
},
{
"epoch": 26.686910994764396,
"learning_rate": 0.00031981968276539543,
"loss": 2.3406246185302733,
"step": 12730
},
{
"epoch": 26.70785340314136,
"learning_rate": 0.0003196941401961754,
"loss": 2.3419260025024413,
"step": 12740
},
{
"epoch": 26.728795811518324,
"learning_rate": 0.000319568745353071,
"loss": 2.344953727722168,
"step": 12750
},
{
"epoch": 26.74973821989529,
"learning_rate": 0.0003194434979465935,
"loss": 2.3517208099365234,
"step": 12760
},
{
"epoch": 26.770680628272252,
"learning_rate": 0.0003193183976880476,
"loss": 2.3811822891235352,
"step": 12770
},
{
"epoch": 26.791623036649213,
"learning_rate": 0.00031919344428952895,
"loss": 2.3604736328125,
"step": 12780
},
{
"epoch": 26.812565445026177,
"learning_rate": 0.0003190686374639211,
"loss": 2.3444387435913088,
"step": 12790
},
{
"epoch": 26.83350785340314,
"learning_rate": 0.00031894397692489295,
"loss": 2.340729331970215,
"step": 12800
},
{
"epoch": 26.854450261780105,
"learning_rate": 0.0003188194623868958,
"loss": 2.340890121459961,
"step": 12810
},
{
"epoch": 26.87539267015707,
"learning_rate": 0.00031869509356516063,
"loss": 2.3352834701538088,
"step": 12820
},
{
"epoch": 26.89633507853403,
"learning_rate": 0.00031857087017569556,
"loss": 2.3224008560180662,
"step": 12830
},
{
"epoch": 26.917277486910994,
"learning_rate": 0.0003184467919352828,
"loss": 2.3237512588500975,
"step": 12840
},
{
"epoch": 26.938219895287958,
"learning_rate": 0.0003183228585614763,
"loss": 2.3366432189941406,
"step": 12850
},
{
"epoch": 26.95916230366492,
"learning_rate": 0.0003181990697725988,
"loss": 2.3389394760131834,
"step": 12860
},
{
"epoch": 26.980104712041886,
"learning_rate": 0.0003180754252877392,
"loss": 2.2963605880737306,
"step": 12870
},
{
"epoch": 26.99895287958115,
"eval_loss": 2.5350682735443115,
"eval_runtime": 73.272,
"eval_samples_per_second": 149.525,
"step": 12879
},
{
"epoch": 27.002094240837696,
"learning_rate": 0.0003179519248267498,
"loss": 2.4346525192260744,
"step": 12880
},
{
"epoch": 27.02303664921466,
"learning_rate": 0.000317828568110244,
"loss": 2.3435186386108398,
"step": 12890
},
{
"epoch": 27.043979057591624,
"learning_rate": 0.000317705354859593,
"loss": 2.351651191711426,
"step": 12900
},
{
"epoch": 27.064921465968588,
"learning_rate": 0.0003175822847969239,
"loss": 2.3490814208984374,
"step": 12910
},
{
"epoch": 27.08586387434555,
"learning_rate": 0.00031745935764511645,
"loss": 2.3329612731933596,
"step": 12920
},
{
"epoch": 27.106806282722513,
"learning_rate": 0.0003173365731278007,
"loss": 2.321672248840332,
"step": 12930
},
{
"epoch": 27.127748691099477,
"learning_rate": 0.00031721393096935445,
"loss": 2.3357425689697267,
"step": 12940
},
{
"epoch": 27.14869109947644,
"learning_rate": 0.00031709143089490063,
"loss": 2.3383440017700194,
"step": 12950
},
{
"epoch": 27.169633507853405,
"learning_rate": 0.00031696907263030445,
"loss": 2.3266096115112305,
"step": 12960
},
{
"epoch": 27.190575916230365,
"learning_rate": 0.00031684685590217115,
"loss": 2.3512828826904295,
"step": 12970
},
{
"epoch": 27.21151832460733,
"learning_rate": 0.00031672478043784336,
"loss": 2.329998016357422,
"step": 12980
},
{
"epoch": 27.232460732984293,
"learning_rate": 0.0003166028459653984,
"loss": 2.353693962097168,
"step": 12990
},
{
"epoch": 27.253403141361257,
"learning_rate": 0.0003164810522136458,
"loss": 2.3388673782348635,
"step": 13000
},
{
"epoch": 27.27434554973822,
"learning_rate": 0.0003163593989121249,
"loss": 2.34061222076416,
"step": 13010
},
{
"epoch": 27.295287958115182,
"learning_rate": 0.0003162378857911022,
"loss": 2.3279703140258787,
"step": 13020
},
{
"epoch": 27.316230366492146,
"learning_rate": 0.00031611651258156884,
"loss": 2.3643896102905275,
"step": 13030
},
{
"epoch": 27.33717277486911,
"learning_rate": 0.0003159952790152381,
"loss": 2.326703643798828,
"step": 13040
},
{
"epoch": 27.358115183246074,
"learning_rate": 0.0003158741848245431,
"loss": 2.3338809967041017,
"step": 13050
},
{
"epoch": 27.379057591623038,
"learning_rate": 0.0003157532297426339,
"loss": 2.318799591064453,
"step": 13060
},
{
"epoch": 27.4,
"learning_rate": 0.00031563241350337546,
"loss": 2.3162815093994142,
"step": 13070
},
{
"epoch": 27.420942408376963,
"learning_rate": 0.00031551173584134514,
"loss": 2.354751968383789,
"step": 13080
},
{
"epoch": 27.441884816753927,
"learning_rate": 0.0003153911964918298,
"loss": 2.3353591918945313,
"step": 13090
},
{
"epoch": 27.46282722513089,
"learning_rate": 0.0003152707951908239,
"loss": 2.3257909774780274,
"step": 13100
},
{
"epoch": 27.483769633507855,
"learning_rate": 0.0003151505316750269,
"loss": 2.336490821838379,
"step": 13110
},
{
"epoch": 27.504712041884815,
"learning_rate": 0.0003150304056818405,
"loss": 2.32800350189209,
"step": 13120
},
{
"epoch": 27.52565445026178,
"learning_rate": 0.00031491041694936697,
"loss": 2.3223346710205077,
"step": 13130
},
{
"epoch": 27.546596858638743,
"learning_rate": 0.000314790565216406,
"loss": 2.348642921447754,
"step": 13140
},
{
"epoch": 27.567539267015707,
"learning_rate": 0.0003146708502224526,
"loss": 2.3384424209594727,
"step": 13150
},
{
"epoch": 27.58848167539267,
"learning_rate": 0.0003145512717076948,
"loss": 2.301900863647461,
"step": 13160
},
{
"epoch": 27.609424083769632,
"learning_rate": 0.00031443182941301147,
"loss": 2.3309160232543946,
"step": 13170
},
{
"epoch": 27.630366492146596,
"learning_rate": 0.0003143125230799694,
"loss": 2.358192253112793,
"step": 13180
},
{
"epoch": 27.65130890052356,
"learning_rate": 0.00031419335245082134,
"loss": 2.347599220275879,
"step": 13190
},
{
"epoch": 27.672251308900524,
"learning_rate": 0.00031407431726850375,
"loss": 2.330830764770508,
"step": 13200
},
{
"epoch": 27.693193717277488,
"learning_rate": 0.00031395541727663413,
"loss": 2.33847599029541,
"step": 13210
},
{
"epoch": 27.71413612565445,
"learning_rate": 0.0003138366522195088,
"loss": 2.3454364776611327,
"step": 13220
},
{
"epoch": 27.735078534031413,
"learning_rate": 0.0003137180218421011,
"loss": 2.3458301544189455,
"step": 13230
},
{
"epoch": 27.756020942408377,
"learning_rate": 0.0003135995258900582,
"loss": 2.2951147079467775,
"step": 13240
},
{
"epoch": 27.77696335078534,
"learning_rate": 0.0003134811641096994,
"loss": 2.324018096923828,
"step": 13250
},
{
"epoch": 27.797905759162305,
"learning_rate": 0.00031336293624801393,
"loss": 2.320078468322754,
"step": 13260
},
{
"epoch": 27.81884816753927,
"learning_rate": 0.00031324484205265824,
"loss": 2.3213479995727537,
"step": 13270
},
{
"epoch": 27.83979057591623,
"learning_rate": 0.000313126881271954,
"loss": 2.352939224243164,
"step": 13280
},
{
"epoch": 27.860732984293193,
"learning_rate": 0.0003130090536548859,
"loss": 2.3275819778442384,
"step": 13290
},
{
"epoch": 27.881675392670157,
"learning_rate": 0.00031289135895109924,
"loss": 2.341213607788086,
"step": 13300
},
{
"epoch": 27.90261780104712,
"learning_rate": 0.00031277379691089786,
"loss": 2.352794647216797,
"step": 13310
},
{
"epoch": 27.923560209424085,
"learning_rate": 0.00031265636728524174,
"loss": 2.329135513305664,
"step": 13320
},
{
"epoch": 27.944502617801046,
"learning_rate": 0.000312539069825745,
"loss": 2.3414382934570312,
"step": 13330
},
{
"epoch": 27.96544502617801,
"learning_rate": 0.00031242190428467325,
"loss": 2.3638214111328124,
"step": 13340
},
{
"epoch": 27.986387434554974,
"learning_rate": 0.0003123048704149423,
"loss": 2.326797294616699,
"step": 13350
},
{
"epoch": 27.99895287958115,
"eval_loss": 2.532017469406128,
"eval_runtime": 73.734,
"eval_samples_per_second": 148.588,
"step": 13356
},
{
"epoch": 28.008376963350784,
"learning_rate": 0.0003121879679701147,
"loss": 2.463714599609375,
"step": 13360
},
{
"epoch": 28.02931937172775,
"learning_rate": 0.00031207119670439884,
"loss": 2.3355535507202148,
"step": 13370
},
{
"epoch": 28.050261780104712,
"learning_rate": 0.00031195455637264574,
"loss": 2.3194732666015625,
"step": 13380
},
{
"epoch": 28.071204188481676,
"learning_rate": 0.00031183804673034756,
"loss": 2.3297607421875,
"step": 13390
},
{
"epoch": 28.09214659685864,
"learning_rate": 0.0003117216675336353,
"loss": 2.33233642578125,
"step": 13400
},
{
"epoch": 28.1130890052356,
"learning_rate": 0.00031160541853927627,
"loss": 2.3335954666137697,
"step": 13410
},
{
"epoch": 28.134031413612565,
"learning_rate": 0.0003114892995046725,
"loss": 2.3236547470092774,
"step": 13420
},
{
"epoch": 28.15497382198953,
"learning_rate": 0.00031137331018785835,
"loss": 2.3411203384399415,
"step": 13430
},
{
"epoch": 28.175916230366493,
"learning_rate": 0.00031125745034749834,
"loss": 2.343415451049805,
"step": 13440
},
{
"epoch": 28.196858638743457,
"learning_rate": 0.00031114171974288516,
"loss": 2.305185890197754,
"step": 13450
},
{
"epoch": 28.217801047120417,
"learning_rate": 0.00031102611813393753,
"loss": 2.3106929779052736,
"step": 13460
},
{
"epoch": 28.23874345549738,
"learning_rate": 0.0003109106452811981,
"loss": 2.342930221557617,
"step": 13470
},
{
"epoch": 28.259685863874346,
"learning_rate": 0.00031079530094583135,
"loss": 2.3201034545898436,
"step": 13480
},
{
"epoch": 28.28062827225131,
"learning_rate": 0.0003106800848896216,
"loss": 2.353871154785156,
"step": 13490
},
{
"epoch": 28.301570680628274,
"learning_rate": 0.0003105649968749708,
"loss": 2.3587244033813475,
"step": 13500
},
{
"epoch": 28.322513089005234,
"learning_rate": 0.0003104500366648965,
"loss": 2.334798812866211,
"step": 13510
},
{
"epoch": 28.343455497382198,
"learning_rate": 0.0003103352040230302,
"loss": 2.3387428283691407,
"step": 13520
},
{
"epoch": 28.364397905759162,
"learning_rate": 0.00031022049871361445,
"loss": 2.35083065032959,
"step": 13530
},
{
"epoch": 28.385340314136126,
"learning_rate": 0.0003101059205015017,
"loss": 2.329609680175781,
"step": 13540
},
{
"epoch": 28.40628272251309,
"learning_rate": 0.0003099914691521518,
"loss": 2.3420963287353516,
"step": 13550
},
{
"epoch": 28.42722513089005,
"learning_rate": 0.00030987714443163,
"loss": 2.3433679580688476,
"step": 13560
},
{
"epoch": 28.448167539267015,
"learning_rate": 0.00030976294610660516,
"loss": 2.3441110610961915,
"step": 13570
},
{
"epoch": 28.46910994764398,
"learning_rate": 0.00030964887394434754,
"loss": 2.338638687133789,
"step": 13580
},
{
"epoch": 28.490052356020943,
"learning_rate": 0.000309534927712727,
"loss": 2.3203834533691405,
"step": 13590
},
{
"epoch": 28.510994764397907,
"learning_rate": 0.0003094211071802107,
"loss": 2.3022727966308594,
"step": 13600
},
{
"epoch": 28.531937172774867,
"learning_rate": 0.00030930741211586155,
"loss": 2.3490713119506834,
"step": 13610
},
{
"epoch": 28.55287958115183,
"learning_rate": 0.0003091938422893358,
"loss": 2.3286787033081056,
"step": 13620
},
{
"epoch": 28.573821989528795,
"learning_rate": 0.00030908039747088155,
"loss": 2.305118942260742,
"step": 13630
},
{
"epoch": 28.59476439790576,
"learning_rate": 0.00030896707743133635,
"loss": 2.3220989227294924,
"step": 13640
},
{
"epoch": 28.615706806282724,
"learning_rate": 0.0003088538819421255,
"loss": 2.3236154556274413,
"step": 13650
},
{
"epoch": 28.636649214659684,
"learning_rate": 0.00030874081077526003,
"loss": 2.323534393310547,
"step": 13660
},
{
"epoch": 28.657591623036648,
"learning_rate": 0.00030862786370333505,
"loss": 2.3269046783447265,
"step": 13670
},
{
"epoch": 28.678534031413612,
"learning_rate": 0.00030851504049952727,
"loss": 2.3261356353759766,
"step": 13680
},
{
"epoch": 28.699476439790576,
"learning_rate": 0.00030840234093759347,
"loss": 2.3454893112182615,
"step": 13690
},
{
"epoch": 28.72041884816754,
"learning_rate": 0.0003082897647918688,
"loss": 2.3275333404541017,
"step": 13700
},
{
"epoch": 28.741361256544504,
"learning_rate": 0.0003081773118372642,
"loss": 2.3333641052246095,
"step": 13710
},
{
"epoch": 28.762303664921465,
"learning_rate": 0.00030806498184926523,
"loss": 2.3693473815917967,
"step": 13720
},
{
"epoch": 28.78324607329843,
"learning_rate": 0.0003079527746039298,
"loss": 2.3141483306884765,
"step": 13730
},
{
"epoch": 28.804188481675393,
"learning_rate": 0.00030784068987788624,
"loss": 2.353886032104492,
"step": 13740
},
{
"epoch": 28.825130890052357,
"learning_rate": 0.00030772872744833183,
"loss": 2.3143518447875975,
"step": 13750
},
{
"epoch": 28.84607329842932,
"learning_rate": 0.00030761688709303036,
"loss": 2.317976379394531,
"step": 13760
},
{
"epoch": 28.86701570680628,
"learning_rate": 0.0003075051685903109,
"loss": 2.331821060180664,
"step": 13770
},
{
"epoch": 28.887958115183245,
"learning_rate": 0.00030739357171906536,
"loss": 2.3297216415405275,
"step": 13780
},
{
"epoch": 28.90890052356021,
"learning_rate": 0.0003072820962587471,
"loss": 2.3354673385620117,
"step": 13790
},
{
"epoch": 28.929842931937173,
"learning_rate": 0.00030717074198936904,
"loss": 2.308320999145508,
"step": 13800
},
{
"epoch": 28.950785340314138,
"learning_rate": 0.0003070595086915015,
"loss": 2.312677192687988,
"step": 13810
},
{
"epoch": 28.971727748691098,
"learning_rate": 0.00030694839614627076,
"loss": 2.3000450134277344,
"step": 13820
},
{
"epoch": 28.992670157068062,
"learning_rate": 0.0003068374041353571,
"loss": 2.333408737182617,
"step": 13830
},
{
"epoch": 28.99895287958115,
"eval_loss": 2.541404962539673,
"eval_runtime": 73.9823,
"eval_samples_per_second": 148.09,
"step": 13833
},
{
"epoch": 29.014659685863876,
"learning_rate": 0.000306726532440993,
"loss": 2.4384193420410156,
"step": 13840
},
{
"epoch": 29.035602094240836,
"learning_rate": 0.0003066157808459613,
"loss": 2.3120851516723633,
"step": 13850
},
{
"epoch": 29.0565445026178,
"learning_rate": 0.0003065051491335936,
"loss": 2.333901596069336,
"step": 13860
},
{
"epoch": 29.077486910994764,
"learning_rate": 0.0003063946370877681,
"loss": 2.311614227294922,
"step": 13870
},
{
"epoch": 29.09842931937173,
"learning_rate": 0.0003062842444929085,
"loss": 2.328507423400879,
"step": 13880
},
{
"epoch": 29.119371727748693,
"learning_rate": 0.00030617397113398125,
"loss": 2.3186750411987305,
"step": 13890
},
{
"epoch": 29.140314136125653,
"learning_rate": 0.00030606381679649483,
"loss": 2.3101566314697264,
"step": 13900
},
{
"epoch": 29.161256544502617,
"learning_rate": 0.00030595378126649727,
"loss": 2.3323139190673827,
"step": 13910
},
{
"epoch": 29.18219895287958,
"learning_rate": 0.0003058438643305747,
"loss": 2.3120336532592773,
"step": 13920
},
{
"epoch": 29.203141361256545,
"learning_rate": 0.00030573406577584955,
"loss": 2.3213123321533202,
"step": 13930
},
{
"epoch": 29.22408376963351,
"learning_rate": 0.000305624385389979,
"loss": 2.3158872604370115,
"step": 13940
},
{
"epoch": 29.24502617801047,
"learning_rate": 0.0003055148229611527,
"loss": 2.3301626205444337,
"step": 13950
},
{
"epoch": 29.265968586387434,
"learning_rate": 0.00030540537827809176,
"loss": 2.2979711532592773,
"step": 13960
},
{
"epoch": 29.286910994764398,
"learning_rate": 0.0003052960511300467,
"loss": 2.337363433837891,
"step": 13970
},
{
"epoch": 29.307853403141362,
"learning_rate": 0.0003051868413067956,
"loss": 2.3084648132324217,
"step": 13980
},
{
"epoch": 29.328795811518326,
"learning_rate": 0.00030507774859864277,
"loss": 2.3315618515014647,
"step": 13990
},
{
"epoch": 29.349738219895286,
"learning_rate": 0.0003049687727964166,
"loss": 2.342039680480957,
"step": 14000
},
{
"epoch": 29.37068062827225,
"learning_rate": 0.00030485991369146834,
"loss": 2.327268600463867,
"step": 14010
},
{
"epoch": 29.391623036649214,
"learning_rate": 0.00030475117107567015,
"loss": 2.311885643005371,
"step": 14020
},
{
"epoch": 29.41256544502618,
"learning_rate": 0.0003046425447414135,
"loss": 2.297453498840332,
"step": 14030
},
{
"epoch": 29.433507853403142,
"learning_rate": 0.0003045340344816073,
"loss": 2.295667839050293,
"step": 14040
},
{
"epoch": 29.454450261780103,
"learning_rate": 0.0003044256400896769,
"loss": 2.3093278884887694,
"step": 14050
},
{
"epoch": 29.475392670157067,
"learning_rate": 0.0003043173613595614,
"loss": 2.3026140213012694,
"step": 14060
},
{
"epoch": 29.49633507853403,
"learning_rate": 0.0003042091980857131,
"loss": 2.339429473876953,
"step": 14070
},
{
"epoch": 29.517277486910995,
"learning_rate": 0.0003041011500630949,
"loss": 2.3204904556274415,
"step": 14080
},
{
"epoch": 29.53821989528796,
"learning_rate": 0.00030399321708717947,
"loss": 2.327162170410156,
"step": 14090
},
{
"epoch": 29.559162303664923,
"learning_rate": 0.00030388539895394697,
"loss": 2.3462697982788088,
"step": 14100
},
{
"epoch": 29.580104712041884,
"learning_rate": 0.00030377769545988394,
"loss": 2.2912479400634767,
"step": 14110
},
{
"epoch": 29.601047120418848,
"learning_rate": 0.00030367010640198143,
"loss": 2.35098876953125,
"step": 14120
},
{
"epoch": 29.62198952879581,
"learning_rate": 0.0003035626315777333,
"loss": 2.304596710205078,
"step": 14130
},
{
"epoch": 29.642931937172776,
"learning_rate": 0.00030345527078513493,
"loss": 2.3083545684814455,
"step": 14140
},
{
"epoch": 29.66387434554974,
"learning_rate": 0.0003033480238226813,
"loss": 2.304719924926758,
"step": 14150
},
{
"epoch": 29.6848167539267,
"learning_rate": 0.0003032408904893656,
"loss": 2.309472846984863,
"step": 14160
},
{
"epoch": 29.705759162303664,
"learning_rate": 0.00030313387058467756,
"loss": 2.3114566802978516,
"step": 14170
},
{
"epoch": 29.72670157068063,
"learning_rate": 0.0003030269639086021,
"loss": 2.3168495178222654,
"step": 14180
},
{
"epoch": 29.747643979057592,
"learning_rate": 0.0003029201702616173,
"loss": 2.3027936935424806,
"step": 14190
},
{
"epoch": 29.768586387434556,
"learning_rate": 0.0003028134894446933,
"loss": 2.330441474914551,
"step": 14200
},
{
"epoch": 29.789528795811517,
"learning_rate": 0.00030270692125929034,
"loss": 2.2950525283813477,
"step": 14210
},
{
"epoch": 29.81047120418848,
"learning_rate": 0.00030260046550735763,
"loss": 2.3066877365112304,
"step": 14220
},
{
"epoch": 29.831413612565445,
"learning_rate": 0.0003024941219913316,
"loss": 2.3138294219970703,
"step": 14230
},
{
"epoch": 29.85235602094241,
"learning_rate": 0.00030238789051413416,
"loss": 2.3398483276367186,
"step": 14240
},
{
"epoch": 29.873298429319373,
"learning_rate": 0.00030228177087917153,
"loss": 2.3180753707885744,
"step": 14250
},
{
"epoch": 29.894240837696334,
"learning_rate": 0.00030217576289033235,
"loss": 2.293859100341797,
"step": 14260
},
{
"epoch": 29.915183246073298,
"learning_rate": 0.00030206986635198654,
"loss": 2.315079116821289,
"step": 14270
},
{
"epoch": 29.93612565445026,
"learning_rate": 0.00030196408106898356,
"loss": 2.3188785552978515,
"step": 14280
},
{
"epoch": 29.957068062827226,
"learning_rate": 0.0003018584068466507,
"loss": 2.355891799926758,
"step": 14290
},
{
"epoch": 29.97801047120419,
"learning_rate": 0.0003017528434907922,
"loss": 2.310663032531738,
"step": 14300
},
{
"epoch": 29.99895287958115,
"learning_rate": 0.00030164739080768704,
"loss": 2.357052803039551,
"step": 14310
},
{
"epoch": 29.99895287958115,
"eval_loss": 2.5363190174102783,
"eval_runtime": 73.3558,
"eval_samples_per_second": 149.354,
"step": 14310
},
{
"epoch": 30.020942408376964,
"learning_rate": 0.0003015420486040879,
"loss": 2.432624626159668,
"step": 14320
},
{
"epoch": 30.041884816753928,
"learning_rate": 0.00030143681668721935,
"loss": 2.3034442901611327,
"step": 14330
},
{
"epoch": 30.06282722513089,
"learning_rate": 0.00030133169486477694,
"loss": 2.3489042282104493,
"step": 14340
},
{
"epoch": 30.083769633507853,
"learning_rate": 0.0003012266829449249,
"loss": 2.3431249618530274,
"step": 14350
},
{
"epoch": 30.104712041884817,
"learning_rate": 0.00030112178073629544,
"loss": 2.332902526855469,
"step": 14360
},
{
"epoch": 30.12565445026178,
"learning_rate": 0.0003010169880479867,
"loss": 2.323573112487793,
"step": 14370
},
{
"epoch": 30.146596858638745,
"learning_rate": 0.0003009123046895618,
"loss": 2.299881362915039,
"step": 14380
},
{
"epoch": 30.167539267015705,
"learning_rate": 0.00030080773047104687,
"loss": 2.319793701171875,
"step": 14390
},
{
"epoch": 30.18848167539267,
"learning_rate": 0.0003007032652029301,
"loss": 2.3272857666015625,
"step": 14400
},
{
"epoch": 30.209424083769633,
"learning_rate": 0.00030059890869615983,
"loss": 2.3354257583618163,
"step": 14410
},
{
"epoch": 30.230366492146597,
"learning_rate": 0.0003004946607621435,
"loss": 2.3249101638793945,
"step": 14420
},
{
"epoch": 30.25130890052356,
"learning_rate": 0.0003003905212127461,
"loss": 2.331306266784668,
"step": 14430
},
{
"epoch": 30.272251308900522,
"learning_rate": 0.00030028648986028843,
"loss": 2.3302356719970705,
"step": 14440
},
{
"epoch": 30.293193717277486,
"learning_rate": 0.00030018256651754633,
"loss": 2.3084732055664063,
"step": 14450
},
{
"epoch": 30.31413612565445,
"learning_rate": 0.00030007875099774864,
"loss": 2.324197006225586,
"step": 14460
},
{
"epoch": 30.335078534031414,
"learning_rate": 0.0002999750431145761,
"loss": 2.309644317626953,
"step": 14470
},
{
"epoch": 30.356020942408378,
"learning_rate": 0.0002998714426821599,
"loss": 2.332279014587402,
"step": 14480
},
{
"epoch": 30.376963350785342,
"learning_rate": 0.00029976794951508027,
"loss": 2.3015905380249024,
"step": 14490
},
{
"epoch": 30.397905759162303,
"learning_rate": 0.00029966456342836505,
"loss": 2.3307212829589843,
"step": 14500
},
{
"epoch": 30.418848167539267,
"learning_rate": 0.0002995612842374884,
"loss": 2.3488508224487306,
"step": 14510
},
{
"epoch": 30.43979057591623,
"learning_rate": 0.0002994581117583693,
"loss": 2.2981189727783202,
"step": 14520
},
{
"epoch": 30.460732984293195,
"learning_rate": 0.00029935504580737006,
"loss": 2.292937088012695,
"step": 14530
},
{
"epoch": 30.48167539267016,
"learning_rate": 0.00029925208620129546,
"loss": 2.329487609863281,
"step": 14540
},
{
"epoch": 30.50261780104712,
"learning_rate": 0.0002991492327573909,
"loss": 2.275893974304199,
"step": 14550
},
{
"epoch": 30.523560209424083,
"learning_rate": 0.0002990464852933409,
"loss": 2.289459228515625,
"step": 14560
},
{
"epoch": 30.544502617801047,
"learning_rate": 0.0002989438436272684,
"loss": 2.2976861953735352,
"step": 14570
},
{
"epoch": 30.56544502617801,
"learning_rate": 0.00029884130757773275,
"loss": 2.319015884399414,
"step": 14580
},
{
"epoch": 30.586387434554975,
"learning_rate": 0.0002987388769637288,
"loss": 2.3237770080566404,
"step": 14590
},
{
"epoch": 30.607329842931936,
"learning_rate": 0.00029863655160468534,
"loss": 2.330046844482422,
"step": 14600
},
{
"epoch": 30.6282722513089,
"learning_rate": 0.0002985343313204637,
"loss": 2.328061103820801,
"step": 14610
},
{
"epoch": 30.649214659685864,
"learning_rate": 0.0002984322159313568,
"loss": 2.3274772644042967,
"step": 14620
},
{
"epoch": 30.670157068062828,
"learning_rate": 0.00029833020525808714,
"loss": 2.325545883178711,
"step": 14630
},
{
"epoch": 30.691099476439792,
"learning_rate": 0.00029822829912180636,
"loss": 2.3240276336669923,
"step": 14640
},
{
"epoch": 30.712041884816752,
"learning_rate": 0.0002981264973440931,
"loss": 2.324121856689453,
"step": 14650
},
{
"epoch": 30.732984293193716,
"learning_rate": 0.00029802479974695223,
"loss": 2.3233869552612303,
"step": 14660
},
{
"epoch": 30.75392670157068,
"learning_rate": 0.00029792320615281337,
"loss": 2.3022382736206053,
"step": 14670
},
{
"epoch": 30.774869109947645,
"learning_rate": 0.00029782171638452937,
"loss": 2.3219308853149414,
"step": 14680
},
{
"epoch": 30.79581151832461,
"learning_rate": 0.0002977203302653755,
"loss": 2.3212976455688477,
"step": 14690
},
{
"epoch": 30.81675392670157,
"learning_rate": 0.0002976190476190476,
"loss": 2.345839500427246,
"step": 14700
},
{
"epoch": 30.837696335078533,
"learning_rate": 0.0002975178682696613,
"loss": 2.2968841552734376,
"step": 14710
},
{
"epoch": 30.858638743455497,
"learning_rate": 0.0002974167920417504,
"loss": 2.313581848144531,
"step": 14720
},
{
"epoch": 30.87958115183246,
"learning_rate": 0.00029731581876026557,
"loss": 2.326977348327637,
"step": 14730
},
{
"epoch": 30.900523560209425,
"learning_rate": 0.00029721494825057357,
"loss": 2.3257322311401367,
"step": 14740
},
{
"epoch": 30.921465968586386,
"learning_rate": 0.00029711418033845523,
"loss": 2.285732460021973,
"step": 14750
},
{
"epoch": 30.94240837696335,
"learning_rate": 0.0002970135148501047,
"loss": 2.3275766372680664,
"step": 14760
},
{
"epoch": 30.963350785340314,
"learning_rate": 0.00029691295161212816,
"loss": 2.3182727813720705,
"step": 14770
},
{
"epoch": 30.984293193717278,
"learning_rate": 0.0002968124904515423,
"loss": 2.3104841232299806,
"step": 14780
},
{
"epoch": 30.99895287958115,
"eval_loss": 2.544727087020874,
"eval_runtime": 73.5548,
"eval_samples_per_second": 148.95,
"step": 14787
},
{
"epoch": 31.006282722513088,
"learning_rate": 0.00029671213119577346,
"loss": 2.4215261459350588,
"step": 14790
},
{
"epoch": 31.027225130890052,
"learning_rate": 0.00029661187367265593,
"loss": 2.3005090713500977,
"step": 14800
},
{
"epoch": 31.048167539267016,
"learning_rate": 0.0002965117177104311,
"loss": 2.3047313690185547,
"step": 14810
},
{
"epoch": 31.06910994764398,
"learning_rate": 0.0002964116631377459,
"loss": 2.3039810180664064,
"step": 14820
},
{
"epoch": 31.09005235602094,
"learning_rate": 0.000296311709783652,
"loss": 2.3020254135131837,
"step": 14830
},
{
"epoch": 31.110994764397905,
"learning_rate": 0.00029621185747760406,
"loss": 2.3200841903686524,
"step": 14840
},
{
"epoch": 31.13193717277487,
"learning_rate": 0.0002961121060494589,
"loss": 2.2955398559570312,
"step": 14850
},
{
"epoch": 31.152879581151833,
"learning_rate": 0.00029601245532947417,
"loss": 2.322628974914551,
"step": 14860
},
{
"epoch": 31.173821989528797,
"learning_rate": 0.0002959129051483069,
"loss": 2.3180873870849608,
"step": 14870
},
{
"epoch": 31.194764397905757,
"learning_rate": 0.00029581345533701285,
"loss": 2.299137306213379,
"step": 14880
},
{
"epoch": 31.21570680628272,
"learning_rate": 0.0002957141057270448,
"loss": 2.2992317199707033,
"step": 14890
},
{
"epoch": 31.236649214659685,
"learning_rate": 0.0002956148561502513,
"loss": 2.3339006423950197,
"step": 14900
},
{
"epoch": 31.25759162303665,
"learning_rate": 0.00029551570643887603,
"loss": 2.2937063217163085,
"step": 14910
},
{
"epoch": 31.278534031413614,
"learning_rate": 0.00029541665642555606,
"loss": 2.3086185455322266,
"step": 14920
},
{
"epoch": 31.299476439790578,
"learning_rate": 0.00029531770594332096,
"loss": 2.3051830291748048,
"step": 14930
},
{
"epoch": 31.320418848167538,
"learning_rate": 0.0002952188548255915,
"loss": 2.3165931701660156,
"step": 14940
},
{
"epoch": 31.341361256544502,
"learning_rate": 0.00029512010290617854,
"loss": 2.310456657409668,
"step": 14950
},
{
"epoch": 31.362303664921466,
"learning_rate": 0.0002950214500192816,
"loss": 2.3057369232177733,
"step": 14960
},
{
"epoch": 31.38324607329843,
"learning_rate": 0.00029492289599948834,
"loss": 2.316122627258301,
"step": 14970
},
{
"epoch": 31.404188481675394,
"learning_rate": 0.0002948244406817725,
"loss": 2.3287500381469726,
"step": 14980
},
{
"epoch": 31.425130890052355,
"learning_rate": 0.00029472608390149343,
"loss": 2.309092330932617,
"step": 14990
},
{
"epoch": 31.44607329842932,
"learning_rate": 0.00029462782549439473,
"loss": 2.331714057922363,
"step": 15000
}
],
"max_steps": 15000,
"num_train_epochs": 32,
"total_flos": 4125839411805155328,
"trial_name": null,
"trial_params": null
}