| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 31.44607329842932, | |
| "global_step": 15000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0020942408376963353, | |
| "learning_rate": 1.0206207261596573e-07, | |
| "loss": 42.81952667236328, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.020942408376963352, | |
| "learning_rate": 1.0206207261596575e-06, | |
| "loss": 43.49555460611979, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.041884816753926704, | |
| "learning_rate": 2.041241452319315e-06, | |
| "loss": 43.81195983886719, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.06282722513089005, | |
| "learning_rate": 3.0618621784789722e-06, | |
| "loss": 43.1638671875, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.08376963350785341, | |
| "learning_rate": 4.08248290463863e-06, | |
| "loss": 43.10018615722656, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.10471204188481675, | |
| "learning_rate": 5.103103630798286e-06, | |
| "loss": 42.361767578125, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1256544502617801, | |
| "learning_rate": 6.1237243569579445e-06, | |
| "loss": 41.58636474609375, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.14659685863874344, | |
| "learning_rate": 7.144345083117603e-06, | |
| "loss": 40.42512817382813, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.16753926701570682, | |
| "learning_rate": 8.16496580927726e-06, | |
| "loss": 40.1504150390625, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.18848167539267016, | |
| "learning_rate": 9.185586535436916e-06, | |
| "loss": 39.03293151855469, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2094240837696335, | |
| "learning_rate": 1.0206207261596573e-05, | |
| "loss": 38.355224609375, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.23036649214659685, | |
| "learning_rate": 1.1226827987756233e-05, | |
| "loss": 37.3426025390625, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2513089005235602, | |
| "learning_rate": 1.2247448713915889e-05, | |
| "loss": 36.23554992675781, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.27225130890052357, | |
| "learning_rate": 1.3268069440075545e-05, | |
| "loss": 35.76961975097656, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2931937172774869, | |
| "learning_rate": 1.4288690166235205e-05, | |
| "loss": 34.53816528320313, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.31413612565445026, | |
| "learning_rate": 1.530931089239486e-05, | |
| "loss": 33.968804931640626, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.33507853403141363, | |
| "learning_rate": 1.632993161855452e-05, | |
| "loss": 32.835089111328124, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.35602094240837695, | |
| "learning_rate": 1.7350552344714174e-05, | |
| "loss": 32.344024658203125, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3769633507853403, | |
| "learning_rate": 1.8371173070873833e-05, | |
| "loss": 31.358078002929688, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.39790575916230364, | |
| "learning_rate": 1.939179379703349e-05, | |
| "loss": 30.222982788085936, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.418848167539267, | |
| "learning_rate": 2.0412414523193145e-05, | |
| "loss": 29.279229736328126, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4397905759162304, | |
| "learning_rate": 2.1433035249352804e-05, | |
| "loss": 28.632891845703124, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4607329842931937, | |
| "learning_rate": 2.2453655975512465e-05, | |
| "loss": 27.667315673828124, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4816753926701571, | |
| "learning_rate": 2.347427670167212e-05, | |
| "loss": 26.753375244140624, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5026178010471204, | |
| "learning_rate": 2.4494897427831778e-05, | |
| "loss": 26.086724853515626, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5235602094240838, | |
| "learning_rate": 2.5515518153991436e-05, | |
| "loss": 24.617787170410157, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5445026178010471, | |
| "learning_rate": 2.653613888015109e-05, | |
| "loss": 23.82097473144531, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5654450261780105, | |
| "learning_rate": 2.755675960631075e-05, | |
| "loss": 22.989166259765625, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5863874345549738, | |
| "learning_rate": 2.857738033247041e-05, | |
| "loss": 22.07659454345703, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6073298429319371, | |
| "learning_rate": 2.9598001058630065e-05, | |
| "loss": 21.30054168701172, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6282722513089005, | |
| "learning_rate": 3.061862178478972e-05, | |
| "loss": 20.448097229003906, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6492146596858639, | |
| "learning_rate": 3.163924251094938e-05, | |
| "loss": 19.719020080566406, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6701570680628273, | |
| "learning_rate": 3.265986323710904e-05, | |
| "loss": 19.040341186523438, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6910994764397905, | |
| "learning_rate": 3.3680483963268694e-05, | |
| "loss": 18.16093292236328, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7120418848167539, | |
| "learning_rate": 3.470110468942835e-05, | |
| "loss": 17.382850646972656, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7329842931937173, | |
| "learning_rate": 3.5721725415588004e-05, | |
| "loss": 16.651702880859375, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7539267015706806, | |
| "learning_rate": 3.6742346141747665e-05, | |
| "loss": 16.079180908203124, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.774869109947644, | |
| "learning_rate": 3.7762966867907327e-05, | |
| "loss": 15.322096252441407, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7958115183246073, | |
| "learning_rate": 3.878358759406698e-05, | |
| "loss": 14.650254821777343, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8167539267015707, | |
| "learning_rate": 3.980420832022664e-05, | |
| "loss": 14.0874267578125, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.837696335078534, | |
| "learning_rate": 4.082482904638629e-05, | |
| "loss": 13.5120849609375, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8586387434554974, | |
| "learning_rate": 4.184544977254595e-05, | |
| "loss": 12.803053283691407, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8795811518324608, | |
| "learning_rate": 4.286607049870561e-05, | |
| "loss": 12.207872009277343, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.900523560209424, | |
| "learning_rate": 4.388669122486527e-05, | |
| "loss": 11.640280151367188, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9214659685863874, | |
| "learning_rate": 4.490731195102493e-05, | |
| "loss": 11.116693878173828, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9424083769633508, | |
| "learning_rate": 4.5927932677184585e-05, | |
| "loss": 10.586290740966797, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9633507853403142, | |
| "learning_rate": 4.694855340334424e-05, | |
| "loss": 10.197176361083985, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9842931937172775, | |
| "learning_rate": 4.7969174129503894e-05, | |
| "loss": 9.696656036376954, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.9989528795811519, | |
| "eval_loss": 7.631092548370361, | |
| "eval_runtime": 73.7635, | |
| "eval_samples_per_second": 148.529, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.006282722513089, | |
| "learning_rate": 4.8989794855663556e-05, | |
| "loss": 9.649329376220702, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0272251308900524, | |
| "learning_rate": 5.001041558182322e-05, | |
| "loss": 8.800138092041015, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.0481675392670158, | |
| "learning_rate": 5.103103630798287e-05, | |
| "loss": 8.46273422241211, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0691099476439792, | |
| "learning_rate": 5.205165703414253e-05, | |
| "loss": 8.093246459960938, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.0900523560209425, | |
| "learning_rate": 5.307227776030218e-05, | |
| "loss": 7.800753021240235, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.1109947643979057, | |
| "learning_rate": 5.409289848646184e-05, | |
| "loss": 7.580593872070312, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.131937172774869, | |
| "learning_rate": 5.51135192126215e-05, | |
| "loss": 7.297437286376953, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.1528795811518324, | |
| "learning_rate": 5.613413993878116e-05, | |
| "loss": 7.115350341796875, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.1738219895287958, | |
| "learning_rate": 5.715476066494082e-05, | |
| "loss": 6.890144348144531, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.1947643979057592, | |
| "learning_rate": 5.817538139110047e-05, | |
| "loss": 6.647792053222656, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.2157068062827225, | |
| "learning_rate": 5.919600211726013e-05, | |
| "loss": 6.418707275390625, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.236649214659686, | |
| "learning_rate": 6.0216622843419785e-05, | |
| "loss": 6.1961822509765625, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.2575916230366493, | |
| "learning_rate": 6.123724356957945e-05, | |
| "loss": 6.020335388183594, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.2785340314136127, | |
| "learning_rate": 6.22578642957391e-05, | |
| "loss": 5.866229248046875, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.2994764397905758, | |
| "learning_rate": 6.327848502189876e-05, | |
| "loss": 5.687960433959961, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.3204188481675394, | |
| "learning_rate": 6.429910574805841e-05, | |
| "loss": 5.516669082641601, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.3413612565445026, | |
| "learning_rate": 6.531972647421808e-05, | |
| "loss": 5.365422058105469, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.362303664921466, | |
| "learning_rate": 6.634034720037773e-05, | |
| "loss": 5.2304027557373045, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.3832460732984293, | |
| "learning_rate": 6.736096792653739e-05, | |
| "loss": 5.1493980407714846, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.4041884816753927, | |
| "learning_rate": 6.838158865269704e-05, | |
| "loss": 5.069922256469726, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.425130890052356, | |
| "learning_rate": 6.94022093788567e-05, | |
| "loss": 4.946885299682617, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.4460732984293194, | |
| "learning_rate": 7.042283010501637e-05, | |
| "loss": 4.852196502685547, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.4670157068062828, | |
| "learning_rate": 7.144345083117601e-05, | |
| "loss": 4.79791488647461, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.487958115183246, | |
| "learning_rate": 7.246407155733568e-05, | |
| "loss": 4.701091766357422, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.5089005235602095, | |
| "learning_rate": 7.348469228349533e-05, | |
| "loss": 4.629792404174805, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.5298429319371727, | |
| "learning_rate": 7.450531300965498e-05, | |
| "loss": 4.491447067260742, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.5507853403141363, | |
| "learning_rate": 7.552593373581465e-05, | |
| "loss": 4.365177917480469, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.5717277486910994, | |
| "learning_rate": 7.654655446197431e-05, | |
| "loss": 4.266152572631836, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.5926701570680628, | |
| "learning_rate": 7.756717518813396e-05, | |
| "loss": 4.203376770019531, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.6136125654450262, | |
| "learning_rate": 7.858779591429362e-05, | |
| "loss": 4.128662872314453, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.6345549738219896, | |
| "learning_rate": 7.960841664045329e-05, | |
| "loss": 4.131737899780274, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.655497382198953, | |
| "learning_rate": 8.062903736661294e-05, | |
| "loss": 4.008557891845703, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.676439790575916, | |
| "learning_rate": 8.164965809277258e-05, | |
| "loss": 3.9548309326171873, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.6973821989528797, | |
| "learning_rate": 8.267027881893225e-05, | |
| "loss": 3.903990936279297, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.7183246073298428, | |
| "learning_rate": 8.36908995450919e-05, | |
| "loss": 3.8517215728759764, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.7392670157068064, | |
| "learning_rate": 8.471152027125156e-05, | |
| "loss": 3.815013122558594, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.7602094240837696, | |
| "learning_rate": 8.573214099741121e-05, | |
| "loss": 3.762827682495117, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.781151832460733, | |
| "learning_rate": 8.675276172357088e-05, | |
| "loss": 3.739139938354492, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.8020942408376963, | |
| "learning_rate": 8.777338244973054e-05, | |
| "loss": 3.6771942138671876, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.8230366492146597, | |
| "learning_rate": 8.879400317589019e-05, | |
| "loss": 3.671974945068359, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.843979057591623, | |
| "learning_rate": 8.981462390204986e-05, | |
| "loss": 3.6025531768798826, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.8649214659685864, | |
| "learning_rate": 9.083524462820951e-05, | |
| "loss": 3.580000305175781, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.8858638743455498, | |
| "learning_rate": 9.185586535436917e-05, | |
| "loss": 3.570189666748047, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.906806282722513, | |
| "learning_rate": 9.287648608052881e-05, | |
| "loss": 3.5345611572265625, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.9277486910994766, | |
| "learning_rate": 9.389710680668848e-05, | |
| "loss": 3.4658973693847654, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.9486910994764397, | |
| "learning_rate": 9.491772753284813e-05, | |
| "loss": 3.4885902404785156, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.9696335078534033, | |
| "learning_rate": 9.593834825900779e-05, | |
| "loss": 3.438787078857422, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.9905759162303664, | |
| "learning_rate": 9.695896898516746e-05, | |
| "loss": 3.434320831298828, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.998952879581152, | |
| "eval_loss": 3.1282973289489746, | |
| "eval_runtime": 73.33, | |
| "eval_samples_per_second": 149.407, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.012565445026178, | |
| "learning_rate": 9.797958971132711e-05, | |
| "loss": 3.5866859436035154, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.033507853403141, | |
| "learning_rate": 9.900021043748677e-05, | |
| "loss": 3.4268508911132813, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.054450261780105, | |
| "learning_rate": 0.00010002083116364643, | |
| "loss": 3.3977298736572266, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.075392670157068, | |
| "learning_rate": 0.00010104145188980609, | |
| "loss": 3.349309539794922, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.0963350785340316, | |
| "learning_rate": 0.00010206207261596574, | |
| "loss": 3.4084583282470704, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.1172774869109947, | |
| "learning_rate": 0.0001030826933421254, | |
| "loss": 3.3497646331787108, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.1382198952879583, | |
| "learning_rate": 0.00010410331406828505, | |
| "loss": 3.2944507598876953, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.1591623036649215, | |
| "learning_rate": 0.00010512393479444471, | |
| "loss": 3.312997055053711, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.180104712041885, | |
| "learning_rate": 0.00010614455552060436, | |
| "loss": 3.3428993225097656, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.201047120418848, | |
| "learning_rate": 0.00010716517624676403, | |
| "loss": 3.292295455932617, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.2219895287958114, | |
| "learning_rate": 0.00010818579697292369, | |
| "loss": 3.291975402832031, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.242931937172775, | |
| "learning_rate": 0.00010920641769908334, | |
| "loss": 3.2534629821777346, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.263874345549738, | |
| "learning_rate": 0.000110227038425243, | |
| "loss": 3.247083282470703, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.2848167539267017, | |
| "learning_rate": 0.00011124765915140266, | |
| "loss": 3.253615951538086, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.305759162303665, | |
| "learning_rate": 0.00011226827987756232, | |
| "loss": 3.2301055908203127, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.3267015706806284, | |
| "learning_rate": 0.00011328890060372197, | |
| "loss": 3.241224670410156, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.3476439790575916, | |
| "learning_rate": 0.00011430952132988164, | |
| "loss": 3.210051345825195, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.368586387434555, | |
| "learning_rate": 0.00011533014205604128, | |
| "loss": 3.225263214111328, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.3895287958115183, | |
| "learning_rate": 0.00011635076278220094, | |
| "loss": 3.1904216766357423, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.4104712041884815, | |
| "learning_rate": 0.00011737138350836059, | |
| "loss": 3.230540466308594, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.431413612565445, | |
| "learning_rate": 0.00011839200423452026, | |
| "loss": 3.191972351074219, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.4523560209424082, | |
| "learning_rate": 0.00011941262496067991, | |
| "loss": 3.191108512878418, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.473298429319372, | |
| "learning_rate": 0.00012043324568683957, | |
| "loss": 3.192665863037109, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.494240837696335, | |
| "learning_rate": 0.00012145386641299924, | |
| "loss": 3.1610179901123048, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.5151832460732986, | |
| "learning_rate": 0.0001224744871391589, | |
| "loss": 3.1794748306274414, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.5361256544502617, | |
| "learning_rate": 0.00012349510786531856, | |
| "loss": 3.2144695281982423, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.5570680628272253, | |
| "learning_rate": 0.0001245157285914782, | |
| "loss": 3.147447204589844, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.5780104712041885, | |
| "learning_rate": 0.00012553634931763784, | |
| "loss": 3.1747854232788084, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.5989528795811516, | |
| "learning_rate": 0.0001265569700437975, | |
| "loss": 3.137411880493164, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.619895287958115, | |
| "learning_rate": 0.00012757759076995718, | |
| "loss": 3.157614326477051, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.640837696335079, | |
| "learning_rate": 0.00012859821149611682, | |
| "loss": 3.1284500122070313, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.661780104712042, | |
| "learning_rate": 0.0001296188322222765, | |
| "loss": 3.168661117553711, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.682722513089005, | |
| "learning_rate": 0.00013063945294843616, | |
| "loss": 3.1208589553833006, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.7036649214659687, | |
| "learning_rate": 0.0001316600736745958, | |
| "loss": 3.177284049987793, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.724607329842932, | |
| "learning_rate": 0.00013268069440075547, | |
| "loss": 3.129215431213379, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.7455497382198955, | |
| "learning_rate": 0.00013370131512691514, | |
| "loss": 3.1385051727294924, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.7664921465968586, | |
| "learning_rate": 0.00013472193585307478, | |
| "loss": 3.093943977355957, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.7874345549738218, | |
| "learning_rate": 0.00013574255657923444, | |
| "loss": 3.125334358215332, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.8083769633507853, | |
| "learning_rate": 0.00013676317730539409, | |
| "loss": 3.1063247680664063, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.829319371727749, | |
| "learning_rate": 0.00013778379803155375, | |
| "loss": 3.153512382507324, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.850261780104712, | |
| "learning_rate": 0.0001388044187577134, | |
| "loss": 3.0612106323242188, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.8712041884816752, | |
| "learning_rate": 0.00013982503948387306, | |
| "loss": 3.0893680572509767, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.892146596858639, | |
| "learning_rate": 0.00014084566021003273, | |
| "loss": 3.0948190689086914, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.913089005235602, | |
| "learning_rate": 0.00014186628093619237, | |
| "loss": 3.052564811706543, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.9340314136125656, | |
| "learning_rate": 0.00014288690166235201, | |
| "loss": 3.061579132080078, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.9549738219895287, | |
| "learning_rate": 0.00014390752238851168, | |
| "loss": 3.0893646240234376, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.975916230366492, | |
| "learning_rate": 0.00014492814311467135, | |
| "loss": 3.0637126922607423, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.9968586387434555, | |
| "learning_rate": 0.000145948763840831, | |
| "loss": 3.063129425048828, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.998952879581152, | |
| "eval_loss": 2.8699653148651123, | |
| "eval_runtime": 73.0344, | |
| "eval_samples_per_second": 150.012, | |
| "step": 1431 | |
| }, | |
| { | |
| "epoch": 3.018848167539267, | |
| "learning_rate": 0.00014696938456699066, | |
| "loss": 3.175088310241699, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 3.0397905759162303, | |
| "learning_rate": 0.00014799000529315033, | |
| "loss": 3.0708381652832033, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 3.060732984293194, | |
| "learning_rate": 0.00014901062601930997, | |
| "loss": 3.043408012390137, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 3.081675392670157, | |
| "learning_rate": 0.00015003124674546964, | |
| "loss": 3.039161491394043, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 3.1026178010471206, | |
| "learning_rate": 0.0001510518674716293, | |
| "loss": 3.0207361221313476, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 3.1235602094240837, | |
| "learning_rate": 0.00015207248819778895, | |
| "loss": 3.0527109146118163, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 3.144502617801047, | |
| "learning_rate": 0.00015309310892394862, | |
| "loss": 3.045629692077637, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.1654450261780105, | |
| "learning_rate": 0.00015411372965010828, | |
| "loss": 3.0113618850708006, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 3.1863874345549736, | |
| "learning_rate": 0.00015513435037626793, | |
| "loss": 3.0372419357299805, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 3.2073298429319372, | |
| "learning_rate": 0.0001561549711024276, | |
| "loss": 3.0219789505004884, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 3.2282722513089004, | |
| "learning_rate": 0.00015717559182858723, | |
| "loss": 3.0140264511108397, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 3.249214659685864, | |
| "learning_rate": 0.0001581962125547469, | |
| "loss": 3.039451789855957, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 3.270157068062827, | |
| "learning_rate": 0.00015921683328090657, | |
| "loss": 3.0277042388916016, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 3.2910994764397907, | |
| "learning_rate": 0.0001602374540070662, | |
| "loss": 3.0352230072021484, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 3.312041884816754, | |
| "learning_rate": 0.00016125807473322588, | |
| "loss": 3.0115480422973633, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 3.332984293193717, | |
| "learning_rate": 0.00016227869545938555, | |
| "loss": 3.008187103271484, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 3.3539267015706806, | |
| "learning_rate": 0.00016329931618554516, | |
| "loss": 3.02030086517334, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.374869109947644, | |
| "learning_rate": 0.00016431993691170483, | |
| "loss": 3.0034799575805664, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 3.3958115183246074, | |
| "learning_rate": 0.0001653405576378645, | |
| "loss": 3.0058149337768554, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 3.4167539267015705, | |
| "learning_rate": 0.00016636117836402414, | |
| "loss": 2.9678651809692385, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 3.437696335078534, | |
| "learning_rate": 0.0001673817990901838, | |
| "loss": 2.999662399291992, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 3.4586387434554973, | |
| "learning_rate": 0.00016840241981634345, | |
| "loss": 2.9900545120239257, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 3.479581151832461, | |
| "learning_rate": 0.00016942304054250312, | |
| "loss": 2.967659759521484, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 3.500523560209424, | |
| "learning_rate": 0.0001704436612686628, | |
| "loss": 2.9933372497558595, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 3.521465968586387, | |
| "learning_rate": 0.00017146428199482243, | |
| "loss": 2.9620410919189455, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 3.5424083769633508, | |
| "learning_rate": 0.0001724849027209821, | |
| "loss": 2.973393440246582, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 3.5633507853403144, | |
| "learning_rate": 0.00017350552344714176, | |
| "loss": 2.9914371490478517, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.5842931937172775, | |
| "learning_rate": 0.0001745261441733014, | |
| "loss": 2.959955596923828, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 3.6052356020942407, | |
| "learning_rate": 0.00017554676489946107, | |
| "loss": 2.947012519836426, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 3.6261780104712042, | |
| "learning_rate": 0.00017656738562562074, | |
| "loss": 2.9780450820922852, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 3.6471204188481674, | |
| "learning_rate": 0.00017758800635178038, | |
| "loss": 2.9911376953125, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 3.668062827225131, | |
| "learning_rate": 0.00017860862707794005, | |
| "loss": 2.936799430847168, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 3.689005235602094, | |
| "learning_rate": 0.00017962924780409972, | |
| "loss": 2.9443117141723634, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 3.7099476439790577, | |
| "learning_rate": 0.00018064986853025936, | |
| "loss": 2.9714879989624023, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 3.730890052356021, | |
| "learning_rate": 0.00018167048925641903, | |
| "loss": 2.9554422378540037, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 3.7518324607329845, | |
| "learning_rate": 0.0001826911099825787, | |
| "loss": 2.971892738342285, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 3.7727748691099476, | |
| "learning_rate": 0.00018371173070873834, | |
| "loss": 2.9589488983154295, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.793717277486911, | |
| "learning_rate": 0.000184732351434898, | |
| "loss": 2.944401741027832, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 3.8146596858638744, | |
| "learning_rate": 0.00018575297216105762, | |
| "loss": 2.926609420776367, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 3.8356020942408375, | |
| "learning_rate": 0.0001867735928872173, | |
| "loss": 2.906996726989746, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 3.856544502617801, | |
| "learning_rate": 0.00018779421361337696, | |
| "loss": 2.9651628494262696, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 3.8774869109947643, | |
| "learning_rate": 0.0001888148343395366, | |
| "loss": 2.94600887298584, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 3.898429319371728, | |
| "learning_rate": 0.00018983545506569627, | |
| "loss": 2.915750503540039, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 3.919371727748691, | |
| "learning_rate": 0.00019085607579185594, | |
| "loss": 2.942264747619629, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 3.9403141361256546, | |
| "learning_rate": 0.00019187669651801558, | |
| "loss": 2.9150556564331054, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 3.9612565445026178, | |
| "learning_rate": 0.00019289731724417525, | |
| "loss": 2.9057634353637694, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 3.982198952879581, | |
| "learning_rate": 0.0001939179379703349, | |
| "loss": 2.934947967529297, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 3.998952879581152, | |
| "eval_loss": 2.7904012203216553, | |
| "eval_runtime": 73.0809, | |
| "eval_samples_per_second": 149.916, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 4.004188481675393, | |
| "learning_rate": 0.00019493855869649455, | |
| "loss": 3.0482250213623048, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 4.025130890052356, | |
| "learning_rate": 0.00019595917942265422, | |
| "loss": 2.9126008987426757, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 4.046073298429319, | |
| "learning_rate": 0.0001969798001488139, | |
| "loss": 2.903646469116211, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 4.067015706806282, | |
| "learning_rate": 0.00019800042087497353, | |
| "loss": 2.9102720260620116, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 4.0879581151832465, | |
| "learning_rate": 0.0001990210416011332, | |
| "loss": 2.9236400604248045, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 4.10890052356021, | |
| "learning_rate": 0.00020004166232729287, | |
| "loss": 2.8818483352661133, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 4.129842931937173, | |
| "learning_rate": 0.0002010622830534525, | |
| "loss": 2.9328163146972654, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 4.150785340314136, | |
| "learning_rate": 0.00020208290377961218, | |
| "loss": 2.896937370300293, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 4.171727748691099, | |
| "learning_rate": 0.00020310352450577185, | |
| "loss": 2.9203187942504885, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 4.192670157068063, | |
| "learning_rate": 0.0002041241452319315, | |
| "loss": 2.887624740600586, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.213612565445026, | |
| "learning_rate": 0.00020514476595809116, | |
| "loss": 2.89230899810791, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 4.234554973821989, | |
| "learning_rate": 0.0002061653866842508, | |
| "loss": 2.8896648406982424, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 4.255497382198953, | |
| "learning_rate": 0.00020718600741041044, | |
| "loss": 2.922181510925293, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 4.276439790575917, | |
| "learning_rate": 0.0002082066281365701, | |
| "loss": 2.888766860961914, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 4.29738219895288, | |
| "learning_rate": 0.00020922724886272975, | |
| "loss": 2.8884586334228515, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 4.318324607329843, | |
| "learning_rate": 0.00021024786958888942, | |
| "loss": 2.8894960403442385, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 4.339267015706806, | |
| "learning_rate": 0.00021126849031504908, | |
| "loss": 2.88952522277832, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 4.36020942408377, | |
| "learning_rate": 0.00021228911104120873, | |
| "loss": 2.8968246459960936, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 4.381151832460733, | |
| "learning_rate": 0.0002133097317673684, | |
| "loss": 2.8720794677734376, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 4.402094240837696, | |
| "learning_rate": 0.00021433035249352806, | |
| "loss": 2.9035682678222656, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 4.4230366492146596, | |
| "learning_rate": 0.0002153509732196877, | |
| "loss": 2.8975749969482423, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 4.443979057591623, | |
| "learning_rate": 0.00021637159394584737, | |
| "loss": 2.8942371368408204, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 4.464921465968587, | |
| "learning_rate": 0.00021739221467200704, | |
| "loss": 2.8582950592041017, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 4.48586387434555, | |
| "learning_rate": 0.00021841283539816668, | |
| "loss": 2.8642202377319337, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 4.506806282722513, | |
| "learning_rate": 0.00021943345612432635, | |
| "loss": 2.8771089553833007, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 4.527748691099476, | |
| "learning_rate": 0.000220454076850486, | |
| "loss": 2.8463533401489256, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 4.548691099476439, | |
| "learning_rate": 0.00022147469757664566, | |
| "loss": 2.865732765197754, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 4.569633507853403, | |
| "learning_rate": 0.00022249531830280533, | |
| "loss": 2.8657468795776366, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 4.5905759162303665, | |
| "learning_rate": 0.00022351593902896497, | |
| "loss": 2.8737346649169924, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 4.61151832460733, | |
| "learning_rate": 0.00022453655975512464, | |
| "loss": 2.906464385986328, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 4.632460732984293, | |
| "learning_rate": 0.0002255571804812843, | |
| "loss": 2.8711727142333983, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 4.653403141361257, | |
| "learning_rate": 0.00022657780120744395, | |
| "loss": 2.866417121887207, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 4.67434554973822, | |
| "learning_rate": 0.00022759842193360361, | |
| "loss": 2.873806190490723, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 4.695287958115183, | |
| "learning_rate": 0.00022861904265976328, | |
| "loss": 2.867740249633789, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 4.716230366492146, | |
| "learning_rate": 0.0002296396633859229, | |
| "loss": 2.848478317260742, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 4.73717277486911, | |
| "learning_rate": 0.00023066028411208256, | |
| "loss": 2.865742492675781, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 4.7581151832460735, | |
| "learning_rate": 0.00023168090483824223, | |
| "loss": 2.8338348388671877, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 4.779057591623037, | |
| "learning_rate": 0.00023270152556440187, | |
| "loss": 2.848302459716797, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 0.00023372214629056154, | |
| "loss": 2.847154235839844, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 4.820942408376963, | |
| "learning_rate": 0.00023474276701672118, | |
| "loss": 2.889314651489258, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 4.841884816753927, | |
| "learning_rate": 0.00023576338774288085, | |
| "loss": 2.8715303421020506, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 4.86282722513089, | |
| "learning_rate": 0.00023678400846904052, | |
| "loss": 2.8583951950073243, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 4.883769633507853, | |
| "learning_rate": 0.00023780462919520016, | |
| "loss": 2.865638542175293, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 4.9047120418848165, | |
| "learning_rate": 0.00023882524992135983, | |
| "loss": 2.8511993408203127, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 4.92565445026178, | |
| "learning_rate": 0.0002398458706475195, | |
| "loss": 2.8662994384765623, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 4.946596858638744, | |
| "learning_rate": 0.00024086649137367914, | |
| "loss": 2.829090690612793, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 4.967539267015707, | |
| "learning_rate": 0.0002418871120998388, | |
| "loss": 2.8659574508666994, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 4.98848167539267, | |
| "learning_rate": 0.00024290773282599848, | |
| "loss": 2.8302743911743162, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 4.998952879581152, | |
| "eval_loss": 2.730773687362671, | |
| "eval_runtime": 73.1511, | |
| "eval_samples_per_second": 149.772, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 5.010471204188482, | |
| "learning_rate": 0.00024392835355215812, | |
| "loss": 2.994700050354004, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 5.031413612565445, | |
| "learning_rate": 0.0002449489742783178, | |
| "loss": 2.8189886093139647, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 5.052356020942408, | |
| "learning_rate": 0.00024596959500447745, | |
| "loss": 2.818514823913574, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 5.0732984293193715, | |
| "learning_rate": 0.0002469902157306371, | |
| "loss": 2.8165609359741213, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 5.0942408376963355, | |
| "learning_rate": 0.00024801083645679674, | |
| "loss": 2.8114208221435546, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 5.115183246073299, | |
| "learning_rate": 0.0002490314571829564, | |
| "loss": 2.841958236694336, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 5.136125654450262, | |
| "learning_rate": 0.00025005207790911607, | |
| "loss": 2.8192333221435546, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 5.157068062827225, | |
| "learning_rate": 0.0002510726986352757, | |
| "loss": 2.842849540710449, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 5.178010471204188, | |
| "learning_rate": 0.00025209331936143535, | |
| "loss": 2.797623062133789, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 5.198952879581152, | |
| "learning_rate": 0.000253113940087595, | |
| "loss": 2.8256582260131835, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 5.219895287958115, | |
| "learning_rate": 0.0002541345608137547, | |
| "loss": 2.853096771240234, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 5.2408376963350785, | |
| "learning_rate": 0.00025515518153991436, | |
| "loss": 2.8135236740112304, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 5.261780104712042, | |
| "learning_rate": 0.00025617580226607403, | |
| "loss": 2.826374053955078, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 5.282722513089006, | |
| "learning_rate": 0.00025719642299223364, | |
| "loss": 2.8227916717529298, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 5.303664921465969, | |
| "learning_rate": 0.0002582170437183933, | |
| "loss": 2.800633430480957, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 5.324607329842932, | |
| "learning_rate": 0.000259237664444553, | |
| "loss": 2.8282939910888674, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 5.345549738219895, | |
| "learning_rate": 0.00026025828517071265, | |
| "loss": 2.789654350280762, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 5.366492146596858, | |
| "learning_rate": 0.0002612789058968723, | |
| "loss": 2.832943916320801, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 5.387434554973822, | |
| "learning_rate": 0.00026229952662303193, | |
| "loss": 2.7946260452270506, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 5.408376963350785, | |
| "learning_rate": 0.0002633201473491916, | |
| "loss": 2.8106937408447266, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 5.429319371727749, | |
| "learning_rate": 0.00026434076807535127, | |
| "loss": 2.807754135131836, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 5.450261780104712, | |
| "learning_rate": 0.00026536138880151093, | |
| "loss": 2.809930992126465, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 5.471204188481676, | |
| "learning_rate": 0.0002663820095276706, | |
| "loss": 2.824372100830078, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 5.492146596858639, | |
| "learning_rate": 0.00026740263025383027, | |
| "loss": 2.797639846801758, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 5.513089005235602, | |
| "learning_rate": 0.0002684232509799899, | |
| "loss": 2.7917612075805662, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 5.534031413612565, | |
| "learning_rate": 0.00026944387170614955, | |
| "loss": 2.818395233154297, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 5.554973821989529, | |
| "learning_rate": 0.0002704644924323092, | |
| "loss": 2.7933046340942385, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 5.575916230366492, | |
| "learning_rate": 0.0002714851131584689, | |
| "loss": 2.794571876525879, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 5.596858638743456, | |
| "learning_rate": 0.0002725057338846285, | |
| "loss": 2.8182382583618164, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 5.617801047120419, | |
| "learning_rate": 0.00027352635461078817, | |
| "loss": 2.80462703704834, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 5.638743455497382, | |
| "learning_rate": 0.00027454697533694784, | |
| "loss": 2.793869400024414, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 5.659685863874346, | |
| "learning_rate": 0.0002755675960631075, | |
| "loss": 2.7867889404296875, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 5.680628272251309, | |
| "learning_rate": 0.0002765882167892671, | |
| "loss": 2.759464073181152, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 5.701570680628272, | |
| "learning_rate": 0.0002776088375154268, | |
| "loss": 2.7763525009155274, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 5.722513089005235, | |
| "learning_rate": 0.00027862945824158646, | |
| "loss": 2.7857263565063475, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 5.7434554973821985, | |
| "learning_rate": 0.00027965007896774613, | |
| "loss": 2.793513298034668, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 5.7643979057591626, | |
| "learning_rate": 0.0002806706996939058, | |
| "loss": 2.780983543395996, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 5.785340314136126, | |
| "learning_rate": 0.00028169132042006546, | |
| "loss": 2.762991714477539, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 5.806282722513089, | |
| "learning_rate": 0.00028271194114622513, | |
| "loss": 2.7711687088012695, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 5.827225130890052, | |
| "learning_rate": 0.00028373256187238475, | |
| "loss": 2.7514936447143556, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 5.848167539267016, | |
| "learning_rate": 0.0002847531825985444, | |
| "loss": 2.7642467498779295, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 5.869109947643979, | |
| "learning_rate": 0.00028577380332470403, | |
| "loss": 2.766267776489258, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 5.890052356020942, | |
| "learning_rate": 0.0002867944240508637, | |
| "loss": 2.7898386001586912, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 5.9109947643979055, | |
| "learning_rate": 0.00028781504477702337, | |
| "loss": 2.7557825088500976, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 5.9319371727748695, | |
| "learning_rate": 0.00028883566550318303, | |
| "loss": 2.7799331665039064, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 5.952879581151833, | |
| "learning_rate": 0.0002898562862293427, | |
| "loss": 2.7844886779785156, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 5.973821989528796, | |
| "learning_rate": 0.0002908769069555023, | |
| "loss": 2.791143608093262, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 5.994764397905759, | |
| "learning_rate": 0.000291897527681662, | |
| "loss": 2.7879209518432617, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 5.998952879581152, | |
| "eval_loss": 2.704967737197876, | |
| "eval_runtime": 73.9106, | |
| "eval_samples_per_second": 148.233, | |
| "step": 2862 | |
| }, | |
| { | |
| "epoch": 6.016753926701571, | |
| "learning_rate": 0.00029291814840782165, | |
| "loss": 2.8958906173706054, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 6.037696335078534, | |
| "learning_rate": 0.0002939387691339813, | |
| "loss": 2.76577205657959, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 6.058638743455497, | |
| "learning_rate": 0.000294959389860141, | |
| "loss": 2.792632484436035, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 6.0795811518324605, | |
| "learning_rate": 0.00029598001058630066, | |
| "loss": 2.7835086822509765, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 6.100523560209424, | |
| "learning_rate": 0.00029700063131246027, | |
| "loss": 2.761422538757324, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 6.121465968586388, | |
| "learning_rate": 0.00029802125203861994, | |
| "loss": 2.763009452819824, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 6.142408376963351, | |
| "learning_rate": 0.0002990418727647796, | |
| "loss": 2.763312339782715, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 6.163350785340314, | |
| "learning_rate": 0.0003000624934909393, | |
| "loss": 2.764869499206543, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 6.184293193717277, | |
| "learning_rate": 0.00030108311421709894, | |
| "loss": 2.741547393798828, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 6.205235602094241, | |
| "learning_rate": 0.0003021037349432586, | |
| "loss": 2.7309391021728517, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 6.226178010471204, | |
| "learning_rate": 0.0003031243556694182, | |
| "loss": 2.7382484436035157, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 6.2471204188481675, | |
| "learning_rate": 0.0003041449763955779, | |
| "loss": 2.73870849609375, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 6.268062827225131, | |
| "learning_rate": 0.00030516559712173756, | |
| "loss": 2.7489850997924803, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 6.289005235602094, | |
| "learning_rate": 0.00030618621784789723, | |
| "loss": 2.720503807067871, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 6.309947643979058, | |
| "learning_rate": 0.0003072068385740569, | |
| "loss": 2.770734977722168, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 6.330890052356021, | |
| "learning_rate": 0.00030822745930021657, | |
| "loss": 2.7358495712280275, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 6.351832460732984, | |
| "learning_rate": 0.0003092480800263762, | |
| "loss": 2.7683557510375976, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 6.372774869109947, | |
| "learning_rate": 0.00031026870075253585, | |
| "loss": 2.7386125564575194, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 6.393717277486911, | |
| "learning_rate": 0.0003112893214786955, | |
| "loss": 2.730208396911621, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 6.4146596858638745, | |
| "learning_rate": 0.0003123099422048552, | |
| "loss": 2.753367042541504, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 6.435602094240838, | |
| "learning_rate": 0.00031333056293101486, | |
| "loss": 2.738628959655762, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 6.456544502617801, | |
| "learning_rate": 0.00031435118365717447, | |
| "loss": 2.7470897674560546, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 6.477486910994765, | |
| "learning_rate": 0.00031537180438333414, | |
| "loss": 2.7530258178710936, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 6.498429319371728, | |
| "learning_rate": 0.0003163924251094938, | |
| "loss": 2.7581613540649412, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 6.519371727748691, | |
| "learning_rate": 0.0003174130458356535, | |
| "loss": 2.719674301147461, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 6.540314136125654, | |
| "learning_rate": 0.00031843366656181314, | |
| "loss": 2.7147310256958006, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 6.561256544502617, | |
| "learning_rate": 0.0003194542872879728, | |
| "loss": 2.731971549987793, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 6.5821989528795815, | |
| "learning_rate": 0.0003204749080141324, | |
| "loss": 2.73608512878418, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 6.603141361256545, | |
| "learning_rate": 0.0003214955287402921, | |
| "loss": 2.7456966400146485, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 6.624083769633508, | |
| "learning_rate": 0.00032251614946645176, | |
| "loss": 2.7154998779296875, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 6.645026178010471, | |
| "learning_rate": 0.00032353677019261143, | |
| "loss": 2.737691116333008, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 6.665968586387434, | |
| "learning_rate": 0.0003245573909187711, | |
| "loss": 2.734482192993164, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 6.686910994764398, | |
| "learning_rate": 0.00032557801164493077, | |
| "loss": 2.74371395111084, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 6.707853403141361, | |
| "learning_rate": 0.0003265986323710903, | |
| "loss": 2.7436079025268554, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 6.728795811518324, | |
| "learning_rate": 0.00032761925309725, | |
| "loss": 2.7301549911499023, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 6.749738219895288, | |
| "learning_rate": 0.00032863987382340966, | |
| "loss": 2.7386409759521486, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 6.770680628272252, | |
| "learning_rate": 0.00032966049454956933, | |
| "loss": 2.718589973449707, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 6.791623036649215, | |
| "learning_rate": 0.000330681115275729, | |
| "loss": 2.7293943405151366, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 6.812565445026178, | |
| "learning_rate": 0.0003317017360018886, | |
| "loss": 2.730001449584961, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 6.833507853403141, | |
| "learning_rate": 0.0003327223567280483, | |
| "loss": 2.7539945602416993, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 6.854450261780105, | |
| "learning_rate": 0.00033374297745420795, | |
| "loss": 2.7157943725585936, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 6.875392670157068, | |
| "learning_rate": 0.0003347635981803676, | |
| "loss": 2.7142717361450197, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 6.896335078534031, | |
| "learning_rate": 0.0003357842189065273, | |
| "loss": 2.723209190368652, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 6.9172774869109945, | |
| "learning_rate": 0.0003368048396326869, | |
| "loss": 2.725255012512207, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 6.938219895287958, | |
| "learning_rate": 0.00033782546035884657, | |
| "loss": 2.7175876617431642, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 6.959162303664922, | |
| "learning_rate": 0.00033884608108500624, | |
| "loss": 2.7198652267456054, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 6.980104712041885, | |
| "learning_rate": 0.0003398667018111659, | |
| "loss": 2.719003105163574, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 6.998952879581152, | |
| "eval_loss": 2.657553195953369, | |
| "eval_runtime": 73.1108, | |
| "eval_samples_per_second": 149.855, | |
| "step": 3339 | |
| }, | |
| { | |
| "epoch": 7.002094240837696, | |
| "learning_rate": 0.0003408873225373256, | |
| "loss": 2.8445356369018553, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 7.02303664921466, | |
| "learning_rate": 0.00034190794326348524, | |
| "loss": 2.6978481292724608, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 7.043979057591623, | |
| "learning_rate": 0.00034292856398964486, | |
| "loss": 2.6844539642333984, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 7.064921465968586, | |
| "learning_rate": 0.0003439491847158045, | |
| "loss": 2.7076372146606444, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 7.0858638743455495, | |
| "learning_rate": 0.0003449698054419642, | |
| "loss": 2.7051311492919923, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 7.106806282722513, | |
| "learning_rate": 0.00034599042616812386, | |
| "loss": 2.700316619873047, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 7.127748691099477, | |
| "learning_rate": 0.00034701104689428353, | |
| "loss": 2.732998085021973, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 7.14869109947644, | |
| "learning_rate": 0.0003480316676204432, | |
| "loss": 2.7185394287109377, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 7.169633507853403, | |
| "learning_rate": 0.0003490522883466028, | |
| "loss": 2.705458068847656, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 7.190575916230366, | |
| "learning_rate": 0.0003500729090727625, | |
| "loss": 2.708370590209961, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 7.21151832460733, | |
| "learning_rate": 0.00035109352979892215, | |
| "loss": 2.7156093597412108, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 7.232460732984293, | |
| "learning_rate": 0.0003521141505250818, | |
| "loss": 2.6954971313476563, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 7.2534031413612565, | |
| "learning_rate": 0.0003531347712512415, | |
| "loss": 2.7061573028564454, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 7.27434554973822, | |
| "learning_rate": 0.00035415539197740115, | |
| "loss": 2.7047218322753905, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 7.295287958115184, | |
| "learning_rate": 0.00035517601270356077, | |
| "loss": 2.6945539474487306, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 7.316230366492147, | |
| "learning_rate": 0.00035619663342972044, | |
| "loss": 2.67620735168457, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 7.33717277486911, | |
| "learning_rate": 0.0003572172541558801, | |
| "loss": 2.6955190658569337, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 7.358115183246073, | |
| "learning_rate": 0.00035823787488203977, | |
| "loss": 2.684027671813965, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 7.379057591623036, | |
| "learning_rate": 0.00035925849560819944, | |
| "loss": 2.698904800415039, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 0.0003602791163343591, | |
| "loss": 2.695516014099121, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 7.4209424083769635, | |
| "learning_rate": 0.0003612997370605187, | |
| "loss": 2.7022026062011717, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 7.441884816753927, | |
| "learning_rate": 0.0003623203577866784, | |
| "loss": 2.703862762451172, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 7.46282722513089, | |
| "learning_rate": 0.00036334097851283806, | |
| "loss": 2.6929235458374023, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 7.483769633507853, | |
| "learning_rate": 0.00036436159923899773, | |
| "loss": 2.6659242630004885, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 7.504712041884817, | |
| "learning_rate": 0.0003653822199651574, | |
| "loss": 2.660002899169922, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 7.52565445026178, | |
| "learning_rate": 0.000366402840691317, | |
| "loss": 2.699945831298828, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 7.546596858638743, | |
| "learning_rate": 0.0003674234614174767, | |
| "loss": 2.677934455871582, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 7.5675392670157065, | |
| "learning_rate": 0.00036844408214363635, | |
| "loss": 2.6725204467773436, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 7.5884816753926705, | |
| "learning_rate": 0.000369464702869796, | |
| "loss": 2.701091003417969, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 7.609424083769634, | |
| "learning_rate": 0.00037048532359595563, | |
| "loss": 2.6974748611450194, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 7.630366492146597, | |
| "learning_rate": 0.00037150594432211524, | |
| "loss": 2.6910377502441407, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 7.65130890052356, | |
| "learning_rate": 0.0003725265650482749, | |
| "loss": 2.6975467681884764, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 7.672251308900524, | |
| "learning_rate": 0.0003735471857744346, | |
| "loss": 2.6613983154296874, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 7.693193717277487, | |
| "learning_rate": 0.00037456780650059425, | |
| "loss": 2.6739892959594727, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 7.71413612565445, | |
| "learning_rate": 0.0003755884272267539, | |
| "loss": 2.684556770324707, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 7.735078534031413, | |
| "learning_rate": 0.0003766090479529136, | |
| "loss": 2.662580680847168, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 7.756020942408377, | |
| "learning_rate": 0.0003776296686790732, | |
| "loss": 2.671764373779297, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 7.776963350785341, | |
| "learning_rate": 0.00037865028940523287, | |
| "loss": 2.687736701965332, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 7.797905759162304, | |
| "learning_rate": 0.00037967091013139253, | |
| "loss": 2.676908493041992, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 7.818848167539267, | |
| "learning_rate": 0.0003806915308575522, | |
| "loss": 2.6953929901123046, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 7.83979057591623, | |
| "learning_rate": 0.00038171215158371187, | |
| "loss": 2.7138477325439454, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 7.860732984293193, | |
| "learning_rate": 0.00038273277230987154, | |
| "loss": 2.658716583251953, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 7.881675392670157, | |
| "learning_rate": 0.00038375339303603115, | |
| "loss": 2.6610225677490233, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 7.90261780104712, | |
| "learning_rate": 0.0003847740137621908, | |
| "loss": 2.6816684722900392, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 7.923560209424084, | |
| "learning_rate": 0.0003857946344883505, | |
| "loss": 2.69453125, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 7.944502617801048, | |
| "learning_rate": 0.00038681525521451016, | |
| "loss": 2.6690832138061524, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 7.965445026178011, | |
| "learning_rate": 0.0003878358759406698, | |
| "loss": 2.6719383239746093, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 7.986387434554974, | |
| "learning_rate": 0.00038885649666682944, | |
| "loss": 2.6821807861328124, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 7.998952879581152, | |
| "eval_loss": 2.6321663856506348, | |
| "eval_runtime": 73.4258, | |
| "eval_samples_per_second": 149.212, | |
| "step": 3816 | |
| }, | |
| { | |
| "epoch": 8.008376963350786, | |
| "learning_rate": 0.0003898771173929891, | |
| "loss": 2.792423057556152, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 8.029319371727748, | |
| "learning_rate": 0.0003908977381191488, | |
| "loss": 2.639652061462402, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 8.050261780104712, | |
| "learning_rate": 0.00039191835884530845, | |
| "loss": 2.658628463745117, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 8.071204188481675, | |
| "learning_rate": 0.0003929389795714681, | |
| "loss": 2.6759145736694334, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 8.092146596858639, | |
| "learning_rate": 0.0003939596002976278, | |
| "loss": 2.7123003005981445, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 8.113089005235603, | |
| "learning_rate": 0.0003949802210237874, | |
| "loss": 2.654216766357422, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 8.134031413612565, | |
| "learning_rate": 0.00039600084174994706, | |
| "loss": 2.653605651855469, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 8.154973821989529, | |
| "learning_rate": 0.00039702146247610673, | |
| "loss": 2.6602855682373048, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 8.175916230366493, | |
| "learning_rate": 0.0003980420832022664, | |
| "loss": 2.664006805419922, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 8.196858638743455, | |
| "learning_rate": 0.00039906270392842607, | |
| "loss": 2.6484548568725588, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 8.21780104712042, | |
| "learning_rate": 0.00040008332465458574, | |
| "loss": 2.6500553131103515, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 8.238743455497382, | |
| "learning_rate": 0.00040110394538074535, | |
| "loss": 2.6387815475463867, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 8.259685863874346, | |
| "learning_rate": 0.000402124566106905, | |
| "loss": 2.666366958618164, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 8.28062827225131, | |
| "learning_rate": 0.0004031451868330647, | |
| "loss": 2.69473876953125, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 8.301570680628272, | |
| "learning_rate": 0.00040416580755922436, | |
| "loss": 2.649556350708008, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 8.322513089005236, | |
| "learning_rate": 0.000405186428285384, | |
| "loss": 2.661054801940918, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 8.343455497382198, | |
| "learning_rate": 0.0004062070490115437, | |
| "loss": 2.651872253417969, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 8.364397905759162, | |
| "learning_rate": 0.0004072276697377033, | |
| "loss": 2.657582473754883, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 8.385340314136126, | |
| "learning_rate": 0.000408248290463863, | |
| "loss": 2.647386932373047, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 8.406282722513089, | |
| "learning_rate": 0.00040926891119002264, | |
| "loss": 2.622752380371094, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 8.427225130890053, | |
| "learning_rate": 0.0004102895319161823, | |
| "loss": 2.6471187591552736, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 8.448167539267015, | |
| "learning_rate": 0.000411310152642342, | |
| "loss": 2.6503711700439454, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 8.469109947643979, | |
| "learning_rate": 0.0004123307733685016, | |
| "loss": 2.662638854980469, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 8.490052356020943, | |
| "learning_rate": 0.00041335139409466126, | |
| "loss": 2.6614749908447264, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 8.510994764397905, | |
| "learning_rate": 0.0004143720148208209, | |
| "loss": 2.651297760009766, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 8.53193717277487, | |
| "learning_rate": 0.00041539263554698055, | |
| "loss": 2.654410552978516, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 8.552879581151833, | |
| "learning_rate": 0.0004164132562731402, | |
| "loss": 2.6440593719482424, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 8.573821989528795, | |
| "learning_rate": 0.00041743387699929983, | |
| "loss": 2.6485244750976564, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 8.59476439790576, | |
| "learning_rate": 0.0004184544977254595, | |
| "loss": 2.645181655883789, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 8.615706806282722, | |
| "learning_rate": 0.00041947511845161916, | |
| "loss": 2.6527120590209963, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 8.636649214659686, | |
| "learning_rate": 0.00042049573917777883, | |
| "loss": 2.644283676147461, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 8.65759162303665, | |
| "learning_rate": 0.0004215163599039385, | |
| "loss": 2.6589195251464846, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 8.678534031413612, | |
| "learning_rate": 0.00042253698063009817, | |
| "loss": 2.638910675048828, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 8.699476439790576, | |
| "learning_rate": 0.0004235576013562578, | |
| "loss": 2.6332365036010743, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 8.72041884816754, | |
| "learning_rate": 0.00042457822208241745, | |
| "loss": 2.6593414306640626, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 8.741361256544502, | |
| "learning_rate": 0.0004255988428085771, | |
| "loss": 2.6320539474487306, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 8.762303664921467, | |
| "learning_rate": 0.0004266194635347368, | |
| "loss": 2.6423057556152343, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 8.783246073298429, | |
| "learning_rate": 0.00042764008426089646, | |
| "loss": 2.6288238525390626, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 8.804188481675393, | |
| "learning_rate": 0.0004286607049870561, | |
| "loss": 2.624134635925293, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 8.825130890052357, | |
| "learning_rate": 0.00042968132571321574, | |
| "loss": 2.6502635955810545, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 8.846073298429319, | |
| "learning_rate": 0.0004307019464393754, | |
| "loss": 2.630023384094238, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 8.867015706806283, | |
| "learning_rate": 0.0004317225671655351, | |
| "loss": 2.6346521377563477, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 8.887958115183245, | |
| "learning_rate": 0.00043274318789169474, | |
| "loss": 2.612893295288086, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 8.90890052356021, | |
| "learning_rate": 0.0004337638086178544, | |
| "loss": 2.605606460571289, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 8.929842931937173, | |
| "learning_rate": 0.0004347844293440141, | |
| "loss": 2.6086076736450194, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 8.950785340314136, | |
| "learning_rate": 0.0004358050500701737, | |
| "loss": 2.6492921829223635, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 8.9717277486911, | |
| "learning_rate": 0.00043682567079633336, | |
| "loss": 2.60416202545166, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 8.992670157068062, | |
| "learning_rate": 0.00043784629152249303, | |
| "loss": 2.639349937438965, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 8.998952879581152, | |
| "eval_loss": 2.6156058311462402, | |
| "eval_runtime": 73.0048, | |
| "eval_samples_per_second": 150.072, | |
| "step": 4293 | |
| }, | |
| { | |
| "epoch": 9.014659685863874, | |
| "learning_rate": 0.0004388669122486527, | |
| "loss": 2.7455169677734377, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 9.035602094240838, | |
| "learning_rate": 0.00043988753297481237, | |
| "loss": 2.632405471801758, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 9.0565445026178, | |
| "learning_rate": 0.000440908153700972, | |
| "loss": 2.644683074951172, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 9.077486910994764, | |
| "learning_rate": 0.00044192877442713165, | |
| "loss": 2.620805358886719, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 9.098429319371728, | |
| "learning_rate": 0.0004429493951532913, | |
| "loss": 2.6189605712890627, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 9.11937172774869, | |
| "learning_rate": 0.000443970015879451, | |
| "loss": 2.614165687561035, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 9.140314136125655, | |
| "learning_rate": 0.00044499063660561065, | |
| "loss": 2.615524673461914, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 9.161256544502617, | |
| "learning_rate": 0.0004460112573317703, | |
| "loss": 2.623036003112793, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 9.182198952879581, | |
| "learning_rate": 0.00044703187805792994, | |
| "loss": 2.636097526550293, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 9.203141361256545, | |
| "learning_rate": 0.0004480524987840896, | |
| "loss": 2.5902896881103517, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 9.224083769633507, | |
| "learning_rate": 0.0004490731195102493, | |
| "loss": 2.596613121032715, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 9.245026178010471, | |
| "learning_rate": 0.00045009374023640894, | |
| "loss": 2.6126483917236327, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 9.265968586387434, | |
| "learning_rate": 0.0004511143609625686, | |
| "loss": 2.6270191192626955, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 9.286910994764398, | |
| "learning_rate": 0.0004521349816887283, | |
| "loss": 2.603664779663086, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 9.307853403141362, | |
| "learning_rate": 0.0004531556024148879, | |
| "loss": 2.618840980529785, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 9.328795811518324, | |
| "learning_rate": 0.00045417622314104756, | |
| "loss": 2.6355594635009765, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 9.349738219895288, | |
| "learning_rate": 0.00045519684386720723, | |
| "loss": 2.6463857650756837, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 9.370680628272252, | |
| "learning_rate": 0.0004562174645933669, | |
| "loss": 2.6071035385131838, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 9.391623036649214, | |
| "learning_rate": 0.00045723808531952657, | |
| "loss": 2.6336111068725585, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 9.412565445026178, | |
| "learning_rate": 0.0004582587060456861, | |
| "loss": 2.6231353759765623, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 9.43350785340314, | |
| "learning_rate": 0.0004592793267718458, | |
| "loss": 2.6107151031494142, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 9.454450261780105, | |
| "learning_rate": 0.00046029994749800546, | |
| "loss": 2.6213560104370117, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 9.475392670157069, | |
| "learning_rate": 0.00046132056822416513, | |
| "loss": 2.584638214111328, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 9.496335078534031, | |
| "learning_rate": 0.0004623411889503248, | |
| "loss": 2.6262628555297853, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 9.517277486910995, | |
| "learning_rate": 0.00046336180967648447, | |
| "loss": 2.5897647857666017, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 9.538219895287957, | |
| "learning_rate": 0.0004643824304026441, | |
| "loss": 2.6060354232788088, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 9.559162303664921, | |
| "learning_rate": 0.00046540305112880375, | |
| "loss": 2.6086732864379885, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 9.580104712041885, | |
| "learning_rate": 0.0004664236718549634, | |
| "loss": 2.5964023590087892, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 9.601047120418848, | |
| "learning_rate": 0.0004674442925811231, | |
| "loss": 2.622255325317383, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 9.621989528795812, | |
| "learning_rate": 0.00046846491330728275, | |
| "loss": 2.5982023239135743, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 9.642931937172776, | |
| "learning_rate": 0.00046948553403344237, | |
| "loss": 2.6015974044799806, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 9.663874345549738, | |
| "learning_rate": 0.00047050615475960204, | |
| "loss": 2.617095184326172, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 9.684816753926702, | |
| "learning_rate": 0.0004715267754857617, | |
| "loss": 2.5803293228149413, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 9.705759162303664, | |
| "learning_rate": 0.00047254739621192137, | |
| "loss": 2.615343475341797, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 9.726701570680628, | |
| "learning_rate": 0.00047356801693808104, | |
| "loss": 2.5800424575805665, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 9.747643979057592, | |
| "learning_rate": 0.0004745886376642407, | |
| "loss": 2.598635673522949, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 9.768586387434555, | |
| "learning_rate": 0.0004756092583904003, | |
| "loss": 2.615553283691406, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 9.789528795811519, | |
| "learning_rate": 0.00047662987911656, | |
| "loss": 2.582602882385254, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 9.810471204188481, | |
| "learning_rate": 0.00047765049984271966, | |
| "loss": 2.5848695755004885, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 9.831413612565445, | |
| "learning_rate": 0.00047867112056887933, | |
| "loss": 2.6057199478149413, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 9.852356020942409, | |
| "learning_rate": 0.000479691741295039, | |
| "loss": 2.6193204879760743, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 9.873298429319371, | |
| "learning_rate": 0.00048071236202119866, | |
| "loss": 2.6137775421142577, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 9.894240837696335, | |
| "learning_rate": 0.0004817329827473583, | |
| "loss": 2.613667678833008, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 9.915183246073298, | |
| "learning_rate": 0.00048275360347351795, | |
| "loss": 2.599857711791992, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 9.936125654450262, | |
| "learning_rate": 0.0004837742241996776, | |
| "loss": 2.579629325866699, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 9.957068062827226, | |
| "learning_rate": 0.0004847948449258373, | |
| "loss": 2.6212156295776365, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 9.978010471204188, | |
| "learning_rate": 0.00048581546565199695, | |
| "loss": 2.5806146621704102, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 9.998952879581152, | |
| "learning_rate": 0.0004868360863781566, | |
| "loss": 2.5807775497436523, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 9.998952879581152, | |
| "eval_loss": 2.604253053665161, | |
| "eval_runtime": 74.0972, | |
| "eval_samples_per_second": 147.86, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 10.020942408376964, | |
| "learning_rate": 0.00048785670710431623, | |
| "loss": 2.721482849121094, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 10.041884816753926, | |
| "learning_rate": 0.000488877327830476, | |
| "loss": 2.5954622268676757, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 10.06282722513089, | |
| "learning_rate": 0.0004898979485566356, | |
| "loss": 2.6108915328979494, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 10.083769633507853, | |
| "learning_rate": 0.0004909185692827952, | |
| "loss": 2.607802391052246, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 10.104712041884817, | |
| "learning_rate": 0.0004919391900089549, | |
| "loss": 2.594425010681152, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 10.12565445026178, | |
| "learning_rate": 0.0004929598107351145, | |
| "loss": 2.6012176513671874, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 10.146596858638743, | |
| "learning_rate": 0.0004939804314612742, | |
| "loss": 2.5987337112426756, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 10.167539267015707, | |
| "learning_rate": 0.0004950010521874339, | |
| "loss": 2.5843479156494142, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 10.188481675392671, | |
| "learning_rate": 0.0004960216729135935, | |
| "loss": 2.612710380554199, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 10.209424083769633, | |
| "learning_rate": 0.0004970422936397532, | |
| "loss": 2.608279991149902, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 10.230366492146597, | |
| "learning_rate": 0.0004980629143659128, | |
| "loss": 2.599015998840332, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 10.25130890052356, | |
| "learning_rate": 0.0004990835350920725, | |
| "loss": 2.6169193267822264, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 10.272251308900524, | |
| "learning_rate": 0.0005001041558182321, | |
| "loss": 2.5874061584472656, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 10.293193717277488, | |
| "learning_rate": 0.0005011247765443918, | |
| "loss": 2.5972221374511717, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 10.31413612565445, | |
| "learning_rate": 0.0005021453972705514, | |
| "loss": 2.592855453491211, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 10.335078534031414, | |
| "learning_rate": 0.0005031660179967111, | |
| "loss": 2.5813924789428713, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 10.356020942408376, | |
| "learning_rate": 0.0005041866387228707, | |
| "loss": 2.5748531341552736, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 10.37696335078534, | |
| "learning_rate": 0.0005052072594490304, | |
| "loss": 2.5954639434814455, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 10.397905759162304, | |
| "learning_rate": 0.00050622788017519, | |
| "loss": 2.5695329666137696, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 10.418848167539267, | |
| "learning_rate": 0.0005072485009013497, | |
| "loss": 2.5873296737670897, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 10.43979057591623, | |
| "learning_rate": 0.0005082691216275094, | |
| "loss": 2.6035577774047853, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 10.460732984293193, | |
| "learning_rate": 0.000509289742353669, | |
| "loss": 2.5872554779052734, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 10.481675392670157, | |
| "learning_rate": 0.0005103103630798287, | |
| "loss": 2.548787498474121, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 10.502617801047121, | |
| "learning_rate": 0.0005098008169087462, | |
| "loss": 2.5832122802734374, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 10.523560209424083, | |
| "learning_rate": 0.0005092927940452339, | |
| "loss": 2.5651966094970704, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 10.544502617801047, | |
| "learning_rate": 0.0005087862869144114, | |
| "loss": 2.5844688415527344, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 10.565445026178011, | |
| "learning_rate": 0.0005082812879940277, | |
| "loss": 2.5721431732177735, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 10.586387434554974, | |
| "learning_rate": 0.0005077777898139921, | |
| "loss": 2.59671630859375, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 10.607329842931938, | |
| "learning_rate": 0.0005072757849559103, | |
| "loss": 2.5834468841552733, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 10.6282722513089, | |
| "learning_rate": 0.0005067752660526248, | |
| "loss": 2.566558074951172, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 10.649214659685864, | |
| "learning_rate": 0.0005062762257877613, | |
| "loss": 2.579891395568848, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 10.670157068062828, | |
| "learning_rate": 0.0005057786568952791, | |
| "loss": 2.5753833770751955, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 10.69109947643979, | |
| "learning_rate": 0.000505282552159027, | |
| "loss": 2.5887382507324217, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 10.712041884816754, | |
| "learning_rate": 0.000504787904412304, | |
| "loss": 2.578693962097168, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 10.732984293193716, | |
| "learning_rate": 0.000504294706537424, | |
| "loss": 2.5702871322631835, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 10.75392670157068, | |
| "learning_rate": 0.0005038029514652858, | |
| "loss": 2.579792594909668, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 10.774869109947645, | |
| "learning_rate": 0.0005033126321749477, | |
| "loss": 2.5686906814575194, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 10.795811518324607, | |
| "learning_rate": 0.000502823741693206, | |
| "loss": 2.5612104415893553, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 10.81675392670157, | |
| "learning_rate": 0.0005023362730941793, | |
| "loss": 2.5499547958374023, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 10.837696335078533, | |
| "learning_rate": 0.0005018502194988955, | |
| "loss": 2.5822380065917967, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 10.858638743455497, | |
| "learning_rate": 0.0005013655740748848, | |
| "loss": 2.5612071990966796, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 10.879581151832461, | |
| "learning_rate": 0.0005008823300357761, | |
| "loss": 2.5559200286865233, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 10.900523560209423, | |
| "learning_rate": 0.0005004004806408972, | |
| "loss": 2.566044807434082, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 10.921465968586388, | |
| "learning_rate": 0.0004999200191948814, | |
| "loss": 2.5504446029663086, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 10.942408376963352, | |
| "learning_rate": 0.0004994409390472751, | |
| "loss": 2.5642301559448244, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 10.963350785340314, | |
| "learning_rate": 0.0004989632335921523, | |
| "loss": 2.560255241394043, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 10.984293193717278, | |
| "learning_rate": 0.0004984868962677315, | |
| "loss": 2.5622379302978517, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 10.998952879581152, | |
| "eval_loss": 2.5793886184692383, | |
| "eval_runtime": 73.6792, | |
| "eval_samples_per_second": 148.699, | |
| "step": 5247 | |
| }, | |
| { | |
| "epoch": 11.006282722513088, | |
| "learning_rate": 0.0004980119205559973, | |
| "loss": 2.682263946533203, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 11.027225130890052, | |
| "learning_rate": 0.0004975382999823259, | |
| "loss": 2.581485557556152, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 11.048167539267016, | |
| "learning_rate": 0.0004970660281151141, | |
| "loss": 2.5775730133056642, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 11.069109947643978, | |
| "learning_rate": 0.0004965950985654126, | |
| "loss": 2.587117385864258, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 11.090052356020943, | |
| "learning_rate": 0.0004961255049865635, | |
| "loss": 2.5482534408569335, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 11.110994764397907, | |
| "learning_rate": 0.0004956572410738401, | |
| "loss": 2.5464542388916014, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 11.131937172774869, | |
| "learning_rate": 0.000495190300564092, | |
| "loss": 2.557751274108887, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 11.152879581151833, | |
| "learning_rate": 0.0004947246772353933, | |
| "loss": 2.54296932220459, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 11.173821989528795, | |
| "learning_rate": 0.0004942603649066942, | |
| "loss": 2.576173782348633, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 11.19476439790576, | |
| "learning_rate": 0.0004937973574374762, | |
| "loss": 2.535029411315918, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 11.215706806282723, | |
| "learning_rate": 0.0004933356487274114, | |
| "loss": 2.551316833496094, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 11.236649214659685, | |
| "learning_rate": 0.0004928752327160248, | |
| "loss": 2.551384925842285, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 11.25759162303665, | |
| "learning_rate": 0.0004924161033823598, | |
| "loss": 2.5437076568603514, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 11.278534031413612, | |
| "learning_rate": 0.0004919582547446482, | |
| "loss": 2.5423688888549805, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 11.299476439790576, | |
| "learning_rate": 0.0004915016808599824, | |
| "loss": 2.5350723266601562, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 11.32041884816754, | |
| "learning_rate": 0.0004910463758239914, | |
| "loss": 2.5370588302612305, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 11.341361256544502, | |
| "learning_rate": 0.0004905923337705201, | |
| "loss": 2.5458969116210937, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 11.362303664921466, | |
| "learning_rate": 0.0004901395488713123, | |
| "loss": 2.5638805389404298, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 11.38324607329843, | |
| "learning_rate": 0.0004896880153356963, | |
| "loss": 2.5516336441040037, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 11.404188481675392, | |
| "learning_rate": 0.000489237727410273, | |
| "loss": 2.559841346740723, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 11.425130890052356, | |
| "learning_rate": 0.0004887886793786093, | |
| "loss": 2.559991645812988, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 11.446073298429319, | |
| "learning_rate": 0.0004883408655609327, | |
| "loss": 2.5499288558959963, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 11.467015706806283, | |
| "learning_rate": 0.0004878942803138293, | |
| "loss": 2.544484519958496, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 11.487958115183247, | |
| "learning_rate": 0.0004874489180299454, | |
| "loss": 2.537228584289551, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 11.508900523560209, | |
| "learning_rate": 0.00048700477313769213, | |
| "loss": 2.5507528305053713, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 11.529842931937173, | |
| "learning_rate": 0.00048656184010095185, | |
| "loss": 2.5349197387695312, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 11.550785340314135, | |
| "learning_rate": 0.00048612011341878916, | |
| "loss": 2.5785711288452147, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 11.5717277486911, | |
| "learning_rate": 0.0004856795876251634, | |
| "loss": 2.5618894577026365, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 11.592670157068063, | |
| "learning_rate": 0.00048524025728864493, | |
| "loss": 2.553698921203613, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 11.613612565445026, | |
| "learning_rate": 0.0004848021170121335, | |
| "loss": 2.563484954833984, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 11.63455497382199, | |
| "learning_rate": 0.0004843651614325803, | |
| "loss": 2.5655393600463867, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 11.655497382198952, | |
| "learning_rate": 0.00048392938522071163, | |
| "loss": 2.550769233703613, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 11.676439790575916, | |
| "learning_rate": 0.0004834947830807563, | |
| "loss": 2.549762725830078, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 11.69738219895288, | |
| "learning_rate": 0.00048306134975017523, | |
| "loss": 2.5488073348999025, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 11.718324607329842, | |
| "learning_rate": 0.0004826290799993939, | |
| "loss": 2.554892158508301, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 11.739267015706806, | |
| "learning_rate": 0.0004821979686315372, | |
| "loss": 2.519801902770996, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 11.76020942408377, | |
| "learning_rate": 0.00048176801048216693, | |
| "loss": 2.541176414489746, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 11.781151832460733, | |
| "learning_rate": 0.0004813392004190223, | |
| "loss": 2.52908878326416, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 11.802094240837697, | |
| "learning_rate": 0.00048091153334176224, | |
| "loss": 2.5230037689208986, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 11.823036649214659, | |
| "learning_rate": 0.00048048500418171097, | |
| "loss": 2.5191682815551757, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 11.843979057591623, | |
| "learning_rate": 0.0004800596079016053, | |
| "loss": 2.553547668457031, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 11.864921465968587, | |
| "learning_rate": 0.0004796353394953452, | |
| "loss": 2.5445688247680662, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 11.88586387434555, | |
| "learning_rate": 0.0004792121939877459, | |
| "loss": 2.554265022277832, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 11.906806282722513, | |
| "learning_rate": 0.00047879016643429336, | |
| "loss": 2.5312326431274412, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 11.927748691099476, | |
| "learning_rate": 0.00047836925192090116, | |
| "loss": 2.54235897064209, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 11.94869109947644, | |
| "learning_rate": 0.0004779494455636703, | |
| "loss": 2.5591432571411135, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 11.969633507853404, | |
| "learning_rate": 0.00047753074250865145, | |
| "loss": 2.5049566268920898, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 11.990575916230366, | |
| "learning_rate": 0.00047711313793160877, | |
| "loss": 2.5459238052368165, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 11.998952879581152, | |
| "eval_loss": 2.5662965774536133, | |
| "eval_runtime": 74.2325, | |
| "eval_samples_per_second": 147.59, | |
| "step": 5724 | |
| }, | |
| { | |
| "epoch": 12.012565445026178, | |
| "learning_rate": 0.000476696627037787, | |
| "loss": 2.6624425888061523, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 12.033507853403142, | |
| "learning_rate": 0.0004762812050616797, | |
| "loss": 2.5242809295654296, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 12.054450261780104, | |
| "learning_rate": 0.0004758668672668006, | |
| "loss": 2.541863441467285, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 12.075392670157068, | |
| "learning_rate": 0.00047545360894545664, | |
| "loss": 2.5424705505371095, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 12.09633507853403, | |
| "learning_rate": 0.0004750414254185235, | |
| "loss": 2.5307668685913085, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 12.117277486910995, | |
| "learning_rate": 0.0004746303120352226, | |
| "loss": 2.5350624084472657, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 12.138219895287959, | |
| "learning_rate": 0.00047422026417290146, | |
| "loss": 2.5554269790649413, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 12.159162303664921, | |
| "learning_rate": 0.0004738112772368146, | |
| "loss": 2.5168834686279298, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 12.180104712041885, | |
| "learning_rate": 0.00047340334665990787, | |
| "loss": 2.531605529785156, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 12.201047120418847, | |
| "learning_rate": 0.0004729964679026039, | |
| "loss": 2.515584373474121, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 12.221989528795811, | |
| "learning_rate": 0.0004725906364525903, | |
| "loss": 2.522596549987793, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 12.242931937172775, | |
| "learning_rate": 0.0004721858478246089, | |
| "loss": 2.5171236038208007, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 12.263874345549738, | |
| "learning_rate": 0.0004717820975602482, | |
| "loss": 2.529332160949707, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 12.284816753926702, | |
| "learning_rate": 0.0004713793812277367, | |
| "loss": 2.5047964096069335, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 12.305759162303666, | |
| "learning_rate": 0.00047097769442173856, | |
| "loss": 2.518666458129883, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 12.326701570680628, | |
| "learning_rate": 0.00047057703276315164, | |
| "loss": 2.5095588684082033, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 12.347643979057592, | |
| "learning_rate": 0.0004701773918989065, | |
| "loss": 2.5557069778442383, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 12.368586387434554, | |
| "learning_rate": 0.00046977876750176805, | |
| "loss": 2.5204561233520506, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 12.389528795811518, | |
| "learning_rate": 0.0004693811552701385, | |
| "loss": 2.5440658569335937, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 12.410471204188482, | |
| "learning_rate": 0.0004689845509278626, | |
| "loss": 2.5195499420166017, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 12.431413612565445, | |
| "learning_rate": 0.00046858895022403474, | |
| "loss": 2.506319999694824, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 12.452356020942409, | |
| "learning_rate": 0.000468194348932807, | |
| "loss": 2.507068061828613, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 12.473298429319371, | |
| "learning_rate": 0.00046780074285319984, | |
| "loss": 2.559153938293457, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 12.494240837696335, | |
| "learning_rate": 0.0004674081278089144, | |
| "loss": 2.5483341217041016, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 12.515183246073299, | |
| "learning_rate": 0.00046701649964814616, | |
| "loss": 2.523490333557129, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 12.536125654450261, | |
| "learning_rate": 0.0004666258542434007, | |
| "loss": 2.511086654663086, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 12.557068062827225, | |
| "learning_rate": 0.000466236187491311, | |
| "loss": 2.525220489501953, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 12.578010471204188, | |
| "learning_rate": 0.00046584749531245617, | |
| "loss": 2.5341968536376953, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 12.598952879581152, | |
| "learning_rate": 0.0004654597736511823, | |
| "loss": 2.517439842224121, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 12.619895287958116, | |
| "learning_rate": 0.0004650730184754247, | |
| "loss": 2.535861778259277, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 12.640837696335078, | |
| "learning_rate": 0.0004646872257765318, | |
| "loss": 2.5128170013427735, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 12.661780104712042, | |
| "learning_rate": 0.00046430239156909045, | |
| "loss": 2.5013412475585937, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 12.682722513089006, | |
| "learning_rate": 0.00046391851189075343, | |
| "loss": 2.492375373840332, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 12.703664921465968, | |
| "learning_rate": 0.00046353558280206746, | |
| "loss": 2.533987045288086, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 12.724607329842932, | |
| "learning_rate": 0.00046315360038630404, | |
| "loss": 2.5339818954467774, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 12.745549738219895, | |
| "learning_rate": 0.0004627725607492909, | |
| "loss": 2.5307100296020506, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 12.766492146596859, | |
| "learning_rate": 0.00046239246001924503, | |
| "loss": 2.5334211349487306, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 12.787434554973823, | |
| "learning_rate": 0.000462013294346608, | |
| "loss": 2.524607849121094, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 12.808376963350785, | |
| "learning_rate": 0.00046163505990388167, | |
| "loss": 2.5250701904296875, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 12.829319371727749, | |
| "learning_rate": 0.00046125775288546623, | |
| "loss": 2.514480400085449, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 12.850261780104713, | |
| "learning_rate": 0.00046088136950749937, | |
| "loss": 2.506093215942383, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 12.871204188481675, | |
| "learning_rate": 0.0004605059060076967, | |
| "loss": 2.5329927444458007, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 12.89214659685864, | |
| "learning_rate": 0.0004601313586451939, | |
| "loss": 2.5250947952270506, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 12.913089005235602, | |
| "learning_rate": 0.00045975772370039034, | |
| "loss": 2.535073471069336, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 12.934031413612566, | |
| "learning_rate": 0.0004593849974747937, | |
| "loss": 2.524639892578125, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 12.95497382198953, | |
| "learning_rate": 0.0004590131762908664, | |
| "loss": 2.509628486633301, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 12.975916230366492, | |
| "learning_rate": 0.00045864225649187287, | |
| "loss": 2.5401321411132813, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 12.996858638743456, | |
| "learning_rate": 0.000458272234441729, | |
| "loss": 2.512648582458496, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 12.998952879581152, | |
| "eval_loss": 2.558600902557373, | |
| "eval_runtime": 73.5975, | |
| "eval_samples_per_second": 148.864, | |
| "step": 6201 | |
| }, | |
| { | |
| "epoch": 13.018848167539266, | |
| "learning_rate": 0.00045790310652485205, | |
| "loss": 2.6353900909423826, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 13.03979057591623, | |
| "learning_rate": 0.0004575348691460124, | |
| "loss": 2.519637870788574, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 13.060732984293194, | |
| "learning_rate": 0.00045716751873018654, | |
| "loss": 2.515974426269531, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 13.081675392670157, | |
| "learning_rate": 0.00045680105172241103, | |
| "loss": 2.5275392532348633, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 13.10261780104712, | |
| "learning_rate": 0.0004564354645876384, | |
| "loss": 2.518478012084961, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 13.123560209424085, | |
| "learning_rate": 0.00045607075381059363, | |
| "loss": 2.506203460693359, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 13.144502617801047, | |
| "learning_rate": 0.00045570691589563234, | |
| "loss": 2.501953125, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 13.165445026178011, | |
| "learning_rate": 0.0004553439473666, | |
| "loss": 2.536935234069824, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 13.186387434554973, | |
| "learning_rate": 0.0004549818447666924, | |
| "loss": 2.500376892089844, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 13.207329842931937, | |
| "learning_rate": 0.00045462060465831743, | |
| "loss": 2.507547950744629, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 13.228272251308901, | |
| "learning_rate": 0.0004542602236229581, | |
| "loss": 2.50396842956543, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 13.249214659685864, | |
| "learning_rate": 0.00045390069826103653, | |
| "loss": 2.4975730895996096, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 13.270157068062828, | |
| "learning_rate": 0.00045354202519177925, | |
| "loss": 2.5031005859375, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 13.29109947643979, | |
| "learning_rate": 0.0004531842010530839, | |
| "loss": 2.5102792739868165, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 13.312041884816754, | |
| "learning_rate": 0.0004528272225013865, | |
| "loss": 2.510196876525879, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 13.332984293193718, | |
| "learning_rate": 0.00045247108621153056, | |
| "loss": 2.529274559020996, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 13.35392670157068, | |
| "learning_rate": 0.0004521157888766368, | |
| "loss": 2.5169746398925783, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 13.374869109947644, | |
| "learning_rate": 0.00045176132720797443, | |
| "loss": 2.4974170684814454, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 13.395811518324606, | |
| "learning_rate": 0.0004514076979348328, | |
| "loss": 2.4937871932983398, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 13.41675392670157, | |
| "learning_rate": 0.0004510548978043951, | |
| "loss": 2.4988937377929688, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 13.437696335078535, | |
| "learning_rate": 0.00045070292358161265, | |
| "loss": 2.5192642211914062, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 13.458638743455497, | |
| "learning_rate": 0.0004503517720490801, | |
| "loss": 2.515308380126953, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 13.47958115183246, | |
| "learning_rate": 0.000450001440006912, | |
| "loss": 2.4873653411865235, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 13.500523560209425, | |
| "learning_rate": 0.00044965192427262043, | |
| "loss": 2.4860763549804688, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 13.521465968586387, | |
| "learning_rate": 0.0004493032216809934, | |
| "loss": 2.4914045333862305, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 13.542408376963351, | |
| "learning_rate": 0.00044895532908397455, | |
| "loss": 2.505444145202637, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 13.563350785340313, | |
| "learning_rate": 0.00044860824335054384, | |
| "loss": 2.484037971496582, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 13.584293193717278, | |
| "learning_rate": 0.00044826196136659916, | |
| "loss": 2.4801618576049806, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 13.605235602094242, | |
| "learning_rate": 0.00044791648003483884, | |
| "loss": 2.497146415710449, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 13.626178010471204, | |
| "learning_rate": 0.0004475717962746455, | |
| "loss": 2.510635757446289, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 13.647120418848168, | |
| "learning_rate": 0.0004472279070219706, | |
| "loss": 2.5058326721191406, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 13.66806282722513, | |
| "learning_rate": 0.00044688480922922, | |
| "loss": 2.490641975402832, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 13.689005235602094, | |
| "learning_rate": 0.00044654249986514057, | |
| "loss": 2.4954011917114256, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 13.709947643979058, | |
| "learning_rate": 0.0004462009759147076, | |
| "loss": 2.496523857116699, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 13.73089005235602, | |
| "learning_rate": 0.0004458602343790135, | |
| "loss": 2.4896028518676756, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 13.751832460732984, | |
| "learning_rate": 0.00044552027227515704, | |
| "loss": 2.490574836730957, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 13.772774869109949, | |
| "learning_rate": 0.00044518108663613355, | |
| "loss": 2.4956533432006838, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 13.79371727748691, | |
| "learning_rate": 0.00044484267451072644, | |
| "loss": 2.5057823181152346, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 13.814659685863875, | |
| "learning_rate": 0.0004445050329633992, | |
| "loss": 2.493949127197266, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 13.835602094240837, | |
| "learning_rate": 0.0004441681590741884, | |
| "loss": 2.514782524108887, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 13.856544502617801, | |
| "learning_rate": 0.0004438320499385977, | |
| "loss": 2.463920783996582, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 13.877486910994765, | |
| "learning_rate": 0.00044349670266749286, | |
| "loss": 2.494730567932129, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 13.898429319371727, | |
| "learning_rate": 0.0004431621143869969, | |
| "loss": 2.4927881240844725, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 13.919371727748691, | |
| "learning_rate": 0.00044282828223838727, | |
| "loss": 2.5001829147338865, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 13.940314136125654, | |
| "learning_rate": 0.0004424952033779929, | |
| "loss": 2.485161018371582, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 13.961256544502618, | |
| "learning_rate": 0.00044216287497709253, | |
| "loss": 2.4990121841430666, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 13.982198952879582, | |
| "learning_rate": 0.0004418312942218139, | |
| "loss": 2.478795051574707, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 13.998952879581152, | |
| "eval_loss": 2.551051616668701, | |
| "eval_runtime": 73.8418, | |
| "eval_samples_per_second": 148.371, | |
| "step": 6678 | |
| }, | |
| { | |
| "epoch": 14.004188481675392, | |
| "learning_rate": 0.0004415004583130336, | |
| "loss": 2.631510925292969, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 14.025130890052356, | |
| "learning_rate": 0.0004411703644662778, | |
| "loss": 2.46261043548584, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 14.04607329842932, | |
| "learning_rate": 0.00044084100991162385, | |
| "loss": 2.488113212585449, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 14.067015706806282, | |
| "learning_rate": 0.00044051239189360286, | |
| "loss": 2.4580398559570313, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 14.087958115183246, | |
| "learning_rate": 0.00044018450767110235, | |
| "loss": 2.482432174682617, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 14.108900523560209, | |
| "learning_rate": 0.0004398573545172709, | |
| "loss": 2.4827293395996093, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 14.129842931937173, | |
| "learning_rate": 0.0004395309297194223, | |
| "loss": 2.472520637512207, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 14.150785340314137, | |
| "learning_rate": 0.0004392052305789416, | |
| "loss": 2.47951602935791, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 14.171727748691099, | |
| "learning_rate": 0.0004388802544111908, | |
| "loss": 2.4616981506347657, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 14.192670157068063, | |
| "learning_rate": 0.0004385559985454165, | |
| "loss": 2.4829242706298826, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 14.213612565445025, | |
| "learning_rate": 0.0004382324603246575, | |
| "loss": 2.478873634338379, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 14.23455497382199, | |
| "learning_rate": 0.0004379096371056532, | |
| "loss": 2.4993722915649412, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 14.255497382198953, | |
| "learning_rate": 0.000437587526258753, | |
| "loss": 2.474994659423828, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 14.276439790575916, | |
| "learning_rate": 0.0004372661251678265, | |
| "loss": 2.495197296142578, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 14.29738219895288, | |
| "learning_rate": 0.00043694543123017407, | |
| "loss": 2.476504325866699, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 14.318324607329842, | |
| "learning_rate": 0.0004366254418564382, | |
| "loss": 2.5161060333251952, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 14.339267015706806, | |
| "learning_rate": 0.0004363061544705161, | |
| "loss": 2.4868789672851563, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 14.36020942408377, | |
| "learning_rate": 0.0004359875665094723, | |
| "loss": 2.4974212646484375, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 14.381151832460732, | |
| "learning_rate": 0.00043566967542345227, | |
| "loss": 2.4662216186523436, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 14.402094240837696, | |
| "learning_rate": 0.00043535247867559673, | |
| "loss": 2.469373321533203, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 14.42303664921466, | |
| "learning_rate": 0.00043503597374195665, | |
| "loss": 2.483184242248535, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 14.443979057591623, | |
| "learning_rate": 0.0004347201581114088, | |
| "loss": 2.4689809799194338, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 14.464921465968587, | |
| "learning_rate": 0.0004344050292855724, | |
| "loss": 2.439427375793457, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 14.485863874345549, | |
| "learning_rate": 0.00043409058477872554, | |
| "loss": 2.47011775970459, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 14.506806282722513, | |
| "learning_rate": 0.00043377682211772343, | |
| "loss": 2.4866916656494142, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 14.527748691099477, | |
| "learning_rate": 0.0004334637388419161, | |
| "loss": 2.4834897994995115, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 14.54869109947644, | |
| "learning_rate": 0.0004331513325030681, | |
| "loss": 2.5011289596557615, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 14.569633507853403, | |
| "learning_rate": 0.0004328396006652773, | |
| "loss": 2.478676986694336, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 14.590575916230367, | |
| "learning_rate": 0.00043252854090489564, | |
| "loss": 2.4692920684814452, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 14.61151832460733, | |
| "learning_rate": 0.00043221815081044985, | |
| "loss": 2.492611122131348, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 14.632460732984294, | |
| "learning_rate": 0.00043190842798256285, | |
| "loss": 2.4726083755493162, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 14.653403141361256, | |
| "learning_rate": 0.00043159937003387584, | |
| "loss": 2.491672706604004, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 14.67434554973822, | |
| "learning_rate": 0.00043129097458897135, | |
| "loss": 2.474324417114258, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 14.695287958115184, | |
| "learning_rate": 0.000430983239284296, | |
| "loss": 2.4726449966430666, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 14.716230366492146, | |
| "learning_rate": 0.0004306761617680849, | |
| "loss": 2.456452178955078, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 14.73717277486911, | |
| "learning_rate": 0.00043036973970028583, | |
| "loss": 2.4777704238891602, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 14.758115183246073, | |
| "learning_rate": 0.00043006397075248464, | |
| "loss": 2.492514801025391, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 14.779057591623037, | |
| "learning_rate": 0.00042975885260783056, | |
| "loss": 2.464923095703125, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 14.8, | |
| "learning_rate": 0.00042945438296096303, | |
| "loss": 2.4520200729370116, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 14.820942408376963, | |
| "learning_rate": 0.0004291505595179379, | |
| "loss": 2.461465072631836, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 14.841884816753927, | |
| "learning_rate": 0.0004288473799961553, | |
| "loss": 2.474461555480957, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 14.86282722513089, | |
| "learning_rate": 0.0004285448421242875, | |
| "loss": 2.474432945251465, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 14.883769633507853, | |
| "learning_rate": 0.00042824294364220724, | |
| "loss": 2.506844329833984, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 14.904712041884817, | |
| "learning_rate": 0.0004279416823009172, | |
| "loss": 2.466670036315918, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 14.92565445026178, | |
| "learning_rate": 0.0004276410558624791, | |
| "loss": 2.4866743087768555, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 14.946596858638744, | |
| "learning_rate": 0.0004273410620999446, | |
| "loss": 2.4524404525756838, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 14.967539267015706, | |
| "learning_rate": 0.0004270416987972853, | |
| "loss": 2.4684980392456053, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 14.98848167539267, | |
| "learning_rate": 0.00042674296374932424, | |
| "loss": 2.469831848144531, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 14.998952879581152, | |
| "eval_loss": 2.5442276000976562, | |
| "eval_runtime": 73.9909, | |
| "eval_samples_per_second": 148.072, | |
| "step": 7155 | |
| }, | |
| { | |
| "epoch": 15.010471204188482, | |
| "learning_rate": 0.0004264448547616681, | |
| "loss": 2.5812490463256834, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 15.031413612565444, | |
| "learning_rate": 0.00042614736965063864, | |
| "loss": 2.4873594284057616, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 15.052356020942408, | |
| "learning_rate": 0.0004258505062432064, | |
| "loss": 2.4659671783447266, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 15.073298429319372, | |
| "learning_rate": 0.0004255542623769234, | |
| "loss": 2.4558393478393556, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 15.094240837696335, | |
| "learning_rate": 0.00042525863589985727, | |
| "loss": 2.4745227813720705, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 15.115183246073299, | |
| "learning_rate": 0.00042496362467052564, | |
| "loss": 2.4763622283935547, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 15.136125654450261, | |
| "learning_rate": 0.00042466922655783073, | |
| "loss": 2.4713407516479493, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 15.157068062827225, | |
| "learning_rate": 0.00042437543944099504, | |
| "loss": 2.4634868621826174, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 15.178010471204189, | |
| "learning_rate": 0.00042408226120949674, | |
| "loss": 2.476248931884766, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 15.198952879581151, | |
| "learning_rate": 0.00042378968976300647, | |
| "loss": 2.4730270385742186, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 15.219895287958115, | |
| "learning_rate": 0.00042349772301132377, | |
| "loss": 2.476571273803711, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 15.24083769633508, | |
| "learning_rate": 0.0004232063588743146, | |
| "loss": 2.4510690689086916, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 15.261780104712042, | |
| "learning_rate": 0.00042291559528184904, | |
| "loss": 2.465399742126465, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 15.282722513089006, | |
| "learning_rate": 0.0004226254301737393, | |
| "loss": 2.4773502349853516, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 15.303664921465968, | |
| "learning_rate": 0.0004223358614996787, | |
| "loss": 2.43621711730957, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 15.324607329842932, | |
| "learning_rate": 0.00042204688721918075, | |
| "loss": 2.456114959716797, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 15.345549738219896, | |
| "learning_rate": 0.0004217585053015187, | |
| "loss": 2.468073844909668, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 15.366492146596858, | |
| "learning_rate": 0.0004214707137256656, | |
| "loss": 2.471833419799805, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 15.387434554973822, | |
| "learning_rate": 0.0004211835104802349, | |
| "loss": 2.499461364746094, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 15.408376963350785, | |
| "learning_rate": 0.00042089689356342115, | |
| "loss": 2.4492721557617188, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 15.429319371727749, | |
| "learning_rate": 0.0004206108609829418, | |
| "loss": 2.4671262741088866, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 15.450261780104713, | |
| "learning_rate": 0.00042032541075597875, | |
| "loss": 2.465005111694336, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 15.471204188481675, | |
| "learning_rate": 0.0004200405409091207, | |
| "loss": 2.4648488998413085, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 15.492146596858639, | |
| "learning_rate": 0.00041975624947830593, | |
| "loss": 2.463612174987793, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 15.513089005235603, | |
| "learning_rate": 0.00041947253450876515, | |
| "loss": 2.4697538375854493, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 15.534031413612565, | |
| "learning_rate": 0.00041918939405496546, | |
| "loss": 2.45694637298584, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 15.55497382198953, | |
| "learning_rate": 0.00041890682618055396, | |
| "loss": 2.4443153381347655, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 15.575916230366492, | |
| "learning_rate": 0.0004186248289583023, | |
| "loss": 2.445983123779297, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 15.596858638743456, | |
| "learning_rate": 0.00041834340047005144, | |
| "loss": 2.489885711669922, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 15.61780104712042, | |
| "learning_rate": 0.0004180625388066569, | |
| "loss": 2.4711660385131835, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 15.638743455497382, | |
| "learning_rate": 0.00041778224206793433, | |
| "loss": 2.4884315490722657, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 15.659685863874346, | |
| "learning_rate": 0.00041750250836260536, | |
| "loss": 2.477284240722656, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 15.680628272251308, | |
| "learning_rate": 0.0004172233358082443, | |
| "loss": 2.475067901611328, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 15.701570680628272, | |
| "learning_rate": 0.00041694472253122467, | |
| "loss": 2.482602119445801, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 15.722513089005236, | |
| "learning_rate": 0.00041666666666666664, | |
| "loss": 2.442608642578125, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 15.743455497382199, | |
| "learning_rate": 0.0004163891663583843, | |
| "loss": 2.468288040161133, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 15.764397905759163, | |
| "learning_rate": 0.00041611221975883396, | |
| "loss": 2.4465059280395507, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 15.785340314136125, | |
| "learning_rate": 0.00041583582502906203, | |
| "loss": 2.4614633560180663, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 15.806282722513089, | |
| "learning_rate": 0.0004155599803386543, | |
| "loss": 2.4629968643188476, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 15.827225130890053, | |
| "learning_rate": 0.0004152846838656846, | |
| "loss": 2.454400062561035, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 15.848167539267015, | |
| "learning_rate": 0.00041500993379666443, | |
| "loss": 2.467230224609375, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 15.86910994764398, | |
| "learning_rate": 0.0004147357283264927, | |
| "loss": 2.442008209228516, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 15.890052356020943, | |
| "learning_rate": 0.000414462065658406, | |
| "loss": 2.4506603240966798, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 15.910994764397905, | |
| "learning_rate": 0.0004141889440039292, | |
| "loss": 2.4443122863769533, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 15.93193717277487, | |
| "learning_rate": 0.00041391636158282614, | |
| "loss": 2.4457521438598633, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 15.952879581151832, | |
| "learning_rate": 0.00041364431662305114, | |
| "loss": 2.457781982421875, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 15.973821989528796, | |
| "learning_rate": 0.0004133728073607005, | |
| "loss": 2.440464210510254, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 15.99476439790576, | |
| "learning_rate": 0.00041310183203996446, | |
| "loss": 2.4534429550170898, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 15.998952879581152, | |
| "eval_loss": 2.5449907779693604, | |
| "eval_runtime": 73.6337, | |
| "eval_samples_per_second": 148.791, | |
| "step": 7632 | |
| }, | |
| { | |
| "epoch": 16.016753926701572, | |
| "learning_rate": 0.0004128313889130795, | |
| "loss": 2.5835424423217774, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 16.037696335078532, | |
| "learning_rate": 0.0004125614762402809, | |
| "loss": 2.458993148803711, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 16.058638743455496, | |
| "learning_rate": 0.00041229209228975627, | |
| "loss": 2.471218299865723, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 16.07958115183246, | |
| "learning_rate": 0.000412023235337598, | |
| "loss": 2.464751052856445, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 16.100523560209425, | |
| "learning_rate": 0.00041175490366775766, | |
| "loss": 2.4599708557128905, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 16.12146596858639, | |
| "learning_rate": 0.0004114870955719997, | |
| "loss": 2.4355844497680663, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 16.14240837696335, | |
| "learning_rate": 0.00041121980934985563, | |
| "loss": 2.4272241592407227, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 16.163350785340313, | |
| "learning_rate": 0.000410953043308579, | |
| "loss": 2.4612340927124023, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 16.184293193717277, | |
| "learning_rate": 0.0004106867957631001, | |
| "loss": 2.465089797973633, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 16.20523560209424, | |
| "learning_rate": 0.00041042106503598165, | |
| "loss": 2.451694297790527, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 16.226178010471205, | |
| "learning_rate": 0.0004101558494573738, | |
| "loss": 2.464099884033203, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 16.24712041884817, | |
| "learning_rate": 0.0004098911473649706, | |
| "loss": 2.448426055908203, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 16.26806282722513, | |
| "learning_rate": 0.0004096269571039658, | |
| "loss": 2.455006217956543, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 16.289005235602094, | |
| "learning_rate": 0.00040936327702701005, | |
| "loss": 2.453194808959961, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 16.309947643979058, | |
| "learning_rate": 0.00040910010549416687, | |
| "loss": 2.4759195327758787, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 16.330890052356022, | |
| "learning_rate": 0.0004088374408728706, | |
| "loss": 2.4628747940063476, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 16.351832460732986, | |
| "learning_rate": 0.0004085752815378834, | |
| "loss": 2.446619415283203, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 16.372774869109946, | |
| "learning_rate": 0.0004083136258712532, | |
| "loss": 2.4754364013671877, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 16.39371727748691, | |
| "learning_rate": 0.0004080524722622717, | |
| "loss": 2.4566783905029297, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 16.414659685863874, | |
| "learning_rate": 0.00040779181910743294, | |
| "loss": 2.426336479187012, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 16.43560209424084, | |
| "learning_rate": 0.0004075316648103914, | |
| "loss": 2.460182762145996, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 16.456544502617803, | |
| "learning_rate": 0.0004072720077819216, | |
| "loss": 2.454692268371582, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 16.477486910994763, | |
| "learning_rate": 0.0004070128464398768, | |
| "loss": 2.4589263916015627, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 16.498429319371727, | |
| "learning_rate": 0.0004067541792091489, | |
| "loss": 2.472345161437988, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 16.51937172774869, | |
| "learning_rate": 0.0004064960045216279, | |
| "loss": 2.427416229248047, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 16.540314136125655, | |
| "learning_rate": 0.0004062383208161624, | |
| "loss": 2.454151725769043, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 16.56125654450262, | |
| "learning_rate": 0.0004059811265385193, | |
| "loss": 2.4490371704101563, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 16.58219895287958, | |
| "learning_rate": 0.00040572442014134516, | |
| "loss": 2.479467010498047, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 16.603141361256544, | |
| "learning_rate": 0.00040546820008412654, | |
| "loss": 2.4391218185424806, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 16.624083769633508, | |
| "learning_rate": 0.0004052124648331515, | |
| "loss": 2.455718421936035, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 16.645026178010472, | |
| "learning_rate": 0.00040495721286147086, | |
| "loss": 2.4620994567871093, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 16.665968586387436, | |
| "learning_rate": 0.00040470244264886006, | |
| "loss": 2.448670196533203, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 16.686910994764396, | |
| "learning_rate": 0.00040444815268178097, | |
| "loss": 2.426989936828613, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 16.70785340314136, | |
| "learning_rate": 0.00040419434145334414, | |
| "loss": 2.447972869873047, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 16.728795811518324, | |
| "learning_rate": 0.00040394100746327154, | |
| "loss": 2.457029342651367, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 16.74973821989529, | |
| "learning_rate": 0.0004036881492178589, | |
| "loss": 2.4539730072021486, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 16.770680628272252, | |
| "learning_rate": 0.00040343576522993926, | |
| "loss": 2.4607629776000977, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 16.791623036649213, | |
| "learning_rate": 0.00040318385401884554, | |
| "loss": 2.43496036529541, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 16.812565445026177, | |
| "learning_rate": 0.00040293241411037484, | |
| "loss": 2.423869323730469, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 16.83350785340314, | |
| "learning_rate": 0.00040268144403675154, | |
| "loss": 2.4423187255859373, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 16.854450261780105, | |
| "learning_rate": 0.0004024309423365915, | |
| "loss": 2.4698711395263673, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 16.87539267015707, | |
| "learning_rate": 0.0004021809075548668, | |
| "loss": 2.423082160949707, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 16.89633507853403, | |
| "learning_rate": 0.0004019313382428694, | |
| "loss": 2.443895149230957, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 16.917277486910994, | |
| "learning_rate": 0.00040168223295817656, | |
| "loss": 2.455313301086426, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 16.938219895287958, | |
| "learning_rate": 0.00040143359026461554, | |
| "loss": 2.415020751953125, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 16.95916230366492, | |
| "learning_rate": 0.000401185408732229, | |
| "loss": 2.462967109680176, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 16.980104712041886, | |
| "learning_rate": 0.0004009376869372401, | |
| "loss": 2.416962242126465, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 16.99895287958115, | |
| "eval_loss": 2.5404856204986572, | |
| "eval_runtime": 73.757, | |
| "eval_samples_per_second": 148.542, | |
| "step": 8109 | |
| }, | |
| { | |
| "epoch": 17.002094240837696, | |
| "learning_rate": 0.00040069042346201864, | |
| "loss": 2.549093818664551, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 17.02303664921466, | |
| "learning_rate": 0.00040044361689504655, | |
| "loss": 2.452895736694336, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 17.043979057591624, | |
| "learning_rate": 0.0004001972658308847, | |
| "loss": 2.460617446899414, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 17.064921465968588, | |
| "learning_rate": 0.0003999513688701383, | |
| "loss": 2.4506912231445312, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 17.08586387434555, | |
| "learning_rate": 0.00039970592461942457, | |
| "loss": 2.440316581726074, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 17.106806282722513, | |
| "learning_rate": 0.00039946093169133874, | |
| "loss": 2.4249364852905275, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 17.127748691099477, | |
| "learning_rate": 0.0003992163887044217, | |
| "loss": 2.4489822387695312, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 17.14869109947644, | |
| "learning_rate": 0.0003989722942831268, | |
| "loss": 2.455015754699707, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 17.169633507853405, | |
| "learning_rate": 0.0003987286470577879, | |
| "loss": 2.4564001083374025, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 17.190575916230365, | |
| "learning_rate": 0.0003984854456645864, | |
| "loss": 2.4481569290161134, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 17.21151832460733, | |
| "learning_rate": 0.0003982426887455199, | |
| "loss": 2.4090858459472657, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 17.232460732984293, | |
| "learning_rate": 0.00039800037494836985, | |
| "loss": 2.4279315948486326, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 17.253403141361257, | |
| "learning_rate": 0.00039775850292667005, | |
| "loss": 2.4328563690185545, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 17.27434554973822, | |
| "learning_rate": 0.0003975170713396753, | |
| "loss": 2.426299476623535, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 17.295287958115182, | |
| "learning_rate": 0.0003972760788523301, | |
| "loss": 2.424925994873047, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 17.316230366492146, | |
| "learning_rate": 0.0003970355241352378, | |
| "loss": 2.4173357009887697, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 17.33717277486911, | |
| "learning_rate": 0.00039679540586462953, | |
| "loss": 2.4525693893432616, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 17.358115183246074, | |
| "learning_rate": 0.00039655572272233384, | |
| "loss": 2.473075103759766, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 17.379057591623038, | |
| "learning_rate": 0.0003963164733957462, | |
| "loss": 2.422397232055664, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 17.4, | |
| "learning_rate": 0.00039607765657779864, | |
| "loss": 2.441000556945801, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 17.420942408376963, | |
| "learning_rate": 0.0003958392709669304, | |
| "loss": 2.4337257385253905, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 17.441884816753927, | |
| "learning_rate": 0.00039560131526705723, | |
| "loss": 2.4255434036254884, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 17.46282722513089, | |
| "learning_rate": 0.0003953637881875425, | |
| "loss": 2.4355316162109375, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 17.483769633507855, | |
| "learning_rate": 0.0003951266884431675, | |
| "loss": 2.428698921203613, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 17.504712041884815, | |
| "learning_rate": 0.00039489001475410214, | |
| "loss": 2.4530813217163088, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 17.52565445026178, | |
| "learning_rate": 0.00039465376584587626, | |
| "loss": 2.4496335983276367, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 17.546596858638743, | |
| "learning_rate": 0.00039441794044935054, | |
| "loss": 2.425421142578125, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 17.567539267015707, | |
| "learning_rate": 0.00039418253730068797, | |
| "loss": 2.4285154342651367, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 17.58848167539267, | |
| "learning_rate": 0.0003939475551413253, | |
| "loss": 2.4037647247314453, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 17.609424083769632, | |
| "learning_rate": 0.000393712992717945, | |
| "loss": 2.423297119140625, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 17.630366492146596, | |
| "learning_rate": 0.0003934788487824469, | |
| "loss": 2.47174186706543, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 17.65130890052356, | |
| "learning_rate": 0.0003932451220919205, | |
| "loss": 2.441014289855957, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 17.672251308900524, | |
| "learning_rate": 0.0003930118114086172, | |
| "loss": 2.4317821502685546, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 17.693193717277488, | |
| "learning_rate": 0.00039277891549992266, | |
| "loss": 2.4377744674682615, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 17.71413612565445, | |
| "learning_rate": 0.0003925464331383298, | |
| "loss": 2.4461442947387697, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 17.735078534031413, | |
| "learning_rate": 0.00039231436310141113, | |
| "loss": 2.466485595703125, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 17.756020942408377, | |
| "learning_rate": 0.00039208270417179214, | |
| "loss": 2.4318614959716798, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 17.77696335078534, | |
| "learning_rate": 0.0003918514551371243, | |
| "loss": 2.431291389465332, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 17.797905759162305, | |
| "learning_rate": 0.0003916206147900585, | |
| "loss": 2.43109130859375, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 17.81884816753927, | |
| "learning_rate": 0.00039139018192821845, | |
| "loss": 2.4323259353637696, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 17.83979057591623, | |
| "learning_rate": 0.00039116015535417445, | |
| "loss": 2.439468193054199, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 17.860732984293193, | |
| "learning_rate": 0.00039093053387541745, | |
| "loss": 2.4567943572998048, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 17.881675392670157, | |
| "learning_rate": 0.00039070131630433274, | |
| "loss": 2.4346736907958983, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 17.90261780104712, | |
| "learning_rate": 0.00039047250145817424, | |
| "loss": 2.441089630126953, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 17.923560209424085, | |
| "learning_rate": 0.00039024408815903914, | |
| "loss": 2.4277088165283205, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 17.944502617801046, | |
| "learning_rate": 0.0003900160752338421, | |
| "loss": 2.4228445053100587, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 17.96544502617801, | |
| "learning_rate": 0.00038978846151429, | |
| "loss": 2.399433708190918, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 17.986387434554974, | |
| "learning_rate": 0.0003895612458368572, | |
| "loss": 2.4009246826171875, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 17.99895287958115, | |
| "eval_loss": 2.5318360328674316, | |
| "eval_runtime": 74.0596, | |
| "eval_samples_per_second": 147.935, | |
| "step": 8586 | |
| }, | |
| { | |
| "epoch": 18.008376963350784, | |
| "learning_rate": 0.00038933442704275974, | |
| "loss": 2.537807655334473, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 18.02931937172775, | |
| "learning_rate": 0.0003891080039779314, | |
| "loss": 2.446313667297363, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 18.050261780104712, | |
| "learning_rate": 0.0003888819754929986, | |
| "loss": 2.4345209121704103, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 18.071204188481676, | |
| "learning_rate": 0.0003886563404432558, | |
| "loss": 2.437006187438965, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 18.09214659685864, | |
| "learning_rate": 0.0003884310976886414, | |
| "loss": 2.420798110961914, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 18.1130890052356, | |
| "learning_rate": 0.0003882062460937135, | |
| "loss": 2.4323537826538084, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 18.134031413612565, | |
| "learning_rate": 0.0003879817845276255, | |
| "loss": 2.442038345336914, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 18.15497382198953, | |
| "learning_rate": 0.0003877577118641029, | |
| "loss": 2.433667755126953, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 18.175916230366493, | |
| "learning_rate": 0.00038753402698141903, | |
| "loss": 2.424707221984863, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 18.196858638743457, | |
| "learning_rate": 0.0003873107287623715, | |
| "loss": 2.4348966598510744, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 18.217801047120417, | |
| "learning_rate": 0.00038708781609425905, | |
| "loss": 2.404917907714844, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 18.23874345549738, | |
| "learning_rate": 0.000386865287868858, | |
| "loss": 2.4346105575561525, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 18.259685863874346, | |
| "learning_rate": 0.0003866431429823993, | |
| "loss": 2.442304992675781, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 18.28062827225131, | |
| "learning_rate": 0.00038642138033554525, | |
| "loss": 2.42406005859375, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 18.301570680628274, | |
| "learning_rate": 0.00038619999883336703, | |
| "loss": 2.440979766845703, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 18.322513089005234, | |
| "learning_rate": 0.0003859789973853217, | |
| "loss": 2.440751075744629, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 18.343455497382198, | |
| "learning_rate": 0.0003857583749052298, | |
| "loss": 2.4363412857055664, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 18.364397905759162, | |
| "learning_rate": 0.0003855381303112527, | |
| "loss": 2.4235382080078125, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 18.385340314136126, | |
| "learning_rate": 0.0003853182625258708, | |
| "loss": 2.4238630294799806, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 18.40628272251309, | |
| "learning_rate": 0.0003850987704758608, | |
| "loss": 2.426643943786621, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 18.42722513089005, | |
| "learning_rate": 0.00038487965309227413, | |
| "loss": 2.438970947265625, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 18.448167539267015, | |
| "learning_rate": 0.0003846609093104148, | |
| "loss": 2.423859786987305, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 18.46910994764398, | |
| "learning_rate": 0.00038444253806981784, | |
| "loss": 2.4040243148803713, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 18.490052356020943, | |
| "learning_rate": 0.00038422453831422784, | |
| "loss": 2.420393371582031, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 18.510994764397907, | |
| "learning_rate": 0.0003840069089915771, | |
| "loss": 2.427932929992676, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 18.531937172774867, | |
| "learning_rate": 0.00038378964905396454, | |
| "loss": 2.419098663330078, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 18.55287958115183, | |
| "learning_rate": 0.00038357275745763475, | |
| "loss": 2.4184850692749023, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 18.573821989528795, | |
| "learning_rate": 0.0003833562331629563, | |
| "loss": 2.402060127258301, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 18.59476439790576, | |
| "learning_rate": 0.0003831400751344014, | |
| "loss": 2.403904914855957, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 18.615706806282724, | |
| "learning_rate": 0.00038292428234052486, | |
| "loss": 2.4094032287597655, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 18.636649214659684, | |
| "learning_rate": 0.0003827088537539434, | |
| "loss": 2.3887189865112304, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 18.657591623036648, | |
| "learning_rate": 0.00038249378835131535, | |
| "loss": 2.4003849029541016, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 18.678534031413612, | |
| "learning_rate": 0.0003822790851133196, | |
| "loss": 2.4100620269775392, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 18.699476439790576, | |
| "learning_rate": 0.00038206474302463617, | |
| "loss": 2.4087665557861326, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 18.72041884816754, | |
| "learning_rate": 0.00038185076107392544, | |
| "loss": 2.4067865371704102, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 18.741361256544504, | |
| "learning_rate": 0.0003816371382538082, | |
| "loss": 2.3902347564697264, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 18.762303664921465, | |
| "learning_rate": 0.0003814238735608459, | |
| "loss": 2.4000757217407225, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 18.78324607329843, | |
| "learning_rate": 0.0003812109659955207, | |
| "loss": 2.4148767471313475, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 18.804188481675393, | |
| "learning_rate": 0.00038099841456221617, | |
| "loss": 2.414336395263672, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 18.825130890052357, | |
| "learning_rate": 0.0003807862182691969, | |
| "loss": 2.440867042541504, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 18.84607329842932, | |
| "learning_rate": 0.00038057437612859003, | |
| "loss": 2.4532596588134767, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 18.86701570680628, | |
| "learning_rate": 0.0003803628871563653, | |
| "loss": 2.4020782470703126, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 18.887958115183245, | |
| "learning_rate": 0.0003801517503723161, | |
| "loss": 2.430096435546875, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 18.90890052356021, | |
| "learning_rate": 0.00037994096480004037, | |
| "loss": 2.419812774658203, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 18.929842931937173, | |
| "learning_rate": 0.0003797305294669214, | |
| "loss": 2.4075344085693358, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 18.950785340314138, | |
| "learning_rate": 0.00037952044340410954, | |
| "loss": 2.4337480545043944, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 18.971727748691098, | |
| "learning_rate": 0.00037931070564650276, | |
| "loss": 2.4088159561157227, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 18.992670157068062, | |
| "learning_rate": 0.0003791013152327286, | |
| "loss": 2.4013919830322266, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 18.99895287958115, | |
| "eval_loss": 2.529340982437134, | |
| "eval_runtime": 73.527, | |
| "eval_samples_per_second": 149.007, | |
| "step": 9063 | |
| }, | |
| { | |
| "epoch": 19.014659685863876, | |
| "learning_rate": 0.00037889227120512545, | |
| "loss": 2.5226316452026367, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 19.035602094240836, | |
| "learning_rate": 0.0003786835726097239, | |
| "loss": 2.4024560928344725, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 19.0565445026178, | |
| "learning_rate": 0.00037847521849622895, | |
| "loss": 2.4342859268188475, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 19.077486910994764, | |
| "learning_rate": 0.0003782672079180015, | |
| "loss": 2.4292444229125976, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 19.09842931937173, | |
| "learning_rate": 0.0003780595399320404, | |
| "loss": 2.4289926528930663, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 19.119371727748693, | |
| "learning_rate": 0.00037785221359896444, | |
| "loss": 2.440321159362793, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 19.140314136125653, | |
| "learning_rate": 0.00037764522798299443, | |
| "loss": 2.4353168487548826, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 19.161256544502617, | |
| "learning_rate": 0.0003774385821519358, | |
| "loss": 2.389999008178711, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 19.18219895287958, | |
| "learning_rate": 0.0003772322751771605, | |
| "loss": 2.4299448013305662, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 19.203141361256545, | |
| "learning_rate": 0.00037702630613358986, | |
| "loss": 2.4094564437866213, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 19.22408376963351, | |
| "learning_rate": 0.0003768206740996769, | |
| "loss": 2.416705322265625, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 19.24502617801047, | |
| "learning_rate": 0.00037661537815738915, | |
| "loss": 2.4174514770507813, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 19.265968586387434, | |
| "learning_rate": 0.00037641041739219143, | |
| "loss": 2.4112581253051757, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 19.286910994764398, | |
| "learning_rate": 0.00037620579089302876, | |
| "loss": 2.4125255584716796, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 19.307853403141362, | |
| "learning_rate": 0.0003760014977523091, | |
| "loss": 2.41434268951416, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 19.328795811518326, | |
| "learning_rate": 0.00037579753706588697, | |
| "loss": 2.419674301147461, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 19.349738219895286, | |
| "learning_rate": 0.00037559390793304604, | |
| "loss": 2.3896152496337892, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 19.37068062827225, | |
| "learning_rate": 0.00037539060945648286, | |
| "loss": 2.4006847381591796, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 19.391623036649214, | |
| "learning_rate": 0.00037518764074229014, | |
| "loss": 2.4157575607299804, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 19.41256544502618, | |
| "learning_rate": 0.00037498500089994, | |
| "loss": 2.398466873168945, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 19.433507853403142, | |
| "learning_rate": 0.00037478268904226795, | |
| "loss": 2.4136272430419923, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 19.454450261780103, | |
| "learning_rate": 0.00037458070428545635, | |
| "loss": 2.412180709838867, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 19.475392670157067, | |
| "learning_rate": 0.00037437904574901817, | |
| "loss": 2.417103385925293, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 19.49633507853403, | |
| "learning_rate": 0.00037417771255578104, | |
| "loss": 2.3830541610717773, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 19.517277486910995, | |
| "learning_rate": 0.00037397670383187097, | |
| "loss": 2.4057411193847655, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 19.53821989528796, | |
| "learning_rate": 0.0003737760187066967, | |
| "loss": 2.429146957397461, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 19.559162303664923, | |
| "learning_rate": 0.00037357565631293365, | |
| "loss": 2.4229619979858397, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 19.580104712041884, | |
| "learning_rate": 0.00037337561578650833, | |
| "loss": 2.4030439376831056, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 19.601047120418848, | |
| "learning_rate": 0.00037317589626658255, | |
| "loss": 2.4122753143310547, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 19.62198952879581, | |
| "learning_rate": 0.0003729764968955379, | |
| "loss": 2.420066070556641, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 19.642931937172776, | |
| "learning_rate": 0.00037277741681896045, | |
| "loss": 2.4116867065429686, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 19.66387434554974, | |
| "learning_rate": 0.0003725786551856251, | |
| "loss": 2.4222272872924804, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 19.6848167539267, | |
| "learning_rate": 0.0003723802111474804, | |
| "loss": 2.398889350891113, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 19.705759162303664, | |
| "learning_rate": 0.0003721820838596335, | |
| "loss": 2.3947797775268556, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 19.72670157068063, | |
| "learning_rate": 0.00037198427248033485, | |
| "loss": 2.39971981048584, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 19.747643979057592, | |
| "learning_rate": 0.00037178677617096337, | |
| "loss": 2.3918169021606444, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 19.768586387434556, | |
| "learning_rate": 0.0003715895940960111, | |
| "loss": 2.393696975708008, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 19.789528795811517, | |
| "learning_rate": 0.000371392725423069, | |
| "loss": 2.403204345703125, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 19.81047120418848, | |
| "learning_rate": 0.00037119616932281165, | |
| "loss": 2.3984851837158203, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 19.831413612565445, | |
| "learning_rate": 0.00037099992496898276, | |
| "loss": 2.442034149169922, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 19.85235602094241, | |
| "learning_rate": 0.00037080399153838065, | |
| "loss": 2.3905046463012694, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 19.873298429319373, | |
| "learning_rate": 0.00037060836821084373, | |
| "loss": 2.4114078521728515, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 19.894240837696334, | |
| "learning_rate": 0.00037041305416923604, | |
| "loss": 2.393054962158203, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 19.915183246073298, | |
| "learning_rate": 0.0003702180485994327, | |
| "loss": 2.388008689880371, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 19.93612565445026, | |
| "learning_rate": 0.00037002335069030614, | |
| "loss": 2.4009252548217774, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 19.957068062827226, | |
| "learning_rate": 0.0003698289596337116, | |
| "loss": 2.3996566772460937, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 19.97801047120419, | |
| "learning_rate": 0.00036963487462447303, | |
| "loss": 2.3795480728149414, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 19.99895287958115, | |
| "learning_rate": 0.0003694410948603691, | |
| "loss": 2.4031463623046876, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 19.99895287958115, | |
| "eval_loss": 2.5357587337493896, | |
| "eval_runtime": 73.9758, | |
| "eval_samples_per_second": 148.102, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 20.020942408376964, | |
| "learning_rate": 0.00036924761954211944, | |
| "loss": 2.5228919982910156, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 20.041884816753928, | |
| "learning_rate": 0.0003690544478733707, | |
| "loss": 2.3785959243774415, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 20.06282722513089, | |
| "learning_rate": 0.0003688615790606828, | |
| "loss": 2.4037866592407227, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 20.083769633507853, | |
| "learning_rate": 0.000368669012313515, | |
| "loss": 2.3924365997314454, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 20.104712041884817, | |
| "learning_rate": 0.0003684767468442126, | |
| "loss": 2.4029878616333007, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 20.12565445026178, | |
| "learning_rate": 0.0003682847818679935, | |
| "loss": 2.387605094909668, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 20.146596858638745, | |
| "learning_rate": 0.0003680931166029342, | |
| "loss": 2.417312431335449, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 20.167539267015705, | |
| "learning_rate": 0.000367901750269957, | |
| "loss": 2.381046485900879, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 20.18848167539267, | |
| "learning_rate": 0.00036771068209281657, | |
| "loss": 2.376552963256836, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 20.209424083769633, | |
| "learning_rate": 0.0003675199112980863, | |
| "loss": 2.4089908599853516, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 20.230366492146597, | |
| "learning_rate": 0.0003673294371151458, | |
| "loss": 2.401862907409668, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 20.25130890052356, | |
| "learning_rate": 0.0003671392587761674, | |
| "loss": 2.406145477294922, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 20.272251308900522, | |
| "learning_rate": 0.0003669493755161031, | |
| "loss": 2.414588737487793, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 20.293193717277486, | |
| "learning_rate": 0.00036675978657267204, | |
| "loss": 2.4057403564453126, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 20.31413612565445, | |
| "learning_rate": 0.00036657049118634733, | |
| "loss": 2.404916000366211, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 20.335078534031414, | |
| "learning_rate": 0.0003663814886003432, | |
| "loss": 2.4110477447509764, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 20.356020942408378, | |
| "learning_rate": 0.00036619277806060276, | |
| "loss": 2.402661895751953, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 20.376963350785342, | |
| "learning_rate": 0.0003660043588157846, | |
| "loss": 2.404218864440918, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 20.397905759162303, | |
| "learning_rate": 0.00036581623011725114, | |
| "loss": 2.4069591522216798, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 20.418848167539267, | |
| "learning_rate": 0.0003656283912190554, | |
| "loss": 2.4185781478881836, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 20.43979057591623, | |
| "learning_rate": 0.00036544084137792883, | |
| "loss": 2.3999982833862306, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 20.460732984293195, | |
| "learning_rate": 0.00036525357985326903, | |
| "loss": 2.3702335357666016, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 20.48167539267016, | |
| "learning_rate": 0.0003650666059071275, | |
| "loss": 2.3878076553344725, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 20.50261780104712, | |
| "learning_rate": 0.00036487991880419725, | |
| "loss": 2.3818979263305664, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 20.523560209424083, | |
| "learning_rate": 0.00036469351781180073, | |
| "loss": 2.363344192504883, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 20.544502617801047, | |
| "learning_rate": 0.00036450740219987765, | |
| "loss": 2.3810457229614257, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 20.56544502617801, | |
| "learning_rate": 0.0003643215712409734, | |
| "loss": 2.411943054199219, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 20.586387434554975, | |
| "learning_rate": 0.00036413602421022653, | |
| "loss": 2.4092056274414064, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 20.607329842931936, | |
| "learning_rate": 0.0003639507603853572, | |
| "loss": 2.3944089889526365, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 20.6282722513089, | |
| "learning_rate": 0.00036376577904665525, | |
| "loss": 2.3668121337890624, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 20.649214659685864, | |
| "learning_rate": 0.00036358107947696876, | |
| "loss": 2.391695022583008, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 20.670157068062828, | |
| "learning_rate": 0.0003633966609616919, | |
| "loss": 2.380820083618164, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 20.691099476439792, | |
| "learning_rate": 0.00036321252278875344, | |
| "loss": 2.372467041015625, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 20.712041884816752, | |
| "learning_rate": 0.00036302866424860566, | |
| "loss": 2.3973648071289064, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 20.732984293193716, | |
| "learning_rate": 0.00036284508463421217, | |
| "loss": 2.3995847702026367, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 20.75392670157068, | |
| "learning_rate": 0.0003626617832410371, | |
| "loss": 2.3931917190551757, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 20.774869109947645, | |
| "learning_rate": 0.00036247875936703335, | |
| "loss": 2.4107311248779295, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 20.79581151832461, | |
| "learning_rate": 0.00036229601231263145, | |
| "loss": 2.367414855957031, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 20.81675392670157, | |
| "learning_rate": 0.0003621135413807282, | |
| "loss": 2.405007171630859, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 20.837696335078533, | |
| "learning_rate": 0.0003619313458766758, | |
| "loss": 2.364247512817383, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 20.858638743455497, | |
| "learning_rate": 0.0003617494251082704, | |
| "loss": 2.3823482513427736, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 20.87958115183246, | |
| "learning_rate": 0.0003615677783857413, | |
| "loss": 2.393014144897461, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 20.900523560209425, | |
| "learning_rate": 0.0003613864050217397, | |
| "loss": 2.3839509963989256, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 20.921465968586386, | |
| "learning_rate": 0.0003612053043313283, | |
| "loss": 2.378824806213379, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 20.94240837696335, | |
| "learning_rate": 0.0003610244756319697, | |
| "loss": 2.3893613815307617, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 20.963350785340314, | |
| "learning_rate": 0.00036084391824351607, | |
| "loss": 2.37738151550293, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 20.984293193717278, | |
| "learning_rate": 0.00036066363148819854, | |
| "loss": 2.389986038208008, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 20.99895287958115, | |
| "eval_loss": 2.5336103439331055, | |
| "eval_runtime": 73.4935, | |
| "eval_samples_per_second": 149.074, | |
| "step": 10017 | |
| }, | |
| { | |
| "epoch": 21.006282722513088, | |
| "learning_rate": 0.000360483614690616, | |
| "loss": 2.528822135925293, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 21.027225130890052, | |
| "learning_rate": 0.00036030386717772494, | |
| "loss": 2.3780399322509767, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 21.048167539267016, | |
| "learning_rate": 0.0003601243882788286, | |
| "loss": 2.3978437423706054, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 21.06910994764398, | |
| "learning_rate": 0.0003599451773255667, | |
| "loss": 2.382208061218262, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 21.09005235602094, | |
| "learning_rate": 0.00035976623365190465, | |
| "loss": 2.375508499145508, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 21.110994764397905, | |
| "learning_rate": 0.0003595875565941235, | |
| "loss": 2.422568511962891, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 21.13193717277487, | |
| "learning_rate": 0.00035940914549080944, | |
| "loss": 2.4112144470214845, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 21.152879581151833, | |
| "learning_rate": 0.0003592309996828435, | |
| "loss": 2.400478172302246, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 21.173821989528797, | |
| "learning_rate": 0.0003590531185133913, | |
| "loss": 2.403495216369629, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 21.194764397905757, | |
| "learning_rate": 0.0003588755013278929, | |
| "loss": 2.375596046447754, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 21.21570680628272, | |
| "learning_rate": 0.00035869814747405306, | |
| "loss": 2.3807771682739256, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 21.236649214659685, | |
| "learning_rate": 0.00035852105630183027, | |
| "loss": 2.40921630859375, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 21.25759162303665, | |
| "learning_rate": 0.0003583442271634278, | |
| "loss": 2.398925018310547, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 21.278534031413614, | |
| "learning_rate": 0.000358167659413283, | |
| "loss": 2.373432731628418, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 21.299476439790578, | |
| "learning_rate": 0.00035799135240805765, | |
| "loss": 2.4216379165649413, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 21.320418848167538, | |
| "learning_rate": 0.0003578153055066282, | |
| "loss": 2.3817609786987304, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 21.341361256544502, | |
| "learning_rate": 0.00035763951807007597, | |
| "loss": 2.4331357955932615, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 21.362303664921466, | |
| "learning_rate": 0.0003574639894616771, | |
| "loss": 2.4034128189086914, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 21.38324607329843, | |
| "learning_rate": 0.0003572887190468934, | |
| "loss": 2.3663650512695313, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 21.404188481675394, | |
| "learning_rate": 0.00035711370619336214, | |
| "loss": 2.3921630859375, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 21.425130890052355, | |
| "learning_rate": 0.00035693895027088694, | |
| "loss": 2.3804367065429686, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 21.44607329842932, | |
| "learning_rate": 0.00035676445065142793, | |
| "loss": 2.4022769927978516, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 21.467015706806283, | |
| "learning_rate": 0.0003565902067090925, | |
| "loss": 2.3611806869506835, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 21.487958115183247, | |
| "learning_rate": 0.0003564162178201257, | |
| "loss": 2.383506202697754, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 21.50890052356021, | |
| "learning_rate": 0.0003562424833629007, | |
| "loss": 2.385580062866211, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 21.52984293193717, | |
| "learning_rate": 0.0003560690027179101, | |
| "loss": 2.3934825897216796, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 21.550785340314135, | |
| "learning_rate": 0.00035589577526775603, | |
| "loss": 2.385503387451172, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 21.5717277486911, | |
| "learning_rate": 0.000355722800397141, | |
| "loss": 2.344258499145508, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 21.592670157068063, | |
| "learning_rate": 0.00035555007749285897, | |
| "loss": 2.4048336029052733, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 21.613612565445028, | |
| "learning_rate": 0.00035537760594378607, | |
| "loss": 2.3891706466674805, | |
| "step": 10310 | |
| }, | |
| { | |
| "epoch": 21.634554973821988, | |
| "learning_rate": 0.00035520538514087155, | |
| "loss": 2.397173309326172, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 21.655497382198952, | |
| "learning_rate": 0.0003550334144771289, | |
| "loss": 2.3752115249633787, | |
| "step": 10330 | |
| }, | |
| { | |
| "epoch": 21.676439790575916, | |
| "learning_rate": 0.00035486169334762637, | |
| "loss": 2.3773225784301757, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 21.69738219895288, | |
| "learning_rate": 0.00035469022114947857, | |
| "loss": 2.410744476318359, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 21.718324607329844, | |
| "learning_rate": 0.00035451899728183736, | |
| "loss": 2.4002202987670898, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 21.739267015706805, | |
| "learning_rate": 0.00035434802114588305, | |
| "loss": 2.371893119812012, | |
| "step": 10370 | |
| }, | |
| { | |
| "epoch": 21.76020942408377, | |
| "learning_rate": 0.00035417729214481556, | |
| "loss": 2.412856674194336, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 21.781151832460733, | |
| "learning_rate": 0.0003540068096838456, | |
| "loss": 2.414295959472656, | |
| "step": 10390 | |
| }, | |
| { | |
| "epoch": 21.802094240837697, | |
| "learning_rate": 0.0003538365731701862, | |
| "loss": 2.3874536514282227, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 21.82303664921466, | |
| "learning_rate": 0.0003536665820130437, | |
| "loss": 2.408889389038086, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 21.843979057591625, | |
| "learning_rate": 0.00035349683562360966, | |
| "loss": 2.4029043197631834, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 21.864921465968585, | |
| "learning_rate": 0.0003533273334150517, | |
| "loss": 2.3872053146362306, | |
| "step": 10430 | |
| }, | |
| { | |
| "epoch": 21.88586387434555, | |
| "learning_rate": 0.0003531580748025054, | |
| "loss": 2.373563766479492, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 21.906806282722513, | |
| "learning_rate": 0.00035298905920306563, | |
| "loss": 2.3822809219360352, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 21.927748691099477, | |
| "learning_rate": 0.00035282028603577823, | |
| "loss": 2.4076284408569335, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 21.94869109947644, | |
| "learning_rate": 0.0003526517547216315, | |
| "loss": 2.3945655822753906, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 21.969633507853402, | |
| "learning_rate": 0.000352483464683548, | |
| "loss": 2.360683059692383, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 21.990575916230366, | |
| "learning_rate": 0.0003523154153463761, | |
| "loss": 2.371842956542969, | |
| "step": 10490 | |
| }, | |
| { | |
| "epoch": 21.99895287958115, | |
| "eval_loss": 2.536722183227539, | |
| "eval_runtime": 79.9983, | |
| "eval_samples_per_second": 136.953, | |
| "step": 10494 | |
| }, | |
| { | |
| "epoch": 22.012565445026176, | |
| "learning_rate": 0.00035214760613688187, | |
| "loss": 2.498021697998047, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 22.03350785340314, | |
| "learning_rate": 0.0003519800364837407, | |
| "loss": 2.377554702758789, | |
| "step": 10510 | |
| }, | |
| { | |
| "epoch": 22.054450261780104, | |
| "learning_rate": 0.0003518127058175293, | |
| "loss": 2.380527687072754, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 22.07539267015707, | |
| "learning_rate": 0.00035164561357071755, | |
| "loss": 2.3838827133178713, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 22.096335078534032, | |
| "learning_rate": 0.0003514787591776602, | |
| "loss": 2.3740776062011717, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 22.117277486910996, | |
| "learning_rate": 0.0003513121420745892, | |
| "loss": 2.374008560180664, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 22.138219895287957, | |
| "learning_rate": 0.0003511457616996052, | |
| "loss": 2.373431587219238, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 22.15916230366492, | |
| "learning_rate": 0.0003509796174926703, | |
| "loss": 2.403927803039551, | |
| "step": 10570 | |
| }, | |
| { | |
| "epoch": 22.180104712041885, | |
| "learning_rate": 0.00035081370889559934, | |
| "loss": 2.4006370544433593, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 22.20104712041885, | |
| "learning_rate": 0.0003506480353520526, | |
| "loss": 2.3758676528930662, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 22.221989528795813, | |
| "learning_rate": 0.0003504825963075276, | |
| "loss": 2.417715644836426, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 22.242931937172774, | |
| "learning_rate": 0.00035031739120935175, | |
| "loss": 2.3941156387329103, | |
| "step": 10610 | |
| }, | |
| { | |
| "epoch": 22.263874345549738, | |
| "learning_rate": 0.0003501524195066741, | |
| "loss": 2.3949649810791014, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 22.2848167539267, | |
| "learning_rate": 0.0003499876806504578, | |
| "loss": 2.4047883987426757, | |
| "step": 10630 | |
| }, | |
| { | |
| "epoch": 22.305759162303666, | |
| "learning_rate": 0.00034982317409347263, | |
| "loss": 2.3971155166625975, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 22.32670157068063, | |
| "learning_rate": 0.00034965889929028707, | |
| "loss": 2.404866027832031, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 22.34764397905759, | |
| "learning_rate": 0.000349494855697261, | |
| "loss": 2.3915122985839843, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 22.368586387434554, | |
| "learning_rate": 0.0003493310427725377, | |
| "loss": 2.39025936126709, | |
| "step": 10670 | |
| }, | |
| { | |
| "epoch": 22.38952879581152, | |
| "learning_rate": 0.0003491674599760369, | |
| "loss": 2.399850273132324, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 22.410471204188482, | |
| "learning_rate": 0.0003490041067694469, | |
| "loss": 2.3945247650146486, | |
| "step": 10690 | |
| }, | |
| { | |
| "epoch": 22.431413612565446, | |
| "learning_rate": 0.00034884098261621724, | |
| "loss": 2.397679901123047, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 22.452356020942407, | |
| "learning_rate": 0.00034867808698155125, | |
| "loss": 2.355159568786621, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 22.47329842931937, | |
| "learning_rate": 0.0003485154193323988, | |
| "loss": 2.3898927688598635, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 22.494240837696335, | |
| "learning_rate": 0.00034835297913744903, | |
| "loss": 2.367123031616211, | |
| "step": 10730 | |
| }, | |
| { | |
| "epoch": 22.5151832460733, | |
| "learning_rate": 0.0003481907658671227, | |
| "loss": 2.3852542877197265, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 22.536125654450263, | |
| "learning_rate": 0.0003480287789935653, | |
| "loss": 2.4065229415893556, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 22.557068062827224, | |
| "learning_rate": 0.00034786701799063976, | |
| "loss": 2.3588846206665037, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 22.578010471204188, | |
| "learning_rate": 0.00034770548233391924, | |
| "loss": 2.390997123718262, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 22.59895287958115, | |
| "learning_rate": 0.0003475441715006799, | |
| "loss": 2.3878786087036135, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 22.619895287958116, | |
| "learning_rate": 0.0003473830849698938, | |
| "loss": 2.398370552062988, | |
| "step": 10790 | |
| }, | |
| { | |
| "epoch": 22.64083769633508, | |
| "learning_rate": 0.0003472222222222222, | |
| "loss": 2.3805349349975584, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 22.66178010471204, | |
| "learning_rate": 0.00034706158274000796, | |
| "loss": 2.3977741241455077, | |
| "step": 10810 | |
| }, | |
| { | |
| "epoch": 22.682722513089004, | |
| "learning_rate": 0.00034690116600726885, | |
| "loss": 2.387373924255371, | |
| "step": 10820 | |
| }, | |
| { | |
| "epoch": 22.70366492146597, | |
| "learning_rate": 0.0003467409715096907, | |
| "loss": 2.3700994491577148, | |
| "step": 10830 | |
| }, | |
| { | |
| "epoch": 22.724607329842932, | |
| "learning_rate": 0.00034658099873462027, | |
| "loss": 2.3671117782592774, | |
| "step": 10840 | |
| }, | |
| { | |
| "epoch": 22.745549738219896, | |
| "learning_rate": 0.0003464212471710583, | |
| "loss": 2.377743148803711, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 22.76649214659686, | |
| "learning_rate": 0.0003462617163096529, | |
| "loss": 2.386002540588379, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 22.78743455497382, | |
| "learning_rate": 0.00034610240564269265, | |
| "loss": 2.3687204360961913, | |
| "step": 10870 | |
| }, | |
| { | |
| "epoch": 22.808376963350785, | |
| "learning_rate": 0.0003459433146640997, | |
| "loss": 2.3671218872070314, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 22.82931937172775, | |
| "learning_rate": 0.00034578444286942307, | |
| "loss": 2.3793460845947267, | |
| "step": 10890 | |
| }, | |
| { | |
| "epoch": 22.850261780104713, | |
| "learning_rate": 0.00034562578975583187, | |
| "loss": 2.374790382385254, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 22.871204188481677, | |
| "learning_rate": 0.00034546735482210894, | |
| "loss": 2.356049728393555, | |
| "step": 10910 | |
| }, | |
| { | |
| "epoch": 22.892146596858638, | |
| "learning_rate": 0.0003453091375686437, | |
| "loss": 2.361851119995117, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 22.9130890052356, | |
| "learning_rate": 0.00034515113749742586, | |
| "loss": 2.3911083221435545, | |
| "step": 10930 | |
| }, | |
| { | |
| "epoch": 22.934031413612566, | |
| "learning_rate": 0.00034499335411203894, | |
| "loss": 2.353407096862793, | |
| "step": 10940 | |
| }, | |
| { | |
| "epoch": 22.95497382198953, | |
| "learning_rate": 0.00034483578691765326, | |
| "loss": 2.3763240814208983, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 22.975916230366494, | |
| "learning_rate": 0.00034467843542102, | |
| "loss": 2.3745288848876953, | |
| "step": 10960 | |
| }, | |
| { | |
| "epoch": 22.996858638743454, | |
| "learning_rate": 0.0003445212991304641, | |
| "loss": 2.367890167236328, | |
| "step": 10970 | |
| }, | |
| { | |
| "epoch": 22.99895287958115, | |
| "eval_loss": 2.542264461517334, | |
| "eval_runtime": 74.647, | |
| "eval_samples_per_second": 146.771, | |
| "step": 10971 | |
| }, | |
| { | |
| "epoch": 23.018848167539268, | |
| "learning_rate": 0.00034436437755587827, | |
| "loss": 2.4732553482055666, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 23.039790575916232, | |
| "learning_rate": 0.00034420767020871656, | |
| "loss": 2.3789663314819336, | |
| "step": 10990 | |
| }, | |
| { | |
| "epoch": 23.060732984293193, | |
| "learning_rate": 0.00034405117660198765, | |
| "loss": 2.387537384033203, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 23.081675392670157, | |
| "learning_rate": 0.00034389489625024885, | |
| "loss": 2.3760297775268553, | |
| "step": 11010 | |
| }, | |
| { | |
| "epoch": 23.10261780104712, | |
| "learning_rate": 0.00034373882866959936, | |
| "loss": 2.3746875762939452, | |
| "step": 11020 | |
| }, | |
| { | |
| "epoch": 23.123560209424085, | |
| "learning_rate": 0.0003435829733776745, | |
| "loss": 2.4071685791015627, | |
| "step": 11030 | |
| }, | |
| { | |
| "epoch": 23.14450261780105, | |
| "learning_rate": 0.00034342732989363903, | |
| "loss": 2.3595859527587892, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 23.16544502617801, | |
| "learning_rate": 0.0003432718977381811, | |
| "loss": 2.356878662109375, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 23.186387434554973, | |
| "learning_rate": 0.0003431166764335058, | |
| "loss": 2.3806716918945314, | |
| "step": 11060 | |
| }, | |
| { | |
| "epoch": 23.207329842931937, | |
| "learning_rate": 0.0003429616655033297, | |
| "loss": 2.365432929992676, | |
| "step": 11070 | |
| }, | |
| { | |
| "epoch": 23.2282722513089, | |
| "learning_rate": 0.00034280686447287373, | |
| "loss": 2.3477930068969726, | |
| "step": 11080 | |
| }, | |
| { | |
| "epoch": 23.249214659685865, | |
| "learning_rate": 0.00034265227286885776, | |
| "loss": 2.359480094909668, | |
| "step": 11090 | |
| }, | |
| { | |
| "epoch": 23.270157068062826, | |
| "learning_rate": 0.00034249789021949435, | |
| "loss": 2.3736724853515625, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 23.29109947643979, | |
| "learning_rate": 0.0003423437160544826, | |
| "loss": 2.3853965759277345, | |
| "step": 11110 | |
| }, | |
| { | |
| "epoch": 23.312041884816754, | |
| "learning_rate": 0.0003421897499050022, | |
| "loss": 2.3594213485717774, | |
| "step": 11120 | |
| }, | |
| { | |
| "epoch": 23.332984293193718, | |
| "learning_rate": 0.0003420359913037075, | |
| "loss": 2.3540416717529298, | |
| "step": 11130 | |
| }, | |
| { | |
| "epoch": 23.353926701570682, | |
| "learning_rate": 0.0003418824397847216, | |
| "loss": 2.371465301513672, | |
| "step": 11140 | |
| }, | |
| { | |
| "epoch": 23.374869109947642, | |
| "learning_rate": 0.00034172909488363007, | |
| "loss": 2.360518455505371, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 23.395811518324606, | |
| "learning_rate": 0.00034157595613747545, | |
| "loss": 2.347417640686035, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 23.41675392670157, | |
| "learning_rate": 0.00034142302308475133, | |
| "loss": 2.388157081604004, | |
| "step": 11170 | |
| }, | |
| { | |
| "epoch": 23.437696335078535, | |
| "learning_rate": 0.0003412702952653962, | |
| "loss": 2.348739433288574, | |
| "step": 11180 | |
| }, | |
| { | |
| "epoch": 23.4586387434555, | |
| "learning_rate": 0.00034111777222078796, | |
| "loss": 2.3871492385864257, | |
| "step": 11190 | |
| }, | |
| { | |
| "epoch": 23.47958115183246, | |
| "learning_rate": 0.00034096545349373804, | |
| "loss": 2.3624570846557615, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 23.500523560209423, | |
| "learning_rate": 0.0003408133386284857, | |
| "loss": 2.337727165222168, | |
| "step": 11210 | |
| }, | |
| { | |
| "epoch": 23.521465968586387, | |
| "learning_rate": 0.0003406614271706919, | |
| "loss": 2.3544214248657225, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 23.54240837696335, | |
| "learning_rate": 0.0003405097186674344, | |
| "loss": 2.3601694107055664, | |
| "step": 11230 | |
| }, | |
| { | |
| "epoch": 23.563350785340315, | |
| "learning_rate": 0.00034035821266720136, | |
| "loss": 2.3869655609130858, | |
| "step": 11240 | |
| }, | |
| { | |
| "epoch": 23.58429319371728, | |
| "learning_rate": 0.0003402069087198858, | |
| "loss": 2.3417810440063476, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 23.60523560209424, | |
| "learning_rate": 0.00034005580637678053, | |
| "loss": 2.3512496948242188, | |
| "step": 11260 | |
| }, | |
| { | |
| "epoch": 23.626178010471204, | |
| "learning_rate": 0.00033990490519057183, | |
| "loss": 2.3688682556152343, | |
| "step": 11270 | |
| }, | |
| { | |
| "epoch": 23.647120418848168, | |
| "learning_rate": 0.0003397542047153345, | |
| "loss": 2.3577795028686523, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 23.668062827225132, | |
| "learning_rate": 0.0003396037045065257, | |
| "loss": 2.380731201171875, | |
| "step": 11290 | |
| }, | |
| { | |
| "epoch": 23.689005235602096, | |
| "learning_rate": 0.0003394534041209802, | |
| "loss": 2.349542427062988, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 23.709947643979056, | |
| "learning_rate": 0.0003393033031169043, | |
| "loss": 2.3590700149536135, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 23.73089005235602, | |
| "learning_rate": 0.0003391534010538705, | |
| "loss": 2.392327880859375, | |
| "step": 11320 | |
| }, | |
| { | |
| "epoch": 23.751832460732984, | |
| "learning_rate": 0.00033900369749281225, | |
| "loss": 2.3760391235351563, | |
| "step": 11330 | |
| }, | |
| { | |
| "epoch": 23.77277486910995, | |
| "learning_rate": 0.00033885419199601845, | |
| "loss": 2.355258560180664, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 23.793717277486913, | |
| "learning_rate": 0.000338704884127128, | |
| "loss": 2.369922065734863, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 23.814659685863873, | |
| "learning_rate": 0.00033855577345112453, | |
| "loss": 2.410330390930176, | |
| "step": 11360 | |
| }, | |
| { | |
| "epoch": 23.835602094240837, | |
| "learning_rate": 0.0003384068595343312, | |
| "loss": 2.374154472351074, | |
| "step": 11370 | |
| }, | |
| { | |
| "epoch": 23.8565445026178, | |
| "learning_rate": 0.00033825814194440504, | |
| "loss": 2.360888671875, | |
| "step": 11380 | |
| }, | |
| { | |
| "epoch": 23.877486910994765, | |
| "learning_rate": 0.0003381096202503321, | |
| "loss": 2.372193145751953, | |
| "step": 11390 | |
| }, | |
| { | |
| "epoch": 23.89842931937173, | |
| "learning_rate": 0.00033796129402242193, | |
| "loss": 2.3628297805786134, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 23.91937172774869, | |
| "learning_rate": 0.0003378131628323024, | |
| "loss": 2.365167999267578, | |
| "step": 11410 | |
| }, | |
| { | |
| "epoch": 23.940314136125654, | |
| "learning_rate": 0.0003376652262529146, | |
| "loss": 2.385006332397461, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 23.961256544502618, | |
| "learning_rate": 0.00033751748385850753, | |
| "loss": 2.3524898529052733, | |
| "step": 11430 | |
| }, | |
| { | |
| "epoch": 23.982198952879582, | |
| "learning_rate": 0.00033736993522463316, | |
| "loss": 2.374051094055176, | |
| "step": 11440 | |
| }, | |
| { | |
| "epoch": 23.99895287958115, | |
| "eval_loss": 2.532155990600586, | |
| "eval_runtime": 73.6953, | |
| "eval_samples_per_second": 148.666, | |
| "step": 11448 | |
| }, | |
| { | |
| "epoch": 24.004188481675392, | |
| "learning_rate": 0.00033722257992814113, | |
| "loss": 2.4828319549560547, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 24.025130890052356, | |
| "learning_rate": 0.0003370754175471737, | |
| "loss": 2.360254669189453, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 24.04607329842932, | |
| "learning_rate": 0.0003369284476611607, | |
| "loss": 2.349439811706543, | |
| "step": 11470 | |
| }, | |
| { | |
| "epoch": 24.067015706806284, | |
| "learning_rate": 0.00033678166985081433, | |
| "loss": 2.36633415222168, | |
| "step": 11480 | |
| }, | |
| { | |
| "epoch": 24.087958115183245, | |
| "learning_rate": 0.0003366350836981245, | |
| "loss": 2.365359306335449, | |
| "step": 11490 | |
| }, | |
| { | |
| "epoch": 24.10890052356021, | |
| "learning_rate": 0.0003364886887863534, | |
| "loss": 2.362344169616699, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 24.129842931937173, | |
| "learning_rate": 0.0003363424847000309, | |
| "loss": 2.357081985473633, | |
| "step": 11510 | |
| }, | |
| { | |
| "epoch": 24.150785340314137, | |
| "learning_rate": 0.0003361964710249494, | |
| "loss": 2.3507287979125975, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 24.1717277486911, | |
| "learning_rate": 0.00033605064734815865, | |
| "loss": 2.3537702560424805, | |
| "step": 11530 | |
| }, | |
| { | |
| "epoch": 24.19267015706806, | |
| "learning_rate": 0.0003359050132579615, | |
| "loss": 2.37689151763916, | |
| "step": 11540 | |
| }, | |
| { | |
| "epoch": 24.213612565445025, | |
| "learning_rate": 0.00033575956834390843, | |
| "loss": 2.348763847351074, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 24.23455497382199, | |
| "learning_rate": 0.00033561431219679297, | |
| "loss": 2.3715591430664062, | |
| "step": 11560 | |
| }, | |
| { | |
| "epoch": 24.255497382198953, | |
| "learning_rate": 0.00033546924440864666, | |
| "loss": 2.3734716415405273, | |
| "step": 11570 | |
| }, | |
| { | |
| "epoch": 24.276439790575917, | |
| "learning_rate": 0.0003353243645727346, | |
| "loss": 2.3519350051879884, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 24.297382198952878, | |
| "learning_rate": 0.0003351796722835502, | |
| "loss": 2.355198287963867, | |
| "step": 11590 | |
| }, | |
| { | |
| "epoch": 24.318324607329842, | |
| "learning_rate": 0.00033503516713681087, | |
| "loss": 2.3704608917236327, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 24.339267015706806, | |
| "learning_rate": 0.00033489084872945283, | |
| "loss": 2.3960491180419923, | |
| "step": 11610 | |
| }, | |
| { | |
| "epoch": 24.36020942408377, | |
| "learning_rate": 0.0003347467166596268, | |
| "loss": 2.3748762130737306, | |
| "step": 11620 | |
| }, | |
| { | |
| "epoch": 24.381151832460734, | |
| "learning_rate": 0.0003346027705266929, | |
| "loss": 2.358123016357422, | |
| "step": 11630 | |
| }, | |
| { | |
| "epoch": 24.402094240837695, | |
| "learning_rate": 0.0003344590099312164, | |
| "loss": 2.345402717590332, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 24.42303664921466, | |
| "learning_rate": 0.00033431543447496275, | |
| "loss": 2.331704330444336, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 24.443979057591623, | |
| "learning_rate": 0.000334172043760893, | |
| "loss": 2.377284812927246, | |
| "step": 11660 | |
| }, | |
| { | |
| "epoch": 24.464921465968587, | |
| "learning_rate": 0.0003340288373931593, | |
| "loss": 2.354692268371582, | |
| "step": 11670 | |
| }, | |
| { | |
| "epoch": 24.48586387434555, | |
| "learning_rate": 0.0003338858149771002, | |
| "loss": 2.3740156173706053, | |
| "step": 11680 | |
| }, | |
| { | |
| "epoch": 24.506806282722515, | |
| "learning_rate": 0.0003337429761192361, | |
| "loss": 2.367665672302246, | |
| "step": 11690 | |
| }, | |
| { | |
| "epoch": 24.527748691099475, | |
| "learning_rate": 0.00033360032042726483, | |
| "loss": 2.362037467956543, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 24.54869109947644, | |
| "learning_rate": 0.000333457847510057, | |
| "loss": 2.371419334411621, | |
| "step": 11710 | |
| }, | |
| { | |
| "epoch": 24.569633507853403, | |
| "learning_rate": 0.0003333155569776514, | |
| "loss": 2.33715934753418, | |
| "step": 11720 | |
| }, | |
| { | |
| "epoch": 24.590575916230367, | |
| "learning_rate": 0.00033317344844125064, | |
| "loss": 2.3361494064331056, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 24.61151832460733, | |
| "learning_rate": 0.00033303152151321696, | |
| "loss": 2.346495819091797, | |
| "step": 11740 | |
| }, | |
| { | |
| "epoch": 24.632460732984292, | |
| "learning_rate": 0.00033288977580706714, | |
| "loss": 2.3827404022216796, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 24.653403141361256, | |
| "learning_rate": 0.0003327482109374687, | |
| "loss": 2.3554365158081056, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 24.67434554973822, | |
| "learning_rate": 0.00033260682652023517, | |
| "loss": 2.3607540130615234, | |
| "step": 11770 | |
| }, | |
| { | |
| "epoch": 24.695287958115184, | |
| "learning_rate": 0.0003324656221723217, | |
| "loss": 2.3806394577026366, | |
| "step": 11780 | |
| }, | |
| { | |
| "epoch": 24.716230366492148, | |
| "learning_rate": 0.000332324597511821, | |
| "loss": 2.3277612686157227, | |
| "step": 11790 | |
| }, | |
| { | |
| "epoch": 24.73717277486911, | |
| "learning_rate": 0.00033218375215795864, | |
| "loss": 2.3573076248168947, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 24.758115183246073, | |
| "learning_rate": 0.00033204308573108897, | |
| "loss": 2.3293807983398436, | |
| "step": 11810 | |
| }, | |
| { | |
| "epoch": 24.779057591623037, | |
| "learning_rate": 0.00033190259785269066, | |
| "loss": 2.3627220153808595, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 24.8, | |
| "learning_rate": 0.0003317622881453626, | |
| "loss": 2.3504779815673826, | |
| "step": 11830 | |
| }, | |
| { | |
| "epoch": 24.820942408376965, | |
| "learning_rate": 0.0003316221562328194, | |
| "loss": 2.3526493072509767, | |
| "step": 11840 | |
| }, | |
| { | |
| "epoch": 24.841884816753925, | |
| "learning_rate": 0.0003314822017398875, | |
| "loss": 2.367503547668457, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 24.86282722513089, | |
| "learning_rate": 0.00033134242429250053, | |
| "loss": 2.364429473876953, | |
| "step": 11860 | |
| }, | |
| { | |
| "epoch": 24.883769633507853, | |
| "learning_rate": 0.00033120282351769556, | |
| "loss": 2.3418235778808594, | |
| "step": 11870 | |
| }, | |
| { | |
| "epoch": 24.904712041884817, | |
| "learning_rate": 0.0003310633990436084, | |
| "loss": 2.361065483093262, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 24.92565445026178, | |
| "learning_rate": 0.00033092415049947006, | |
| "loss": 2.3631685256958006, | |
| "step": 11890 | |
| }, | |
| { | |
| "epoch": 24.946596858638742, | |
| "learning_rate": 0.00033078507751560195, | |
| "loss": 2.346321868896484, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 24.967539267015706, | |
| "learning_rate": 0.00033064617972341235, | |
| "loss": 2.3589923858642576, | |
| "step": 11910 | |
| }, | |
| { | |
| "epoch": 24.98848167539267, | |
| "learning_rate": 0.0003305074567553919, | |
| "loss": 2.3485301971435546, | |
| "step": 11920 | |
| }, | |
| { | |
| "epoch": 24.99895287958115, | |
| "eval_loss": 2.5436818599700928, | |
| "eval_runtime": 73.6581, | |
| "eval_samples_per_second": 148.741, | |
| "step": 11925 | |
| }, | |
| { | |
| "epoch": 25.01047120418848, | |
| "learning_rate": 0.0003303689082451096, | |
| "loss": 2.483962059020996, | |
| "step": 11930 | |
| }, | |
| { | |
| "epoch": 25.031413612565444, | |
| "learning_rate": 0.00033023053382720904, | |
| "loss": 2.352615547180176, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 25.05235602094241, | |
| "learning_rate": 0.0003300923331374039, | |
| "loss": 2.379102325439453, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 25.073298429319372, | |
| "learning_rate": 0.00032995430581247417, | |
| "loss": 2.3579853057861326, | |
| "step": 11960 | |
| }, | |
| { | |
| "epoch": 25.094240837696336, | |
| "learning_rate": 0.0003298164514902622, | |
| "loss": 2.3461565017700194, | |
| "step": 11970 | |
| }, | |
| { | |
| "epoch": 25.115183246073297, | |
| "learning_rate": 0.0003296787698096686, | |
| "loss": 2.328052520751953, | |
| "step": 11980 | |
| }, | |
| { | |
| "epoch": 25.13612565445026, | |
| "learning_rate": 0.0003295412604106482, | |
| "loss": 2.337063026428223, | |
| "step": 11990 | |
| }, | |
| { | |
| "epoch": 25.157068062827225, | |
| "learning_rate": 0.00032940392293420614, | |
| "loss": 2.3367223739624023, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 25.17801047120419, | |
| "learning_rate": 0.00032926675702239425, | |
| "loss": 2.365107536315918, | |
| "step": 12010 | |
| }, | |
| { | |
| "epoch": 25.198952879581153, | |
| "learning_rate": 0.00032912976231830646, | |
| "loss": 2.3596302032470704, | |
| "step": 12020 | |
| }, | |
| { | |
| "epoch": 25.219895287958114, | |
| "learning_rate": 0.0003289929384660757, | |
| "loss": 2.336884307861328, | |
| "step": 12030 | |
| }, | |
| { | |
| "epoch": 25.240837696335078, | |
| "learning_rate": 0.0003288562851108693, | |
| "loss": 2.3663518905639647, | |
| "step": 12040 | |
| }, | |
| { | |
| "epoch": 25.26178010471204, | |
| "learning_rate": 0.0003287198018988856, | |
| "loss": 2.3383811950683593, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 25.282722513089006, | |
| "learning_rate": 0.00032858348847734985, | |
| "loss": 2.3640661239624023, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 25.30366492146597, | |
| "learning_rate": 0.00032844734449451055, | |
| "loss": 2.3613861083984373, | |
| "step": 12070 | |
| }, | |
| { | |
| "epoch": 25.324607329842934, | |
| "learning_rate": 0.00032831136959963553, | |
| "loss": 2.3227806091308594, | |
| "step": 12080 | |
| }, | |
| { | |
| "epoch": 25.345549738219894, | |
| "learning_rate": 0.00032817556344300823, | |
| "loss": 2.328192710876465, | |
| "step": 12090 | |
| }, | |
| { | |
| "epoch": 25.36649214659686, | |
| "learning_rate": 0.0003280399256759237, | |
| "loss": 2.3461523056030273, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 25.387434554973822, | |
| "learning_rate": 0.0003279044559506852, | |
| "loss": 2.3762447357177736, | |
| "step": 12110 | |
| }, | |
| { | |
| "epoch": 25.408376963350786, | |
| "learning_rate": 0.0003277691539206003, | |
| "loss": 2.325837326049805, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 25.42931937172775, | |
| "learning_rate": 0.0003276340192399769, | |
| "loss": 2.3660905838012694, | |
| "step": 12130 | |
| }, | |
| { | |
| "epoch": 25.45026178010471, | |
| "learning_rate": 0.00032749905156412, | |
| "loss": 2.360948181152344, | |
| "step": 12140 | |
| }, | |
| { | |
| "epoch": 25.471204188481675, | |
| "learning_rate": 0.0003273642505493275, | |
| "loss": 2.3165866851806642, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 25.49214659685864, | |
| "learning_rate": 0.0003272296158528871, | |
| "loss": 2.3487401962280274, | |
| "step": 12160 | |
| }, | |
| { | |
| "epoch": 25.513089005235603, | |
| "learning_rate": 0.000327095147133072, | |
| "loss": 2.351056671142578, | |
| "step": 12170 | |
| }, | |
| { | |
| "epoch": 25.534031413612567, | |
| "learning_rate": 0.00032696084404913777, | |
| "loss": 2.3396501541137695, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 25.554973821989527, | |
| "learning_rate": 0.00032682670626131837, | |
| "loss": 2.3343048095703125, | |
| "step": 12190 | |
| }, | |
| { | |
| "epoch": 25.57591623036649, | |
| "learning_rate": 0.0003266927334308229, | |
| "loss": 2.3392221450805666, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 25.596858638743456, | |
| "learning_rate": 0.0003265589252198317, | |
| "loss": 2.339245414733887, | |
| "step": 12210 | |
| }, | |
| { | |
| "epoch": 25.61780104712042, | |
| "learning_rate": 0.0003264252812914928, | |
| "loss": 2.343129539489746, | |
| "step": 12220 | |
| }, | |
| { | |
| "epoch": 25.638743455497384, | |
| "learning_rate": 0.0003262918013099186, | |
| "loss": 2.344712829589844, | |
| "step": 12230 | |
| }, | |
| { | |
| "epoch": 25.659685863874344, | |
| "learning_rate": 0.00032615848494018204, | |
| "loss": 2.364294242858887, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 25.680628272251308, | |
| "learning_rate": 0.0003260253318483131, | |
| "loss": 2.3588529586791993, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 25.701570680628272, | |
| "learning_rate": 0.0003258923417012957, | |
| "loss": 2.3558927536010743, | |
| "step": 12260 | |
| }, | |
| { | |
| "epoch": 25.722513089005236, | |
| "learning_rate": 0.00032575951416706354, | |
| "loss": 2.370713996887207, | |
| "step": 12270 | |
| }, | |
| { | |
| "epoch": 25.7434554973822, | |
| "learning_rate": 0.0003256268489144972, | |
| "loss": 2.3426084518432617, | |
| "step": 12280 | |
| }, | |
| { | |
| "epoch": 25.76439790575916, | |
| "learning_rate": 0.0003254943456134202, | |
| "loss": 2.3299545288085937, | |
| "step": 12290 | |
| }, | |
| { | |
| "epoch": 25.785340314136125, | |
| "learning_rate": 0.0003253620039345959, | |
| "loss": 2.343545913696289, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 25.80628272251309, | |
| "learning_rate": 0.0003252298235497241, | |
| "loss": 2.3348289489746095, | |
| "step": 12310 | |
| }, | |
| { | |
| "epoch": 25.827225130890053, | |
| "learning_rate": 0.0003250978041314371, | |
| "loss": 2.3712085723876952, | |
| "step": 12320 | |
| }, | |
| { | |
| "epoch": 25.848167539267017, | |
| "learning_rate": 0.000324965945353297, | |
| "loss": 2.347680854797363, | |
| "step": 12330 | |
| }, | |
| { | |
| "epoch": 25.869109947643977, | |
| "learning_rate": 0.0003248342468897917, | |
| "loss": 2.342079925537109, | |
| "step": 12340 | |
| }, | |
| { | |
| "epoch": 25.89005235602094, | |
| "learning_rate": 0.00032470270841633195, | |
| "loss": 2.376851272583008, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 25.910994764397905, | |
| "learning_rate": 0.00032457132960924783, | |
| "loss": 2.3613746643066404, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 25.93193717277487, | |
| "learning_rate": 0.00032444011014578535, | |
| "loss": 2.3406829833984375, | |
| "step": 12370 | |
| }, | |
| { | |
| "epoch": 25.952879581151834, | |
| "learning_rate": 0.00032430904970410314, | |
| "loss": 2.328056526184082, | |
| "step": 12380 | |
| }, | |
| { | |
| "epoch": 25.973821989528794, | |
| "learning_rate": 0.0003241781479632693, | |
| "loss": 2.3438344955444337, | |
| "step": 12390 | |
| }, | |
| { | |
| "epoch": 25.994764397905758, | |
| "learning_rate": 0.0003240474046032579, | |
| "loss": 2.349610137939453, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 25.99895287958115, | |
| "eval_loss": 2.533395767211914, | |
| "eval_runtime": 73.6578, | |
| "eval_samples_per_second": 148.742, | |
| "step": 12402 | |
| }, | |
| { | |
| "epoch": 26.016753926701572, | |
| "learning_rate": 0.00032391681930494566, | |
| "loss": 2.4640811920166015, | |
| "step": 12410 | |
| }, | |
| { | |
| "epoch": 26.037696335078532, | |
| "learning_rate": 0.000323786391750109, | |
| "loss": 2.3338626861572265, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 26.058638743455496, | |
| "learning_rate": 0.0003236561216214202, | |
| "loss": 2.342071533203125, | |
| "step": 12430 | |
| }, | |
| { | |
| "epoch": 26.07958115183246, | |
| "learning_rate": 0.000323526008602445, | |
| "loss": 2.374074172973633, | |
| "step": 12440 | |
| }, | |
| { | |
| "epoch": 26.100523560209425, | |
| "learning_rate": 0.0003233960523776387, | |
| "loss": 2.3421449661254883, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 26.12146596858639, | |
| "learning_rate": 0.0003232662526323429, | |
| "loss": 2.3614429473876952, | |
| "step": 12460 | |
| }, | |
| { | |
| "epoch": 26.14240837696335, | |
| "learning_rate": 0.0003231366090527828, | |
| "loss": 2.319747543334961, | |
| "step": 12470 | |
| }, | |
| { | |
| "epoch": 26.163350785340313, | |
| "learning_rate": 0.00032300712132606366, | |
| "loss": 2.3622182846069335, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 26.184293193717277, | |
| "learning_rate": 0.0003228777891401678, | |
| "loss": 2.359231185913086, | |
| "step": 12490 | |
| }, | |
| { | |
| "epoch": 26.20523560209424, | |
| "learning_rate": 0.0003227486121839514, | |
| "loss": 2.355366516113281, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 26.226178010471205, | |
| "learning_rate": 0.00032261959014714107, | |
| "loss": 2.3299293518066406, | |
| "step": 12510 | |
| }, | |
| { | |
| "epoch": 26.24712041884817, | |
| "learning_rate": 0.0003224907227203312, | |
| "loss": 2.3555164337158203, | |
| "step": 12520 | |
| }, | |
| { | |
| "epoch": 26.26806282722513, | |
| "learning_rate": 0.0003223620095949806, | |
| "loss": 2.314861869812012, | |
| "step": 12530 | |
| }, | |
| { | |
| "epoch": 26.289005235602094, | |
| "learning_rate": 0.00032223345046340936, | |
| "loss": 2.3362022399902345, | |
| "step": 12540 | |
| }, | |
| { | |
| "epoch": 26.309947643979058, | |
| "learning_rate": 0.00032210504501879576, | |
| "loss": 2.3406482696533204, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 26.330890052356022, | |
| "learning_rate": 0.0003219767929551733, | |
| "loss": 2.30753231048584, | |
| "step": 12560 | |
| }, | |
| { | |
| "epoch": 26.351832460732986, | |
| "learning_rate": 0.00032184869396742754, | |
| "loss": 2.351367950439453, | |
| "step": 12570 | |
| }, | |
| { | |
| "epoch": 26.372774869109946, | |
| "learning_rate": 0.00032172074775129323, | |
| "loss": 2.3465883255004885, | |
| "step": 12580 | |
| }, | |
| { | |
| "epoch": 26.39371727748691, | |
| "learning_rate": 0.00032159295400335114, | |
| "loss": 2.3782730102539062, | |
| "step": 12590 | |
| }, | |
| { | |
| "epoch": 26.414659685863874, | |
| "learning_rate": 0.00032146531242102476, | |
| "loss": 2.3480430603027345, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 26.43560209424084, | |
| "learning_rate": 0.0003213378227025779, | |
| "loss": 2.370161437988281, | |
| "step": 12610 | |
| }, | |
| { | |
| "epoch": 26.456544502617803, | |
| "learning_rate": 0.00032121048454711114, | |
| "loss": 2.3542537689208984, | |
| "step": 12620 | |
| }, | |
| { | |
| "epoch": 26.477486910994763, | |
| "learning_rate": 0.00032108329765455926, | |
| "loss": 2.3564731597900392, | |
| "step": 12630 | |
| }, | |
| { | |
| "epoch": 26.498429319371727, | |
| "learning_rate": 0.00032095626172568784, | |
| "loss": 2.333011817932129, | |
| "step": 12640 | |
| }, | |
| { | |
| "epoch": 26.51937172774869, | |
| "learning_rate": 0.00032082937646209084, | |
| "loss": 2.3433643341064454, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 26.540314136125655, | |
| "learning_rate": 0.0003207026415661871, | |
| "loss": 2.3346595764160156, | |
| "step": 12660 | |
| }, | |
| { | |
| "epoch": 26.56125654450262, | |
| "learning_rate": 0.0003205760567412178, | |
| "loss": 2.339708709716797, | |
| "step": 12670 | |
| }, | |
| { | |
| "epoch": 26.58219895287958, | |
| "learning_rate": 0.00032044962169124335, | |
| "loss": 2.3501649856567384, | |
| "step": 12680 | |
| }, | |
| { | |
| "epoch": 26.603141361256544, | |
| "learning_rate": 0.0003203233361211406, | |
| "loss": 2.3222862243652345, | |
| "step": 12690 | |
| }, | |
| { | |
| "epoch": 26.624083769633508, | |
| "learning_rate": 0.00032019719973659996, | |
| "loss": 2.3362213134765626, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 26.645026178010472, | |
| "learning_rate": 0.00032007121224412224, | |
| "loss": 2.311092567443848, | |
| "step": 12710 | |
| }, | |
| { | |
| "epoch": 26.665968586387436, | |
| "learning_rate": 0.0003199453733510162, | |
| "loss": 2.332124137878418, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 26.686910994764396, | |
| "learning_rate": 0.00031981968276539543, | |
| "loss": 2.3406246185302733, | |
| "step": 12730 | |
| }, | |
| { | |
| "epoch": 26.70785340314136, | |
| "learning_rate": 0.0003196941401961754, | |
| "loss": 2.3419260025024413, | |
| "step": 12740 | |
| }, | |
| { | |
| "epoch": 26.728795811518324, | |
| "learning_rate": 0.000319568745353071, | |
| "loss": 2.344953727722168, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 26.74973821989529, | |
| "learning_rate": 0.0003194434979465935, | |
| "loss": 2.3517208099365234, | |
| "step": 12760 | |
| }, | |
| { | |
| "epoch": 26.770680628272252, | |
| "learning_rate": 0.0003193183976880476, | |
| "loss": 2.3811822891235352, | |
| "step": 12770 | |
| }, | |
| { | |
| "epoch": 26.791623036649213, | |
| "learning_rate": 0.00031919344428952895, | |
| "loss": 2.3604736328125, | |
| "step": 12780 | |
| }, | |
| { | |
| "epoch": 26.812565445026177, | |
| "learning_rate": 0.0003190686374639211, | |
| "loss": 2.3444387435913088, | |
| "step": 12790 | |
| }, | |
| { | |
| "epoch": 26.83350785340314, | |
| "learning_rate": 0.00031894397692489295, | |
| "loss": 2.340729331970215, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 26.854450261780105, | |
| "learning_rate": 0.0003188194623868958, | |
| "loss": 2.340890121459961, | |
| "step": 12810 | |
| }, | |
| { | |
| "epoch": 26.87539267015707, | |
| "learning_rate": 0.00031869509356516063, | |
| "loss": 2.3352834701538088, | |
| "step": 12820 | |
| }, | |
| { | |
| "epoch": 26.89633507853403, | |
| "learning_rate": 0.00031857087017569556, | |
| "loss": 2.3224008560180662, | |
| "step": 12830 | |
| }, | |
| { | |
| "epoch": 26.917277486910994, | |
| "learning_rate": 0.0003184467919352828, | |
| "loss": 2.3237512588500975, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 26.938219895287958, | |
| "learning_rate": 0.0003183228585614763, | |
| "loss": 2.3366432189941406, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 26.95916230366492, | |
| "learning_rate": 0.0003181990697725988, | |
| "loss": 2.3389394760131834, | |
| "step": 12860 | |
| }, | |
| { | |
| "epoch": 26.980104712041886, | |
| "learning_rate": 0.0003180754252877392, | |
| "loss": 2.2963605880737306, | |
| "step": 12870 | |
| }, | |
| { | |
| "epoch": 26.99895287958115, | |
| "eval_loss": 2.5350682735443115, | |
| "eval_runtime": 73.272, | |
| "eval_samples_per_second": 149.525, | |
| "step": 12879 | |
| }, | |
| { | |
| "epoch": 27.002094240837696, | |
| "learning_rate": 0.0003179519248267498, | |
| "loss": 2.4346525192260744, | |
| "step": 12880 | |
| }, | |
| { | |
| "epoch": 27.02303664921466, | |
| "learning_rate": 0.000317828568110244, | |
| "loss": 2.3435186386108398, | |
| "step": 12890 | |
| }, | |
| { | |
| "epoch": 27.043979057591624, | |
| "learning_rate": 0.000317705354859593, | |
| "loss": 2.351651191711426, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 27.064921465968588, | |
| "learning_rate": 0.0003175822847969239, | |
| "loss": 2.3490814208984374, | |
| "step": 12910 | |
| }, | |
| { | |
| "epoch": 27.08586387434555, | |
| "learning_rate": 0.00031745935764511645, | |
| "loss": 2.3329612731933596, | |
| "step": 12920 | |
| }, | |
| { | |
| "epoch": 27.106806282722513, | |
| "learning_rate": 0.0003173365731278007, | |
| "loss": 2.321672248840332, | |
| "step": 12930 | |
| }, | |
| { | |
| "epoch": 27.127748691099477, | |
| "learning_rate": 0.00031721393096935445, | |
| "loss": 2.3357425689697267, | |
| "step": 12940 | |
| }, | |
| { | |
| "epoch": 27.14869109947644, | |
| "learning_rate": 0.00031709143089490063, | |
| "loss": 2.3383440017700194, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 27.169633507853405, | |
| "learning_rate": 0.00031696907263030445, | |
| "loss": 2.3266096115112305, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 27.190575916230365, | |
| "learning_rate": 0.00031684685590217115, | |
| "loss": 2.3512828826904295, | |
| "step": 12970 | |
| }, | |
| { | |
| "epoch": 27.21151832460733, | |
| "learning_rate": 0.00031672478043784336, | |
| "loss": 2.329998016357422, | |
| "step": 12980 | |
| }, | |
| { | |
| "epoch": 27.232460732984293, | |
| "learning_rate": 0.0003166028459653984, | |
| "loss": 2.353693962097168, | |
| "step": 12990 | |
| }, | |
| { | |
| "epoch": 27.253403141361257, | |
| "learning_rate": 0.0003164810522136458, | |
| "loss": 2.3388673782348635, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 27.27434554973822, | |
| "learning_rate": 0.0003163593989121249, | |
| "loss": 2.34061222076416, | |
| "step": 13010 | |
| }, | |
| { | |
| "epoch": 27.295287958115182, | |
| "learning_rate": 0.0003162378857911022, | |
| "loss": 2.3279703140258787, | |
| "step": 13020 | |
| }, | |
| { | |
| "epoch": 27.316230366492146, | |
| "learning_rate": 0.00031611651258156884, | |
| "loss": 2.3643896102905275, | |
| "step": 13030 | |
| }, | |
| { | |
| "epoch": 27.33717277486911, | |
| "learning_rate": 0.0003159952790152381, | |
| "loss": 2.326703643798828, | |
| "step": 13040 | |
| }, | |
| { | |
| "epoch": 27.358115183246074, | |
| "learning_rate": 0.0003158741848245431, | |
| "loss": 2.3338809967041017, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 27.379057591623038, | |
| "learning_rate": 0.0003157532297426339, | |
| "loss": 2.318799591064453, | |
| "step": 13060 | |
| }, | |
| { | |
| "epoch": 27.4, | |
| "learning_rate": 0.00031563241350337546, | |
| "loss": 2.3162815093994142, | |
| "step": 13070 | |
| }, | |
| { | |
| "epoch": 27.420942408376963, | |
| "learning_rate": 0.00031551173584134514, | |
| "loss": 2.354751968383789, | |
| "step": 13080 | |
| }, | |
| { | |
| "epoch": 27.441884816753927, | |
| "learning_rate": 0.0003153911964918298, | |
| "loss": 2.3353591918945313, | |
| "step": 13090 | |
| }, | |
| { | |
| "epoch": 27.46282722513089, | |
| "learning_rate": 0.0003152707951908239, | |
| "loss": 2.3257909774780274, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 27.483769633507855, | |
| "learning_rate": 0.0003151505316750269, | |
| "loss": 2.336490821838379, | |
| "step": 13110 | |
| }, | |
| { | |
| "epoch": 27.504712041884815, | |
| "learning_rate": 0.0003150304056818405, | |
| "loss": 2.32800350189209, | |
| "step": 13120 | |
| }, | |
| { | |
| "epoch": 27.52565445026178, | |
| "learning_rate": 0.00031491041694936697, | |
| "loss": 2.3223346710205077, | |
| "step": 13130 | |
| }, | |
| { | |
| "epoch": 27.546596858638743, | |
| "learning_rate": 0.000314790565216406, | |
| "loss": 2.348642921447754, | |
| "step": 13140 | |
| }, | |
| { | |
| "epoch": 27.567539267015707, | |
| "learning_rate": 0.0003146708502224526, | |
| "loss": 2.3384424209594727, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 27.58848167539267, | |
| "learning_rate": 0.0003145512717076948, | |
| "loss": 2.301900863647461, | |
| "step": 13160 | |
| }, | |
| { | |
| "epoch": 27.609424083769632, | |
| "learning_rate": 0.00031443182941301147, | |
| "loss": 2.3309160232543946, | |
| "step": 13170 | |
| }, | |
| { | |
| "epoch": 27.630366492146596, | |
| "learning_rate": 0.0003143125230799694, | |
| "loss": 2.358192253112793, | |
| "step": 13180 | |
| }, | |
| { | |
| "epoch": 27.65130890052356, | |
| "learning_rate": 0.00031419335245082134, | |
| "loss": 2.347599220275879, | |
| "step": 13190 | |
| }, | |
| { | |
| "epoch": 27.672251308900524, | |
| "learning_rate": 0.00031407431726850375, | |
| "loss": 2.330830764770508, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 27.693193717277488, | |
| "learning_rate": 0.00031395541727663413, | |
| "loss": 2.33847599029541, | |
| "step": 13210 | |
| }, | |
| { | |
| "epoch": 27.71413612565445, | |
| "learning_rate": 0.0003138366522195088, | |
| "loss": 2.3454364776611327, | |
| "step": 13220 | |
| }, | |
| { | |
| "epoch": 27.735078534031413, | |
| "learning_rate": 0.0003137180218421011, | |
| "loss": 2.3458301544189455, | |
| "step": 13230 | |
| }, | |
| { | |
| "epoch": 27.756020942408377, | |
| "learning_rate": 0.0003135995258900582, | |
| "loss": 2.2951147079467775, | |
| "step": 13240 | |
| }, | |
| { | |
| "epoch": 27.77696335078534, | |
| "learning_rate": 0.0003134811641096994, | |
| "loss": 2.324018096923828, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 27.797905759162305, | |
| "learning_rate": 0.00031336293624801393, | |
| "loss": 2.320078468322754, | |
| "step": 13260 | |
| }, | |
| { | |
| "epoch": 27.81884816753927, | |
| "learning_rate": 0.00031324484205265824, | |
| "loss": 2.3213479995727537, | |
| "step": 13270 | |
| }, | |
| { | |
| "epoch": 27.83979057591623, | |
| "learning_rate": 0.000313126881271954, | |
| "loss": 2.352939224243164, | |
| "step": 13280 | |
| }, | |
| { | |
| "epoch": 27.860732984293193, | |
| "learning_rate": 0.0003130090536548859, | |
| "loss": 2.3275819778442384, | |
| "step": 13290 | |
| }, | |
| { | |
| "epoch": 27.881675392670157, | |
| "learning_rate": 0.00031289135895109924, | |
| "loss": 2.341213607788086, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 27.90261780104712, | |
| "learning_rate": 0.00031277379691089786, | |
| "loss": 2.352794647216797, | |
| "step": 13310 | |
| }, | |
| { | |
| "epoch": 27.923560209424085, | |
| "learning_rate": 0.00031265636728524174, | |
| "loss": 2.329135513305664, | |
| "step": 13320 | |
| }, | |
| { | |
| "epoch": 27.944502617801046, | |
| "learning_rate": 0.000312539069825745, | |
| "loss": 2.3414382934570312, | |
| "step": 13330 | |
| }, | |
| { | |
| "epoch": 27.96544502617801, | |
| "learning_rate": 0.00031242190428467325, | |
| "loss": 2.3638214111328124, | |
| "step": 13340 | |
| }, | |
| { | |
| "epoch": 27.986387434554974, | |
| "learning_rate": 0.0003123048704149423, | |
| "loss": 2.326797294616699, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 27.99895287958115, | |
| "eval_loss": 2.532017469406128, | |
| "eval_runtime": 73.734, | |
| "eval_samples_per_second": 148.588, | |
| "step": 13356 | |
| }, | |
| { | |
| "epoch": 28.008376963350784, | |
| "learning_rate": 0.0003121879679701147, | |
| "loss": 2.463714599609375, | |
| "step": 13360 | |
| }, | |
| { | |
| "epoch": 28.02931937172775, | |
| "learning_rate": 0.00031207119670439884, | |
| "loss": 2.3355535507202148, | |
| "step": 13370 | |
| }, | |
| { | |
| "epoch": 28.050261780104712, | |
| "learning_rate": 0.00031195455637264574, | |
| "loss": 2.3194732666015625, | |
| "step": 13380 | |
| }, | |
| { | |
| "epoch": 28.071204188481676, | |
| "learning_rate": 0.00031183804673034756, | |
| "loss": 2.3297607421875, | |
| "step": 13390 | |
| }, | |
| { | |
| "epoch": 28.09214659685864, | |
| "learning_rate": 0.0003117216675336353, | |
| "loss": 2.33233642578125, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 28.1130890052356, | |
| "learning_rate": 0.00031160541853927627, | |
| "loss": 2.3335954666137697, | |
| "step": 13410 | |
| }, | |
| { | |
| "epoch": 28.134031413612565, | |
| "learning_rate": 0.0003114892995046725, | |
| "loss": 2.3236547470092774, | |
| "step": 13420 | |
| }, | |
| { | |
| "epoch": 28.15497382198953, | |
| "learning_rate": 0.00031137331018785835, | |
| "loss": 2.3411203384399415, | |
| "step": 13430 | |
| }, | |
| { | |
| "epoch": 28.175916230366493, | |
| "learning_rate": 0.00031125745034749834, | |
| "loss": 2.343415451049805, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 28.196858638743457, | |
| "learning_rate": 0.00031114171974288516, | |
| "loss": 2.305185890197754, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 28.217801047120417, | |
| "learning_rate": 0.00031102611813393753, | |
| "loss": 2.3106929779052736, | |
| "step": 13460 | |
| }, | |
| { | |
| "epoch": 28.23874345549738, | |
| "learning_rate": 0.0003109106452811981, | |
| "loss": 2.342930221557617, | |
| "step": 13470 | |
| }, | |
| { | |
| "epoch": 28.259685863874346, | |
| "learning_rate": 0.00031079530094583135, | |
| "loss": 2.3201034545898436, | |
| "step": 13480 | |
| }, | |
| { | |
| "epoch": 28.28062827225131, | |
| "learning_rate": 0.0003106800848896216, | |
| "loss": 2.353871154785156, | |
| "step": 13490 | |
| }, | |
| { | |
| "epoch": 28.301570680628274, | |
| "learning_rate": 0.0003105649968749708, | |
| "loss": 2.3587244033813475, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 28.322513089005234, | |
| "learning_rate": 0.0003104500366648965, | |
| "loss": 2.334798812866211, | |
| "step": 13510 | |
| }, | |
| { | |
| "epoch": 28.343455497382198, | |
| "learning_rate": 0.0003103352040230302, | |
| "loss": 2.3387428283691407, | |
| "step": 13520 | |
| }, | |
| { | |
| "epoch": 28.364397905759162, | |
| "learning_rate": 0.00031022049871361445, | |
| "loss": 2.35083065032959, | |
| "step": 13530 | |
| }, | |
| { | |
| "epoch": 28.385340314136126, | |
| "learning_rate": 0.0003101059205015017, | |
| "loss": 2.329609680175781, | |
| "step": 13540 | |
| }, | |
| { | |
| "epoch": 28.40628272251309, | |
| "learning_rate": 0.0003099914691521518, | |
| "loss": 2.3420963287353516, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 28.42722513089005, | |
| "learning_rate": 0.00030987714443163, | |
| "loss": 2.3433679580688476, | |
| "step": 13560 | |
| }, | |
| { | |
| "epoch": 28.448167539267015, | |
| "learning_rate": 0.00030976294610660516, | |
| "loss": 2.3441110610961915, | |
| "step": 13570 | |
| }, | |
| { | |
| "epoch": 28.46910994764398, | |
| "learning_rate": 0.00030964887394434754, | |
| "loss": 2.338638687133789, | |
| "step": 13580 | |
| }, | |
| { | |
| "epoch": 28.490052356020943, | |
| "learning_rate": 0.000309534927712727, | |
| "loss": 2.3203834533691405, | |
| "step": 13590 | |
| }, | |
| { | |
| "epoch": 28.510994764397907, | |
| "learning_rate": 0.0003094211071802107, | |
| "loss": 2.3022727966308594, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 28.531937172774867, | |
| "learning_rate": 0.00030930741211586155, | |
| "loss": 2.3490713119506834, | |
| "step": 13610 | |
| }, | |
| { | |
| "epoch": 28.55287958115183, | |
| "learning_rate": 0.0003091938422893358, | |
| "loss": 2.3286787033081056, | |
| "step": 13620 | |
| }, | |
| { | |
| "epoch": 28.573821989528795, | |
| "learning_rate": 0.00030908039747088155, | |
| "loss": 2.305118942260742, | |
| "step": 13630 | |
| }, | |
| { | |
| "epoch": 28.59476439790576, | |
| "learning_rate": 0.00030896707743133635, | |
| "loss": 2.3220989227294924, | |
| "step": 13640 | |
| }, | |
| { | |
| "epoch": 28.615706806282724, | |
| "learning_rate": 0.0003088538819421255, | |
| "loss": 2.3236154556274413, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 28.636649214659684, | |
| "learning_rate": 0.00030874081077526003, | |
| "loss": 2.323534393310547, | |
| "step": 13660 | |
| }, | |
| { | |
| "epoch": 28.657591623036648, | |
| "learning_rate": 0.00030862786370333505, | |
| "loss": 2.3269046783447265, | |
| "step": 13670 | |
| }, | |
| { | |
| "epoch": 28.678534031413612, | |
| "learning_rate": 0.00030851504049952727, | |
| "loss": 2.3261356353759766, | |
| "step": 13680 | |
| }, | |
| { | |
| "epoch": 28.699476439790576, | |
| "learning_rate": 0.00030840234093759347, | |
| "loss": 2.3454893112182615, | |
| "step": 13690 | |
| }, | |
| { | |
| "epoch": 28.72041884816754, | |
| "learning_rate": 0.0003082897647918688, | |
| "loss": 2.3275333404541017, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 28.741361256544504, | |
| "learning_rate": 0.0003081773118372642, | |
| "loss": 2.3333641052246095, | |
| "step": 13710 | |
| }, | |
| { | |
| "epoch": 28.762303664921465, | |
| "learning_rate": 0.00030806498184926523, | |
| "loss": 2.3693473815917967, | |
| "step": 13720 | |
| }, | |
| { | |
| "epoch": 28.78324607329843, | |
| "learning_rate": 0.0003079527746039298, | |
| "loss": 2.3141483306884765, | |
| "step": 13730 | |
| }, | |
| { | |
| "epoch": 28.804188481675393, | |
| "learning_rate": 0.00030784068987788624, | |
| "loss": 2.353886032104492, | |
| "step": 13740 | |
| }, | |
| { | |
| "epoch": 28.825130890052357, | |
| "learning_rate": 0.00030772872744833183, | |
| "loss": 2.3143518447875975, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 28.84607329842932, | |
| "learning_rate": 0.00030761688709303036, | |
| "loss": 2.317976379394531, | |
| "step": 13760 | |
| }, | |
| { | |
| "epoch": 28.86701570680628, | |
| "learning_rate": 0.0003075051685903109, | |
| "loss": 2.331821060180664, | |
| "step": 13770 | |
| }, | |
| { | |
| "epoch": 28.887958115183245, | |
| "learning_rate": 0.00030739357171906536, | |
| "loss": 2.3297216415405275, | |
| "step": 13780 | |
| }, | |
| { | |
| "epoch": 28.90890052356021, | |
| "learning_rate": 0.0003072820962587471, | |
| "loss": 2.3354673385620117, | |
| "step": 13790 | |
| }, | |
| { | |
| "epoch": 28.929842931937173, | |
| "learning_rate": 0.00030717074198936904, | |
| "loss": 2.308320999145508, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 28.950785340314138, | |
| "learning_rate": 0.0003070595086915015, | |
| "loss": 2.312677192687988, | |
| "step": 13810 | |
| }, | |
| { | |
| "epoch": 28.971727748691098, | |
| "learning_rate": 0.00030694839614627076, | |
| "loss": 2.3000450134277344, | |
| "step": 13820 | |
| }, | |
| { | |
| "epoch": 28.992670157068062, | |
| "learning_rate": 0.0003068374041353571, | |
| "loss": 2.333408737182617, | |
| "step": 13830 | |
| }, | |
| { | |
| "epoch": 28.99895287958115, | |
| "eval_loss": 2.541404962539673, | |
| "eval_runtime": 73.9823, | |
| "eval_samples_per_second": 148.09, | |
| "step": 13833 | |
| }, | |
| { | |
| "epoch": 29.014659685863876, | |
| "learning_rate": 0.000306726532440993, | |
| "loss": 2.4384193420410156, | |
| "step": 13840 | |
| }, | |
| { | |
| "epoch": 29.035602094240836, | |
| "learning_rate": 0.0003066157808459613, | |
| "loss": 2.3120851516723633, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 29.0565445026178, | |
| "learning_rate": 0.0003065051491335936, | |
| "loss": 2.333901596069336, | |
| "step": 13860 | |
| }, | |
| { | |
| "epoch": 29.077486910994764, | |
| "learning_rate": 0.0003063946370877681, | |
| "loss": 2.311614227294922, | |
| "step": 13870 | |
| }, | |
| { | |
| "epoch": 29.09842931937173, | |
| "learning_rate": 0.0003062842444929085, | |
| "loss": 2.328507423400879, | |
| "step": 13880 | |
| }, | |
| { | |
| "epoch": 29.119371727748693, | |
| "learning_rate": 0.00030617397113398125, | |
| "loss": 2.3186750411987305, | |
| "step": 13890 | |
| }, | |
| { | |
| "epoch": 29.140314136125653, | |
| "learning_rate": 0.00030606381679649483, | |
| "loss": 2.3101566314697264, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 29.161256544502617, | |
| "learning_rate": 0.00030595378126649727, | |
| "loss": 2.3323139190673827, | |
| "step": 13910 | |
| }, | |
| { | |
| "epoch": 29.18219895287958, | |
| "learning_rate": 0.0003058438643305747, | |
| "loss": 2.3120336532592773, | |
| "step": 13920 | |
| }, | |
| { | |
| "epoch": 29.203141361256545, | |
| "learning_rate": 0.00030573406577584955, | |
| "loss": 2.3213123321533202, | |
| "step": 13930 | |
| }, | |
| { | |
| "epoch": 29.22408376963351, | |
| "learning_rate": 0.000305624385389979, | |
| "loss": 2.3158872604370115, | |
| "step": 13940 | |
| }, | |
| { | |
| "epoch": 29.24502617801047, | |
| "learning_rate": 0.0003055148229611527, | |
| "loss": 2.3301626205444337, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 29.265968586387434, | |
| "learning_rate": 0.00030540537827809176, | |
| "loss": 2.2979711532592773, | |
| "step": 13960 | |
| }, | |
| { | |
| "epoch": 29.286910994764398, | |
| "learning_rate": 0.0003052960511300467, | |
| "loss": 2.337363433837891, | |
| "step": 13970 | |
| }, | |
| { | |
| "epoch": 29.307853403141362, | |
| "learning_rate": 0.0003051868413067956, | |
| "loss": 2.3084648132324217, | |
| "step": 13980 | |
| }, | |
| { | |
| "epoch": 29.328795811518326, | |
| "learning_rate": 0.00030507774859864277, | |
| "loss": 2.3315618515014647, | |
| "step": 13990 | |
| }, | |
| { | |
| "epoch": 29.349738219895286, | |
| "learning_rate": 0.0003049687727964166, | |
| "loss": 2.342039680480957, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 29.37068062827225, | |
| "learning_rate": 0.00030485991369146834, | |
| "loss": 2.327268600463867, | |
| "step": 14010 | |
| }, | |
| { | |
| "epoch": 29.391623036649214, | |
| "learning_rate": 0.00030475117107567015, | |
| "loss": 2.311885643005371, | |
| "step": 14020 | |
| }, | |
| { | |
| "epoch": 29.41256544502618, | |
| "learning_rate": 0.0003046425447414135, | |
| "loss": 2.297453498840332, | |
| "step": 14030 | |
| }, | |
| { | |
| "epoch": 29.433507853403142, | |
| "learning_rate": 0.0003045340344816073, | |
| "loss": 2.295667839050293, | |
| "step": 14040 | |
| }, | |
| { | |
| "epoch": 29.454450261780103, | |
| "learning_rate": 0.0003044256400896769, | |
| "loss": 2.3093278884887694, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 29.475392670157067, | |
| "learning_rate": 0.0003043173613595614, | |
| "loss": 2.3026140213012694, | |
| "step": 14060 | |
| }, | |
| { | |
| "epoch": 29.49633507853403, | |
| "learning_rate": 0.0003042091980857131, | |
| "loss": 2.339429473876953, | |
| "step": 14070 | |
| }, | |
| { | |
| "epoch": 29.517277486910995, | |
| "learning_rate": 0.0003041011500630949, | |
| "loss": 2.3204904556274415, | |
| "step": 14080 | |
| }, | |
| { | |
| "epoch": 29.53821989528796, | |
| "learning_rate": 0.00030399321708717947, | |
| "loss": 2.327162170410156, | |
| "step": 14090 | |
| }, | |
| { | |
| "epoch": 29.559162303664923, | |
| "learning_rate": 0.00030388539895394697, | |
| "loss": 2.3462697982788088, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 29.580104712041884, | |
| "learning_rate": 0.00030377769545988394, | |
| "loss": 2.2912479400634767, | |
| "step": 14110 | |
| }, | |
| { | |
| "epoch": 29.601047120418848, | |
| "learning_rate": 0.00030367010640198143, | |
| "loss": 2.35098876953125, | |
| "step": 14120 | |
| }, | |
| { | |
| "epoch": 29.62198952879581, | |
| "learning_rate": 0.0003035626315777333, | |
| "loss": 2.304596710205078, | |
| "step": 14130 | |
| }, | |
| { | |
| "epoch": 29.642931937172776, | |
| "learning_rate": 0.00030345527078513493, | |
| "loss": 2.3083545684814455, | |
| "step": 14140 | |
| }, | |
| { | |
| "epoch": 29.66387434554974, | |
| "learning_rate": 0.0003033480238226813, | |
| "loss": 2.304719924926758, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 29.6848167539267, | |
| "learning_rate": 0.0003032408904893656, | |
| "loss": 2.309472846984863, | |
| "step": 14160 | |
| }, | |
| { | |
| "epoch": 29.705759162303664, | |
| "learning_rate": 0.00030313387058467756, | |
| "loss": 2.3114566802978516, | |
| "step": 14170 | |
| }, | |
| { | |
| "epoch": 29.72670157068063, | |
| "learning_rate": 0.0003030269639086021, | |
| "loss": 2.3168495178222654, | |
| "step": 14180 | |
| }, | |
| { | |
| "epoch": 29.747643979057592, | |
| "learning_rate": 0.0003029201702616173, | |
| "loss": 2.3027936935424806, | |
| "step": 14190 | |
| }, | |
| { | |
| "epoch": 29.768586387434556, | |
| "learning_rate": 0.0003028134894446933, | |
| "loss": 2.330441474914551, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 29.789528795811517, | |
| "learning_rate": 0.00030270692125929034, | |
| "loss": 2.2950525283813477, | |
| "step": 14210 | |
| }, | |
| { | |
| "epoch": 29.81047120418848, | |
| "learning_rate": 0.00030260046550735763, | |
| "loss": 2.3066877365112304, | |
| "step": 14220 | |
| }, | |
| { | |
| "epoch": 29.831413612565445, | |
| "learning_rate": 0.0003024941219913316, | |
| "loss": 2.3138294219970703, | |
| "step": 14230 | |
| }, | |
| { | |
| "epoch": 29.85235602094241, | |
| "learning_rate": 0.00030238789051413416, | |
| "loss": 2.3398483276367186, | |
| "step": 14240 | |
| }, | |
| { | |
| "epoch": 29.873298429319373, | |
| "learning_rate": 0.00030228177087917153, | |
| "loss": 2.3180753707885744, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 29.894240837696334, | |
| "learning_rate": 0.00030217576289033235, | |
| "loss": 2.293859100341797, | |
| "step": 14260 | |
| }, | |
| { | |
| "epoch": 29.915183246073298, | |
| "learning_rate": 0.00030206986635198654, | |
| "loss": 2.315079116821289, | |
| "step": 14270 | |
| }, | |
| { | |
| "epoch": 29.93612565445026, | |
| "learning_rate": 0.00030196408106898356, | |
| "loss": 2.3188785552978515, | |
| "step": 14280 | |
| }, | |
| { | |
| "epoch": 29.957068062827226, | |
| "learning_rate": 0.0003018584068466507, | |
| "loss": 2.355891799926758, | |
| "step": 14290 | |
| }, | |
| { | |
| "epoch": 29.97801047120419, | |
| "learning_rate": 0.0003017528434907922, | |
| "loss": 2.310663032531738, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 29.99895287958115, | |
| "learning_rate": 0.00030164739080768704, | |
| "loss": 2.357052803039551, | |
| "step": 14310 | |
| }, | |
| { | |
| "epoch": 29.99895287958115, | |
| "eval_loss": 2.5363190174102783, | |
| "eval_runtime": 73.3558, | |
| "eval_samples_per_second": 149.354, | |
| "step": 14310 | |
| }, | |
| { | |
| "epoch": 30.020942408376964, | |
| "learning_rate": 0.0003015420486040879, | |
| "loss": 2.432624626159668, | |
| "step": 14320 | |
| }, | |
| { | |
| "epoch": 30.041884816753928, | |
| "learning_rate": 0.00030143681668721935, | |
| "loss": 2.3034442901611327, | |
| "step": 14330 | |
| }, | |
| { | |
| "epoch": 30.06282722513089, | |
| "learning_rate": 0.00030133169486477694, | |
| "loss": 2.3489042282104493, | |
| "step": 14340 | |
| }, | |
| { | |
| "epoch": 30.083769633507853, | |
| "learning_rate": 0.0003012266829449249, | |
| "loss": 2.3431249618530274, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 30.104712041884817, | |
| "learning_rate": 0.00030112178073629544, | |
| "loss": 2.332902526855469, | |
| "step": 14360 | |
| }, | |
| { | |
| "epoch": 30.12565445026178, | |
| "learning_rate": 0.0003010169880479867, | |
| "loss": 2.323573112487793, | |
| "step": 14370 | |
| }, | |
| { | |
| "epoch": 30.146596858638745, | |
| "learning_rate": 0.0003009123046895618, | |
| "loss": 2.299881362915039, | |
| "step": 14380 | |
| }, | |
| { | |
| "epoch": 30.167539267015705, | |
| "learning_rate": 0.00030080773047104687, | |
| "loss": 2.319793701171875, | |
| "step": 14390 | |
| }, | |
| { | |
| "epoch": 30.18848167539267, | |
| "learning_rate": 0.0003007032652029301, | |
| "loss": 2.3272857666015625, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 30.209424083769633, | |
| "learning_rate": 0.00030059890869615983, | |
| "loss": 2.3354257583618163, | |
| "step": 14410 | |
| }, | |
| { | |
| "epoch": 30.230366492146597, | |
| "learning_rate": 0.0003004946607621435, | |
| "loss": 2.3249101638793945, | |
| "step": 14420 | |
| }, | |
| { | |
| "epoch": 30.25130890052356, | |
| "learning_rate": 0.0003003905212127461, | |
| "loss": 2.331306266784668, | |
| "step": 14430 | |
| }, | |
| { | |
| "epoch": 30.272251308900522, | |
| "learning_rate": 0.00030028648986028843, | |
| "loss": 2.3302356719970705, | |
| "step": 14440 | |
| }, | |
| { | |
| "epoch": 30.293193717277486, | |
| "learning_rate": 0.00030018256651754633, | |
| "loss": 2.3084732055664063, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 30.31413612565445, | |
| "learning_rate": 0.00030007875099774864, | |
| "loss": 2.324197006225586, | |
| "step": 14460 | |
| }, | |
| { | |
| "epoch": 30.335078534031414, | |
| "learning_rate": 0.0002999750431145761, | |
| "loss": 2.309644317626953, | |
| "step": 14470 | |
| }, | |
| { | |
| "epoch": 30.356020942408378, | |
| "learning_rate": 0.0002998714426821599, | |
| "loss": 2.332279014587402, | |
| "step": 14480 | |
| }, | |
| { | |
| "epoch": 30.376963350785342, | |
| "learning_rate": 0.00029976794951508027, | |
| "loss": 2.3015905380249024, | |
| "step": 14490 | |
| }, | |
| { | |
| "epoch": 30.397905759162303, | |
| "learning_rate": 0.00029966456342836505, | |
| "loss": 2.3307212829589843, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 30.418848167539267, | |
| "learning_rate": 0.0002995612842374884, | |
| "loss": 2.3488508224487306, | |
| "step": 14510 | |
| }, | |
| { | |
| "epoch": 30.43979057591623, | |
| "learning_rate": 0.0002994581117583693, | |
| "loss": 2.2981189727783202, | |
| "step": 14520 | |
| }, | |
| { | |
| "epoch": 30.460732984293195, | |
| "learning_rate": 0.00029935504580737006, | |
| "loss": 2.292937088012695, | |
| "step": 14530 | |
| }, | |
| { | |
| "epoch": 30.48167539267016, | |
| "learning_rate": 0.00029925208620129546, | |
| "loss": 2.329487609863281, | |
| "step": 14540 | |
| }, | |
| { | |
| "epoch": 30.50261780104712, | |
| "learning_rate": 0.0002991492327573909, | |
| "loss": 2.275893974304199, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 30.523560209424083, | |
| "learning_rate": 0.0002990464852933409, | |
| "loss": 2.289459228515625, | |
| "step": 14560 | |
| }, | |
| { | |
| "epoch": 30.544502617801047, | |
| "learning_rate": 0.0002989438436272684, | |
| "loss": 2.2976861953735352, | |
| "step": 14570 | |
| }, | |
| { | |
| "epoch": 30.56544502617801, | |
| "learning_rate": 0.00029884130757773275, | |
| "loss": 2.319015884399414, | |
| "step": 14580 | |
| }, | |
| { | |
| "epoch": 30.586387434554975, | |
| "learning_rate": 0.0002987388769637288, | |
| "loss": 2.3237770080566404, | |
| "step": 14590 | |
| }, | |
| { | |
| "epoch": 30.607329842931936, | |
| "learning_rate": 0.00029863655160468534, | |
| "loss": 2.330046844482422, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 30.6282722513089, | |
| "learning_rate": 0.0002985343313204637, | |
| "loss": 2.328061103820801, | |
| "step": 14610 | |
| }, | |
| { | |
| "epoch": 30.649214659685864, | |
| "learning_rate": 0.0002984322159313568, | |
| "loss": 2.3274772644042967, | |
| "step": 14620 | |
| }, | |
| { | |
| "epoch": 30.670157068062828, | |
| "learning_rate": 0.00029833020525808714, | |
| "loss": 2.325545883178711, | |
| "step": 14630 | |
| }, | |
| { | |
| "epoch": 30.691099476439792, | |
| "learning_rate": 0.00029822829912180636, | |
| "loss": 2.3240276336669923, | |
| "step": 14640 | |
| }, | |
| { | |
| "epoch": 30.712041884816752, | |
| "learning_rate": 0.0002981264973440931, | |
| "loss": 2.324121856689453, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 30.732984293193716, | |
| "learning_rate": 0.00029802479974695223, | |
| "loss": 2.3233869552612303, | |
| "step": 14660 | |
| }, | |
| { | |
| "epoch": 30.75392670157068, | |
| "learning_rate": 0.00029792320615281337, | |
| "loss": 2.3022382736206053, | |
| "step": 14670 | |
| }, | |
| { | |
| "epoch": 30.774869109947645, | |
| "learning_rate": 0.00029782171638452937, | |
| "loss": 2.3219308853149414, | |
| "step": 14680 | |
| }, | |
| { | |
| "epoch": 30.79581151832461, | |
| "learning_rate": 0.0002977203302653755, | |
| "loss": 2.3212976455688477, | |
| "step": 14690 | |
| }, | |
| { | |
| "epoch": 30.81675392670157, | |
| "learning_rate": 0.0002976190476190476, | |
| "loss": 2.345839500427246, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 30.837696335078533, | |
| "learning_rate": 0.0002975178682696613, | |
| "loss": 2.2968841552734376, | |
| "step": 14710 | |
| }, | |
| { | |
| "epoch": 30.858638743455497, | |
| "learning_rate": 0.0002974167920417504, | |
| "loss": 2.313581848144531, | |
| "step": 14720 | |
| }, | |
| { | |
| "epoch": 30.87958115183246, | |
| "learning_rate": 0.00029731581876026557, | |
| "loss": 2.326977348327637, | |
| "step": 14730 | |
| }, | |
| { | |
| "epoch": 30.900523560209425, | |
| "learning_rate": 0.00029721494825057357, | |
| "loss": 2.3257322311401367, | |
| "step": 14740 | |
| }, | |
| { | |
| "epoch": 30.921465968586386, | |
| "learning_rate": 0.00029711418033845523, | |
| "loss": 2.285732460021973, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 30.94240837696335, | |
| "learning_rate": 0.0002970135148501047, | |
| "loss": 2.3275766372680664, | |
| "step": 14760 | |
| }, | |
| { | |
| "epoch": 30.963350785340314, | |
| "learning_rate": 0.00029691295161212816, | |
| "loss": 2.3182727813720705, | |
| "step": 14770 | |
| }, | |
| { | |
| "epoch": 30.984293193717278, | |
| "learning_rate": 0.0002968124904515423, | |
| "loss": 2.3104841232299806, | |
| "step": 14780 | |
| }, | |
| { | |
| "epoch": 30.99895287958115, | |
| "eval_loss": 2.544727087020874, | |
| "eval_runtime": 73.5548, | |
| "eval_samples_per_second": 148.95, | |
| "step": 14787 | |
| }, | |
| { | |
| "epoch": 31.006282722513088, | |
| "learning_rate": 0.00029671213119577346, | |
| "loss": 2.4215261459350588, | |
| "step": 14790 | |
| }, | |
| { | |
| "epoch": 31.027225130890052, | |
| "learning_rate": 0.00029661187367265593, | |
| "loss": 2.3005090713500977, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 31.048167539267016, | |
| "learning_rate": 0.0002965117177104311, | |
| "loss": 2.3047313690185547, | |
| "step": 14810 | |
| }, | |
| { | |
| "epoch": 31.06910994764398, | |
| "learning_rate": 0.0002964116631377459, | |
| "loss": 2.3039810180664064, | |
| "step": 14820 | |
| }, | |
| { | |
| "epoch": 31.09005235602094, | |
| "learning_rate": 0.000296311709783652, | |
| "loss": 2.3020254135131837, | |
| "step": 14830 | |
| }, | |
| { | |
| "epoch": 31.110994764397905, | |
| "learning_rate": 0.00029621185747760406, | |
| "loss": 2.3200841903686524, | |
| "step": 14840 | |
| }, | |
| { | |
| "epoch": 31.13193717277487, | |
| "learning_rate": 0.0002961121060494589, | |
| "loss": 2.2955398559570312, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 31.152879581151833, | |
| "learning_rate": 0.00029601245532947417, | |
| "loss": 2.322628974914551, | |
| "step": 14860 | |
| }, | |
| { | |
| "epoch": 31.173821989528797, | |
| "learning_rate": 0.0002959129051483069, | |
| "loss": 2.3180873870849608, | |
| "step": 14870 | |
| }, | |
| { | |
| "epoch": 31.194764397905757, | |
| "learning_rate": 0.00029581345533701285, | |
| "loss": 2.299137306213379, | |
| "step": 14880 | |
| }, | |
| { | |
| "epoch": 31.21570680628272, | |
| "learning_rate": 0.0002957141057270448, | |
| "loss": 2.2992317199707033, | |
| "step": 14890 | |
| }, | |
| { | |
| "epoch": 31.236649214659685, | |
| "learning_rate": 0.0002956148561502513, | |
| "loss": 2.3339006423950197, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 31.25759162303665, | |
| "learning_rate": 0.00029551570643887603, | |
| "loss": 2.2937063217163085, | |
| "step": 14910 | |
| }, | |
| { | |
| "epoch": 31.278534031413614, | |
| "learning_rate": 0.00029541665642555606, | |
| "loss": 2.3086185455322266, | |
| "step": 14920 | |
| }, | |
| { | |
| "epoch": 31.299476439790578, | |
| "learning_rate": 0.00029531770594332096, | |
| "loss": 2.3051830291748048, | |
| "step": 14930 | |
| }, | |
| { | |
| "epoch": 31.320418848167538, | |
| "learning_rate": 0.0002952188548255915, | |
| "loss": 2.3165931701660156, | |
| "step": 14940 | |
| }, | |
| { | |
| "epoch": 31.341361256544502, | |
| "learning_rate": 0.00029512010290617854, | |
| "loss": 2.310456657409668, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 31.362303664921466, | |
| "learning_rate": 0.0002950214500192816, | |
| "loss": 2.3057369232177733, | |
| "step": 14960 | |
| }, | |
| { | |
| "epoch": 31.38324607329843, | |
| "learning_rate": 0.00029492289599948834, | |
| "loss": 2.316122627258301, | |
| "step": 14970 | |
| }, | |
| { | |
| "epoch": 31.404188481675394, | |
| "learning_rate": 0.0002948244406817725, | |
| "loss": 2.3287500381469726, | |
| "step": 14980 | |
| }, | |
| { | |
| "epoch": 31.425130890052355, | |
| "learning_rate": 0.00029472608390149343, | |
| "loss": 2.309092330932617, | |
| "step": 14990 | |
| }, | |
| { | |
| "epoch": 31.44607329842932, | |
| "learning_rate": 0.00029462782549439473, | |
| "loss": 2.331714057922363, | |
| "step": 15000 | |
| } | |
| ], | |
| "max_steps": 15000, | |
| "num_train_epochs": 32, | |
| "total_flos": 4125839411805155328, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |