| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 2916, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.006858710562414266, | |
| "grad_norm": 1.162638545036316, | |
| "learning_rate": 6.849315068493151e-07, | |
| "loss": 0.0267, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.013717421124828532, | |
| "grad_norm": 0.5459424257278442, | |
| "learning_rate": 1.3698630136986302e-06, | |
| "loss": 0.0209, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0205761316872428, | |
| "grad_norm": 0.484558641910553, | |
| "learning_rate": 2.0547945205479454e-06, | |
| "loss": 0.0117, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.027434842249657063, | |
| "grad_norm": 0.23133069276809692, | |
| "learning_rate": 2.7397260273972604e-06, | |
| "loss": 0.0065, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03429355281207133, | |
| "grad_norm": 0.20168930292129517, | |
| "learning_rate": 3.4246575342465754e-06, | |
| "loss": 0.0097, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0411522633744856, | |
| "grad_norm": 0.20999373495578766, | |
| "learning_rate": 4.109589041095891e-06, | |
| "loss": 0.0152, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.04801097393689986, | |
| "grad_norm": 0.42117851972579956, | |
| "learning_rate": 4.7945205479452054e-06, | |
| "loss": 0.0194, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.05486968449931413, | |
| "grad_norm": 0.1798817217350006, | |
| "learning_rate": 5.479452054794521e-06, | |
| "loss": 0.0028, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.06172839506172839, | |
| "grad_norm": 0.058727577328681946, | |
| "learning_rate": 6.164383561643836e-06, | |
| "loss": 0.0015, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.06858710562414266, | |
| "grad_norm": 0.22002284228801727, | |
| "learning_rate": 6.849315068493151e-06, | |
| "loss": 0.0055, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07544581618655692, | |
| "grad_norm": 0.2096785008907318, | |
| "learning_rate": 7.534246575342466e-06, | |
| "loss": 0.011, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0823045267489712, | |
| "grad_norm": 0.06468941271305084, | |
| "learning_rate": 8.219178082191782e-06, | |
| "loss": 0.0092, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.08916323731138547, | |
| "grad_norm": 0.03226824477314949, | |
| "learning_rate": 8.904109589041097e-06, | |
| "loss": 0.0019, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.09602194787379972, | |
| "grad_norm": 0.10582853108644485, | |
| "learning_rate": 9.589041095890411e-06, | |
| "loss": 0.0027, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.102880658436214, | |
| "grad_norm": 0.14337775111198425, | |
| "learning_rate": 1.0273972602739728e-05, | |
| "loss": 0.0034, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.10973936899862825, | |
| "grad_norm": 0.09626258909702301, | |
| "learning_rate": 1.0958904109589042e-05, | |
| "loss": 0.0024, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.11659807956104253, | |
| "grad_norm": 0.29003870487213135, | |
| "learning_rate": 1.1643835616438357e-05, | |
| "loss": 0.0238, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.12345679012345678, | |
| "grad_norm": 0.12768058478832245, | |
| "learning_rate": 1.2328767123287673e-05, | |
| "loss": 0.0039, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.13031550068587106, | |
| "grad_norm": 0.04557815566658974, | |
| "learning_rate": 1.3013698630136988e-05, | |
| "loss": 0.0023, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.13717421124828533, | |
| "grad_norm": 0.15683676302433014, | |
| "learning_rate": 1.3698630136986302e-05, | |
| "loss": 0.0048, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1440329218106996, | |
| "grad_norm": 0.042118772864341736, | |
| "learning_rate": 1.4383561643835617e-05, | |
| "loss": 0.003, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.15089163237311384, | |
| "grad_norm": 0.08400426059961319, | |
| "learning_rate": 1.5068493150684933e-05, | |
| "loss": 0.0043, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.15775034293552812, | |
| "grad_norm": 0.17980587482452393, | |
| "learning_rate": 1.5753424657534248e-05, | |
| "loss": 0.0078, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1646090534979424, | |
| "grad_norm": 0.14988136291503906, | |
| "learning_rate": 1.6438356164383563e-05, | |
| "loss": 0.0061, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.17146776406035666, | |
| "grad_norm": 0.15037085115909576, | |
| "learning_rate": 1.712328767123288e-05, | |
| "loss": 0.0028, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.17832647462277093, | |
| "grad_norm": 0.235006645321846, | |
| "learning_rate": 1.7808219178082194e-05, | |
| "loss": 0.0045, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.18518518518518517, | |
| "grad_norm": 0.22422538697719574, | |
| "learning_rate": 1.849315068493151e-05, | |
| "loss": 0.0023, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.19204389574759945, | |
| "grad_norm": 0.2827920615673065, | |
| "learning_rate": 1.9178082191780822e-05, | |
| "loss": 0.0124, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.19890260631001372, | |
| "grad_norm": 0.27119961380958557, | |
| "learning_rate": 1.9863013698630137e-05, | |
| "loss": 0.002, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.205761316872428, | |
| "grad_norm": 0.1681121289730072, | |
| "learning_rate": 1.9999541310559686e-05, | |
| "loss": 0.0074, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.21262002743484226, | |
| "grad_norm": 0.13818895816802979, | |
| "learning_rate": 1.9997677956826334e-05, | |
| "loss": 0.003, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.2194787379972565, | |
| "grad_norm": 0.06377672404050827, | |
| "learning_rate": 1.9994381537597277e-05, | |
| "loss": 0.018, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.22633744855967078, | |
| "grad_norm": 0.12524163722991943, | |
| "learning_rate": 1.9989652525380695e-05, | |
| "loss": 0.0209, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.23319615912208505, | |
| "grad_norm": 0.04168206453323364, | |
| "learning_rate": 1.998349159803241e-05, | |
| "loss": 0.0027, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.24005486968449932, | |
| "grad_norm": 0.27079302072525024, | |
| "learning_rate": 1.9975899638658733e-05, | |
| "loss": 0.0116, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.24691358024691357, | |
| "grad_norm": 0.13237667083740234, | |
| "learning_rate": 1.9966877735489846e-05, | |
| "loss": 0.0019, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.25377229080932784, | |
| "grad_norm": 0.06160791590809822, | |
| "learning_rate": 1.995642718172386e-05, | |
| "loss": 0.0014, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2606310013717421, | |
| "grad_norm": 0.07572882622480392, | |
| "learning_rate": 1.9944549475341404e-05, | |
| "loss": 0.006, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.2674897119341564, | |
| "grad_norm": 0.10234888643026352, | |
| "learning_rate": 1.9931246318890943e-05, | |
| "loss": 0.0039, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.27434842249657065, | |
| "grad_norm": 0.12086405605077744, | |
| "learning_rate": 1.9916519619244707e-05, | |
| "loss": 0.0046, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2812071330589849, | |
| "grad_norm": 0.046436768025159836, | |
| "learning_rate": 1.990037148732537e-05, | |
| "loss": 0.0097, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.2880658436213992, | |
| "grad_norm": 0.043175164610147476, | |
| "learning_rate": 1.9882804237803487e-05, | |
| "loss": 0.0015, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.29492455418381347, | |
| "grad_norm": 0.3755040168762207, | |
| "learning_rate": 1.9863820388765672e-05, | |
| "loss": 0.0077, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.3017832647462277, | |
| "grad_norm": 0.10730766505002975, | |
| "learning_rate": 1.9843422661353697e-05, | |
| "loss": 0.001, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.30864197530864196, | |
| "grad_norm": 0.09198994934558868, | |
| "learning_rate": 1.9821613979374414e-05, | |
| "loss": 0.0052, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.31550068587105623, | |
| "grad_norm": 1.359625220298767, | |
| "learning_rate": 1.979839746888067e-05, | |
| "loss": 0.0146, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.3223593964334705, | |
| "grad_norm": 0.2990811765193939, | |
| "learning_rate": 1.9773776457723216e-05, | |
| "loss": 0.0083, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.3292181069958848, | |
| "grad_norm": 0.09418093413114548, | |
| "learning_rate": 1.9747754475073707e-05, | |
| "loss": 0.0057, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.33607681755829905, | |
| "grad_norm": 0.1610361486673355, | |
| "learning_rate": 1.9720335250918797e-05, | |
| "loss": 0.0066, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.3429355281207133, | |
| "grad_norm": 0.2202681005001068, | |
| "learning_rate": 1.969152271552552e-05, | |
| "loss": 0.0096, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3497942386831276, | |
| "grad_norm": 0.2531183362007141, | |
| "learning_rate": 1.966132099887791e-05, | |
| "loss": 0.0078, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.35665294924554186, | |
| "grad_norm": 0.07069271057844162, | |
| "learning_rate": 1.9629734430085007e-05, | |
| "loss": 0.0045, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.3635116598079561, | |
| "grad_norm": 0.08322709053754807, | |
| "learning_rate": 1.9596767536760328e-05, | |
| "loss": 0.0044, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.37037037037037035, | |
| "grad_norm": 0.2892574667930603, | |
| "learning_rate": 1.9562425044372884e-05, | |
| "loss": 0.0034, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.3772290809327846, | |
| "grad_norm": 0.21209566295146942, | |
| "learning_rate": 1.9526711875569817e-05, | |
| "loss": 0.0105, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3840877914951989, | |
| "grad_norm": 0.0845257118344307, | |
| "learning_rate": 1.948963314947081e-05, | |
| "loss": 0.006, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.39094650205761317, | |
| "grad_norm": 0.3820701241493225, | |
| "learning_rate": 1.945119418093429e-05, | |
| "loss": 0.0104, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.39780521262002744, | |
| "grad_norm": 0.2731214463710785, | |
| "learning_rate": 1.9411400479795618e-05, | |
| "loss": 0.0121, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.4046639231824417, | |
| "grad_norm": 0.01668688841164112, | |
| "learning_rate": 1.9370257750077296e-05, | |
| "loss": 0.0023, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.411522633744856, | |
| "grad_norm": 0.28591388463974, | |
| "learning_rate": 1.932777188917136e-05, | |
| "loss": 0.0363, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.41838134430727025, | |
| "grad_norm": 0.22390630841255188, | |
| "learning_rate": 1.9283948986994047e-05, | |
| "loss": 0.0055, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.4252400548696845, | |
| "grad_norm": 0.22514230012893677, | |
| "learning_rate": 1.9238795325112867e-05, | |
| "loss": 0.0109, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.43209876543209874, | |
| "grad_norm": 0.11907146126031876, | |
| "learning_rate": 1.919231737584621e-05, | |
| "loss": 0.0009, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.438957475994513, | |
| "grad_norm": 0.030443059280514717, | |
| "learning_rate": 1.9144521801335588e-05, | |
| "loss": 0.0031, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.4458161865569273, | |
| "grad_norm": 0.22063292562961578, | |
| "learning_rate": 1.90954154525907e-05, | |
| "loss": 0.0164, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.45267489711934156, | |
| "grad_norm": 0.3322547972202301, | |
| "learning_rate": 1.9045005368507418e-05, | |
| "loss": 0.0024, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.45953360768175583, | |
| "grad_norm": 0.16343438625335693, | |
| "learning_rate": 1.899329877485881e-05, | |
| "loss": 0.0033, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.4663923182441701, | |
| "grad_norm": 0.4316878020763397, | |
| "learning_rate": 1.89403030832594e-05, | |
| "loss": 0.0025, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.4732510288065844, | |
| "grad_norm": 0.16055700182914734, | |
| "learning_rate": 1.888602589010282e-05, | |
| "loss": 0.0024, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.48010973936899864, | |
| "grad_norm": 0.18311993777751923, | |
| "learning_rate": 1.8830474975472904e-05, | |
| "loss": 0.0056, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4869684499314129, | |
| "grad_norm": 0.06576403230428696, | |
| "learning_rate": 1.8773658302028525e-05, | |
| "loss": 0.0094, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.49382716049382713, | |
| "grad_norm": 0.13461001217365265, | |
| "learning_rate": 1.87155840138622e-05, | |
| "loss": 0.0123, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.5006858710562414, | |
| "grad_norm": 0.10820876806974411, | |
| "learning_rate": 1.8656260435332732e-05, | |
| "loss": 0.0031, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.5075445816186557, | |
| "grad_norm": 0.05024191737174988, | |
| "learning_rate": 1.8595696069872013e-05, | |
| "loss": 0.0047, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.51440329218107, | |
| "grad_norm": 0.06663210690021515, | |
| "learning_rate": 1.8533899598766106e-05, | |
| "loss": 0.0023, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5212620027434842, | |
| "grad_norm": 0.1120394617319107, | |
| "learning_rate": 1.8470879879910916e-05, | |
| "loss": 0.0016, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.5281207133058985, | |
| "grad_norm": 0.5442692637443542, | |
| "learning_rate": 1.8406645946542446e-05, | |
| "loss": 0.0388, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.5349794238683128, | |
| "grad_norm": 0.3559621274471283, | |
| "learning_rate": 1.8341207005942033e-05, | |
| "loss": 0.0042, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.541838134430727, | |
| "grad_norm": 0.06798390299081802, | |
| "learning_rate": 1.827457243811654e-05, | |
| "loss": 0.0007, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.5486968449931413, | |
| "grad_norm": 0.09693529456853867, | |
| "learning_rate": 1.8206751794453837e-05, | |
| "loss": 0.0028, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 0.014329448342323303, | |
| "learning_rate": 1.8137754796353708e-05, | |
| "loss": 0.0024, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.5624142661179699, | |
| "grad_norm": 0.05700727179646492, | |
| "learning_rate": 1.8067591333834382e-05, | |
| "loss": 0.0098, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.5692729766803841, | |
| "grad_norm": 0.16214902698993683, | |
| "learning_rate": 1.7996271464114915e-05, | |
| "loss": 0.0024, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.5761316872427984, | |
| "grad_norm": 0.017287936061620712, | |
| "learning_rate": 1.792380541017357e-05, | |
| "loss": 0.0023, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.5829903978052127, | |
| "grad_norm": 0.03860418125987053, | |
| "learning_rate": 1.7850203559282464e-05, | |
| "loss": 0.0054, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.5898491083676269, | |
| "grad_norm": 0.057672981172800064, | |
| "learning_rate": 1.7775476461518668e-05, | |
| "loss": 0.0039, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.5967078189300411, | |
| "grad_norm": 0.1254579871892929, | |
| "learning_rate": 1.7699634828251945e-05, | |
| "loss": 0.0016, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.6035665294924554, | |
| "grad_norm": 0.12202060967683792, | |
| "learning_rate": 1.7622689530609397e-05, | |
| "loss": 0.0055, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.6104252400548696, | |
| "grad_norm": 0.5825458765029907, | |
| "learning_rate": 1.7544651597917194e-05, | |
| "loss": 0.0015, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.6172839506172839, | |
| "grad_norm": 0.06615274399518967, | |
| "learning_rate": 1.7465532216119628e-05, | |
| "loss": 0.0029, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6241426611796982, | |
| "grad_norm": 0.016936153173446655, | |
| "learning_rate": 1.7385342726175728e-05, | |
| "loss": 0.0033, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.6310013717421125, | |
| "grad_norm": 0.09208139032125473, | |
| "learning_rate": 1.7304094622433646e-05, | |
| "loss": 0.0072, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.6378600823045267, | |
| "grad_norm": 0.0077827684581279755, | |
| "learning_rate": 1.7221799550983062e-05, | |
| "loss": 0.001, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.644718792866941, | |
| "grad_norm": 0.15854914486408234, | |
| "learning_rate": 1.7138469307985832e-05, | |
| "loss": 0.0033, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.6515775034293553, | |
| "grad_norm": 0.062444571405649185, | |
| "learning_rate": 1.705411583798513e-05, | |
| "loss": 0.0327, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.6584362139917695, | |
| "grad_norm": 0.018232915550470352, | |
| "learning_rate": 1.6968751232193315e-05, | |
| "loss": 0.008, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.6652949245541838, | |
| "grad_norm": 0.16335558891296387, | |
| "learning_rate": 1.6882387726758793e-05, | |
| "loss": 0.0359, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.6721536351165981, | |
| "grad_norm": 0.4718017876148224, | |
| "learning_rate": 1.679503770101206e-05, | |
| "loss": 0.0096, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.6790123456790124, | |
| "grad_norm": 0.16451981663703918, | |
| "learning_rate": 1.6706713675691283e-05, | |
| "loss": 0.0055, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.6858710562414266, | |
| "grad_norm": 0.08348975330591202, | |
| "learning_rate": 1.661742831114757e-05, | |
| "loss": 0.0051, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6927297668038409, | |
| "grad_norm": 0.22571489214897156, | |
| "learning_rate": 1.6527194405530217e-05, | |
| "loss": 0.0029, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.6995884773662552, | |
| "grad_norm": 0.11072508990764618, | |
| "learning_rate": 1.6436024892952256e-05, | |
| "loss": 0.0143, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.7064471879286695, | |
| "grad_norm": 0.01999078132212162, | |
| "learning_rate": 1.6343932841636455e-05, | |
| "loss": 0.0012, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.7133058984910837, | |
| "grad_norm": 0.04143204912543297, | |
| "learning_rate": 1.6250931452042136e-05, | |
| "loss": 0.011, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.720164609053498, | |
| "grad_norm": 0.017960038036108017, | |
| "learning_rate": 1.615703405497302e-05, | |
| "loss": 0.0106, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.7270233196159122, | |
| "grad_norm": 0.13416878879070282, | |
| "learning_rate": 1.6062254109666383e-05, | |
| "loss": 0.0281, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.7338820301783264, | |
| "grad_norm": 0.0555805005133152, | |
| "learning_rate": 1.5966605201863822e-05, | |
| "loss": 0.0051, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.7407407407407407, | |
| "grad_norm": 0.11911418288946152, | |
| "learning_rate": 1.587010104186388e-05, | |
| "loss": 0.0097, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.747599451303155, | |
| "grad_norm": 0.10376396775245667, | |
| "learning_rate": 1.57727554625568e-05, | |
| "loss": 0.0023, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.7544581618655692, | |
| "grad_norm": 0.025814570486545563, | |
| "learning_rate": 1.5674582417441734e-05, | |
| "loss": 0.0009, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7613168724279835, | |
| "grad_norm": 0.05079122632741928, | |
| "learning_rate": 1.5575595978626634e-05, | |
| "loss": 0.0012, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.7681755829903978, | |
| "grad_norm": 0.01824193075299263, | |
| "learning_rate": 1.547581033481119e-05, | |
| "loss": 0.0033, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.7750342935528121, | |
| "grad_norm": 1.8023141622543335, | |
| "learning_rate": 1.5375239789252986e-05, | |
| "loss": 0.0092, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.7818930041152263, | |
| "grad_norm": 0.10559989511966705, | |
| "learning_rate": 1.5273898757717295e-05, | |
| "loss": 0.0049, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.7887517146776406, | |
| "grad_norm": 0.07854919880628586, | |
| "learning_rate": 1.5171801766410727e-05, | |
| "loss": 0.0041, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.7956104252400549, | |
| "grad_norm": 0.039341386407613754, | |
| "learning_rate": 1.5068963449899039e-05, | |
| "loss": 0.0025, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.8024691358024691, | |
| "grad_norm": 0.10025200247764587, | |
| "learning_rate": 1.4965398549009416e-05, | |
| "loss": 0.0013, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.8093278463648834, | |
| "grad_norm": 0.021738484501838684, | |
| "learning_rate": 1.4861121908717529e-05, | |
| "loss": 0.0085, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.8161865569272977, | |
| "grad_norm": 0.06829023361206055, | |
| "learning_rate": 1.4756148476019654e-05, | |
| "loss": 0.0116, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.823045267489712, | |
| "grad_norm": 0.2639261782169342, | |
| "learning_rate": 1.4650493297790178e-05, | |
| "loss": 0.0123, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.8299039780521262, | |
| "grad_norm": 0.35069772601127625, | |
| "learning_rate": 1.4544171518624778e-05, | |
| "loss": 0.0006, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.8367626886145405, | |
| "grad_norm": 0.07271619141101837, | |
| "learning_rate": 1.4437198378669598e-05, | |
| "loss": 0.0093, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.8436213991769548, | |
| "grad_norm": 0.00380721571855247, | |
| "learning_rate": 1.4329589211436733e-05, | |
| "loss": 0.0012, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.850480109739369, | |
| "grad_norm": 0.02267816662788391, | |
| "learning_rate": 1.4221359441606311e-05, | |
| "loss": 0.0027, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.8573388203017832, | |
| "grad_norm": 0.11175378412008286, | |
| "learning_rate": 1.4112524582815546e-05, | |
| "loss": 0.0128, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.8641975308641975, | |
| "grad_norm": 0.053114600479602814, | |
| "learning_rate": 1.4003100235434998e-05, | |
| "loss": 0.0022, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.8710562414266118, | |
| "grad_norm": 0.011361058801412582, | |
| "learning_rate": 1.389310208433242e-05, | |
| "loss": 0.0045, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.877914951989026, | |
| "grad_norm": 0.0025912427809089422, | |
| "learning_rate": 1.3782545896624502e-05, | |
| "loss": 0.0094, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.8847736625514403, | |
| "grad_norm": 0.12230653315782547, | |
| "learning_rate": 1.3671447519416803e-05, | |
| "loss": 0.0206, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.8916323731138546, | |
| "grad_norm": 0.01137256808578968, | |
| "learning_rate": 1.3559822877532234e-05, | |
| "loss": 0.0022, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.8984910836762688, | |
| "grad_norm": 0.07587670534849167, | |
| "learning_rate": 1.3447687971228402e-05, | |
| "loss": 0.0013, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.9053497942386831, | |
| "grad_norm": 0.07251809537410736, | |
| "learning_rate": 1.3335058873904128e-05, | |
| "loss": 0.0054, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.9122085048010974, | |
| "grad_norm": 0.23888733983039856, | |
| "learning_rate": 1.3221951729795492e-05, | |
| "loss": 0.0073, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.9190672153635117, | |
| "grad_norm": 0.3885558843612671, | |
| "learning_rate": 1.3108382751661722e-05, | |
| "loss": 0.0022, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.9259259259259259, | |
| "grad_norm": 0.12456855922937393, | |
| "learning_rate": 1.2994368218461255e-05, | |
| "loss": 0.0037, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.9327846364883402, | |
| "grad_norm": 0.011711199767887592, | |
| "learning_rate": 1.287992447301832e-05, | |
| "loss": 0.0061, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.9396433470507545, | |
| "grad_norm": 0.3066045045852661, | |
| "learning_rate": 1.2765067919680357e-05, | |
| "loss": 0.0041, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.9465020576131687, | |
| "grad_norm": 0.016845189034938812, | |
| "learning_rate": 1.264981502196662e-05, | |
| "loss": 0.0046, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.953360768175583, | |
| "grad_norm": 0.09259962290525436, | |
| "learning_rate": 1.2534182300208299e-05, | |
| "loss": 0.0081, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.9602194787379973, | |
| "grad_norm": 0.4014507830142975, | |
| "learning_rate": 1.2418186329180506e-05, | |
| "loss": 0.0069, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.9670781893004116, | |
| "grad_norm": 0.24332621693611145, | |
| "learning_rate": 1.230184373572643e-05, | |
| "loss": 0.0017, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.9739368998628258, | |
| "grad_norm": 0.07767323404550552, | |
| "learning_rate": 1.218517119637408e-05, | |
| "loss": 0.0034, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.9807956104252401, | |
| "grad_norm": 0.023259377107024193, | |
| "learning_rate": 1.2068185434945834e-05, | |
| "loss": 0.0025, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.9876543209876543, | |
| "grad_norm": 0.01660298928618431, | |
| "learning_rate": 1.1950903220161286e-05, | |
| "loss": 0.0031, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.9945130315500685, | |
| "grad_norm": 0.06700021773576736, | |
| "learning_rate": 1.1833341363233594e-05, | |
| "loss": 0.0125, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.0013717421124828, | |
| "grad_norm": 0.09212377667427063, | |
| "learning_rate": 1.1715516715459784e-05, | |
| "loss": 0.0021, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.008230452674897, | |
| "grad_norm": 0.2013186812400818, | |
| "learning_rate": 1.1597446165805272e-05, | |
| "loss": 0.0055, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.0150891632373114, | |
| "grad_norm": 0.033271919935941696, | |
| "learning_rate": 1.147914663848301e-05, | |
| "loss": 0.0006, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.0219478737997256, | |
| "grad_norm": 0.012748132459819317, | |
| "learning_rate": 1.1360635090527571e-05, | |
| "loss": 0.0061, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.02880658436214, | |
| "grad_norm": 0.18844588100910187, | |
| "learning_rate": 1.1241928509364533e-05, | |
| "loss": 0.0164, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.0356652949245542, | |
| "grad_norm": 0.02880697138607502, | |
| "learning_rate": 1.1123043910375495e-05, | |
| "loss": 0.0013, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.0425240054869684, | |
| "grad_norm": 0.040209993720054626, | |
| "learning_rate": 1.1003998334459107e-05, | |
| "loss": 0.0076, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.0493827160493827, | |
| "grad_norm": 0.03542792797088623, | |
| "learning_rate": 1.0884808845588424e-05, | |
| "loss": 0.0121, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.056241426611797, | |
| "grad_norm": 0.029188377782702446, | |
| "learning_rate": 1.076549252836496e-05, | |
| "loss": 0.0006, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.0631001371742113, | |
| "grad_norm": 0.1534910947084427, | |
| "learning_rate": 1.0646066485569779e-05, | |
| "loss": 0.0011, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.0699588477366255, | |
| "grad_norm": 0.0010759709402918816, | |
| "learning_rate": 1.0526547835712e-05, | |
| "loss": 0.0002, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.0768175582990398, | |
| "grad_norm": 0.0017988062463700771, | |
| "learning_rate": 1.0406953710575015e-05, | |
| "loss": 0.0003, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.083676268861454, | |
| "grad_norm": 0.2528439462184906, | |
| "learning_rate": 1.0287301252760833e-05, | |
| "loss": 0.0039, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.0905349794238683, | |
| "grad_norm": 0.000582815904635936, | |
| "learning_rate": 1.0167607613232856e-05, | |
| "loss": 0.0005, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.0973936899862826, | |
| "grad_norm": 0.08733749389648438, | |
| "learning_rate": 1.0047889948857477e-05, | |
| "loss": 0.0003, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.1042524005486969, | |
| "grad_norm": 0.05657390132546425, | |
| "learning_rate": 9.928165419944788e-06, | |
| "loss": 0.002, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 0.036703433841466904, | |
| "learning_rate": 9.80845118778886e-06, | |
| "loss": 0.0004, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.1179698216735254, | |
| "grad_norm": 0.004433758556842804, | |
| "learning_rate": 9.68876441220782e-06, | |
| "loss": 0.0051, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.1248285322359397, | |
| "grad_norm": 0.06380768865346909, | |
| "learning_rate": 9.569122249084177e-06, | |
| "loss": 0.0035, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.131687242798354, | |
| "grad_norm": 0.012877637520432472, | |
| "learning_rate": 9.449541847905688e-06, | |
| "loss": 0.0009, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.1385459533607682, | |
| "grad_norm": 0.009387146681547165, | |
| "learning_rate": 9.330040349307185e-06, | |
| "loss": 0.0111, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.1454046639231825, | |
| "grad_norm": 0.0012669875286519527, | |
| "learning_rate": 9.210634882613595e-06, | |
| "loss": 0.0006, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.1522633744855968, | |
| "grad_norm": 0.029193460941314697, | |
| "learning_rate": 9.091342563384661e-06, | |
| "loss": 0.0009, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.159122085048011, | |
| "grad_norm": 0.0319526381790638, | |
| "learning_rate": 8.972180490961581e-06, | |
| "loss": 0.0038, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.1659807956104253, | |
| "grad_norm": 0.01600039191544056, | |
| "learning_rate": 8.853165746015997e-06, | |
| "loss": 0.0034, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.1728395061728394, | |
| "grad_norm": 0.00153280608355999, | |
| "learning_rate": 8.73431538810166e-06, | |
| "loss": 0.0007, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.1796982167352539, | |
| "grad_norm": 0.012536576949059963, | |
| "learning_rate": 8.61564645320911e-06, | |
| "loss": 0.0011, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.186556927297668, | |
| "grad_norm": 0.013463485054671764, | |
| "learning_rate": 8.497175951323737e-06, | |
| "loss": 0.003, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.1934156378600824, | |
| "grad_norm": 0.0013876461889594793, | |
| "learning_rate": 8.378920863987576e-06, | |
| "loss": 0.0005, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.2002743484224965, | |
| "grad_norm": 0.008089344017207623, | |
| "learning_rate": 8.260898141865188e-06, | |
| "loss": 0.0117, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.2071330589849107, | |
| "grad_norm": 0.0536433607339859, | |
| "learning_rate": 8.143124702313932e-06, | |
| "loss": 0.0057, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.213991769547325, | |
| "grad_norm": 0.16750673949718475, | |
| "learning_rate": 8.025617426959046e-06, | |
| "loss": 0.0011, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.2208504801097393, | |
| "grad_norm": 0.15625609457492828, | |
| "learning_rate": 7.908393159273835e-06, | |
| "loss": 0.0031, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.2277091906721536, | |
| "grad_norm": 0.01993492804467678, | |
| "learning_rate": 7.791468702165337e-06, | |
| "loss": 0.0034, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.2345679012345678, | |
| "grad_norm": 0.03969631716609001, | |
| "learning_rate": 7.674860815565792e-06, | |
| "loss": 0.0008, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.241426611796982, | |
| "grad_norm": 0.05006815120577812, | |
| "learning_rate": 7.558586214030272e-06, | |
| "loss": 0.0034, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.2482853223593964, | |
| "grad_norm": 0.004975775256752968, | |
| "learning_rate": 7.442661564340823e-06, | |
| "loss": 0.0001, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.2551440329218106, | |
| "grad_norm": 0.010278506204485893, | |
| "learning_rate": 7.327103483117453e-06, | |
| "loss": 0.003, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.262002743484225, | |
| "grad_norm": 0.03384735807776451, | |
| "learning_rate": 7.211928534436307e-06, | |
| "loss": 0.0025, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.2688614540466392, | |
| "grad_norm": 0.11490129679441452, | |
| "learning_rate": 7.097153227455379e-06, | |
| "loss": 0.0033, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.2757201646090535, | |
| "grad_norm": 0.008139098063111305, | |
| "learning_rate": 6.9827940140480776e-06, | |
| "loss": 0.0021, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.2825788751714677, | |
| "grad_norm": 0.07241293787956238, | |
| "learning_rate": 6.868867286445041e-06, | |
| "loss": 0.0003, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.289437585733882, | |
| "grad_norm": 0.2514030933380127, | |
| "learning_rate": 6.7553893748844535e-06, | |
| "loss": 0.0036, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.2962962962962963, | |
| "grad_norm": 0.3750598430633545, | |
| "learning_rate": 6.6423765452712895e-06, | |
| "loss": 0.0185, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.3031550068587106, | |
| "grad_norm": 0.011171502061188221, | |
| "learning_rate": 6.529844996845751e-06, | |
| "loss": 0.0012, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.3100137174211248, | |
| "grad_norm": 0.010206708684563637, | |
| "learning_rate": 6.417810859861275e-06, | |
| "loss": 0.0081, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.316872427983539, | |
| "grad_norm": 0.20837537944316864, | |
| "learning_rate": 6.306290193272422e-06, | |
| "loss": 0.0004, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.3237311385459534, | |
| "grad_norm": 0.002073473297059536, | |
| "learning_rate": 6.195298982433e-06, | |
| "loss": 0.0004, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.3305898491083676, | |
| "grad_norm": 0.13253618776798248, | |
| "learning_rate": 6.084853136804711e-06, | |
| "loss": 0.0006, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.337448559670782, | |
| "grad_norm": 0.0015091156819835305, | |
| "learning_rate": 5.9749684876767015e-06, | |
| "loss": 0.0001, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.3443072702331962, | |
| "grad_norm": 0.04806216433644295, | |
| "learning_rate": 5.8656607858963014e-06, | |
| "loss": 0.0258, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.3511659807956105, | |
| "grad_norm": 0.0028714430518448353, | |
| "learning_rate": 5.756945699611302e-06, | |
| "loss": 0.0027, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.3580246913580247, | |
| "grad_norm": 0.0008589240605942905, | |
| "learning_rate": 5.6488388120241e-06, | |
| "loss": 0.0036, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.364883401920439, | |
| "grad_norm": 0.018008651211857796, | |
| "learning_rate": 5.541355619157981e-06, | |
| "loss": 0.004, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.3717421124828533, | |
| "grad_norm": 0.1377667784690857, | |
| "learning_rate": 5.434511527635935e-06, | |
| "loss": 0.032, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.3786008230452675, | |
| "grad_norm": 0.0013193864142522216, | |
| "learning_rate": 5.328321852472269e-06, | |
| "loss": 0.0044, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.3854595336076818, | |
| "grad_norm": 0.0013291804352775216, | |
| "learning_rate": 5.22280181487737e-06, | |
| "loss": 0.0006, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.392318244170096, | |
| "grad_norm": 0.0038565269205719233, | |
| "learning_rate": 5.117966540075874e-06, | |
| "loss": 0.0008, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.3991769547325104, | |
| "grad_norm": 0.08756982535123825, | |
| "learning_rate": 5.013831055138636e-06, | |
| "loss": 0.0056, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.4060356652949246, | |
| "grad_norm": 0.16994261741638184, | |
| "learning_rate": 4.91041028682875e-06, | |
| "loss": 0.0055, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.412894375857339, | |
| "grad_norm": 0.0005570650682784617, | |
| "learning_rate": 4.8077190594619425e-06, | |
| "loss": 0.0012, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.4197530864197532, | |
| "grad_norm": 0.04777060076594353, | |
| "learning_rate": 4.705772092781675e-06, | |
| "loss": 0.001, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.4266117969821672, | |
| "grad_norm": 0.017951903864741325, | |
| "learning_rate": 4.604583999849193e-06, | |
| "loss": 0.0014, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.4334705075445817, | |
| "grad_norm": 0.0037813596427440643, | |
| "learning_rate": 4.504169284948909e-06, | |
| "loss": 0.0004, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.4403292181069958, | |
| "grad_norm": 0.09480316936969757, | |
| "learning_rate": 4.40454234150936e-06, | |
| "loss": 0.001, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.4471879286694103, | |
| "grad_norm": 0.012794774025678635, | |
| "learning_rate": 4.30571745004005e-06, | |
| "loss": 0.0005, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.4540466392318243, | |
| "grad_norm": 0.06668855994939804, | |
| "learning_rate": 4.207708776084486e-06, | |
| "loss": 0.0035, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.4609053497942388, | |
| "grad_norm": 0.0008814105531200767, | |
| "learning_rate": 4.110530368189695e-06, | |
| "loss": 0.0036, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.4677640603566529, | |
| "grad_norm": 0.003472542390227318, | |
| "learning_rate": 4.014196155892503e-06, | |
| "loss": 0.0001, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.4746227709190673, | |
| "grad_norm": 0.010452075861394405, | |
| "learning_rate": 3.9187199477228764e-06, | |
| "loss": 0.0002, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.4814814814814814, | |
| "grad_norm": 0.0006203448283486068, | |
| "learning_rate": 3.824115429224625e-06, | |
| "loss": 0.0019, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.4883401920438957, | |
| "grad_norm": 0.0003119745524600148, | |
| "learning_rate": 3.7303961609936933e-06, | |
| "loss": 0.0026, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.49519890260631, | |
| "grad_norm": 0.0362611822783947, | |
| "learning_rate": 3.6375755767344047e-06, | |
| "loss": 0.0003, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.5020576131687244, | |
| "grad_norm": 0.2301040142774582, | |
| "learning_rate": 3.5456669813338684e-06, | |
| "loss": 0.0021, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.5089163237311385, | |
| "grad_norm": 0.004565828945487738, | |
| "learning_rate": 3.4546835489548647e-06, | |
| "loss": 0.0111, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.5157750342935528, | |
| "grad_norm": 0.03340575471520424, | |
| "learning_rate": 3.3646383211474633e-06, | |
| "loss": 0.0007, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.522633744855967, | |
| "grad_norm": 0.0012551895342767239, | |
| "learning_rate": 3.275544204979643e-06, | |
| "loss": 0.0019, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.5294924554183813, | |
| "grad_norm": 0.0014971940545365214, | |
| "learning_rate": 3.187413971187198e-06, | |
| "loss": 0.0029, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.5363511659807956, | |
| "grad_norm": 0.009951584972441196, | |
| "learning_rate": 3.1002602523431792e-06, | |
| "loss": 0.0003, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.5432098765432098, | |
| "grad_norm": 0.019237512722611427, | |
| "learning_rate": 3.0140955410471606e-06, | |
| "loss": 0.0054, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.5500685871056241, | |
| "grad_norm": 0.5323840975761414, | |
| "learning_rate": 2.9289321881345257e-06, | |
| "loss": 0.0081, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.5569272976680384, | |
| "grad_norm": 0.045489732176065445, | |
| "learning_rate": 2.8447824009061185e-06, | |
| "loss": 0.0007, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.5637860082304527, | |
| "grad_norm": 0.0036737327463924885, | |
| "learning_rate": 2.7616582413784465e-06, | |
| "loss": 0.0021, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.570644718792867, | |
| "grad_norm": 0.157160222530365, | |
| "learning_rate": 2.679571624554709e-06, | |
| "loss": 0.0033, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.5775034293552812, | |
| "grad_norm": 0.037767913192510605, | |
| "learning_rate": 2.5985343167169174e-06, | |
| "loss": 0.0053, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.5843621399176955, | |
| "grad_norm": 0.0038649821653962135, | |
| "learning_rate": 2.5185579337392964e-06, | |
| "loss": 0.0007, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.5912208504801097, | |
| "grad_norm": 0.004294196609407663, | |
| "learning_rate": 2.439653939423283e-06, | |
| "loss": 0.0012, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.598079561042524, | |
| "grad_norm": 0.0028122446965426207, | |
| "learning_rate": 2.3618336438542977e-06, | |
| "loss": 0.0, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.6049382716049383, | |
| "grad_norm": 0.0009102143230848014, | |
| "learning_rate": 2.2851082017805704e-06, | |
| "loss": 0.0001, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.6117969821673526, | |
| "grad_norm": 0.031087348237633705, | |
| "learning_rate": 2.2094886110142065e-06, | |
| "loss": 0.0007, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.6186556927297668, | |
| "grad_norm": 0.009288856759667397, | |
| "learning_rate": 2.13498571085477e-06, | |
| "loss": 0.0012, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.625514403292181, | |
| "grad_norm": 0.02940617874264717, | |
| "learning_rate": 2.0616101805355814e-06, | |
| "loss": 0.0172, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.6323731138545954, | |
| "grad_norm": 0.29882147908210754, | |
| "learning_rate": 1.9893725376929506e-06, | |
| "loss": 0.0042, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.6392318244170097, | |
| "grad_norm": 0.08803040534257889, | |
| "learning_rate": 1.918283136858595e-06, | |
| "loss": 0.005, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.646090534979424, | |
| "grad_norm": 0.0005768566625192761, | |
| "learning_rate": 1.8483521679754046e-06, | |
| "loss": 0.001, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.652949245541838, | |
| "grad_norm": 0.13397887349128723, | |
| "learning_rate": 1.7795896549368308e-06, | |
| "loss": 0.0008, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.6598079561042525, | |
| "grad_norm": 0.08956614136695862, | |
| "learning_rate": 1.7120054541500552e-06, | |
| "loss": 0.0012, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.0006750011234544218, | |
| "learning_rate": 1.6456092531231816e-06, | |
| "loss": 0.0008, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.673525377229081, | |
| "grad_norm": 0.0019244247814640403, | |
| "learning_rate": 1.5804105690766224e-06, | |
| "loss": 0.0021, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.680384087791495, | |
| "grad_norm": 0.013517620973289013, | |
| "learning_rate": 1.516418747578906e-06, | |
| "loss": 0.0018, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.6872427983539096, | |
| "grad_norm": 0.01616906374692917, | |
| "learning_rate": 1.4536429612070846e-06, | |
| "loss": 0.0012, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.6941015089163236, | |
| "grad_norm": 0.001243108999915421, | |
| "learning_rate": 1.3920922082319355e-06, | |
| "loss": 0.002, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.700960219478738, | |
| "grad_norm": 0.17654798924922943, | |
| "learning_rate": 1.3317753113281562e-06, | |
| "loss": 0.0019, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.7078189300411522, | |
| "grad_norm": 0.000989454216323793, | |
| "learning_rate": 1.272700916309718e-06, | |
| "loss": 0.0021, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.7146776406035666, | |
| "grad_norm": 0.002051191870123148, | |
| "learning_rate": 1.2148774908905782e-06, | |
| "loss": 0.0016, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.7215363511659807, | |
| "grad_norm": 0.0013189888559281826, | |
| "learning_rate": 1.1583133234709198e-06, | |
| "loss": 0.0012, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.7283950617283952, | |
| "grad_norm": 0.08241615444421768, | |
| "learning_rate": 1.103016521949093e-06, | |
| "loss": 0.0006, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.7352537722908092, | |
| "grad_norm": 0.0015023777959868312, | |
| "learning_rate": 1.0489950125594351e-06, | |
| "loss": 0.0003, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.7421124828532237, | |
| "grad_norm": 0.025439105927944183, | |
| "learning_rate": 9.962565387361167e-07, | |
| "loss": 0.0013, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.7489711934156378, | |
| "grad_norm": 0.0018972799880430102, | |
| "learning_rate": 9.448086600032047e-07, | |
| "loss": 0.0087, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.7558299039780523, | |
| "grad_norm": 0.003276234259828925, | |
| "learning_rate": 8.946587508910798e-07, | |
| "loss": 0.0133, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.7626886145404663, | |
| "grad_norm": 0.04291309043765068, | |
| "learning_rate": 8.458139998793779e-07, | |
| "loss": 0.0001, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.7695473251028808, | |
| "grad_norm": 0.01314778346568346, | |
| "learning_rate": 7.982814083665825e-07, | |
| "loss": 0.0014, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.7764060356652949, | |
| "grad_norm": 0.020097751170396805, | |
| "learning_rate": 7.520677896664586e-07, | |
| "loss": 0.002, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.7832647462277091, | |
| "grad_norm": 0.002437903080135584, | |
| "learning_rate": 7.07179768031424e-07, | |
| "loss": 0.0008, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.7901234567901234, | |
| "grad_norm": 0.003759504295885563, | |
| "learning_rate": 6.636237777030341e-07, | |
| "loss": 0.0032, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.7969821673525377, | |
| "grad_norm": 0.0019247238524258137, | |
| "learning_rate": 6.214060619897011e-07, | |
| "loss": 0.0006, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.803840877914952, | |
| "grad_norm": 0.48312854766845703, | |
| "learning_rate": 5.805326723717741e-07, | |
| "loss": 0.0058, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.8106995884773662, | |
| "grad_norm": 0.0015425217570737004, | |
| "learning_rate": 5.410094676341237e-07, | |
| "loss": 0.0005, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.8175582990397805, | |
| "grad_norm": 0.39914342761039734, | |
| "learning_rate": 5.028421130263416e-07, | |
| "loss": 0.0036, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.8244170096021948, | |
| "grad_norm": 0.042819537222385406, | |
| "learning_rate": 4.660360794506946e-07, | |
| "loss": 0.004, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.831275720164609, | |
| "grad_norm": 0.16416482627391815, | |
| "learning_rate": 4.305966426779118e-07, | |
| "loss": 0.0061, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.8381344307270233, | |
| "grad_norm": 0.04438329488039017, | |
| "learning_rate": 3.9652888259096635e-07, | |
| "loss": 0.0024, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.8449931412894376, | |
| "grad_norm": 0.16208341717720032, | |
| "learning_rate": 3.6383768245692453e-07, | |
| "loss": 0.001, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.8518518518518519, | |
| "grad_norm": 0.07959479838609695, | |
| "learning_rate": 3.3252772822697565e-07, | |
| "loss": 0.0011, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.8587105624142661, | |
| "grad_norm": 0.04148540273308754, | |
| "learning_rate": 3.026035078647549e-07, | |
| "loss": 0.0052, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.8655692729766804, | |
| "grad_norm": 0.0011746763484552503, | |
| "learning_rate": 2.740693107030301e-07, | |
| "loss": 0.0011, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.8724279835390947, | |
| "grad_norm": 0.003397882217541337, | |
| "learning_rate": 2.4692922682887923e-07, | |
| "loss": 0.0013, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.879286694101509, | |
| "grad_norm": 0.0028587563429027796, | |
| "learning_rate": 2.2118714649740912e-07, | |
| "loss": 0.0023, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.8861454046639232, | |
| "grad_norm": 0.45575016736984253, | |
| "learning_rate": 1.9684675957413414e-07, | |
| "loss": 0.012, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.8930041152263375, | |
| "grad_norm": 0.00040283441194333136, | |
| "learning_rate": 1.739115550060688e-07, | |
| "loss": 0.0009, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.8998628257887518, | |
| "grad_norm": 0.0022473863791674376, | |
| "learning_rate": 1.5238482032162162e-07, | |
| "loss": 0.0019, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.906721536351166, | |
| "grad_norm": 0.0015027286717668176, | |
| "learning_rate": 1.3226964115936046e-07, | |
| "loss": 0.0026, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.9135802469135803, | |
| "grad_norm": 0.0023126809392124414, | |
| "learning_rate": 1.1356890082572459e-07, | |
| "loss": 0.0018, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.9204389574759944, | |
| "grad_norm": 0.050206076353788376, | |
| "learning_rate": 9.628527988172154e-08, | |
| "loss": 0.0123, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.9272976680384089, | |
| "grad_norm": 0.000970209832303226, | |
| "learning_rate": 8.042125575870362e-08, | |
| "loss": 0.0013, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.934156378600823, | |
| "grad_norm": 0.002282192464917898, | |
| "learning_rate": 6.597910240324967e-08, | |
| "loss": 0.0021, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.9410150891632374, | |
| "grad_norm": 0.00975970458239317, | |
| "learning_rate": 5.296088995122017e-08, | |
| "loss": 0.0039, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.9478737997256514, | |
| "grad_norm": 0.010269064456224442, | |
| "learning_rate": 4.1368484431023593e-08, | |
| "loss": 0.0022, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.954732510288066, | |
| "grad_norm": 0.0009142697090283036, | |
| "learning_rate": 3.1203547496140295e-08, | |
| "loss": 0.0028, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.96159122085048, | |
| "grad_norm": 0.002814473118633032, | |
| "learning_rate": 2.2467536186937532e-08, | |
| "loss": 0.0041, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.9684499314128945, | |
| "grad_norm": 0.0822644829750061, | |
| "learning_rate": 1.516170272182538e-08, | |
| "loss": 0.0163, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.9753086419753085, | |
| "grad_norm": 0.08335670083761215, | |
| "learning_rate": 9.287094317756985e-09, | |
| "loss": 0.0005, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.982167352537723, | |
| "grad_norm": 0.00891471654176712, | |
| "learning_rate": 4.844553040125322e-09, | |
| "loss": 0.0037, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.989026063100137, | |
| "grad_norm": 0.014899800531566143, | |
| "learning_rate": 1.8347156820563983e-09, | |
| "loss": 0.0006, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.9958847736625516, | |
| "grad_norm": 0.002669480862095952, | |
| "learning_rate": 2.5801367313782464e-10, | |
| "loss": 0.0022, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 2916, | |
| "total_flos": 6.452437106693243e+17, | |
| "train_loss": 0.005305082097742036, | |
| "train_runtime": 10141.6254, | |
| "train_samples_per_second": 4.6, | |
| "train_steps_per_second": 0.288 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2916, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.452437106693243e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |