| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 460, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004347826086956522, | |
| "grad_norm": 210.10928344726562, | |
| "learning_rate": 0.0, | |
| "loss": 5.8188, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.008695652173913044, | |
| "grad_norm": 216.5006561279297, | |
| "learning_rate": 2.173913043478261e-06, | |
| "loss": 5.9259, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.013043478260869565, | |
| "grad_norm": 144.48963928222656, | |
| "learning_rate": 4.347826086956522e-06, | |
| "loss": 5.646, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.017391304347826087, | |
| "grad_norm": 45.486934661865234, | |
| "learning_rate": 6.521739130434783e-06, | |
| "loss": 5.3097, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.021739130434782608, | |
| "grad_norm": 83.79264831542969, | |
| "learning_rate": 8.695652173913044e-06, | |
| "loss": 5.3505, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.02608695652173913, | |
| "grad_norm": 33.744483947753906, | |
| "learning_rate": 1.0869565217391305e-05, | |
| "loss": 5.1314, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.030434782608695653, | |
| "grad_norm": 22.175418853759766, | |
| "learning_rate": 1.3043478260869566e-05, | |
| "loss": 4.8346, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.034782608695652174, | |
| "grad_norm": 18.40424156188965, | |
| "learning_rate": 1.5217391304347828e-05, | |
| "loss": 4.7562, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0391304347826087, | |
| "grad_norm": 15.772565841674805, | |
| "learning_rate": 1.739130434782609e-05, | |
| "loss": 4.5057, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.043478260869565216, | |
| "grad_norm": 11.410517692565918, | |
| "learning_rate": 1.956521739130435e-05, | |
| "loss": 4.3231, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04782608695652174, | |
| "grad_norm": 14.64340877532959, | |
| "learning_rate": 2.173913043478261e-05, | |
| "loss": 4.3797, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.05217391304347826, | |
| "grad_norm": 7.4696946144104, | |
| "learning_rate": 2.391304347826087e-05, | |
| "loss": 3.9548, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.05652173913043478, | |
| "grad_norm": 3.1422557830810547, | |
| "learning_rate": 2.608695652173913e-05, | |
| "loss": 3.8226, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.06086956521739131, | |
| "grad_norm": 2.6594135761260986, | |
| "learning_rate": 2.826086956521739e-05, | |
| "loss": 3.8783, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.06521739130434782, | |
| "grad_norm": 2.0335605144500732, | |
| "learning_rate": 3.0434782608695656e-05, | |
| "loss": 3.626, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.06956521739130435, | |
| "grad_norm": 2.045989513397217, | |
| "learning_rate": 3.260869565217392e-05, | |
| "loss": 3.4734, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.07391304347826087, | |
| "grad_norm": 1.797641396522522, | |
| "learning_rate": 3.478260869565218e-05, | |
| "loss": 3.3667, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0782608695652174, | |
| "grad_norm": 1.7289575338363647, | |
| "learning_rate": 3.695652173913043e-05, | |
| "loss": 3.2171, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.08260869565217391, | |
| "grad_norm": 1.6280560493469238, | |
| "learning_rate": 3.91304347826087e-05, | |
| "loss": 3.0697, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.08695652173913043, | |
| "grad_norm": 1.5199931859970093, | |
| "learning_rate": 4.130434782608696e-05, | |
| "loss": 2.9537, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09130434782608696, | |
| "grad_norm": 1.4183111190795898, | |
| "learning_rate": 4.347826086956522e-05, | |
| "loss": 2.8091, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.09565217391304348, | |
| "grad_norm": 1.453029990196228, | |
| "learning_rate": 4.565217391304348e-05, | |
| "loss": 2.6457, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 1.135553002357483, | |
| "learning_rate": 4.782608695652174e-05, | |
| "loss": 2.4701, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.10434782608695652, | |
| "grad_norm": 0.9866960644721985, | |
| "learning_rate": 5e-05, | |
| "loss": 2.3948, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.10869565217391304, | |
| "grad_norm": 0.8710840344429016, | |
| "learning_rate": 5.217391304347826e-05, | |
| "loss": 2.3239, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.11304347826086956, | |
| "grad_norm": 0.8170456886291504, | |
| "learning_rate": 5.4347826086956524e-05, | |
| "loss": 2.1285, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.11739130434782609, | |
| "grad_norm": 0.790302038192749, | |
| "learning_rate": 5.652173913043478e-05, | |
| "loss": 2.021, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.12173913043478261, | |
| "grad_norm": 0.7848089933395386, | |
| "learning_rate": 5.869565217391305e-05, | |
| "loss": 1.9254, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.12608695652173912, | |
| "grad_norm": 0.7707406878471375, | |
| "learning_rate": 6.086956521739131e-05, | |
| "loss": 1.8048, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.13043478260869565, | |
| "grad_norm": 0.7862960696220398, | |
| "learning_rate": 6.304347826086957e-05, | |
| "loss": 1.6704, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.13478260869565217, | |
| "grad_norm": 0.8184984922409058, | |
| "learning_rate": 6.521739130434783e-05, | |
| "loss": 1.5525, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.1391304347826087, | |
| "grad_norm": 0.751800537109375, | |
| "learning_rate": 6.73913043478261e-05, | |
| "loss": 1.4305, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.14347826086956522, | |
| "grad_norm": 0.6508727073669434, | |
| "learning_rate": 6.956521739130436e-05, | |
| "loss": 1.3082, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.14782608695652175, | |
| "grad_norm": 0.5927818417549133, | |
| "learning_rate": 7.17391304347826e-05, | |
| "loss": 1.2962, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.15217391304347827, | |
| "grad_norm": 0.48864519596099854, | |
| "learning_rate": 7.391304347826086e-05, | |
| "loss": 1.1943, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.1565217391304348, | |
| "grad_norm": 0.43812891840934753, | |
| "learning_rate": 7.608695652173914e-05, | |
| "loss": 1.1367, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.1608695652173913, | |
| "grad_norm": 0.3985790014266968, | |
| "learning_rate": 7.82608695652174e-05, | |
| "loss": 1.0961, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.16521739130434782, | |
| "grad_norm": 0.3411348760128021, | |
| "learning_rate": 8.043478260869566e-05, | |
| "loss": 1.0314, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.16956521739130434, | |
| "grad_norm": 0.32298171520233154, | |
| "learning_rate": 8.260869565217392e-05, | |
| "loss": 0.9771, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.17391304347826086, | |
| "grad_norm": 0.30958038568496704, | |
| "learning_rate": 8.478260869565218e-05, | |
| "loss": 0.9268, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1782608695652174, | |
| "grad_norm": 0.2889741063117981, | |
| "learning_rate": 8.695652173913044e-05, | |
| "loss": 0.9256, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.1826086956521739, | |
| "grad_norm": 0.24591656029224396, | |
| "learning_rate": 8.91304347826087e-05, | |
| "loss": 0.883, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.18695652173913044, | |
| "grad_norm": 0.23997186124324799, | |
| "learning_rate": 9.130434782608696e-05, | |
| "loss": 0.8786, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.19130434782608696, | |
| "grad_norm": 0.2006598263978958, | |
| "learning_rate": 9.347826086956522e-05, | |
| "loss": 0.8396, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.1956521739130435, | |
| "grad_norm": 0.18479709327220917, | |
| "learning_rate": 9.565217391304348e-05, | |
| "loss": 0.8413, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.17641599476337433, | |
| "learning_rate": 9.782608695652174e-05, | |
| "loss": 0.8359, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.20434782608695654, | |
| "grad_norm": 0.15423867106437683, | |
| "learning_rate": 0.0001, | |
| "loss": 0.8058, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.20869565217391303, | |
| "grad_norm": 0.1461988240480423, | |
| "learning_rate": 9.999856041607731e-05, | |
| "loss": 0.8029, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.21304347826086956, | |
| "grad_norm": 0.12839862704277039, | |
| "learning_rate": 9.999424174720531e-05, | |
| "loss": 0.7822, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.21739130434782608, | |
| "grad_norm": 0.12158359587192535, | |
| "learning_rate": 9.998704424206746e-05, | |
| "loss": 0.7748, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2217391304347826, | |
| "grad_norm": 0.1291743963956833, | |
| "learning_rate": 9.997696831512027e-05, | |
| "loss": 0.7661, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.22608695652173913, | |
| "grad_norm": 0.12144283205270767, | |
| "learning_rate": 9.99640145465694e-05, | |
| "loss": 0.7869, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.23043478260869565, | |
| "grad_norm": 0.1100422814488411, | |
| "learning_rate": 9.994818368233639e-05, | |
| "loss": 0.7777, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.23478260869565218, | |
| "grad_norm": 0.0993693619966507, | |
| "learning_rate": 9.992947663401548e-05, | |
| "loss": 0.7473, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.2391304347826087, | |
| "grad_norm": 0.0941305086016655, | |
| "learning_rate": 9.990789447882137e-05, | |
| "loss": 0.7516, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.24347826086956523, | |
| "grad_norm": 0.09400874376296997, | |
| "learning_rate": 9.988343845952697e-05, | |
| "loss": 0.7517, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.24782608695652175, | |
| "grad_norm": 0.083980493247509, | |
| "learning_rate": 9.985610998439197e-05, | |
| "loss": 0.749, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.25217391304347825, | |
| "grad_norm": 0.08494170755147934, | |
| "learning_rate": 9.98259106270817e-05, | |
| "loss": 0.7332, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.2565217391304348, | |
| "grad_norm": 0.08159317076206207, | |
| "learning_rate": 9.979284212657657e-05, | |
| "loss": 0.7343, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.2608695652173913, | |
| "grad_norm": 0.08313615620136261, | |
| "learning_rate": 9.97569063870718e-05, | |
| "loss": 0.7211, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.26521739130434785, | |
| "grad_norm": 0.07852096855640411, | |
| "learning_rate": 9.971810547786793e-05, | |
| "loss": 0.731, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.26956521739130435, | |
| "grad_norm": 0.0774468258023262, | |
| "learning_rate": 9.967644163325156e-05, | |
| "loss": 0.7198, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.27391304347826084, | |
| "grad_norm": 0.07157547771930695, | |
| "learning_rate": 9.963191725236672e-05, | |
| "loss": 0.6946, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.2782608695652174, | |
| "grad_norm": 0.07179877161979675, | |
| "learning_rate": 9.958453489907673e-05, | |
| "loss": 0.6983, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.2826086956521739, | |
| "grad_norm": 0.13720852136611938, | |
| "learning_rate": 9.953429730181653e-05, | |
| "loss": 0.7209, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.28695652173913044, | |
| "grad_norm": 0.08586138486862183, | |
| "learning_rate": 9.948120735343566e-05, | |
| "loss": 0.7022, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.29130434782608694, | |
| "grad_norm": 0.06595543771982193, | |
| "learning_rate": 9.942526811103152e-05, | |
| "loss": 0.6857, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.2956521739130435, | |
| "grad_norm": 0.06423239409923553, | |
| "learning_rate": 9.936648279577349e-05, | |
| "loss": 0.6924, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.07080892473459244, | |
| "learning_rate": 9.930485479271735e-05, | |
| "loss": 0.6963, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.30434782608695654, | |
| "grad_norm": 0.06481339782476425, | |
| "learning_rate": 9.924038765061042e-05, | |
| "loss": 0.7055, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.30869565217391304, | |
| "grad_norm": 0.07143648713827133, | |
| "learning_rate": 9.91730850816871e-05, | |
| "loss": 0.6761, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.3130434782608696, | |
| "grad_norm": 0.06885742396116257, | |
| "learning_rate": 9.91029509614553e-05, | |
| "loss": 0.7111, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.3173913043478261, | |
| "grad_norm": 0.06406974792480469, | |
| "learning_rate": 9.902998932847307e-05, | |
| "loss": 0.6971, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.3217391304347826, | |
| "grad_norm": 0.06285955011844635, | |
| "learning_rate": 9.895420438411616e-05, | |
| "loss": 0.681, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.32608695652173914, | |
| "grad_norm": 0.07179131358861923, | |
| "learning_rate": 9.887560049233605e-05, | |
| "loss": 0.7001, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.33043478260869563, | |
| "grad_norm": 0.06652161478996277, | |
| "learning_rate": 9.879418217940873e-05, | |
| "loss": 0.6668, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.3347826086956522, | |
| "grad_norm": 0.06445639580488205, | |
| "learning_rate": 9.870995413367397e-05, | |
| "loss": 0.6981, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.3391304347826087, | |
| "grad_norm": 0.06834300607442856, | |
| "learning_rate": 9.862292120526535e-05, | |
| "loss": 0.6484, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.34347826086956523, | |
| "grad_norm": 0.06481563299894333, | |
| "learning_rate": 9.853308840583109e-05, | |
| "loss": 0.6875, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.34782608695652173, | |
| "grad_norm": 0.062026482075452805, | |
| "learning_rate": 9.844046090824533e-05, | |
| "loss": 0.6889, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3521739130434783, | |
| "grad_norm": 0.07275456190109253, | |
| "learning_rate": 9.834504404631031e-05, | |
| "loss": 0.6879, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.3565217391304348, | |
| "grad_norm": 0.06591422110795975, | |
| "learning_rate": 9.824684331444927e-05, | |
| "loss": 0.6554, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.36086956521739133, | |
| "grad_norm": 0.06396066397428513, | |
| "learning_rate": 9.814586436738998e-05, | |
| "loss": 0.6925, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.3652173913043478, | |
| "grad_norm": 0.08825157582759857, | |
| "learning_rate": 9.804211301983918e-05, | |
| "loss": 0.6629, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.3695652173913043, | |
| "grad_norm": 0.06731634587049484, | |
| "learning_rate": 9.793559524614779e-05, | |
| "loss": 0.6745, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.3739130434782609, | |
| "grad_norm": 0.06455274671316147, | |
| "learning_rate": 9.782631717996675e-05, | |
| "loss": 0.6851, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.3782608695652174, | |
| "grad_norm": 0.07710668444633484, | |
| "learning_rate": 9.771428511389395e-05, | |
| "loss": 0.6929, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.3826086956521739, | |
| "grad_norm": 0.0727052241563797, | |
| "learning_rate": 9.759950549911186e-05, | |
| "loss": 0.6798, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.3869565217391304, | |
| "grad_norm": 0.07156208157539368, | |
| "learning_rate": 9.748198494501597e-05, | |
| "loss": 0.6807, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.391304347826087, | |
| "grad_norm": 0.0921456515789032, | |
| "learning_rate": 9.736173021883432e-05, | |
| "loss": 0.6435, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.39565217391304347, | |
| "grad_norm": 0.09094609320163727, | |
| "learning_rate": 9.723874824523771e-05, | |
| "loss": 0.6874, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.09006571024656296, | |
| "learning_rate": 9.711304610594104e-05, | |
| "loss": 0.6778, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.4043478260869565, | |
| "grad_norm": 0.13732297718524933, | |
| "learning_rate": 9.698463103929542e-05, | |
| "loss": 0.6561, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.40869565217391307, | |
| "grad_norm": 0.09598764777183533, | |
| "learning_rate": 9.685351043987151e-05, | |
| "loss": 0.6624, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.41304347826086957, | |
| "grad_norm": 0.09070798009634018, | |
| "learning_rate": 9.671969185803356e-05, | |
| "loss": 0.6684, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.41739130434782606, | |
| "grad_norm": 0.0911954715847969, | |
| "learning_rate": 9.658318299950473e-05, | |
| "loss": 0.6568, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.4217391304347826, | |
| "grad_norm": 0.08703230321407318, | |
| "learning_rate": 9.644399172492336e-05, | |
| "loss": 0.6442, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.4260869565217391, | |
| "grad_norm": 0.0760849341750145, | |
| "learning_rate": 9.630212604939026e-05, | |
| "loss": 0.6551, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.43043478260869567, | |
| "grad_norm": 0.10621879249811172, | |
| "learning_rate": 9.615759414200729e-05, | |
| "loss": 0.6665, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.43478260869565216, | |
| "grad_norm": 0.08248650282621384, | |
| "learning_rate": 9.601040432540684e-05, | |
| "loss": 0.6752, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4391304347826087, | |
| "grad_norm": 0.10147503018379211, | |
| "learning_rate": 9.586056507527266e-05, | |
| "loss": 0.6602, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.4434782608695652, | |
| "grad_norm": 0.1442282497882843, | |
| "learning_rate": 9.570808501985175e-05, | |
| "loss": 0.6704, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.44782608695652176, | |
| "grad_norm": 0.11339450627565384, | |
| "learning_rate": 9.555297293945759e-05, | |
| "loss": 0.6631, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.45217391304347826, | |
| "grad_norm": 0.15643437206745148, | |
| "learning_rate": 9.539523776596445e-05, | |
| "loss": 0.668, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.45652173913043476, | |
| "grad_norm": 0.1856074035167694, | |
| "learning_rate": 9.523488858229313e-05, | |
| "loss": 0.6413, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.4608695652173913, | |
| "grad_norm": 0.12280824780464172, | |
| "learning_rate": 9.507193462188791e-05, | |
| "loss": 0.6658, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.4652173913043478, | |
| "grad_norm": 0.18749414384365082, | |
| "learning_rate": 9.49063852681848e-05, | |
| "loss": 0.6785, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.46956521739130436, | |
| "grad_norm": 0.13954943418502808, | |
| "learning_rate": 9.47382500540714e-05, | |
| "loss": 0.652, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.47391304347826085, | |
| "grad_norm": 0.15025292336940765, | |
| "learning_rate": 9.45675386613377e-05, | |
| "loss": 0.6622, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.4782608695652174, | |
| "grad_norm": 0.11263363063335419, | |
| "learning_rate": 9.439426092011875e-05, | |
| "loss": 0.6573, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.4826086956521739, | |
| "grad_norm": 0.12779393792152405, | |
| "learning_rate": 9.421842680832861e-05, | |
| "loss": 0.6535, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.48695652173913045, | |
| "grad_norm": 0.11488567292690277, | |
| "learning_rate": 9.404004645108568e-05, | |
| "loss": 0.6438, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.49130434782608695, | |
| "grad_norm": 0.1706668585538864, | |
| "learning_rate": 9.385913012012973e-05, | |
| "loss": 0.6427, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.4956521739130435, | |
| "grad_norm": 0.13733729720115662, | |
| "learning_rate": 9.367568823323039e-05, | |
| "loss": 0.6555, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.11061578243970871, | |
| "learning_rate": 9.348973135358734e-05, | |
| "loss": 0.6672, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.5043478260869565, | |
| "grad_norm": 0.18926067650318146, | |
| "learning_rate": 9.330127018922194e-05, | |
| "loss": 0.6573, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.508695652173913, | |
| "grad_norm": 0.15428727865219116, | |
| "learning_rate": 9.311031559236067e-05, | |
| "loss": 0.6708, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.5130434782608696, | |
| "grad_norm": 0.16264328360557556, | |
| "learning_rate": 9.291687855881026e-05, | |
| "loss": 0.6446, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.5173913043478261, | |
| "grad_norm": 0.11342114955186844, | |
| "learning_rate": 9.272097022732443e-05, | |
| "loss": 0.6571, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.5217391304347826, | |
| "grad_norm": 0.15034589171409607, | |
| "learning_rate": 9.252260187896256e-05, | |
| "loss": 0.6408, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5260869565217391, | |
| "grad_norm": 0.21747715771198273, | |
| "learning_rate": 9.232178493644006e-05, | |
| "loss": 0.6346, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.5304347826086957, | |
| "grad_norm": 0.27781569957733154, | |
| "learning_rate": 9.211853096347058e-05, | |
| "loss": 0.6541, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5347826086956522, | |
| "grad_norm": 0.2587333023548126, | |
| "learning_rate": 9.191285166410022e-05, | |
| "loss": 0.6516, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.5391304347826087, | |
| "grad_norm": 0.16397182643413544, | |
| "learning_rate": 9.170475888203347e-05, | |
| "loss": 0.6716, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.5434782608695652, | |
| "grad_norm": 0.12862510979175568, | |
| "learning_rate": 9.149426459995126e-05, | |
| "loss": 0.6596, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5478260869565217, | |
| "grad_norm": 0.15427789092063904, | |
| "learning_rate": 9.128138093882098e-05, | |
| "loss": 0.6588, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5521739130434783, | |
| "grad_norm": 0.22064033150672913, | |
| "learning_rate": 9.106612015719845e-05, | |
| "loss": 0.6314, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.5565217391304348, | |
| "grad_norm": 0.1941988468170166, | |
| "learning_rate": 9.08484946505221e-05, | |
| "loss": 0.648, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.5608695652173913, | |
| "grad_norm": 0.18163767457008362, | |
| "learning_rate": 9.062851695039915e-05, | |
| "loss": 0.6738, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.5652173913043478, | |
| "grad_norm": 0.16294820606708527, | |
| "learning_rate": 9.040619972388403e-05, | |
| "loss": 0.6534, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5695652173913044, | |
| "grad_norm": 0.23330819606781006, | |
| "learning_rate": 9.018155577274892e-05, | |
| "loss": 0.6478, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.5739130434782609, | |
| "grad_norm": 0.3880465030670166, | |
| "learning_rate": 8.995459803274664e-05, | |
| "loss": 0.6566, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.5782608695652174, | |
| "grad_norm": 0.6047540903091431, | |
| "learning_rate": 8.972533957286573e-05, | |
| "loss": 0.6321, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.5826086956521739, | |
| "grad_norm": 0.526760995388031, | |
| "learning_rate": 8.949379359457793e-05, | |
| "loss": 0.6501, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.5869565217391305, | |
| "grad_norm": 0.26121070981025696, | |
| "learning_rate": 8.925997343107795e-05, | |
| "loss": 0.6462, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.591304347826087, | |
| "grad_norm": 0.3640858232975006, | |
| "learning_rate": 8.902389254651569e-05, | |
| "loss": 0.6378, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.5956521739130435, | |
| "grad_norm": 0.3413775861263275, | |
| "learning_rate": 8.8785564535221e-05, | |
| "loss": 0.6549, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.25738435983657837, | |
| "learning_rate": 8.854500312092081e-05, | |
| "loss": 0.6266, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.6043478260869565, | |
| "grad_norm": 0.3974941670894623, | |
| "learning_rate": 8.83022221559489e-05, | |
| "loss": 0.6207, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.6086956521739131, | |
| "grad_norm": 0.3541712462902069, | |
| "learning_rate": 8.805723562044824e-05, | |
| "loss": 0.6623, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6130434782608696, | |
| "grad_norm": 0.29466933012008667, | |
| "learning_rate": 8.781005762156593e-05, | |
| "loss": 0.6753, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.6173913043478261, | |
| "grad_norm": 0.429376482963562, | |
| "learning_rate": 8.75607023926409e-05, | |
| "loss": 0.6351, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.6217391304347826, | |
| "grad_norm": 0.28085529804229736, | |
| "learning_rate": 8.730918429238428e-05, | |
| "loss": 0.6584, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.6260869565217392, | |
| "grad_norm": 0.34451988339424133, | |
| "learning_rate": 8.705551780405263e-05, | |
| "loss": 0.6619, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.6304347826086957, | |
| "grad_norm": 0.3307543098926544, | |
| "learning_rate": 8.679971753461387e-05, | |
| "loss": 0.6448, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.6347826086956522, | |
| "grad_norm": 0.2655896842479706, | |
| "learning_rate": 8.654179821390621e-05, | |
| "loss": 0.6442, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.6391304347826087, | |
| "grad_norm": 0.4360576868057251, | |
| "learning_rate": 8.628177469378995e-05, | |
| "loss": 0.6487, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.6434782608695652, | |
| "grad_norm": 0.35094520449638367, | |
| "learning_rate": 8.601966194729227e-05, | |
| "loss": 0.6359, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.6478260869565218, | |
| "grad_norm": 0.4109646677970886, | |
| "learning_rate": 8.575547506774497e-05, | |
| "loss": 0.6519, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.6521739130434783, | |
| "grad_norm": 0.3401927053928375, | |
| "learning_rate": 8.548922926791545e-05, | |
| "loss": 0.6375, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6565217391304348, | |
| "grad_norm": 0.22073158621788025, | |
| "learning_rate": 8.522093987913062e-05, | |
| "loss": 0.6462, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.6608695652173913, | |
| "grad_norm": 0.43310844898223877, | |
| "learning_rate": 8.495062235039411e-05, | |
| "loss": 0.6697, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.6652173913043479, | |
| "grad_norm": 0.42843684554100037, | |
| "learning_rate": 8.467829224749665e-05, | |
| "loss": 0.6169, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.6695652173913044, | |
| "grad_norm": 0.4057531952857971, | |
| "learning_rate": 8.440396525211975e-05, | |
| "loss": 0.6625, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.6739130434782609, | |
| "grad_norm": 0.23454974591732025, | |
| "learning_rate": 8.412765716093272e-05, | |
| "loss": 0.616, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.6782608695652174, | |
| "grad_norm": 0.3723919987678528, | |
| "learning_rate": 8.384938388468296e-05, | |
| "loss": 0.6576, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.6826086956521739, | |
| "grad_norm": 0.44731444120407104, | |
| "learning_rate": 8.356916144727985e-05, | |
| "loss": 0.6408, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.6869565217391305, | |
| "grad_norm": 0.4032682180404663, | |
| "learning_rate": 8.328700598487203e-05, | |
| "loss": 0.6541, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.691304347826087, | |
| "grad_norm": 0.34927839040756226, | |
| "learning_rate": 8.300293374491821e-05, | |
| "loss": 0.641, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.6956521739130435, | |
| "grad_norm": 0.37337374687194824, | |
| "learning_rate": 8.271696108525157e-05, | |
| "loss": 0.6409, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 0.33849138021469116, | |
| "learning_rate": 8.24291044731378e-05, | |
| "loss": 0.6571, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.7043478260869566, | |
| "grad_norm": 0.26402008533477783, | |
| "learning_rate": 8.213938048432697e-05, | |
| "loss": 0.6467, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.7086956521739131, | |
| "grad_norm": 0.31730157136917114, | |
| "learning_rate": 8.184780580209892e-05, | |
| "loss": 0.6519, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.7130434782608696, | |
| "grad_norm": 0.47295334935188293, | |
| "learning_rate": 8.155439721630264e-05, | |
| "loss": 0.629, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.717391304347826, | |
| "grad_norm": 0.3847337067127228, | |
| "learning_rate": 8.125917162238945e-05, | |
| "loss": 0.6404, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.7217391304347827, | |
| "grad_norm": 0.30035194754600525, | |
| "learning_rate": 8.09621460204401e-05, | |
| "loss": 0.6697, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.7260869565217392, | |
| "grad_norm": 0.4391736686229706, | |
| "learning_rate": 8.066333751418583e-05, | |
| "loss": 0.6399, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.7304347826086957, | |
| "grad_norm": 0.6057283878326416, | |
| "learning_rate": 8.036276331002348e-05, | |
| "loss": 0.6341, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.7347826086956522, | |
| "grad_norm": 0.644005537033081, | |
| "learning_rate": 8.006044071602477e-05, | |
| "loss": 0.662, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.7391304347826086, | |
| "grad_norm": 0.6034097671508789, | |
| "learning_rate": 7.975638714093949e-05, | |
| "loss": 0.638, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7434782608695653, | |
| "grad_norm": 0.49918678402900696, | |
| "learning_rate": 7.945062009319319e-05, | |
| "loss": 0.6322, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.7478260869565218, | |
| "grad_norm": 0.4087945520877838, | |
| "learning_rate": 7.914315717987892e-05, | |
| "loss": 0.6419, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.7521739130434782, | |
| "grad_norm": 0.3623512089252472, | |
| "learning_rate": 7.883401610574336e-05, | |
| "loss": 0.6618, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.7565217391304347, | |
| "grad_norm": 0.7307239174842834, | |
| "learning_rate": 7.85232146721673e-05, | |
| "loss": 0.6572, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.7608695652173914, | |
| "grad_norm": 0.8763480186462402, | |
| "learning_rate": 7.821077077614061e-05, | |
| "loss": 0.6434, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.7652173913043478, | |
| "grad_norm": 0.5741376280784607, | |
| "learning_rate": 7.789670240923168e-05, | |
| "loss": 0.6539, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.7695652173913043, | |
| "grad_norm": 0.4742548167705536, | |
| "learning_rate": 7.758102765655137e-05, | |
| "loss": 0.6435, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.7739130434782608, | |
| "grad_norm": 0.6679338216781616, | |
| "learning_rate": 7.726376469571164e-05, | |
| "loss": 0.6654, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.7782608695652173, | |
| "grad_norm": 0.7236630320549011, | |
| "learning_rate": 7.694493179577879e-05, | |
| "loss": 0.655, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.782608695652174, | |
| "grad_norm": 0.3954794704914093, | |
| "learning_rate": 7.662454731622148e-05, | |
| "loss": 0.6733, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7869565217391304, | |
| "grad_norm": 0.41423317790031433, | |
| "learning_rate": 7.630262970585356e-05, | |
| "loss": 0.6466, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.7913043478260869, | |
| "grad_norm": 0.5248022675514221, | |
| "learning_rate": 7.597919750177168e-05, | |
| "loss": 0.6343, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.7956521739130434, | |
| "grad_norm": 0.4523037075996399, | |
| "learning_rate": 7.56542693282879e-05, | |
| "loss": 0.6494, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.3984985947608948, | |
| "learning_rate": 7.532786389585716e-05, | |
| "loss": 0.6511, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.8043478260869565, | |
| "grad_norm": 0.3854583501815796, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 0.6628, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.808695652173913, | |
| "grad_norm": 0.43506574630737305, | |
| "learning_rate": 7.467069652022016e-05, | |
| "loss": 0.6603, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.8130434782608695, | |
| "grad_norm": 0.41759249567985535, | |
| "learning_rate": 7.433997241891742e-05, | |
| "loss": 0.6674, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.8173913043478261, | |
| "grad_norm": 0.2531141936779022, | |
| "learning_rate": 7.400784674029578e-05, | |
| "loss": 0.6395, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.8217391304347826, | |
| "grad_norm": 0.4464227259159088, | |
| "learning_rate": 7.36743386092667e-05, | |
| "loss": 0.6576, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.8260869565217391, | |
| "grad_norm": 0.47379711270332336, | |
| "learning_rate": 7.333946723034794e-05, | |
| "loss": 0.6423, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8304347826086956, | |
| "grad_norm": 0.3901284635066986, | |
| "learning_rate": 7.300325188655761e-05, | |
| "loss": 0.6594, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.8347826086956521, | |
| "grad_norm": 0.3132023811340332, | |
| "learning_rate": 7.266571193830387e-05, | |
| "loss": 0.6611, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.8391304347826087, | |
| "grad_norm": 0.4356115460395813, | |
| "learning_rate": 7.232686682227001e-05, | |
| "loss": 0.6376, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.8434782608695652, | |
| "grad_norm": 0.5321224331855774, | |
| "learning_rate": 7.198673605029528e-05, | |
| "loss": 0.6643, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.8478260869565217, | |
| "grad_norm": 0.3640391230583191, | |
| "learning_rate": 7.164533920825137e-05, | |
| "loss": 0.6476, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.8521739130434782, | |
| "grad_norm": 0.2873951494693756, | |
| "learning_rate": 7.130269595491443e-05, | |
| "loss": 0.649, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.8565217391304348, | |
| "grad_norm": 0.45937976241111755, | |
| "learning_rate": 7.095882602083322e-05, | |
| "loss": 0.648, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.8608695652173913, | |
| "grad_norm": 0.5308820009231567, | |
| "learning_rate": 7.061374920719288e-05, | |
| "loss": 0.6458, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.8652173913043478, | |
| "grad_norm": 0.43750235438346863, | |
| "learning_rate": 7.026748538467474e-05, | |
| "loss": 0.6457, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.8695652173913043, | |
| "grad_norm": 0.27052804827690125, | |
| "learning_rate": 6.992005449231208e-05, | |
| "loss": 0.6452, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8739130434782608, | |
| "grad_norm": 0.37570297718048096, | |
| "learning_rate": 6.957147653634198e-05, | |
| "loss": 0.6566, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.8782608695652174, | |
| "grad_norm": 0.32025307416915894, | |
| "learning_rate": 6.922177158905325e-05, | |
| "loss": 0.6655, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.8826086956521739, | |
| "grad_norm": 0.2932673990726471, | |
| "learning_rate": 6.887095978763072e-05, | |
| "loss": 0.6749, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.8869565217391304, | |
| "grad_norm": 0.23213867843151093, | |
| "learning_rate": 6.851906133299557e-05, | |
| "loss": 0.6631, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.8913043478260869, | |
| "grad_norm": 0.3275505602359772, | |
| "learning_rate": 6.816609648864208e-05, | |
| "loss": 0.6758, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.8956521739130435, | |
| "grad_norm": 0.32032299041748047, | |
| "learning_rate": 6.781208557947086e-05, | |
| "loss": 0.662, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 0.26808756589889526, | |
| "learning_rate": 6.745704899061843e-05, | |
| "loss": 0.6464, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.9043478260869565, | |
| "grad_norm": 0.25998106598854065, | |
| "learning_rate": 6.710100716628344e-05, | |
| "loss": 0.6556, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.908695652173913, | |
| "grad_norm": 0.36953797936439514, | |
| "learning_rate": 6.674398060854931e-05, | |
| "loss": 0.6761, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.9130434782608695, | |
| "grad_norm": 0.43774327635765076, | |
| "learning_rate": 6.638598987620375e-05, | |
| "loss": 0.6481, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.9173913043478261, | |
| "grad_norm": 0.24901102483272552, | |
| "learning_rate": 6.602705558355486e-05, | |
| "loss": 0.675, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.9217391304347826, | |
| "grad_norm": 0.3668375313282013, | |
| "learning_rate": 6.566719839924412e-05, | |
| "loss": 0.6619, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.9260869565217391, | |
| "grad_norm": 0.5943741202354431, | |
| "learning_rate": 6.530643904505621e-05, | |
| "loss": 0.6561, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.9304347826086956, | |
| "grad_norm": 0.6538096070289612, | |
| "learning_rate": 6.49447982947258e-05, | |
| "loss": 0.6297, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.9347826086956522, | |
| "grad_norm": 0.5622021555900574, | |
| "learning_rate": 6.458229697274125e-05, | |
| "loss": 0.6602, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.9391304347826087, | |
| "grad_norm": 0.45731329917907715, | |
| "learning_rate": 6.42189559531456e-05, | |
| "loss": 0.6686, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.9434782608695652, | |
| "grad_norm": 0.26856303215026855, | |
| "learning_rate": 6.385479615833445e-05, | |
| "loss": 0.6358, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.9478260869565217, | |
| "grad_norm": 0.24898113310337067, | |
| "learning_rate": 6.348983855785121e-05, | |
| "loss": 0.6579, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.9521739130434783, | |
| "grad_norm": 0.3039465844631195, | |
| "learning_rate": 6.312410416717968e-05, | |
| "loss": 0.6493, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.9565217391304348, | |
| "grad_norm": 0.36120837926864624, | |
| "learning_rate": 6.27576140465338e-05, | |
| "loss": 0.6524, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.9608695652173913, | |
| "grad_norm": 0.35759392380714417, | |
| "learning_rate": 6.2390389299645e-05, | |
| "loss": 0.6247, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.9652173913043478, | |
| "grad_norm": 0.29583072662353516, | |
| "learning_rate": 6.202245107254693e-05, | |
| "loss": 0.642, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.9695652173913043, | |
| "grad_norm": 0.24323242902755737, | |
| "learning_rate": 6.165382055235783e-05, | |
| "loss": 0.6683, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.9739130434782609, | |
| "grad_norm": 0.4522090256214142, | |
| "learning_rate": 6.128451896606053e-05, | |
| "loss": 0.639, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.9782608695652174, | |
| "grad_norm": 0.6692441701889038, | |
| "learning_rate": 6.091456757928008e-05, | |
| "loss": 0.6628, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.9826086956521739, | |
| "grad_norm": 0.7985122203826904, | |
| "learning_rate": 6.054398769505924e-05, | |
| "loss": 0.6585, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.9869565217391304, | |
| "grad_norm": 0.6464029550552368, | |
| "learning_rate": 6.01728006526317e-05, | |
| "loss": 0.6563, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.991304347826087, | |
| "grad_norm": 0.36494386196136475, | |
| "learning_rate": 5.980102782619342e-05, | |
| "loss": 0.648, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.9956521739130435, | |
| "grad_norm": 0.40735068917274475, | |
| "learning_rate": 5.942869062367179e-05, | |
| "loss": 0.6502, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.6993163228034973, | |
| "learning_rate": 5.905581048549279e-05, | |
| "loss": 0.6682, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.0043478260869565, | |
| "grad_norm": 0.8026555776596069, | |
| "learning_rate": 5.868240888334653e-05, | |
| "loss": 0.6223, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.008695652173913, | |
| "grad_norm": 0.48003122210502625, | |
| "learning_rate": 5.830850731895071e-05, | |
| "loss": 0.6451, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.0130434782608695, | |
| "grad_norm": 0.3873646557331085, | |
| "learning_rate": 5.793412732281257e-05, | |
| "loss": 0.644, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.017391304347826, | |
| "grad_norm": 0.6189988255500793, | |
| "learning_rate": 5.755929045298905e-05, | |
| "loss": 0.6453, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.0217391304347827, | |
| "grad_norm": 0.6522347331047058, | |
| "learning_rate": 5.718401829384541e-05, | |
| "loss": 0.6555, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.0260869565217392, | |
| "grad_norm": 0.3959437906742096, | |
| "learning_rate": 5.680833245481234e-05, | |
| "loss": 0.6374, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.0304347826086957, | |
| "grad_norm": 0.4250578284263611, | |
| "learning_rate": 5.643225456914156e-05, | |
| "loss": 0.6447, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.0347826086956522, | |
| "grad_norm": 0.5769574642181396, | |
| "learning_rate": 5.6055806292660206e-05, | |
| "loss": 0.6432, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.0391304347826087, | |
| "grad_norm": 0.5396884083747864, | |
| "learning_rate": 5.5679009302523744e-05, | |
| "loss": 0.6499, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.0434782608695652, | |
| "grad_norm": 0.25375860929489136, | |
| "learning_rate": 5.530188529596774e-05, | |
| "loss": 0.6613, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.0478260869565217, | |
| "grad_norm": 0.35306674242019653, | |
| "learning_rate": 5.4924455989058434e-05, | |
| "loss": 0.6577, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.0521739130434782, | |
| "grad_norm": 0.47978144884109497, | |
| "learning_rate": 5.454674311544235e-05, | |
| "loss": 0.6388, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.0565217391304347, | |
| "grad_norm": 0.35482141375541687, | |
| "learning_rate": 5.4168768425094673e-05, | |
| "loss": 0.6619, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.0608695652173914, | |
| "grad_norm": 0.2898884117603302, | |
| "learning_rate": 5.379055368306692e-05, | |
| "loss": 0.6232, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.065217391304348, | |
| "grad_norm": 0.29022127389907837, | |
| "learning_rate": 5.341212066823355e-05, | |
| "loss": 0.645, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.0695652173913044, | |
| "grad_norm": 0.2884006202220917, | |
| "learning_rate": 5.303349117203793e-05, | |
| "loss": 0.649, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.0739130434782609, | |
| "grad_norm": 0.264309287071228, | |
| "learning_rate": 5.265468699723748e-05, | |
| "loss": 0.6414, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.0782608695652174, | |
| "grad_norm": 0.32101091742515564, | |
| "learning_rate": 5.227572995664819e-05, | |
| "loss": 0.6417, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.0826086956521739, | |
| "grad_norm": 0.521435022354126, | |
| "learning_rate": 5.189664187188857e-05, | |
| "loss": 0.6681, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.0869565217391304, | |
| "grad_norm": 0.7148544192314148, | |
| "learning_rate": 5.151744457212312e-05, | |
| "loss": 0.6604, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.0913043478260869, | |
| "grad_norm": 0.767124354839325, | |
| "learning_rate": 5.113815989280528e-05, | |
| "loss": 0.6505, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.0956521739130434, | |
| "grad_norm": 0.6748565435409546, | |
| "learning_rate": 5.075880967442014e-05, | |
| "loss": 0.6424, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 0.5375097393989563, | |
| "learning_rate": 5.037941576122667e-05, | |
| "loss": 0.6265, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.1043478260869566, | |
| "grad_norm": 0.35003581643104553, | |
| "learning_rate": 5e-05, | |
| "loss": 0.6436, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.108695652173913, | |
| "grad_norm": 0.443466454744339, | |
| "learning_rate": 4.962058423877335e-05, | |
| "loss": 0.6443, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.1130434782608696, | |
| "grad_norm": 0.5645395517349243, | |
| "learning_rate": 4.924119032557987e-05, | |
| "loss": 0.6638, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.117391304347826, | |
| "grad_norm": 0.4608463943004608, | |
| "learning_rate": 4.886184010719471e-05, | |
| "loss": 0.6244, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.1217391304347826, | |
| "grad_norm": 0.31499549746513367, | |
| "learning_rate": 4.848255542787688e-05, | |
| "loss": 0.658, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.126086956521739, | |
| "grad_norm": 0.5753819346427917, | |
| "learning_rate": 4.810335812811144e-05, | |
| "loss": 0.655, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.1304347826086956, | |
| "grad_norm": 0.6405854821205139, | |
| "learning_rate": 4.7724270043351835e-05, | |
| "loss": 0.6481, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.134782608695652, | |
| "grad_norm": 0.4674757719039917, | |
| "learning_rate": 4.7345313002762545e-05, | |
| "loss": 0.6572, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.1391304347826088, | |
| "grad_norm": 0.49681004881858826, | |
| "learning_rate": 4.6966508827962075e-05, | |
| "loss": 0.6426, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.1434782608695653, | |
| "grad_norm": 0.4794960021972656, | |
| "learning_rate": 4.658787933176646e-05, | |
| "loss": 0.6268, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.1478260869565218, | |
| "grad_norm": 0.5307829976081848, | |
| "learning_rate": 4.620944631693309e-05, | |
| "loss": 0.667, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.1521739130434783, | |
| "grad_norm": 0.5696162581443787, | |
| "learning_rate": 4.583123157490533e-05, | |
| "loss": 0.6517, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.1565217391304348, | |
| "grad_norm": 0.4920869767665863, | |
| "learning_rate": 4.545325688455765e-05, | |
| "loss": 0.6431, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.1608695652173913, | |
| "grad_norm": 0.30201277136802673, | |
| "learning_rate": 4.5075544010941564e-05, | |
| "loss": 0.645, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.1652173913043478, | |
| "grad_norm": 0.48374703526496887, | |
| "learning_rate": 4.4698114704032274e-05, | |
| "loss": 0.6631, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.1695652173913043, | |
| "grad_norm": 0.5670326948165894, | |
| "learning_rate": 4.432099069747625e-05, | |
| "loss": 0.6283, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.1739130434782608, | |
| "grad_norm": 0.4862656593322754, | |
| "learning_rate": 4.394419370733981e-05, | |
| "loss": 0.6126, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.1782608695652175, | |
| "grad_norm": 0.35349950194358826, | |
| "learning_rate": 4.3567745430858456e-05, | |
| "loss": 0.6377, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.182608695652174, | |
| "grad_norm": 0.6254827976226807, | |
| "learning_rate": 4.319166754518768e-05, | |
| "loss": 0.6385, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.1869565217391305, | |
| "grad_norm": 0.7132217288017273, | |
| "learning_rate": 4.2815981706154596e-05, | |
| "loss": 0.6471, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.191304347826087, | |
| "grad_norm": 0.5088608264923096, | |
| "learning_rate": 4.2440709547010956e-05, | |
| "loss": 0.6292, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.1956521739130435, | |
| "grad_norm": 0.3840946555137634, | |
| "learning_rate": 4.2065872677187435e-05, | |
| "loss": 0.6485, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.6056833267211914, | |
| "learning_rate": 4.16914926810493e-05, | |
| "loss": 0.6555, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.2043478260869565, | |
| "grad_norm": 0.3235962390899658, | |
| "learning_rate": 4.131759111665349e-05, | |
| "loss": 0.6413, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.208695652173913, | |
| "grad_norm": 0.6332939267158508, | |
| "learning_rate": 4.094418951450721e-05, | |
| "loss": 0.6502, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.2130434782608694, | |
| "grad_norm": 0.6941733956336975, | |
| "learning_rate": 4.0571309376328214e-05, | |
| "loss": 0.6351, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.2173913043478262, | |
| "grad_norm": 0.4228350818157196, | |
| "learning_rate": 4.019897217380659e-05, | |
| "loss": 0.6354, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.2217391304347827, | |
| "grad_norm": 0.3524788022041321, | |
| "learning_rate": 3.982719934736832e-05, | |
| "loss": 0.6322, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.2260869565217392, | |
| "grad_norm": 0.5214580297470093, | |
| "learning_rate": 3.945601230494079e-05, | |
| "loss": 0.6683, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.2304347826086957, | |
| "grad_norm": 0.5836907029151917, | |
| "learning_rate": 3.908543242071993e-05, | |
| "loss": 0.6639, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.2347826086956522, | |
| "grad_norm": 0.3420216143131256, | |
| "learning_rate": 3.871548103393947e-05, | |
| "loss": 0.6343, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.2391304347826086, | |
| "grad_norm": 0.4118534326553345, | |
| "learning_rate": 3.834617944764218e-05, | |
| "loss": 0.6435, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.2434782608695651, | |
| "grad_norm": 0.5472633838653564, | |
| "learning_rate": 3.7977548927453086e-05, | |
| "loss": 0.6538, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.2478260869565219, | |
| "grad_norm": 0.3181094229221344, | |
| "learning_rate": 3.760961070035501e-05, | |
| "loss": 0.6595, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.2521739130434781, | |
| "grad_norm": 0.3373314440250397, | |
| "learning_rate": 3.724238595346619e-05, | |
| "loss": 0.6417, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.2565217391304349, | |
| "grad_norm": 0.3461237847805023, | |
| "learning_rate": 3.6875895832820314e-05, | |
| "loss": 0.65, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.2608695652173914, | |
| "grad_norm": 0.27134010195732117, | |
| "learning_rate": 3.651016144214878e-05, | |
| "loss": 0.6413, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.2652173913043478, | |
| "grad_norm": 0.3723100423812866, | |
| "learning_rate": 3.614520384166557e-05, | |
| "loss": 0.6497, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.2695652173913043, | |
| "grad_norm": 0.30786219239234924, | |
| "learning_rate": 3.5781044046854416e-05, | |
| "loss": 0.643, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.2739130434782608, | |
| "grad_norm": 0.30118393898010254, | |
| "learning_rate": 3.541770302725875e-05, | |
| "loss": 0.6217, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.2782608695652173, | |
| "grad_norm": 0.34141477942466736, | |
| "learning_rate": 3.5055201705274223e-05, | |
| "loss": 0.6325, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.2826086956521738, | |
| "grad_norm": 0.3495056629180908, | |
| "learning_rate": 3.46935609549438e-05, | |
| "loss": 0.659, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.2869565217391306, | |
| "grad_norm": 0.32825884222984314, | |
| "learning_rate": 3.4332801600755896e-05, | |
| "loss": 0.644, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.2913043478260868, | |
| "grad_norm": 0.4410015344619751, | |
| "learning_rate": 3.397294441644515e-05, | |
| "loss": 0.6272, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.2956521739130435, | |
| "grad_norm": 0.2754165828227997, | |
| "learning_rate": 3.361401012379626e-05, | |
| "loss": 0.6405, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 0.40065616369247437, | |
| "learning_rate": 3.325601939145069e-05, | |
| "loss": 0.6398, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.3043478260869565, | |
| "grad_norm": 0.4464031159877777, | |
| "learning_rate": 3.289899283371657e-05, | |
| "loss": 0.6527, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.308695652173913, | |
| "grad_norm": 0.2647855877876282, | |
| "learning_rate": 3.2542951009381584e-05, | |
| "loss": 0.6238, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.3130434782608695, | |
| "grad_norm": 0.3642147481441498, | |
| "learning_rate": 3.2187914420529174e-05, | |
| "loss": 0.6661, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.317391304347826, | |
| "grad_norm": 0.2401854395866394, | |
| "learning_rate": 3.1833903511357946e-05, | |
| "loss": 0.6537, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.3217391304347825, | |
| "grad_norm": 0.32390883564949036, | |
| "learning_rate": 3.148093866700445e-05, | |
| "loss": 0.6374, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.3260869565217392, | |
| "grad_norm": 0.3197692930698395, | |
| "learning_rate": 3.112904021236929e-05, | |
| "loss": 0.6587, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.3304347826086955, | |
| "grad_norm": 0.26799288392066956, | |
| "learning_rate": 3.0778228410946755e-05, | |
| "loss": 0.6225, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.3347826086956522, | |
| "grad_norm": 0.3222508430480957, | |
| "learning_rate": 3.0428523463658044e-05, | |
| "loss": 0.6595, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.3391304347826087, | |
| "grad_norm": 0.26545819640159607, | |
| "learning_rate": 3.007994550768793e-05, | |
| "loss": 0.6083, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.3434782608695652, | |
| "grad_norm": 0.28427237272262573, | |
| "learning_rate": 2.9732514615325267e-05, | |
| "loss": 0.6523, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.3478260869565217, | |
| "grad_norm": 0.23605205118656158, | |
| "learning_rate": 2.9386250792807125e-05, | |
| "loss": 0.6558, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.3521739130434782, | |
| "grad_norm": 0.2776494324207306, | |
| "learning_rate": 2.9041173979166813e-05, | |
| "loss": 0.6531, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.3565217391304347, | |
| "grad_norm": 0.2938724458217621, | |
| "learning_rate": 2.86973040450856e-05, | |
| "loss": 0.6212, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.3608695652173912, | |
| "grad_norm": 0.2571961283683777, | |
| "learning_rate": 2.8354660791748654e-05, | |
| "loss": 0.6639, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.365217391304348, | |
| "grad_norm": 0.24677854776382446, | |
| "learning_rate": 2.8013263949704705e-05, | |
| "loss": 0.6334, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.3695652173913042, | |
| "grad_norm": 0.24199220538139343, | |
| "learning_rate": 2.7673133177729997e-05, | |
| "loss": 0.6469, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.373913043478261, | |
| "grad_norm": 0.27776235342025757, | |
| "learning_rate": 2.7334288061696146e-05, | |
| "loss": 0.6602, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.3782608695652174, | |
| "grad_norm": 0.27854007482528687, | |
| "learning_rate": 2.6996748113442394e-05, | |
| "loss": 0.6649, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.382608695652174, | |
| "grad_norm": 0.33447134494781494, | |
| "learning_rate": 2.666053276965207e-05, | |
| "loss": 0.6533, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.3869565217391304, | |
| "grad_norm": 0.2757052481174469, | |
| "learning_rate": 2.63256613907333e-05, | |
| "loss": 0.6528, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.391304347826087, | |
| "grad_norm": 0.2926105856895447, | |
| "learning_rate": 2.5992153259704228e-05, | |
| "loss": 0.6171, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.3956521739130434, | |
| "grad_norm": 0.2858363091945648, | |
| "learning_rate": 2.5660027581082558e-05, | |
| "loss": 0.6679, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 0.2723914086818695, | |
| "learning_rate": 2.5329303479779854e-05, | |
| "loss": 0.6582, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.4043478260869566, | |
| "grad_norm": 0.22853125631809235, | |
| "learning_rate": 2.500000000000001e-05, | |
| "loss": 0.6293, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.4086956521739131, | |
| "grad_norm": 0.28617411851882935, | |
| "learning_rate": 2.4672136104142856e-05, | |
| "loss": 0.6408, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.4130434782608696, | |
| "grad_norm": 0.23259450495243073, | |
| "learning_rate": 2.434573067171213e-05, | |
| "loss": 0.648, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.4173913043478261, | |
| "grad_norm": 0.23848018050193787, | |
| "learning_rate": 2.4020802498228335e-05, | |
| "loss": 0.6363, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.4217391304347826, | |
| "grad_norm": 0.3065076768398285, | |
| "learning_rate": 2.3697370294146444e-05, | |
| "loss": 0.6251, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.4260869565217391, | |
| "grad_norm": 0.2523568868637085, | |
| "learning_rate": 2.3375452683778526e-05, | |
| "loss": 0.6384, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.4304347826086956, | |
| "grad_norm": 0.24281303584575653, | |
| "learning_rate": 2.3055068204221224e-05, | |
| "loss": 0.6512, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.434782608695652, | |
| "grad_norm": 0.2753816545009613, | |
| "learning_rate": 2.2736235304288373e-05, | |
| "loss": 0.6587, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.4391304347826086, | |
| "grad_norm": 0.24279272556304932, | |
| "learning_rate": 2.241897234344864e-05, | |
| "loss": 0.6391, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.4434782608695653, | |
| "grad_norm": 0.2752493917942047, | |
| "learning_rate": 2.2103297590768334e-05, | |
| "loss": 0.6514, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.4478260869565218, | |
| "grad_norm": 0.24567648768424988, | |
| "learning_rate": 2.1789229223859404e-05, | |
| "loss": 0.6447, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.4521739130434783, | |
| "grad_norm": 0.25969740748405457, | |
| "learning_rate": 2.1476785327832714e-05, | |
| "loss": 0.6522, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.4565217391304348, | |
| "grad_norm": 0.2602309286594391, | |
| "learning_rate": 2.1165983894256647e-05, | |
| "loss": 0.6249, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.4608695652173913, | |
| "grad_norm": 0.35641685128211975, | |
| "learning_rate": 2.0856842820121082e-05, | |
| "loss": 0.6501, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.4652173913043478, | |
| "grad_norm": 0.32985907793045044, | |
| "learning_rate": 2.0549379906806815e-05, | |
| "loss": 0.6606, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.4695652173913043, | |
| "grad_norm": 0.22265039384365082, | |
| "learning_rate": 2.0243612859060524e-05, | |
| "loss": 0.6379, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.4739130434782608, | |
| "grad_norm": 0.30286267399787903, | |
| "learning_rate": 1.9939559283975235e-05, | |
| "loss": 0.6478, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.4782608695652173, | |
| "grad_norm": 0.22443290054798126, | |
| "learning_rate": 1.963723668997652e-05, | |
| "loss": 0.6414, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.482608695652174, | |
| "grad_norm": 0.2321448177099228, | |
| "learning_rate": 1.9336662485814178e-05, | |
| "loss": 0.6394, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.4869565217391305, | |
| "grad_norm": 0.21384797990322113, | |
| "learning_rate": 1.9037853979559923e-05, | |
| "loss": 0.6291, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.491304347826087, | |
| "grad_norm": 0.2781314551830292, | |
| "learning_rate": 1.8740828377610563e-05, | |
| "loss": 0.6307, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.4956521739130435, | |
| "grad_norm": 0.22772769629955292, | |
| "learning_rate": 1.8445602783697374e-05, | |
| "loss": 0.6425, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 0.22104138135910034, | |
| "learning_rate": 1.8152194197901085e-05, | |
| "loss": 0.6551, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.5043478260869565, | |
| "grad_norm": 0.25521254539489746, | |
| "learning_rate": 1.7860619515673033e-05, | |
| "loss": 0.6436, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.508695652173913, | |
| "grad_norm": 0.2593313753604889, | |
| "learning_rate": 1.7570895526862202e-05, | |
| "loss": 0.6583, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.5130434782608697, | |
| "grad_norm": 0.2515895962715149, | |
| "learning_rate": 1.7283038914748445e-05, | |
| "loss": 0.6311, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.517391304347826, | |
| "grad_norm": 0.2536557614803314, | |
| "learning_rate": 1.6997066255081794e-05, | |
| "loss": 0.6444, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.5217391304347827, | |
| "grad_norm": 0.2563648521900177, | |
| "learning_rate": 1.6712994015127976e-05, | |
| "loss": 0.628, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.526086956521739, | |
| "grad_norm": 0.21132583916187286, | |
| "learning_rate": 1.6430838552720168e-05, | |
| "loss": 0.6227, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.5304347826086957, | |
| "grad_norm": 0.2708798050880432, | |
| "learning_rate": 1.6150616115317052e-05, | |
| "loss": 0.6408, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.5347826086956522, | |
| "grad_norm": 0.2023913413286209, | |
| "learning_rate": 1.5872342839067306e-05, | |
| "loss": 0.6379, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.5391304347826087, | |
| "grad_norm": 0.2893276512622833, | |
| "learning_rate": 1.559603474788026e-05, | |
| "loss": 0.6585, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.5434782608695652, | |
| "grad_norm": 0.21205087006092072, | |
| "learning_rate": 1.5321707752503367e-05, | |
| "loss": 0.6464, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.5478260869565217, | |
| "grad_norm": 0.24209854006767273, | |
| "learning_rate": 1.5049377649605906e-05, | |
| "loss": 0.6453, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.5521739130434784, | |
| "grad_norm": 0.23982687294483185, | |
| "learning_rate": 1.4779060120869392e-05, | |
| "loss": 0.6179, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.5565217391304347, | |
| "grad_norm": 0.23865072429180145, | |
| "learning_rate": 1.451077073208455e-05, | |
| "loss": 0.6365, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.5608695652173914, | |
| "grad_norm": 0.34930044412612915, | |
| "learning_rate": 1.4244524932255027e-05, | |
| "loss": 0.6659, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.5652173913043477, | |
| "grad_norm": 0.21124225854873657, | |
| "learning_rate": 1.3980338052707736e-05, | |
| "loss": 0.6438, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.5695652173913044, | |
| "grad_norm": 0.292665034532547, | |
| "learning_rate": 1.3718225306210048e-05, | |
| "loss": 0.6369, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.5739130434782609, | |
| "grad_norm": 0.22756700217723846, | |
| "learning_rate": 1.3458201786093794e-05, | |
| "loss": 0.6442, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.5782608695652174, | |
| "grad_norm": 0.2244371622800827, | |
| "learning_rate": 1.3200282465386155e-05, | |
| "loss": 0.619, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.5826086956521739, | |
| "grad_norm": 0.2715966999530792, | |
| "learning_rate": 1.2944482195947383e-05, | |
| "loss": 0.638, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.5869565217391304, | |
| "grad_norm": 0.1995944380760193, | |
| "learning_rate": 1.2690815707615727e-05, | |
| "loss": 0.6367, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.591304347826087, | |
| "grad_norm": 0.2646836042404175, | |
| "learning_rate": 1.2439297607359118e-05, | |
| "loss": 0.6279, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.5956521739130434, | |
| "grad_norm": 0.2539423704147339, | |
| "learning_rate": 1.2189942378434083e-05, | |
| "loss": 0.6445, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.22100889682769775, | |
| "learning_rate": 1.1942764379551769e-05, | |
| "loss": 0.6169, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.6043478260869564, | |
| "grad_norm": 0.28634268045425415, | |
| "learning_rate": 1.1697777844051105e-05, | |
| "loss": 0.6075, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.608695652173913, | |
| "grad_norm": 0.19687707722187042, | |
| "learning_rate": 1.1454996879079206e-05, | |
| "loss": 0.6492, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.6130434782608696, | |
| "grad_norm": 0.28088298439979553, | |
| "learning_rate": 1.1214435464779006e-05, | |
| "loss": 0.6651, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.617391304347826, | |
| "grad_norm": 0.22760345041751862, | |
| "learning_rate": 1.0976107453484314e-05, | |
| "loss": 0.6238, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.6217391304347826, | |
| "grad_norm": 0.209342360496521, | |
| "learning_rate": 1.0740026568922058e-05, | |
| "loss": 0.6479, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.626086956521739, | |
| "grad_norm": 0.245696023106575, | |
| "learning_rate": 1.050620640542208e-05, | |
| "loss": 0.6494, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.6304347826086958, | |
| "grad_norm": 0.19594644010066986, | |
| "learning_rate": 1.027466042713428e-05, | |
| "loss": 0.6342, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.634782608695652, | |
| "grad_norm": 0.21066397428512573, | |
| "learning_rate": 1.0045401967253382e-05, | |
| "loss": 0.6338, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.6391304347826088, | |
| "grad_norm": 0.22265934944152832, | |
| "learning_rate": 9.818444227251089e-06, | |
| "loss": 0.6355, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.643478260869565, | |
| "grad_norm": 0.21390433609485626, | |
| "learning_rate": 9.593800276115978e-06, | |
| "loss": 0.6208, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.6478260869565218, | |
| "grad_norm": 0.19574061036109924, | |
| "learning_rate": 9.371483049600849e-06, | |
| "loss": 0.6381, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.6521739130434783, | |
| "grad_norm": 0.21275962889194489, | |
| "learning_rate": 9.151505349477902e-06, | |
| "loss": 0.6271, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.6565217391304348, | |
| "grad_norm": 0.1803780496120453, | |
| "learning_rate": 8.933879842801557e-06, | |
| "loss": 0.636, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.6608695652173913, | |
| "grad_norm": 0.20060917735099792, | |
| "learning_rate": 8.718619061179028e-06, | |
| "loss": 0.6584, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.6652173913043478, | |
| "grad_norm": 0.18945138156414032, | |
| "learning_rate": 8.505735400048748e-06, | |
| "loss": 0.6048, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.6695652173913045, | |
| "grad_norm": 0.1903897225856781, | |
| "learning_rate": 8.29524111796654e-06, | |
| "loss": 0.6535, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.6739130434782608, | |
| "grad_norm": 0.18091754615306854, | |
| "learning_rate": 8.087148335899786e-06, | |
| "loss": 0.6038, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.6782608695652175, | |
| "grad_norm": 0.18907198309898376, | |
| "learning_rate": 7.881469036529427e-06, | |
| "loss": 0.6443, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.6826086956521737, | |
| "grad_norm": 0.17433352768421173, | |
| "learning_rate": 7.678215063559956e-06, | |
| "loss": 0.6286, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.6869565217391305, | |
| "grad_norm": 0.2968502342700958, | |
| "learning_rate": 7.477398121037449e-06, | |
| "loss": 0.6433, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.691304347826087, | |
| "grad_norm": 0.18142454326152802, | |
| "learning_rate": 7.2790297726755716e-06, | |
| "loss": 0.6299, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.6956521739130435, | |
| "grad_norm": 0.19517359137535095, | |
| "learning_rate": 7.083121441189738e-06, | |
| "loss": 0.6303, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 0.1804899126291275, | |
| "learning_rate": 6.889684407639324e-06, | |
| "loss": 0.6447, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.7043478260869565, | |
| "grad_norm": 0.16353510320186615, | |
| "learning_rate": 6.698729810778065e-06, | |
| "loss": 0.6367, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.7086956521739132, | |
| "grad_norm": 0.1956324279308319, | |
| "learning_rate": 6.510268646412665e-06, | |
| "loss": 0.6422, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.7130434782608694, | |
| "grad_norm": 0.19028721749782562, | |
| "learning_rate": 6.3243117667696305e-06, | |
| "loss": 0.6156, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.7173913043478262, | |
| "grad_norm": 0.19931164383888245, | |
| "learning_rate": 6.140869879870287e-06, | |
| "loss": 0.6275, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.7217391304347827, | |
| "grad_norm": 0.23204225301742554, | |
| "learning_rate": 5.9599535489143265e-06, | |
| "loss": 0.6575, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.7260869565217392, | |
| "grad_norm": 0.18730613589286804, | |
| "learning_rate": 5.781573191671386e-06, | |
| "loss": 0.6283, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.7304347826086957, | |
| "grad_norm": 0.1644641011953354, | |
| "learning_rate": 5.605739079881239e-06, | |
| "loss": 0.6217, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.7347826086956522, | |
| "grad_norm": 0.1922946721315384, | |
| "learning_rate": 5.432461338662309e-06, | |
| "loss": 0.6497, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.7391304347826086, | |
| "grad_norm": 0.18219797313213348, | |
| "learning_rate": 5.2617499459286126e-06, | |
| "loss": 0.6274, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.7434782608695651, | |
| "grad_norm": 0.19190607964992523, | |
| "learning_rate": 5.0936147318152e-06, | |
| "loss": 0.6222, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.7478260869565219, | |
| "grad_norm": 0.21981754899024963, | |
| "learning_rate": 4.928065378112106e-06, | |
| "loss": 0.6307, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.7521739130434781, | |
| "grad_norm": 0.15831628441810608, | |
| "learning_rate": 4.765111417706869e-06, | |
| "loss": 0.6478, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.7565217391304349, | |
| "grad_norm": 0.17769937217235565, | |
| "learning_rate": 4.604762234035548e-06, | |
| "loss": 0.6428, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.7608695652173914, | |
| "grad_norm": 0.19617512822151184, | |
| "learning_rate": 4.447027060542419e-06, | |
| "loss": 0.6274, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.7652173913043478, | |
| "grad_norm": 0.1862083375453949, | |
| "learning_rate": 4.29191498014826e-06, | |
| "loss": 0.6391, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.7695652173913043, | |
| "grad_norm": 0.1610027253627777, | |
| "learning_rate": 4.139434924727359e-06, | |
| "loss": 0.63, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.7739130434782608, | |
| "grad_norm": 0.16372613608837128, | |
| "learning_rate": 3.98959567459316e-06, | |
| "loss": 0.6546, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.7782608695652173, | |
| "grad_norm": 0.15922823548316956, | |
| "learning_rate": 3.842405857992714e-06, | |
| "loss": 0.6455, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.7826086956521738, | |
| "grad_norm": 0.19706743955612183, | |
| "learning_rate": 3.697873950609737e-06, | |
| "loss": 0.6611, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.7869565217391306, | |
| "grad_norm": 0.16771166026592255, | |
| "learning_rate": 3.55600827507665e-06, | |
| "loss": 0.632, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.7913043478260868, | |
| "grad_norm": 0.1845441460609436, | |
| "learning_rate": 3.4168170004952706e-06, | |
| "loss": 0.6235, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.7956521739130435, | |
| "grad_norm": 0.1645931601524353, | |
| "learning_rate": 3.2803081419664484e-06, | |
| "loss": 0.6347, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.1753637194633484, | |
| "learning_rate": 3.146489560128496e-06, | |
| "loss": 0.6361, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.8043478260869565, | |
| "grad_norm": 0.15960703790187836, | |
| "learning_rate": 3.0153689607045845e-06, | |
| "loss": 0.6511, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.808695652173913, | |
| "grad_norm": 0.16774137318134308, | |
| "learning_rate": 2.88695389405898e-06, | |
| "loss": 0.6509, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.8130434782608695, | |
| "grad_norm": 0.17161710560321808, | |
| "learning_rate": 2.761251754762295e-06, | |
| "loss": 0.6588, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.8173913043478263, | |
| "grad_norm": 0.15675298869609833, | |
| "learning_rate": 2.638269781165692e-06, | |
| "loss": 0.6296, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.8217391304347825, | |
| "grad_norm": 0.19153152406215668, | |
| "learning_rate": 2.518015054984041e-06, | |
| "loss": 0.6447, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.8260869565217392, | |
| "grad_norm": 0.16199903190135956, | |
| "learning_rate": 2.400494500888162e-06, | |
| "loss": 0.6302, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.8304347826086955, | |
| "grad_norm": 0.16786567866802216, | |
| "learning_rate": 2.2857148861060553e-06, | |
| "loss": 0.6488, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.8347826086956522, | |
| "grad_norm": 0.15710964798927307, | |
| "learning_rate": 2.1736828200332625e-06, | |
| "loss": 0.6505, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.8391304347826087, | |
| "grad_norm": 0.1546759009361267, | |
| "learning_rate": 2.0644047538522226e-06, | |
| "loss": 0.6267, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.8434782608695652, | |
| "grad_norm": 0.15734130144119263, | |
| "learning_rate": 1.957886980160817e-06, | |
| "loss": 0.6527, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.8478260869565217, | |
| "grad_norm": 0.15224894881248474, | |
| "learning_rate": 1.8541356326100433e-06, | |
| "loss": 0.6355, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.8521739130434782, | |
| "grad_norm": 0.2036304622888565, | |
| "learning_rate": 1.7531566855507441e-06, | |
| "loss": 0.637, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.856521739130435, | |
| "grad_norm": 0.1561633050441742, | |
| "learning_rate": 1.6549559536896964e-06, | |
| "loss": 0.6303, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.8608695652173912, | |
| "grad_norm": 0.16117189824581146, | |
| "learning_rate": 1.559539091754686e-06, | |
| "loss": 0.6292, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.865217391304348, | |
| "grad_norm": 0.16403424739837646, | |
| "learning_rate": 1.4669115941689183e-06, | |
| "loss": 0.6309, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.8695652173913042, | |
| "grad_norm": 0.1809576004743576, | |
| "learning_rate": 1.3770787947346597e-06, | |
| "loss": 0.6323, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.873913043478261, | |
| "grad_norm": 0.16379229724407196, | |
| "learning_rate": 1.2900458663260506e-06, | |
| "loss": 0.6443, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.8782608695652174, | |
| "grad_norm": 0.1786649525165558, | |
| "learning_rate": 1.2058178205912762e-06, | |
| "loss": 0.6559, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.882608695652174, | |
| "grad_norm": 0.16650472581386566, | |
| "learning_rate": 1.1243995076639535e-06, | |
| "loss": 0.6655, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.8869565217391304, | |
| "grad_norm": 0.16296634078025818, | |
| "learning_rate": 1.0457956158838544e-06, | |
| "loss": 0.6544, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.891304347826087, | |
| "grad_norm": 0.1720471829175949, | |
| "learning_rate": 9.700106715269387e-07, | |
| "loss": 0.6657, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.8956521739130436, | |
| "grad_norm": 0.1512221246957779, | |
| "learning_rate": 8.970490385447061e-07, | |
| "loss": 0.653, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 0.17203445732593536, | |
| "learning_rate": 8.269149183128988e-07, | |
| "loss": 0.636, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.9043478260869566, | |
| "grad_norm": 0.1648741364479065, | |
| "learning_rate": 7.596123493895991e-07, | |
| "loss": 0.64, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.908695652173913, | |
| "grad_norm": 0.15940794348716736, | |
| "learning_rate": 6.951452072826548e-07, | |
| "loss": 0.6637, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.9130434782608696, | |
| "grad_norm": 0.19714687764644623, | |
| "learning_rate": 6.335172042265192e-07, | |
| "loss": 0.6387, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.9173913043478261, | |
| "grad_norm": 0.16102586686611176, | |
| "learning_rate": 5.747318889684883e-07, | |
| "loss": 0.6633, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.9217391304347826, | |
| "grad_norm": 0.14013265073299408, | |
| "learning_rate": 5.187926465643478e-07, | |
| "loss": 0.6505, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.9260869565217391, | |
| "grad_norm": 0.14478904008865356, | |
| "learning_rate": 4.6570269818346224e-07, | |
| "loss": 0.6453, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.9304347826086956, | |
| "grad_norm": 0.14230051636695862, | |
| "learning_rate": 4.1546510092327906e-07, | |
| "loss": 0.6186, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.9347826086956523, | |
| "grad_norm": 0.14860378205776215, | |
| "learning_rate": 3.6808274763328043e-07, | |
| "loss": 0.6459, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.9391304347826086, | |
| "grad_norm": 0.1445004940032959, | |
| "learning_rate": 3.235583667484443e-07, | |
| "loss": 0.6543, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.9434782608695653, | |
| "grad_norm": 0.1507095992565155, | |
| "learning_rate": 2.818945221320701e-07, | |
| "loss": 0.6255, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.9478260869565216, | |
| "grad_norm": 0.14174917340278625, | |
| "learning_rate": 2.4309361292820244e-07, | |
| "loss": 0.6475, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.9521739130434783, | |
| "grad_norm": 0.14581258594989777, | |
| "learning_rate": 2.0715787342343584e-07, | |
| "loss": 0.6374, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.9565217391304348, | |
| "grad_norm": 0.1401786357164383, | |
| "learning_rate": 1.7408937291829575e-07, | |
| "loss": 0.6383, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.9608695652173913, | |
| "grad_norm": 0.16463448107242584, | |
| "learning_rate": 1.4389001560803916e-07, | |
| "loss": 0.6131, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.9652173913043478, | |
| "grad_norm": 0.14031337201595306, | |
| "learning_rate": 1.1656154047303691e-07, | |
| "loss": 0.6276, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.9695652173913043, | |
| "grad_norm": 0.15517276525497437, | |
| "learning_rate": 9.210552117863703e-08, | |
| "loss": 0.6547, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.973913043478261, | |
| "grad_norm": 0.15215976536273956, | |
| "learning_rate": 7.052336598451503e-08, | |
| "loss": 0.6283, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.9782608695652173, | |
| "grad_norm": 0.15844860672950745, | |
| "learning_rate": 5.181631766362216e-08, | |
| "loss": 0.6497, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.982608695652174, | |
| "grad_norm": 0.16235847771167755, | |
| "learning_rate": 3.5985453430598116e-08, | |
| "loss": 0.6466, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.9869565217391303, | |
| "grad_norm": 0.14429187774658203, | |
| "learning_rate": 2.3031684879742944e-08, | |
| "loss": 0.6462, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.991304347826087, | |
| "grad_norm": 0.14863254129886627, | |
| "learning_rate": 1.2955757932542333e-08, | |
| "loss": 0.6382, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.9956521739130435, | |
| "grad_norm": 0.14549672603607178, | |
| "learning_rate": 5.758252794690888e-09, | |
| "loss": 0.6361, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.15147580206394196, | |
| "learning_rate": 1.4395839226910568e-09, | |
| "loss": 0.6525, | |
| "step": 460 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 460, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.7941728521827123e+19, | |
| "train_batch_size": 24, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |