| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 764, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002617801047120419, | |
| "grad_norm": 12.144990537050639, | |
| "learning_rate": 5e-08, | |
| "loss": 1.2113, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.005235602094240838, | |
| "grad_norm": 13.176179220886064, | |
| "learning_rate": 1e-07, | |
| "loss": 1.3287, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.007853403141361256, | |
| "grad_norm": 12.924711168956318, | |
| "learning_rate": 1.5e-07, | |
| "loss": 1.2798, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.010471204188481676, | |
| "grad_norm": 12.21691289343528, | |
| "learning_rate": 2e-07, | |
| "loss": 1.2284, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.013089005235602094, | |
| "grad_norm": 11.997438945474764, | |
| "learning_rate": 2.5e-07, | |
| "loss": 1.2477, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.015706806282722512, | |
| "grad_norm": 12.217585739384367, | |
| "learning_rate": 3e-07, | |
| "loss": 1.2437, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.01832460732984293, | |
| "grad_norm": 11.875859411166187, | |
| "learning_rate": 3.5e-07, | |
| "loss": 1.2108, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.020942408376963352, | |
| "grad_norm": 11.64973795949067, | |
| "learning_rate": 4e-07, | |
| "loss": 1.2131, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.02356020942408377, | |
| "grad_norm": 12.45663018271384, | |
| "learning_rate": 4.5e-07, | |
| "loss": 1.2516, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.02617801047120419, | |
| "grad_norm": 11.239158096874556, | |
| "learning_rate": 5e-07, | |
| "loss": 1.1734, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.028795811518324606, | |
| "grad_norm": 11.710604360468333, | |
| "learning_rate": 5.5e-07, | |
| "loss": 1.2206, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.031413612565445025, | |
| "grad_norm": 11.118095241592131, | |
| "learning_rate": 6e-07, | |
| "loss": 1.2074, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.034031413612565446, | |
| "grad_norm": 11.770346865985067, | |
| "learning_rate": 6.5e-07, | |
| "loss": 1.2615, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.03664921465968586, | |
| "grad_norm": 11.44615754399185, | |
| "learning_rate": 7e-07, | |
| "loss": 1.2183, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03926701570680628, | |
| "grad_norm": 9.987481097984933, | |
| "learning_rate": 7.5e-07, | |
| "loss": 1.1378, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.041884816753926704, | |
| "grad_norm": 10.087076189949745, | |
| "learning_rate": 8e-07, | |
| "loss": 1.2098, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.04450261780104712, | |
| "grad_norm": 10.02461667279938, | |
| "learning_rate": 8.499999999999999e-07, | |
| "loss": 1.1835, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.04712041884816754, | |
| "grad_norm": 8.79462248217657, | |
| "learning_rate": 9e-07, | |
| "loss": 1.1356, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.049738219895287955, | |
| "grad_norm": 8.628781591254402, | |
| "learning_rate": 9.499999999999999e-07, | |
| "loss": 1.1735, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.05235602094240838, | |
| "grad_norm": 7.738647331698055, | |
| "learning_rate": 1e-06, | |
| "loss": 1.1868, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0549738219895288, | |
| "grad_norm": 6.233136791291174, | |
| "learning_rate": 1.05e-06, | |
| "loss": 1.151, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.05759162303664921, | |
| "grad_norm": 5.834711461409201, | |
| "learning_rate": 1.1e-06, | |
| "loss": 1.129, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.060209424083769635, | |
| "grad_norm": 5.0077704654863595, | |
| "learning_rate": 1.1499999999999998e-06, | |
| "loss": 1.1388, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.06282722513089005, | |
| "grad_norm": 3.854978376544968, | |
| "learning_rate": 1.2e-06, | |
| "loss": 1.052, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.06544502617801047, | |
| "grad_norm": 3.534169938073168, | |
| "learning_rate": 1.2499999999999999e-06, | |
| "loss": 1.0727, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.06806282722513089, | |
| "grad_norm": 3.2514927992093274, | |
| "learning_rate": 1.3e-06, | |
| "loss": 1.1347, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.07068062827225131, | |
| "grad_norm": 3.102034937556906, | |
| "learning_rate": 1.35e-06, | |
| "loss": 1.1357, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.07329842931937172, | |
| "grad_norm": 2.6056648897843577, | |
| "learning_rate": 1.4e-06, | |
| "loss": 1.0368, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.07591623036649214, | |
| "grad_norm": 5.81258653165808, | |
| "learning_rate": 1.4499999999999999e-06, | |
| "loss": 1.1151, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.07853403141361257, | |
| "grad_norm": 3.4877693307584723, | |
| "learning_rate": 1.5e-06, | |
| "loss": 1.1164, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.08115183246073299, | |
| "grad_norm": 4.02075317392796, | |
| "learning_rate": 1.55e-06, | |
| "loss": 1.1174, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.08376963350785341, | |
| "grad_norm": 3.7625924042569543, | |
| "learning_rate": 1.6e-06, | |
| "loss": 1.0247, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.08638743455497382, | |
| "grad_norm": 3.698847952235366, | |
| "learning_rate": 1.6499999999999999e-06, | |
| "loss": 1.0578, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.08900523560209424, | |
| "grad_norm": 3.4078618253433444, | |
| "learning_rate": 1.6999999999999998e-06, | |
| "loss": 1.0513, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.09162303664921466, | |
| "grad_norm": 3.2909646643289325, | |
| "learning_rate": 1.75e-06, | |
| "loss": 1.1018, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.09424083769633508, | |
| "grad_norm": 2.872238177892186, | |
| "learning_rate": 1.8e-06, | |
| "loss": 1.0762, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0968586387434555, | |
| "grad_norm": 2.77121115764399, | |
| "learning_rate": 1.85e-06, | |
| "loss": 1.0735, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.09947643979057591, | |
| "grad_norm": 2.549417261476137, | |
| "learning_rate": 1.8999999999999998e-06, | |
| "loss": 1.0632, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.10209424083769633, | |
| "grad_norm": 2.190358595015251, | |
| "learning_rate": 1.95e-06, | |
| "loss": 1.0227, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.10471204188481675, | |
| "grad_norm": 2.5027525699752227, | |
| "learning_rate": 2e-06, | |
| "loss": 1.0837, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.10732984293193717, | |
| "grad_norm": 2.535128732687191, | |
| "learning_rate": 1.9999905856154088e-06, | |
| "loss": 1.0799, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.1099476439790576, | |
| "grad_norm": 1.7534174402541685, | |
| "learning_rate": 1.999962342638896e-06, | |
| "loss": 1.0124, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.112565445026178, | |
| "grad_norm": 2.0631738564308377, | |
| "learning_rate": 1.9999152716022427e-06, | |
| "loss": 1.1008, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.11518324607329843, | |
| "grad_norm": 2.005091740871665, | |
| "learning_rate": 1.9998493733917385e-06, | |
| "loss": 1.0374, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.11780104712041885, | |
| "grad_norm": 1.7413337245159195, | |
| "learning_rate": 1.999764649248165e-06, | |
| "loss": 1.0398, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.12041884816753927, | |
| "grad_norm": 1.745666445155081, | |
| "learning_rate": 1.999661100766774e-06, | |
| "loss": 1.0645, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.12303664921465969, | |
| "grad_norm": 1.8987977735185813, | |
| "learning_rate": 1.999538729897256e-06, | |
| "loss": 1.0614, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.1256544502617801, | |
| "grad_norm": 1.711040452338805, | |
| "learning_rate": 1.9993975389437036e-06, | |
| "loss": 1.0332, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.12827225130890052, | |
| "grad_norm": 1.6087094017934318, | |
| "learning_rate": 1.999237530564569e-06, | |
| "loss": 1.0008, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.13089005235602094, | |
| "grad_norm": 2.0042351547368686, | |
| "learning_rate": 1.9990587077726125e-06, | |
| "loss": 1.0768, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.13350785340314136, | |
| "grad_norm": 1.5264346842814094, | |
| "learning_rate": 1.998861073934848e-06, | |
| "loss": 1.0213, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.13612565445026178, | |
| "grad_norm": 1.7583198075470237, | |
| "learning_rate": 1.998644632772477e-06, | |
| "loss": 1.0103, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.1387434554973822, | |
| "grad_norm": 1.6841921629086405, | |
| "learning_rate": 1.99840938836082e-06, | |
| "loss": 1.0116, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.14136125654450263, | |
| "grad_norm": 9.859700163848672, | |
| "learning_rate": 1.9981553451292393e-06, | |
| "loss": 1.0429, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.14397905759162305, | |
| "grad_norm": 1.6093588714569955, | |
| "learning_rate": 1.9978825078610574e-06, | |
| "loss": 0.9722, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.14659685863874344, | |
| "grad_norm": 1.6921175654924636, | |
| "learning_rate": 1.9975908816934638e-06, | |
| "loss": 1.045, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.14921465968586387, | |
| "grad_norm": 1.4401701807904859, | |
| "learning_rate": 1.9972804721174198e-06, | |
| "loss": 1.0094, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.1518324607329843, | |
| "grad_norm": 1.7314403887490193, | |
| "learning_rate": 1.996951284977556e-06, | |
| "loss": 0.9334, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.1544502617801047, | |
| "grad_norm": 1.4035559785329323, | |
| "learning_rate": 1.9966033264720613e-06, | |
| "loss": 0.9635, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.15706806282722513, | |
| "grad_norm": 1.4996157907491292, | |
| "learning_rate": 1.9962366031525663e-06, | |
| "loss": 1.0347, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.15968586387434555, | |
| "grad_norm": 3.8626755595391287, | |
| "learning_rate": 1.9958511219240188e-06, | |
| "loss": 0.9453, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.16230366492146597, | |
| "grad_norm": 1.5545221856692377, | |
| "learning_rate": 1.9954468900445565e-06, | |
| "loss": 1.0431, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1649214659685864, | |
| "grad_norm": 1.50894331448368, | |
| "learning_rate": 1.995023915125368e-06, | |
| "loss": 1.056, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.16753926701570682, | |
| "grad_norm": 1.6123663294471207, | |
| "learning_rate": 1.9945822051305507e-06, | |
| "loss": 0.9744, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.17015706806282724, | |
| "grad_norm": 2.002886641603634, | |
| "learning_rate": 1.9941217683769596e-06, | |
| "loss": 1.0396, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.17277486910994763, | |
| "grad_norm": 1.4061639592638548, | |
| "learning_rate": 1.9936426135340527e-06, | |
| "loss": 1.0162, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.17539267015706805, | |
| "grad_norm": 1.4744552229885421, | |
| "learning_rate": 1.9931447496237255e-06, | |
| "loss": 1.0339, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.17801047120418848, | |
| "grad_norm": 1.3373753426511383, | |
| "learning_rate": 1.9926281860201426e-06, | |
| "loss": 1.0123, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1806282722513089, | |
| "grad_norm": 1.8346771940527866, | |
| "learning_rate": 1.992092932449561e-06, | |
| "loss": 1.0218, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.18324607329842932, | |
| "grad_norm": 1.5683702537742212, | |
| "learning_rate": 1.9915389989901473e-06, | |
| "loss": 0.9868, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.18586387434554974, | |
| "grad_norm": 1.6088369103380997, | |
| "learning_rate": 1.9909663960717854e-06, | |
| "loss": 1.0082, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.18848167539267016, | |
| "grad_norm": 1.6226263180260028, | |
| "learning_rate": 1.9903751344758845e-06, | |
| "loss": 1.0272, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.19109947643979058, | |
| "grad_norm": 1.519358495108808, | |
| "learning_rate": 1.9897652253351726e-06, | |
| "loss": 1.0006, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.193717277486911, | |
| "grad_norm": 1.7246977984441099, | |
| "learning_rate": 1.9891366801334875e-06, | |
| "loss": 1.0071, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.19633507853403143, | |
| "grad_norm": 1.885654171226984, | |
| "learning_rate": 1.9884895107055627e-06, | |
| "loss": 0.9659, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.19895287958115182, | |
| "grad_norm": 1.4255056172161928, | |
| "learning_rate": 1.987823729236801e-06, | |
| "loss": 0.9791, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.20157068062827224, | |
| "grad_norm": 1.4127676458511806, | |
| "learning_rate": 1.9871393482630486e-06, | |
| "loss": 0.9982, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.20418848167539266, | |
| "grad_norm": 1.6038922300951168, | |
| "learning_rate": 1.9864363806703567e-06, | |
| "loss": 1.0035, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.20680628272251309, | |
| "grad_norm": 1.6873794773106967, | |
| "learning_rate": 1.9857148396947403e-06, | |
| "loss": 1.0059, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.2094240837696335, | |
| "grad_norm": 1.4769795174016325, | |
| "learning_rate": 1.984974738921927e-06, | |
| "loss": 0.999, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.21204188481675393, | |
| "grad_norm": 1.595922250055431, | |
| "learning_rate": 1.9842160922871043e-06, | |
| "loss": 0.9853, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.21465968586387435, | |
| "grad_norm": 2.267409709264766, | |
| "learning_rate": 1.9834389140746535e-06, | |
| "loss": 1.0104, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.21727748691099477, | |
| "grad_norm": 1.4934157955551683, | |
| "learning_rate": 1.982643218917885e-06, | |
| "loss": 0.9612, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.2198952879581152, | |
| "grad_norm": 1.7746131869025623, | |
| "learning_rate": 1.9818290217987584e-06, | |
| "loss": 0.9525, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.22251308900523561, | |
| "grad_norm": 1.7233411954456554, | |
| "learning_rate": 1.980996338047604e-06, | |
| "loss": 1.035, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.225130890052356, | |
| "grad_norm": 1.4012107807745622, | |
| "learning_rate": 1.980145183342831e-06, | |
| "loss": 0.9657, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.22774869109947643, | |
| "grad_norm": 1.5160148683515746, | |
| "learning_rate": 1.9792755737106357e-06, | |
| "loss": 0.9691, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.23036649214659685, | |
| "grad_norm": 1.3553031139205232, | |
| "learning_rate": 1.978387525524697e-06, | |
| "loss": 0.9732, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.23298429319371727, | |
| "grad_norm": 2.3559259408150144, | |
| "learning_rate": 1.9774810555058694e-06, | |
| "loss": 0.968, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.2356020942408377, | |
| "grad_norm": 2.0405648874947144, | |
| "learning_rate": 1.976556180721867e-06, | |
| "loss": 1.0217, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.23821989528795812, | |
| "grad_norm": 1.289980797855502, | |
| "learning_rate": 1.975612918586944e-06, | |
| "loss": 0.973, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.24083769633507854, | |
| "grad_norm": 1.4893463969411602, | |
| "learning_rate": 1.9746512868615655e-06, | |
| "loss": 1.0109, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.24345549738219896, | |
| "grad_norm": 2.074068588481718, | |
| "learning_rate": 1.973671303652073e-06, | |
| "loss": 0.9831, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.24607329842931938, | |
| "grad_norm": 1.4717466896018212, | |
| "learning_rate": 1.972672987410345e-06, | |
| "loss": 0.9815, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.2486910994764398, | |
| "grad_norm": 1.3362580099120316, | |
| "learning_rate": 1.971656356933446e-06, | |
| "loss": 0.9627, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.2513089005235602, | |
| "grad_norm": 1.6316029366291782, | |
| "learning_rate": 1.970621431363278e-06, | |
| "loss": 0.9657, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.25392670157068065, | |
| "grad_norm": 1.7671135565160097, | |
| "learning_rate": 1.9695682301862154e-06, | |
| "loss": 0.9219, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.25654450261780104, | |
| "grad_norm": 1.5966004233267017, | |
| "learning_rate": 1.9684967732327396e-06, | |
| "loss": 1.0045, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2591623036649215, | |
| "grad_norm": 1.4806542129023226, | |
| "learning_rate": 1.9674070806770667e-06, | |
| "loss": 0.9732, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.2617801047120419, | |
| "grad_norm": 1.5115625649689057, | |
| "learning_rate": 1.9662991730367663e-06, | |
| "loss": 0.9692, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2643979057591623, | |
| "grad_norm": 1.6408654241651082, | |
| "learning_rate": 1.965173071172375e-06, | |
| "loss": 1.0782, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.2670157068062827, | |
| "grad_norm": 1.58266015019036, | |
| "learning_rate": 1.9640287962870057e-06, | |
| "loss": 0.9532, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.2696335078534031, | |
| "grad_norm": 1.39253004882154, | |
| "learning_rate": 1.962866369925946e-06, | |
| "loss": 0.9742, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.27225130890052357, | |
| "grad_norm": 1.4246820152864472, | |
| "learning_rate": 1.9616858139762532e-06, | |
| "loss": 1.0196, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.27486910994764396, | |
| "grad_norm": 1.5013590811914759, | |
| "learning_rate": 1.960487150666343e-06, | |
| "loss": 1.0238, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.2774869109947644, | |
| "grad_norm": 1.440692442657796, | |
| "learning_rate": 1.95927040256557e-06, | |
| "loss": 1.0166, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2801047120418848, | |
| "grad_norm": 1.5426181884013244, | |
| "learning_rate": 1.958035592583803e-06, | |
| "loss": 0.9635, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.28272251308900526, | |
| "grad_norm": 1.3715243788491147, | |
| "learning_rate": 1.956782743970995e-06, | |
| "loss": 0.972, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.28534031413612565, | |
| "grad_norm": 1.6076440082600667, | |
| "learning_rate": 1.955511880316743e-06, | |
| "loss": 0.9634, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.2879581151832461, | |
| "grad_norm": 1.322996372089846, | |
| "learning_rate": 1.9542230255498453e-06, | |
| "loss": 0.946, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2905759162303665, | |
| "grad_norm": 1.6508002306297551, | |
| "learning_rate": 1.9529162039378505e-06, | |
| "loss": 1.0146, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.2931937172774869, | |
| "grad_norm": 1.9140720133949438, | |
| "learning_rate": 1.951591440086602e-06, | |
| "loss": 0.9794, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.29581151832460734, | |
| "grad_norm": 1.301517922409106, | |
| "learning_rate": 1.9502487589397717e-06, | |
| "loss": 0.9955, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.29842931937172773, | |
| "grad_norm": 1.6752384604989274, | |
| "learning_rate": 1.948888185778393e-06, | |
| "loss": 0.9614, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.3010471204188482, | |
| "grad_norm": 1.6669201309688906, | |
| "learning_rate": 1.947509746220385e-06, | |
| "loss": 0.9596, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.3036649214659686, | |
| "grad_norm": 1.6186884886800548, | |
| "learning_rate": 1.9461134662200666e-06, | |
| "loss": 0.966, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.306282722513089, | |
| "grad_norm": 1.4525901118285445, | |
| "learning_rate": 1.9446993720676725e-06, | |
| "loss": 0.9637, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.3089005235602094, | |
| "grad_norm": 1.3926685659345446, | |
| "learning_rate": 1.9432674903888547e-06, | |
| "loss": 0.9562, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.31151832460732987, | |
| "grad_norm": 1.5687485197442492, | |
| "learning_rate": 1.941817848144183e-06, | |
| "loss": 1.003, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.31413612565445026, | |
| "grad_norm": 1.3452828778510757, | |
| "learning_rate": 1.9403504726286365e-06, | |
| "loss": 0.976, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.31675392670157065, | |
| "grad_norm": 1.3816617452534992, | |
| "learning_rate": 1.93886539147109e-06, | |
| "loss": 0.9599, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.3193717277486911, | |
| "grad_norm": 1.481897037058517, | |
| "learning_rate": 1.9373626326337944e-06, | |
| "loss": 0.9731, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.3219895287958115, | |
| "grad_norm": 1.413329266686157, | |
| "learning_rate": 1.9358422244118486e-06, | |
| "loss": 0.9783, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.32460732984293195, | |
| "grad_norm": 2.9010127845249385, | |
| "learning_rate": 1.9343041954326677e-06, | |
| "loss": 0.9777, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.32722513089005234, | |
| "grad_norm": 1.2853594407436248, | |
| "learning_rate": 1.932748574655445e-06, | |
| "loss": 0.9784, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.3298429319371728, | |
| "grad_norm": 1.6563678696752941, | |
| "learning_rate": 1.931175391370605e-06, | |
| "loss": 0.9591, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.3324607329842932, | |
| "grad_norm": 1.4153593487098186, | |
| "learning_rate": 1.929584675199252e-06, | |
| "loss": 0.9433, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.33507853403141363, | |
| "grad_norm": 1.4853036785401517, | |
| "learning_rate": 1.927976456092614e-06, | |
| "loss": 0.9195, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.337696335078534, | |
| "grad_norm": 1.6653698839653936, | |
| "learning_rate": 1.9263507643314775e-06, | |
| "loss": 0.9711, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.3403141361256545, | |
| "grad_norm": 1.5513246503570877, | |
| "learning_rate": 1.9247076305256173e-06, | |
| "loss": 1.0266, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.34293193717277487, | |
| "grad_norm": 1.9279366482258156, | |
| "learning_rate": 1.923047085613221e-06, | |
| "loss": 1.0001, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.34554973821989526, | |
| "grad_norm": 1.4339951925997667, | |
| "learning_rate": 1.9213691608603046e-06, | |
| "loss": 1.003, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.3481675392670157, | |
| "grad_norm": 1.3521774084798501, | |
| "learning_rate": 1.9196738878601262e-06, | |
| "loss": 0.9748, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.3507853403141361, | |
| "grad_norm": 1.4539030309601608, | |
| "learning_rate": 1.9179612985325907e-06, | |
| "loss": 0.9544, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.35340314136125656, | |
| "grad_norm": 1.9287872837043116, | |
| "learning_rate": 1.9162314251236464e-06, | |
| "loss": 0.9649, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.35602094240837695, | |
| "grad_norm": 1.426685422908151, | |
| "learning_rate": 1.9144843002046803e-06, | |
| "loss": 1.0246, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3586387434554974, | |
| "grad_norm": 1.6129363461024815, | |
| "learning_rate": 1.912719956671905e-06, | |
| "loss": 0.9603, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.3612565445026178, | |
| "grad_norm": 1.4893083520917, | |
| "learning_rate": 1.9109384277457366e-06, | |
| "loss": 0.9644, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.36387434554973824, | |
| "grad_norm": 1.4422789845290571, | |
| "learning_rate": 1.9091397469701734e-06, | |
| "loss": 1.0022, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.36649214659685864, | |
| "grad_norm": 1.379089386140381, | |
| "learning_rate": 1.9073239482121597e-06, | |
| "loss": 0.977, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.36910994764397903, | |
| "grad_norm": 1.2902251521449346, | |
| "learning_rate": 1.905491065660951e-06, | |
| "loss": 0.9479, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.3717277486910995, | |
| "grad_norm": 1.4849260045966897, | |
| "learning_rate": 1.9036411338274702e-06, | |
| "loss": 0.9535, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.3743455497382199, | |
| "grad_norm": 1.4742367033559105, | |
| "learning_rate": 1.9017741875436569e-06, | |
| "loss": 0.9692, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.3769633507853403, | |
| "grad_norm": 1.7554436015022166, | |
| "learning_rate": 1.8998902619618114e-06, | |
| "loss": 0.9279, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3795811518324607, | |
| "grad_norm": 1.4010842342135992, | |
| "learning_rate": 1.8979893925539336e-06, | |
| "loss": 0.9423, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.38219895287958117, | |
| "grad_norm": 1.497443248481895, | |
| "learning_rate": 1.8960716151110553e-06, | |
| "loss": 0.9848, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.38481675392670156, | |
| "grad_norm": 1.5982300331675101, | |
| "learning_rate": 1.894136965742565e-06, | |
| "loss": 0.9691, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.387434554973822, | |
| "grad_norm": 1.371150523732322, | |
| "learning_rate": 1.8921854808755292e-06, | |
| "loss": 0.9548, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.3900523560209424, | |
| "grad_norm": 1.7867184813381989, | |
| "learning_rate": 1.8902171972540058e-06, | |
| "loss": 0.985, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.39267015706806285, | |
| "grad_norm": 1.4222923909172587, | |
| "learning_rate": 1.8882321519383533e-06, | |
| "loss": 0.9473, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.39528795811518325, | |
| "grad_norm": 1.4430633539722946, | |
| "learning_rate": 1.886230382304531e-06, | |
| "loss": 0.945, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.39790575916230364, | |
| "grad_norm": 1.4120566052672336, | |
| "learning_rate": 1.884211926043398e-06, | |
| "loss": 0.9377, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.4005235602094241, | |
| "grad_norm": 1.5491012831054978, | |
| "learning_rate": 1.882176821160001e-06, | |
| "loss": 0.9694, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.4031413612565445, | |
| "grad_norm": 1.702124781114296, | |
| "learning_rate": 1.8801251059728602e-06, | |
| "loss": 0.9713, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.40575916230366493, | |
| "grad_norm": 1.2951128973054504, | |
| "learning_rate": 1.878056819113247e-06, | |
| "loss": 0.9355, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.4083769633507853, | |
| "grad_norm": 1.3183333685080236, | |
| "learning_rate": 1.875971999524458e-06, | |
| "loss": 0.9591, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.4109947643979058, | |
| "grad_norm": 1.4206883250837021, | |
| "learning_rate": 1.8738706864610791e-06, | |
| "loss": 0.9724, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.41361256544502617, | |
| "grad_norm": 1.3491536224151637, | |
| "learning_rate": 1.8717529194882497e-06, | |
| "loss": 0.9596, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.4162303664921466, | |
| "grad_norm": 1.6067238922960525, | |
| "learning_rate": 1.8696187384809153e-06, | |
| "loss": 0.9874, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.418848167539267, | |
| "grad_norm": 2.170081739206507, | |
| "learning_rate": 1.8674681836230768e-06, | |
| "loss": 0.9393, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4214659685863874, | |
| "grad_norm": 1.6523206816354508, | |
| "learning_rate": 1.8653012954070356e-06, | |
| "loss": 0.9912, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.42408376963350786, | |
| "grad_norm": 1.7458545642560128, | |
| "learning_rate": 1.8631181146326303e-06, | |
| "loss": 0.9163, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.42670157068062825, | |
| "grad_norm": 1.551930694859062, | |
| "learning_rate": 1.860918682406467e-06, | |
| "loss": 0.9959, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.4293193717277487, | |
| "grad_norm": 1.3800307543491146, | |
| "learning_rate": 1.8587030401411478e-06, | |
| "loss": 0.944, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.4319371727748691, | |
| "grad_norm": 1.5733297290745525, | |
| "learning_rate": 1.8564712295544892e-06, | |
| "loss": 0.9952, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.43455497382198954, | |
| "grad_norm": 1.69702400357591, | |
| "learning_rate": 1.8542232926687382e-06, | |
| "loss": 0.9765, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.43717277486910994, | |
| "grad_norm": 1.4210131276532727, | |
| "learning_rate": 1.851959271809779e-06, | |
| "loss": 0.9644, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.4397905759162304, | |
| "grad_norm": 1.3705469469324443, | |
| "learning_rate": 1.8496792096063379e-06, | |
| "loss": 0.9784, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.4424083769633508, | |
| "grad_norm": 1.367481685410119, | |
| "learning_rate": 1.8473831489891798e-06, | |
| "loss": 0.9487, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.44502617801047123, | |
| "grad_norm": 1.4039844158067947, | |
| "learning_rate": 1.8450711331903005e-06, | |
| "loss": 0.9287, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.4476439790575916, | |
| "grad_norm": 1.9060472632401462, | |
| "learning_rate": 1.8427432057421113e-06, | |
| "loss": 0.9887, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.450261780104712, | |
| "grad_norm": 1.4101639997739956, | |
| "learning_rate": 1.8403994104766212e-06, | |
| "loss": 0.9732, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.45287958115183247, | |
| "grad_norm": 1.2872899994123914, | |
| "learning_rate": 1.83803979152461e-06, | |
| "loss": 0.934, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.45549738219895286, | |
| "grad_norm": 1.5676491316951058, | |
| "learning_rate": 1.8356643933147985e-06, | |
| "loss": 0.9706, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.4581151832460733, | |
| "grad_norm": 1.4853491925545348, | |
| "learning_rate": 1.8332732605730109e-06, | |
| "loss": 0.9548, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.4607329842931937, | |
| "grad_norm": 1.4216585779544653, | |
| "learning_rate": 1.8308664383213342e-06, | |
| "loss": 0.9953, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.46335078534031415, | |
| "grad_norm": 1.283197939445289, | |
| "learning_rate": 1.8284439718772687e-06, | |
| "loss": 0.9058, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.46596858638743455, | |
| "grad_norm": 1.4393324681794193, | |
| "learning_rate": 1.8260059068528762e-06, | |
| "loss": 0.9455, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.468586387434555, | |
| "grad_norm": 1.4129111621276607, | |
| "learning_rate": 1.82355228915392e-06, | |
| "loss": 0.9674, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.4712041884816754, | |
| "grad_norm": 1.5281499729812038, | |
| "learning_rate": 1.8210831649790015e-06, | |
| "loss": 0.9451, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4738219895287958, | |
| "grad_norm": 1.5870948861953107, | |
| "learning_rate": 1.8185985808186901e-06, | |
| "loss": 0.976, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.47643979057591623, | |
| "grad_norm": 1.4461944444081016, | |
| "learning_rate": 1.8160985834546474e-06, | |
| "loss": 0.9872, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.4790575916230366, | |
| "grad_norm": 1.572493945220242, | |
| "learning_rate": 1.813583219958746e-06, | |
| "loss": 0.9677, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.4816753926701571, | |
| "grad_norm": 1.488313044626855, | |
| "learning_rate": 1.811052537692186e-06, | |
| "loss": 0.9853, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.48429319371727747, | |
| "grad_norm": 1.3580335854042254, | |
| "learning_rate": 1.8085065843045986e-06, | |
| "loss": 0.9668, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.4869109947643979, | |
| "grad_norm": 1.3632089604738407, | |
| "learning_rate": 1.8059454077331526e-06, | |
| "loss": 0.9483, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.4895287958115183, | |
| "grad_norm": 1.3238305682165803, | |
| "learning_rate": 1.8033690562016507e-06, | |
| "loss": 0.958, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.49214659685863876, | |
| "grad_norm": 1.3836133731386677, | |
| "learning_rate": 1.8007775782196212e-06, | |
| "loss": 0.901, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.49476439790575916, | |
| "grad_norm": 1.5098701097333365, | |
| "learning_rate": 1.798171022581405e-06, | |
| "loss": 0.9208, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.4973821989528796, | |
| "grad_norm": 1.3315669287435203, | |
| "learning_rate": 1.7955494383652364e-06, | |
| "loss": 0.9957, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.4430105607942247, | |
| "learning_rate": 1.7929128749323193e-06, | |
| "loss": 0.9629, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.5026178010471204, | |
| "grad_norm": 1.3748095197619064, | |
| "learning_rate": 1.7902613819258983e-06, | |
| "loss": 0.9728, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.5052356020942408, | |
| "grad_norm": 1.4636464184043185, | |
| "learning_rate": 1.7875950092703232e-06, | |
| "loss": 0.8843, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.5078534031413613, | |
| "grad_norm": 1.3206431630831412, | |
| "learning_rate": 1.784913807170109e-06, | |
| "loss": 0.964, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.5104712041884817, | |
| "grad_norm": 1.3494139964974532, | |
| "learning_rate": 1.7822178261089917e-06, | |
| "loss": 0.955, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.5130890052356021, | |
| "grad_norm": 1.3451367536689751, | |
| "learning_rate": 1.7795071168489759e-06, | |
| "loss": 0.9491, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.5157068062827225, | |
| "grad_norm": 1.5037258899078974, | |
| "learning_rate": 1.776781730429381e-06, | |
| "loss": 0.9859, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.518324607329843, | |
| "grad_norm": 1.341930800019914, | |
| "learning_rate": 1.7740417181658787e-06, | |
| "loss": 0.9903, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.5209424083769634, | |
| "grad_norm": 1.7348664326730883, | |
| "learning_rate": 1.771287131649527e-06, | |
| "loss": 0.97, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.5235602094240838, | |
| "grad_norm": 1.6744990474369295, | |
| "learning_rate": 1.7685180227458e-06, | |
| "loss": 0.9286, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5261780104712042, | |
| "grad_norm": 1.4602031412588528, | |
| "learning_rate": 1.7657344435936106e-06, | |
| "loss": 0.9064, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.5287958115183246, | |
| "grad_norm": 1.315375736937206, | |
| "learning_rate": 1.762936446604327e-06, | |
| "loss": 0.9298, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.5314136125654451, | |
| "grad_norm": 1.594576053216995, | |
| "learning_rate": 1.76012408446079e-06, | |
| "loss": 0.9615, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.5340314136125655, | |
| "grad_norm": 1.3869099290197058, | |
| "learning_rate": 1.7572974101163163e-06, | |
| "loss": 0.9677, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.5366492146596858, | |
| "grad_norm": 3.9344634005104564, | |
| "learning_rate": 1.7544564767937046e-06, | |
| "loss": 1.0005, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.5392670157068062, | |
| "grad_norm": 1.5834820308294422, | |
| "learning_rate": 1.7516013379842336e-06, | |
| "loss": 1.0007, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.5418848167539267, | |
| "grad_norm": 1.3504523420206829, | |
| "learning_rate": 1.7487320474466523e-06, | |
| "loss": 0.904, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.5445026178010471, | |
| "grad_norm": 1.4968877215079561, | |
| "learning_rate": 1.74584865920617e-06, | |
| "loss": 0.9503, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.5471204188481675, | |
| "grad_norm": 1.4700348286966616, | |
| "learning_rate": 1.742951227553438e-06, | |
| "loss": 0.9261, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.5497382198952879, | |
| "grad_norm": 1.2537017291057395, | |
| "learning_rate": 1.7400398070435292e-06, | |
| "loss": 0.8954, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5523560209424084, | |
| "grad_norm": 1.4930050854123005, | |
| "learning_rate": 1.7371144524949073e-06, | |
| "loss": 0.9453, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.5549738219895288, | |
| "grad_norm": 1.2707287989257905, | |
| "learning_rate": 1.734175218988398e-06, | |
| "loss": 0.8907, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.5575916230366492, | |
| "grad_norm": 1.4475456324495655, | |
| "learning_rate": 1.7312221618661514e-06, | |
| "loss": 0.9423, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.5602094240837696, | |
| "grad_norm": 1.4619960180732723, | |
| "learning_rate": 1.7282553367305975e-06, | |
| "loss": 0.9778, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.56282722513089, | |
| "grad_norm": 1.4264214578023766, | |
| "learning_rate": 1.7252747994434022e-06, | |
| "loss": 0.9902, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.5654450261780105, | |
| "grad_norm": 1.2942895079135885, | |
| "learning_rate": 1.7222806061244147e-06, | |
| "loss": 0.9354, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.5680628272251309, | |
| "grad_norm": 1.266395336298489, | |
| "learning_rate": 1.7192728131506092e-06, | |
| "loss": 0.9379, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.5706806282722513, | |
| "grad_norm": 1.308287162023205, | |
| "learning_rate": 1.7162514771550253e-06, | |
| "loss": 0.9487, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.5732984293193717, | |
| "grad_norm": 1.271075019304684, | |
| "learning_rate": 1.7132166550257017e-06, | |
| "loss": 0.9369, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.5759162303664922, | |
| "grad_norm": 1.3338935943608123, | |
| "learning_rate": 1.7101684039046037e-06, | |
| "loss": 0.9609, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5785340314136126, | |
| "grad_norm": 1.4007771729449567, | |
| "learning_rate": 1.7071067811865474e-06, | |
| "loss": 0.9484, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.581151832460733, | |
| "grad_norm": 1.4937516535328665, | |
| "learning_rate": 1.7040318445181207e-06, | |
| "loss": 0.9823, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.5837696335078534, | |
| "grad_norm": 1.398159578570716, | |
| "learning_rate": 1.700943651796597e-06, | |
| "loss": 0.946, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.5863874345549738, | |
| "grad_norm": 1.4551713677795557, | |
| "learning_rate": 1.697842261168843e-06, | |
| "loss": 0.9853, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.5890052356020943, | |
| "grad_norm": 1.3054117416156679, | |
| "learning_rate": 1.6947277310302282e-06, | |
| "loss": 0.942, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.5916230366492147, | |
| "grad_norm": 1.3573897273220095, | |
| "learning_rate": 1.6916001200235207e-06, | |
| "loss": 0.9133, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.5942408376963351, | |
| "grad_norm": 1.6785599921639411, | |
| "learning_rate": 1.6884594870377869e-06, | |
| "loss": 1.0038, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.5968586387434555, | |
| "grad_norm": 1.4459897239045543, | |
| "learning_rate": 1.68530589120728e-06, | |
| "loss": 0.9801, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.599476439790576, | |
| "grad_norm": 1.375845613183054, | |
| "learning_rate": 1.682139391910328e-06, | |
| "loss": 0.9485, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.6020942408376964, | |
| "grad_norm": 1.397560038167906, | |
| "learning_rate": 1.6789600487682153e-06, | |
| "loss": 0.9049, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6047120418848168, | |
| "grad_norm": 1.5103238712813247, | |
| "learning_rate": 1.6757679216440605e-06, | |
| "loss": 0.9194, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.6073298429319371, | |
| "grad_norm": 1.4527282751393369, | |
| "learning_rate": 1.672563070641688e-06, | |
| "loss": 0.9514, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.6099476439790575, | |
| "grad_norm": 1.5949625620890675, | |
| "learning_rate": 1.6693455561044975e-06, | |
| "loss": 0.9429, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.612565445026178, | |
| "grad_norm": 1.6225622102603658, | |
| "learning_rate": 1.666115438614328e-06, | |
| "loss": 0.9081, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.6151832460732984, | |
| "grad_norm": 2.222270565423305, | |
| "learning_rate": 1.662872778990316e-06, | |
| "loss": 1.0294, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.6178010471204188, | |
| "grad_norm": 1.4866785885036309, | |
| "learning_rate": 1.6596176382877504e-06, | |
| "loss": 0.9904, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.6204188481675392, | |
| "grad_norm": 1.339889347899888, | |
| "learning_rate": 1.6563500777969252e-06, | |
| "loss": 0.935, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.6230366492146597, | |
| "grad_norm": 1.456584804791214, | |
| "learning_rate": 1.6530701590419823e-06, | |
| "loss": 0.933, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.6256544502617801, | |
| "grad_norm": 1.6334561208762561, | |
| "learning_rate": 1.6497779437797546e-06, | |
| "loss": 0.9932, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.6282722513089005, | |
| "grad_norm": 1.4843297692175943, | |
| "learning_rate": 1.6464734939986035e-06, | |
| "loss": 0.9969, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6308900523560209, | |
| "grad_norm": 1.4307606437352334, | |
| "learning_rate": 1.6431568719172513e-06, | |
| "loss": 0.9282, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.6335078534031413, | |
| "grad_norm": 1.4227744118845083, | |
| "learning_rate": 1.6398281399836097e-06, | |
| "loss": 0.9435, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.6361256544502618, | |
| "grad_norm": 1.42172179895148, | |
| "learning_rate": 1.6364873608736035e-06, | |
| "loss": 0.9205, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.6387434554973822, | |
| "grad_norm": 1.3401809099812794, | |
| "learning_rate": 1.6331345974899922e-06, | |
| "loss": 0.9474, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.6413612565445026, | |
| "grad_norm": 1.4648547641535852, | |
| "learning_rate": 1.629769912961183e-06, | |
| "loss": 0.9629, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.643979057591623, | |
| "grad_norm": 1.4527950553735653, | |
| "learning_rate": 1.626393370640045e-06, | |
| "loss": 0.873, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.6465968586387435, | |
| "grad_norm": 1.455801277807891, | |
| "learning_rate": 1.6230050341027133e-06, | |
| "loss": 0.9389, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.6492146596858639, | |
| "grad_norm": 1.3597683147157529, | |
| "learning_rate": 1.6196049671473952e-06, | |
| "loss": 0.9622, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.6518324607329843, | |
| "grad_norm": 1.3452857712103874, | |
| "learning_rate": 1.616193233793166e-06, | |
| "loss": 0.9423, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.6544502617801047, | |
| "grad_norm": 5.437851500838725, | |
| "learning_rate": 1.612769898278766e-06, | |
| "loss": 0.9624, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6570680628272252, | |
| "grad_norm": 1.5102856532376654, | |
| "learning_rate": 1.6093350250613892e-06, | |
| "loss": 0.979, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.6596858638743456, | |
| "grad_norm": 1.4743192601344492, | |
| "learning_rate": 1.605888678815471e-06, | |
| "loss": 0.9569, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.662303664921466, | |
| "grad_norm": 1.5393143829011873, | |
| "learning_rate": 1.602430924431469e-06, | |
| "loss": 0.9629, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.6649214659685864, | |
| "grad_norm": 1.5737174699578425, | |
| "learning_rate": 1.5989618270146422e-06, | |
| "loss": 0.9639, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.6675392670157068, | |
| "grad_norm": 4.258134694492717, | |
| "learning_rate": 1.5954814518838253e-06, | |
| "loss": 0.9198, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.6701570680628273, | |
| "grad_norm": 1.4218596129552161, | |
| "learning_rate": 1.5919898645701987e-06, | |
| "loss": 0.886, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.6727748691099477, | |
| "grad_norm": 1.5211636215659439, | |
| "learning_rate": 1.5884871308160536e-06, | |
| "loss": 0.9175, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.675392670157068, | |
| "grad_norm": 1.4773591575654617, | |
| "learning_rate": 1.5849733165735555e-06, | |
| "loss": 0.9014, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.6780104712041884, | |
| "grad_norm": 1.5157264963354438, | |
| "learning_rate": 1.5814484880035016e-06, | |
| "loss": 0.9516, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.680628272251309, | |
| "grad_norm": 1.3470517687326489, | |
| "learning_rate": 1.5779127114740755e-06, | |
| "loss": 0.912, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6832460732984293, | |
| "grad_norm": 1.349831010666242, | |
| "learning_rate": 1.5743660535595975e-06, | |
| "loss": 0.8723, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.6858638743455497, | |
| "grad_norm": 1.4458453237757587, | |
| "learning_rate": 1.5708085810392705e-06, | |
| "loss": 0.9299, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.6884816753926701, | |
| "grad_norm": 1.4350060007388417, | |
| "learning_rate": 1.567240360895924e-06, | |
| "loss": 0.9602, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.6910994764397905, | |
| "grad_norm": 1.4063518232729058, | |
| "learning_rate": 1.563661460314751e-06, | |
| "loss": 0.9271, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.693717277486911, | |
| "grad_norm": 1.2949234623299979, | |
| "learning_rate": 1.5600719466820447e-06, | |
| "loss": 0.9348, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.6963350785340314, | |
| "grad_norm": 1.399942819545271, | |
| "learning_rate": 1.5564718875839287e-06, | |
| "loss": 0.9577, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.6989528795811518, | |
| "grad_norm": 1.3541499365962402, | |
| "learning_rate": 1.5528613508050847e-06, | |
| "loss": 0.9818, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.7015706806282722, | |
| "grad_norm": 1.472944664577557, | |
| "learning_rate": 1.5492404043274767e-06, | |
| "loss": 1.0009, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.7041884816753927, | |
| "grad_norm": 4.017991300664643, | |
| "learning_rate": 1.5456091163290697e-06, | |
| "loss": 0.9481, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.7068062827225131, | |
| "grad_norm": 1.4031375152179757, | |
| "learning_rate": 1.5419675551825472e-06, | |
| "loss": 0.9454, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.7094240837696335, | |
| "grad_norm": 1.3949814525905722, | |
| "learning_rate": 1.5383157894540242e-06, | |
| "loss": 0.9701, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.7120418848167539, | |
| "grad_norm": 1.4769482292493297, | |
| "learning_rate": 1.5346538879017538e-06, | |
| "loss": 0.9386, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.7146596858638743, | |
| "grad_norm": 1.2860864329400274, | |
| "learning_rate": 1.5309819194748359e-06, | |
| "loss": 0.9, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.7172774869109948, | |
| "grad_norm": 1.3727353556535293, | |
| "learning_rate": 1.5272999533119162e-06, | |
| "loss": 0.9805, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.7198952879581152, | |
| "grad_norm": 2.722418651884381, | |
| "learning_rate": 1.5236080587398853e-06, | |
| "loss": 0.8907, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.7225130890052356, | |
| "grad_norm": 1.4156318742824492, | |
| "learning_rate": 1.5199063052725745e-06, | |
| "loss": 0.9734, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.725130890052356, | |
| "grad_norm": 1.519150038749317, | |
| "learning_rate": 1.516194762609445e-06, | |
| "loss": 0.9548, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.7277486910994765, | |
| "grad_norm": 2.3876346042029013, | |
| "learning_rate": 1.512473500634277e-06, | |
| "loss": 0.9355, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.7303664921465969, | |
| "grad_norm": 1.4156665926570595, | |
| "learning_rate": 1.5087425894138534e-06, | |
| "loss": 0.9418, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.7329842931937173, | |
| "grad_norm": 1.545693736367149, | |
| "learning_rate": 1.5050020991966403e-06, | |
| "loss": 0.943, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7356020942408377, | |
| "grad_norm": 1.3719386457832154, | |
| "learning_rate": 1.501252100411465e-06, | |
| "loss": 0.9504, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.7382198952879581, | |
| "grad_norm": 1.4434108163997796, | |
| "learning_rate": 1.497492663666189e-06, | |
| "loss": 0.8861, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.7408376963350786, | |
| "grad_norm": 1.4077022286642678, | |
| "learning_rate": 1.4937238597463784e-06, | |
| "loss": 0.9503, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.743455497382199, | |
| "grad_norm": 1.6432508014410978, | |
| "learning_rate": 1.4899457596139727e-06, | |
| "loss": 0.9809, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.7460732984293194, | |
| "grad_norm": 1.4078348319712304, | |
| "learning_rate": 1.4861584344059474e-06, | |
| "loss": 0.9221, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.7486910994764397, | |
| "grad_norm": 1.496498216030133, | |
| "learning_rate": 1.4823619554329744e-06, | |
| "loss": 0.9593, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.7513089005235603, | |
| "grad_norm": 1.1775236514477745, | |
| "learning_rate": 1.4785563941780805e-06, | |
| "loss": 0.9004, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.7539267015706806, | |
| "grad_norm": 1.445348047393682, | |
| "learning_rate": 1.4747418222952993e-06, | |
| "loss": 0.9188, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.756544502617801, | |
| "grad_norm": 1.4942704837793932, | |
| "learning_rate": 1.4709183116083253e-06, | |
| "loss": 0.9618, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.7591623036649214, | |
| "grad_norm": 1.3529276296646142, | |
| "learning_rate": 1.4670859341091577e-06, | |
| "loss": 0.9704, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7617801047120419, | |
| "grad_norm": 1.5516858536495775, | |
| "learning_rate": 1.4632447619567488e-06, | |
| "loss": 0.9155, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.7643979057591623, | |
| "grad_norm": 1.443364008768138, | |
| "learning_rate": 1.4593948674756415e-06, | |
| "loss": 0.9358, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.7670157068062827, | |
| "grad_norm": 1.3608416942283856, | |
| "learning_rate": 1.4555363231546109e-06, | |
| "loss": 0.9952, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.7696335078534031, | |
| "grad_norm": 1.3239348941023465, | |
| "learning_rate": 1.4516692016452979e-06, | |
| "loss": 0.9165, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.7722513089005235, | |
| "grad_norm": 1.6158463432267232, | |
| "learning_rate": 1.4477935757608397e-06, | |
| "loss": 0.9066, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.774869109947644, | |
| "grad_norm": 1.5884308678780332, | |
| "learning_rate": 1.4439095184745022e-06, | |
| "loss": 0.9458, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.7774869109947644, | |
| "grad_norm": 2.012960318795794, | |
| "learning_rate": 1.4400171029183035e-06, | |
| "loss": 0.9006, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.7801047120418848, | |
| "grad_norm": 1.360499900869024, | |
| "learning_rate": 1.4361164023816374e-06, | |
| "loss": 0.9351, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.7827225130890052, | |
| "grad_norm": 1.3724813802477163, | |
| "learning_rate": 1.4322074903098945e-06, | |
| "loss": 0.917, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.7853403141361257, | |
| "grad_norm": 1.503052362303298, | |
| "learning_rate": 1.428290440303077e-06, | |
| "loss": 0.9927, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7879581151832461, | |
| "grad_norm": 1.7377456947229262, | |
| "learning_rate": 1.4243653261144167e-06, | |
| "loss": 0.9541, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.7905759162303665, | |
| "grad_norm": 1.2764425600693903, | |
| "learning_rate": 1.4204322216489813e-06, | |
| "loss": 0.9262, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.7931937172774869, | |
| "grad_norm": 1.567992829586323, | |
| "learning_rate": 1.4164912009622878e-06, | |
| "loss": 0.9829, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.7958115183246073, | |
| "grad_norm": 1.5156917718141123, | |
| "learning_rate": 1.4125423382589048e-06, | |
| "loss": 0.952, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.7984293193717278, | |
| "grad_norm": 1.4764271181959159, | |
| "learning_rate": 1.4085857078910567e-06, | |
| "loss": 0.9458, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.8010471204188482, | |
| "grad_norm": 1.2522703326500677, | |
| "learning_rate": 1.4046213843572234e-06, | |
| "loss": 0.9462, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.8036649214659686, | |
| "grad_norm": 1.4674944162208565, | |
| "learning_rate": 1.400649442300738e-06, | |
| "loss": 0.9537, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.806282722513089, | |
| "grad_norm": 1.3763179090039912, | |
| "learning_rate": 1.3966699565083803e-06, | |
| "loss": 0.9365, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.8089005235602095, | |
| "grad_norm": 1.9176648914200796, | |
| "learning_rate": 1.3926830019089694e-06, | |
| "loss": 1.0161, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.8115183246073299, | |
| "grad_norm": 1.4863434582201211, | |
| "learning_rate": 1.3886886535719539e-06, | |
| "loss": 0.9457, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8141361256544503, | |
| "grad_norm": 1.3323857609548473, | |
| "learning_rate": 1.3846869867059965e-06, | |
| "loss": 0.9434, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.8167539267015707, | |
| "grad_norm": 1.490105065147535, | |
| "learning_rate": 1.3806780766575587e-06, | |
| "loss": 0.9392, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.819371727748691, | |
| "grad_norm": 1.333245804459532, | |
| "learning_rate": 1.3766619989094827e-06, | |
| "loss": 0.908, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.8219895287958116, | |
| "grad_norm": 1.5154308753484564, | |
| "learning_rate": 1.3726388290795696e-06, | |
| "loss": 0.8954, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.824607329842932, | |
| "grad_norm": 1.3909829985266102, | |
| "learning_rate": 1.3686086429191552e-06, | |
| "loss": 0.9485, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.8272251308900523, | |
| "grad_norm": 1.5779188390331473, | |
| "learning_rate": 1.3645715163116845e-06, | |
| "loss": 0.9557, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.8298429319371727, | |
| "grad_norm": 1.449669031785137, | |
| "learning_rate": 1.3605275252712826e-06, | |
| "loss": 0.8792, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.8324607329842932, | |
| "grad_norm": 1.4974346680981285, | |
| "learning_rate": 1.3564767459413235e-06, | |
| "loss": 0.9502, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.8350785340314136, | |
| "grad_norm": 1.379610694396643, | |
| "learning_rate": 1.3524192545929963e-06, | |
| "loss": 0.9344, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.837696335078534, | |
| "grad_norm": 1.3871474353129742, | |
| "learning_rate": 1.3483551276238688e-06, | |
| "loss": 0.9295, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8403141361256544, | |
| "grad_norm": 1.506463325541792, | |
| "learning_rate": 1.3442844415564496e-06, | |
| "loss": 0.9316, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.8429319371727748, | |
| "grad_norm": 1.7186336719867092, | |
| "learning_rate": 1.3402072730367474e-06, | |
| "loss": 0.9275, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.8455497382198953, | |
| "grad_norm": 1.3614543479827845, | |
| "learning_rate": 1.336123698832827e-06, | |
| "loss": 0.9394, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.8481675392670157, | |
| "grad_norm": 1.5928598074183693, | |
| "learning_rate": 1.3320337958333637e-06, | |
| "loss": 0.9284, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.8507853403141361, | |
| "grad_norm": 1.450573452624891, | |
| "learning_rate": 1.3279376410461987e-06, | |
| "loss": 0.9453, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.8534031413612565, | |
| "grad_norm": 1.3696430137457172, | |
| "learning_rate": 1.3238353115968838e-06, | |
| "loss": 0.9345, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.856020942408377, | |
| "grad_norm": 1.4279904502242198, | |
| "learning_rate": 1.3197268847272338e-06, | |
| "loss": 0.9405, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.8586387434554974, | |
| "grad_norm": 1.3113735477129913, | |
| "learning_rate": 1.3156124377938698e-06, | |
| "loss": 0.8496, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.8612565445026178, | |
| "grad_norm": 1.5978377076734773, | |
| "learning_rate": 1.3114920482667633e-06, | |
| "loss": 0.9504, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.8638743455497382, | |
| "grad_norm": 1.8348826755579801, | |
| "learning_rate": 1.307365793727778e-06, | |
| "loss": 1.0206, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.8664921465968587, | |
| "grad_norm": 1.6036886081318196, | |
| "learning_rate": 1.3032337518692079e-06, | |
| "loss": 0.9325, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.8691099476439791, | |
| "grad_norm": 1.4319398650151158, | |
| "learning_rate": 1.2990960004923153e-06, | |
| "loss": 0.9511, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.8717277486910995, | |
| "grad_norm": 1.587326115767848, | |
| "learning_rate": 1.2949526175058663e-06, | |
| "loss": 0.9352, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.8743455497382199, | |
| "grad_norm": 1.4070281530555484, | |
| "learning_rate": 1.2908036809246622e-06, | |
| "loss": 0.9169, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.8769633507853403, | |
| "grad_norm": 1.3435113876325042, | |
| "learning_rate": 1.286649268868073e-06, | |
| "loss": 0.9191, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.8795811518324608, | |
| "grad_norm": 1.3798965471877482, | |
| "learning_rate": 1.2824894595585636e-06, | |
| "loss": 0.8751, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.8821989528795812, | |
| "grad_norm": 1.4127914600029392, | |
| "learning_rate": 1.278324331320224e-06, | |
| "loss": 0.9221, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.8848167539267016, | |
| "grad_norm": 1.419094074148045, | |
| "learning_rate": 1.2741539625772916e-06, | |
| "loss": 0.994, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.887434554973822, | |
| "grad_norm": 1.6168248145801407, | |
| "learning_rate": 1.269978431852678e-06, | |
| "loss": 0.9068, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.8900523560209425, | |
| "grad_norm": 1.4320272522924853, | |
| "learning_rate": 1.265797817766486e-06, | |
| "loss": 0.9107, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.8926701570680629, | |
| "grad_norm": 1.5043311007283438, | |
| "learning_rate": 1.2616121990345344e-06, | |
| "loss": 0.9379, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.8952879581151832, | |
| "grad_norm": 1.5310090194376413, | |
| "learning_rate": 1.2574216544668719e-06, | |
| "loss": 0.976, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.8979057591623036, | |
| "grad_norm": 1.362042648677866, | |
| "learning_rate": 1.2532262629662947e-06, | |
| "loss": 0.9131, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.900523560209424, | |
| "grad_norm": 1.5988287333686646, | |
| "learning_rate": 1.2490261035268612e-06, | |
| "loss": 0.8755, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.9031413612565445, | |
| "grad_norm": 1.4637242725250341, | |
| "learning_rate": 1.244821255232404e-06, | |
| "loss": 0.9109, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.9057591623036649, | |
| "grad_norm": 1.4212281055853575, | |
| "learning_rate": 1.2406117972550411e-06, | |
| "loss": 0.9539, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.9083769633507853, | |
| "grad_norm": 1.3319624620662243, | |
| "learning_rate": 1.2363978088536851e-06, | |
| "loss": 0.8959, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.9109947643979057, | |
| "grad_norm": 1.4662422372152333, | |
| "learning_rate": 1.2321793693725506e-06, | |
| "loss": 0.9405, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.9136125654450262, | |
| "grad_norm": 1.4304240335118916, | |
| "learning_rate": 1.2279565582396615e-06, | |
| "loss": 0.9541, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.9162303664921466, | |
| "grad_norm": 1.3671914328595074, | |
| "learning_rate": 1.2237294549653539e-06, | |
| "loss": 0.9717, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.918848167539267, | |
| "grad_norm": 1.3382227529528294, | |
| "learning_rate": 1.219498139140779e-06, | |
| "loss": 0.9378, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.9214659685863874, | |
| "grad_norm": 1.331756121322301, | |
| "learning_rate": 1.2152626904364064e-06, | |
| "loss": 0.9559, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.9240837696335078, | |
| "grad_norm": 1.4348519441766092, | |
| "learning_rate": 1.2110231886005222e-06, | |
| "loss": 0.9148, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.9267015706806283, | |
| "grad_norm": 1.2598591796573784, | |
| "learning_rate": 1.2067797134577273e-06, | |
| "loss": 0.9749, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.9293193717277487, | |
| "grad_norm": 1.6362760645353196, | |
| "learning_rate": 1.202532344907436e-06, | |
| "loss": 0.9261, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.9319371727748691, | |
| "grad_norm": 1.3685299905093398, | |
| "learning_rate": 1.198281162922371e-06, | |
| "loss": 0.9157, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.9345549738219895, | |
| "grad_norm": 1.5014341284660457, | |
| "learning_rate": 1.1940262475470555e-06, | |
| "loss": 0.9468, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.93717277486911, | |
| "grad_norm": 1.4866894767271521, | |
| "learning_rate": 1.18976767889631e-06, | |
| "loss": 0.9737, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.9397905759162304, | |
| "grad_norm": 1.3686575013762912, | |
| "learning_rate": 1.1855055371537399e-06, | |
| "loss": 0.9671, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.9424083769633508, | |
| "grad_norm": 1.3408401081503738, | |
| "learning_rate": 1.1812399025702289e-06, | |
| "loss": 0.9446, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.9450261780104712, | |
| "grad_norm": 1.4426258662911882, | |
| "learning_rate": 1.1769708554624255e-06, | |
| "loss": 0.9424, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.9476439790575916, | |
| "grad_norm": 1.3570630863827366, | |
| "learning_rate": 1.1726984762112326e-06, | |
| "loss": 0.9363, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.9502617801047121, | |
| "grad_norm": 1.4972719883412338, | |
| "learning_rate": 1.168422845260293e-06, | |
| "loss": 0.9629, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.9528795811518325, | |
| "grad_norm": 2.0926927624177853, | |
| "learning_rate": 1.1641440431144748e-06, | |
| "loss": 0.9362, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.9554973821989529, | |
| "grad_norm": 1.9559448320553872, | |
| "learning_rate": 1.1598621503383564e-06, | |
| "loss": 0.9355, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.9581151832460733, | |
| "grad_norm": 1.4196013691936538, | |
| "learning_rate": 1.1555772475547083e-06, | |
| "loss": 0.9807, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.9607329842931938, | |
| "grad_norm": 1.7129377232532392, | |
| "learning_rate": 1.1512894154429757e-06, | |
| "loss": 0.9321, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.9633507853403142, | |
| "grad_norm": 1.3874760503367283, | |
| "learning_rate": 1.14699873473776e-06, | |
| "loss": 0.9171, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.9659685863874345, | |
| "grad_norm": 1.3667521655356518, | |
| "learning_rate": 1.1427052862272981e-06, | |
| "loss": 0.9634, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.9685863874345549, | |
| "grad_norm": 1.4603827013405721, | |
| "learning_rate": 1.1384091507519403e-06, | |
| "loss": 0.8996, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.9712041884816754, | |
| "grad_norm": 1.3023965306720733, | |
| "learning_rate": 1.1341104092026302e-06, | |
| "loss": 0.9057, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.9738219895287958, | |
| "grad_norm": 1.587437099971742, | |
| "learning_rate": 1.1298091425193806e-06, | |
| "loss": 0.9122, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.9764397905759162, | |
| "grad_norm": 1.5072141830161945, | |
| "learning_rate": 1.1255054316897482e-06, | |
| "loss": 0.917, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.9790575916230366, | |
| "grad_norm": 1.5666361589706173, | |
| "learning_rate": 1.121199357747312e-06, | |
| "loss": 0.9004, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.981675392670157, | |
| "grad_norm": 1.547610708086, | |
| "learning_rate": 1.1168910017701434e-06, | |
| "loss": 0.8929, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.9842931937172775, | |
| "grad_norm": 1.4698749043156947, | |
| "learning_rate": 1.112580444879283e-06, | |
| "loss": 1.0095, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.9869109947643979, | |
| "grad_norm": 1.532940293838814, | |
| "learning_rate": 1.1082677682372112e-06, | |
| "loss": 0.944, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.9895287958115183, | |
| "grad_norm": 1.4452991257917254, | |
| "learning_rate": 1.1039530530463217e-06, | |
| "loss": 0.9699, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.9921465968586387, | |
| "grad_norm": 1.3913866901966334, | |
| "learning_rate": 1.0996363805473902e-06, | |
| "loss": 0.9476, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.9947643979057592, | |
| "grad_norm": 1.4385256079298478, | |
| "learning_rate": 1.0953178320180473e-06, | |
| "loss": 0.8981, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.9973821989528796, | |
| "grad_norm": 1.4122272138909508, | |
| "learning_rate": 1.0909974887712468e-06, | |
| "loss": 0.937, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.5381889662517363, | |
| "learning_rate": 1.0866754321537337e-06, | |
| "loss": 0.8369, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.0026178010471205, | |
| "grad_norm": 1.352548452256198, | |
| "learning_rate": 1.0823517435445149e-06, | |
| "loss": 0.8598, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.0052356020942408, | |
| "grad_norm": 1.6987449243575325, | |
| "learning_rate": 1.078026504353325e-06, | |
| "loss": 0.9466, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.0078534031413613, | |
| "grad_norm": 1.2636646845580983, | |
| "learning_rate": 1.0736997960190945e-06, | |
| "loss": 0.8466, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.0104712041884816, | |
| "grad_norm": 1.3487399166845027, | |
| "learning_rate": 1.0693717000084158e-06, | |
| "loss": 0.9227, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.013089005235602, | |
| "grad_norm": 1.4238712320318556, | |
| "learning_rate": 1.06504229781401e-06, | |
| "loss": 0.9006, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.0157068062827226, | |
| "grad_norm": 1.4571642770903115, | |
| "learning_rate": 1.0607116709531918e-06, | |
| "loss": 0.9162, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.0183246073298429, | |
| "grad_norm": 1.346066094766837, | |
| "learning_rate": 1.0563799009663343e-06, | |
| "loss": 0.9108, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.0209424083769634, | |
| "grad_norm": 1.410928572921669, | |
| "learning_rate": 1.0520470694153352e-06, | |
| "loss": 0.9914, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.0235602094240839, | |
| "grad_norm": 1.5207294046186268, | |
| "learning_rate": 1.047713257882079e-06, | |
| "loss": 0.9295, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.0261780104712042, | |
| "grad_norm": 1.3840105489229526, | |
| "learning_rate": 1.0433785479669038e-06, | |
| "loss": 0.8874, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.0287958115183247, | |
| "grad_norm": 1.3438440478368636, | |
| "learning_rate": 1.039043021287061e-06, | |
| "loss": 0.9186, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.031413612565445, | |
| "grad_norm": 1.5703077556397094, | |
| "learning_rate": 1.034706759475182e-06, | |
| "loss": 0.9052, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.0340314136125655, | |
| "grad_norm": 1.3504157220975264, | |
| "learning_rate": 1.03036984417774e-06, | |
| "loss": 0.9045, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.036649214659686, | |
| "grad_norm": 1.4105634277460741, | |
| "learning_rate": 1.026032357053512e-06, | |
| "loss": 0.9045, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.0392670157068062, | |
| "grad_norm": 1.3475091071385106, | |
| "learning_rate": 1.0216943797720417e-06, | |
| "loss": 0.8633, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.0418848167539267, | |
| "grad_norm": 1.4138471590235702, | |
| "learning_rate": 1.017355994012102e-06, | |
| "loss": 0.8908, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.044502617801047, | |
| "grad_norm": 1.4770009484245705, | |
| "learning_rate": 1.0130172814601574e-06, | |
| "loss": 0.931, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.0471204188481675, | |
| "grad_norm": 1.4838585726093223, | |
| "learning_rate": 1.0086783238088244e-06, | |
| "loss": 0.8935, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.049738219895288, | |
| "grad_norm": 1.7353117348056972, | |
| "learning_rate": 1.0043392027553359e-06, | |
| "loss": 0.9103, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.0523560209424083, | |
| "grad_norm": 1.606805445159876, | |
| "learning_rate": 1e-06, | |
| "loss": 0.9098, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.0549738219895288, | |
| "grad_norm": 1.4003150648318952, | |
| "learning_rate": 9.956607972446642e-07, | |
| "loss": 0.911, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.057591623036649, | |
| "grad_norm": 1.3167792983140534, | |
| "learning_rate": 9.913216761911753e-07, | |
| "loss": 0.9009, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.0602094240837696, | |
| "grad_norm": 1.2725669879710217, | |
| "learning_rate": 9.869827185398427e-07, | |
| "loss": 0.8839, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.0628272251308901, | |
| "grad_norm": 1.2890395865651842, | |
| "learning_rate": 9.826440059878981e-07, | |
| "loss": 0.9019, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.0654450261780104, | |
| "grad_norm": 1.3894062424259876, | |
| "learning_rate": 9.783056202279587e-07, | |
| "loss": 0.9324, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.068062827225131, | |
| "grad_norm": 1.3917884191601717, | |
| "learning_rate": 9.73967642946488e-07, | |
| "loss": 0.8865, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.0706806282722514, | |
| "grad_norm": 1.3702754228543925, | |
| "learning_rate": 9.6963015582226e-07, | |
| "loss": 0.8896, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.0732984293193717, | |
| "grad_norm": 1.4183394433577425, | |
| "learning_rate": 9.65293240524818e-07, | |
| "loss": 0.9622, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.0759162303664922, | |
| "grad_norm": 1.8223040196130649, | |
| "learning_rate": 9.609569787129392e-07, | |
| "loss": 0.9445, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.0785340314136125, | |
| "grad_norm": 1.561543253672229, | |
| "learning_rate": 9.566214520330965e-07, | |
| "loss": 0.9201, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.081151832460733, | |
| "grad_norm": 1.5251337755140832, | |
| "learning_rate": 9.52286742117921e-07, | |
| "loss": 0.8734, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.0837696335078535, | |
| "grad_norm": 1.2585711830780457, | |
| "learning_rate": 9.479529305846652e-07, | |
| "loss": 0.8811, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.0863874345549738, | |
| "grad_norm": 1.347193385434298, | |
| "learning_rate": 9.436200990336656e-07, | |
| "loss": 0.9101, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.0890052356020943, | |
| "grad_norm": 1.380510360812572, | |
| "learning_rate": 9.392883290468082e-07, | |
| "loss": 0.9352, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.0916230366492146, | |
| "grad_norm": 1.4226456539762178, | |
| "learning_rate": 9.349577021859899e-07, | |
| "loss": 0.9216, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.094240837696335, | |
| "grad_norm": 1.4185426724478578, | |
| "learning_rate": 9.306282999915839e-07, | |
| "loss": 0.8718, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.0968586387434556, | |
| "grad_norm": 1.6442742168613387, | |
| "learning_rate": 9.263002039809055e-07, | |
| "loss": 0.9369, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.0994764397905759, | |
| "grad_norm": 1.4966541668940625, | |
| "learning_rate": 9.219734956466752e-07, | |
| "loss": 0.9093, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.1020942408376964, | |
| "grad_norm": 1.5331073728044513, | |
| "learning_rate": 9.176482564554853e-07, | |
| "loss": 0.8945, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.1047120418848166, | |
| "grad_norm": 2.010031110583405, | |
| "learning_rate": 9.133245678462662e-07, | |
| "loss": 0.8757, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.1073298429319371, | |
| "grad_norm": 1.4805034302628122, | |
| "learning_rate": 9.090025112287532e-07, | |
| "loss": 0.9101, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.1099476439790577, | |
| "grad_norm": 1.3324528881382394, | |
| "learning_rate": 9.046821679819526e-07, | |
| "loss": 0.8468, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.112565445026178, | |
| "grad_norm": 1.5950663314140405, | |
| "learning_rate": 9.003636194526098e-07, | |
| "loss": 0.859, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.1151832460732984, | |
| "grad_norm": 1.4696265552281182, | |
| "learning_rate": 8.960469469536784e-07, | |
| "loss": 0.9125, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.117801047120419, | |
| "grad_norm": 1.7012055856407813, | |
| "learning_rate": 8.917322317627886e-07, | |
| "loss": 0.9044, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.1204188481675392, | |
| "grad_norm": 1.3440632345526482, | |
| "learning_rate": 8.874195551207173e-07, | |
| "loss": 0.9052, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.1230366492146597, | |
| "grad_norm": 1.494387132622485, | |
| "learning_rate": 8.831089982298568e-07, | |
| "loss": 0.8855, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.12565445026178, | |
| "grad_norm": 1.2116093561626082, | |
| "learning_rate": 8.78800642252688e-07, | |
| "loss": 0.9089, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.1282722513089005, | |
| "grad_norm": 1.3952551501152495, | |
| "learning_rate": 8.744945683102516e-07, | |
| "loss": 0.903, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.130890052356021, | |
| "grad_norm": 1.4380203340874709, | |
| "learning_rate": 8.701908574806198e-07, | |
| "loss": 0.8961, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.1335078534031413, | |
| "grad_norm": 1.321179107685139, | |
| "learning_rate": 8.658895907973696e-07, | |
| "loss": 0.8675, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.1361256544502618, | |
| "grad_norm": 1.5378152096859476, | |
| "learning_rate": 8.615908492480598e-07, | |
| "loss": 0.9023, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.1387434554973823, | |
| "grad_norm": 1.412669028369897, | |
| "learning_rate": 8.572947137727022e-07, | |
| "loss": 0.8696, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.1413612565445026, | |
| "grad_norm": 1.531047948413987, | |
| "learning_rate": 8.530012652622397e-07, | |
| "loss": 0.9266, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.143979057591623, | |
| "grad_norm": 1.3302826186622878, | |
| "learning_rate": 8.487105845570242e-07, | |
| "loss": 0.8793, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.1465968586387434, | |
| "grad_norm": 1.32167025755748, | |
| "learning_rate": 8.444227524452919e-07, | |
| "loss": 0.8921, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.149214659685864, | |
| "grad_norm": 1.437600669859301, | |
| "learning_rate": 8.401378496616436e-07, | |
| "loss": 0.9262, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.1518324607329844, | |
| "grad_norm": 1.7595701476639378, | |
| "learning_rate": 8.358559568855248e-07, | |
| "loss": 0.95, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.1544502617801047, | |
| "grad_norm": 1.8234006679918948, | |
| "learning_rate": 8.315771547397069e-07, | |
| "loss": 0.9589, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.1570680628272252, | |
| "grad_norm": 1.452249454487249, | |
| "learning_rate": 8.273015237887673e-07, | |
| "loss": 0.9084, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.1596858638743455, | |
| "grad_norm": 1.7580904230300225, | |
| "learning_rate": 8.230291445375743e-07, | |
| "loss": 0.8941, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.162303664921466, | |
| "grad_norm": 1.3278204456920104, | |
| "learning_rate": 8.187600974297713e-07, | |
| "loss": 0.8985, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.1649214659685865, | |
| "grad_norm": 1.4027118574490405, | |
| "learning_rate": 8.144944628462602e-07, | |
| "loss": 0.8731, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.1675392670157068, | |
| "grad_norm": 1.415174215071559, | |
| "learning_rate": 8.102323211036903e-07, | |
| "loss": 0.8845, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.1701570680628273, | |
| "grad_norm": 1.3913552918511438, | |
| "learning_rate": 8.059737524529443e-07, | |
| "loss": 0.8932, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.1727748691099475, | |
| "grad_norm": 1.3393476374259683, | |
| "learning_rate": 8.017188370776291e-07, | |
| "loss": 0.9429, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.175392670157068, | |
| "grad_norm": 1.339931563196864, | |
| "learning_rate": 7.974676550925638e-07, | |
| "loss": 0.8584, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.1780104712041886, | |
| "grad_norm": 1.4030008780056942, | |
| "learning_rate": 7.932202865422726e-07, | |
| "loss": 0.8831, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.1806282722513088, | |
| "grad_norm": 1.8118925202824216, | |
| "learning_rate": 7.889768113994779e-07, | |
| "loss": 0.8887, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.1832460732984293, | |
| "grad_norm": 1.5387839907662275, | |
| "learning_rate": 7.847373095635936e-07, | |
| "loss": 0.8957, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.1858638743455496, | |
| "grad_norm": 1.3918514287546606, | |
| "learning_rate": 7.805018608592211e-07, | |
| "loss": 0.9043, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.1884816753926701, | |
| "grad_norm": 1.2858265895726548, | |
| "learning_rate": 7.76270545034646e-07, | |
| "loss": 0.8629, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.1910994764397906, | |
| "grad_norm": 1.3873983010304787, | |
| "learning_rate": 7.720434417603383e-07, | |
| "loss": 0.8948, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.193717277486911, | |
| "grad_norm": 1.317347612940767, | |
| "learning_rate": 7.678206306274494e-07, | |
| "loss": 0.8789, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.1963350785340314, | |
| "grad_norm": 1.5036388466833512, | |
| "learning_rate": 7.636021911463151e-07, | |
| "loss": 0.9402, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.1989528795811517, | |
| "grad_norm": 1.427135257044766, | |
| "learning_rate": 7.59388202744959e-07, | |
| "loss": 0.9449, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.2015706806282722, | |
| "grad_norm": 1.93494024531244, | |
| "learning_rate": 7.551787447675961e-07, | |
| "loss": 0.8978, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.2041884816753927, | |
| "grad_norm": 1.4160041714291973, | |
| "learning_rate": 7.509738964731388e-07, | |
| "loss": 0.8502, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.206806282722513, | |
| "grad_norm": 1.4158880080077554, | |
| "learning_rate": 7.467737370337053e-07, | |
| "loss": 0.8544, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.2094240837696335, | |
| "grad_norm": 1.4319367593292147, | |
| "learning_rate": 7.42578345533128e-07, | |
| "loss": 0.8924, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.212041884816754, | |
| "grad_norm": 1.5603806054375955, | |
| "learning_rate": 7.383878009654656e-07, | |
| "loss": 0.9332, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.2146596858638743, | |
| "grad_norm": 1.6030080299637368, | |
| "learning_rate": 7.342021822335142e-07, | |
| "loss": 0.9562, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.2172774869109948, | |
| "grad_norm": 1.4321929382537035, | |
| "learning_rate": 7.300215681473223e-07, | |
| "loss": 0.8923, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.2198952879581153, | |
| "grad_norm": 1.5156349677916563, | |
| "learning_rate": 7.258460374227084e-07, | |
| "loss": 0.9585, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.2225130890052356, | |
| "grad_norm": 1.382771006951781, | |
| "learning_rate": 7.216756686797763e-07, | |
| "loss": 0.8921, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.225130890052356, | |
| "grad_norm": 1.3862045180941078, | |
| "learning_rate": 7.175105404414361e-07, | |
| "loss": 0.9613, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.2277486910994764, | |
| "grad_norm": 1.403237935502315, | |
| "learning_rate": 7.133507311319269e-07, | |
| "loss": 0.8979, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.2303664921465969, | |
| "grad_norm": 1.470944258568419, | |
| "learning_rate": 7.091963190753377e-07, | |
| "loss": 0.938, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.2329842931937174, | |
| "grad_norm": 1.499848647249035, | |
| "learning_rate": 7.050473824941339e-07, | |
| "loss": 0.9093, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.2356020942408377, | |
| "grad_norm": 1.405120300665954, | |
| "learning_rate": 7.009039995076844e-07, | |
| "loss": 0.928, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.2382198952879582, | |
| "grad_norm": 1.4681450182994786, | |
| "learning_rate": 6.967662481307922e-07, | |
| "loss": 0.8985, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.2408376963350785, | |
| "grad_norm": 1.3777720330440961, | |
| "learning_rate": 6.926342062722222e-07, | |
| "loss": 0.8719, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.243455497382199, | |
| "grad_norm": 1.4958767523410936, | |
| "learning_rate": 6.885079517332366e-07, | |
| "loss": 0.8984, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.2460732984293195, | |
| "grad_norm": 1.5727144596330556, | |
| "learning_rate": 6.843875622061304e-07, | |
| "loss": 0.8878, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.2486910994764397, | |
| "grad_norm": 1.6315335944052536, | |
| "learning_rate": 6.802731152727663e-07, | |
| "loss": 0.91, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.2513089005235603, | |
| "grad_norm": 1.5654763348760663, | |
| "learning_rate": 6.761646884031163e-07, | |
| "loss": 0.8597, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.2539267015706805, | |
| "grad_norm": 1.3376352257465756, | |
| "learning_rate": 6.720623589538013e-07, | |
| "loss": 0.9081, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.256544502617801, | |
| "grad_norm": 1.5086059528146298, | |
| "learning_rate": 6.679662041666361e-07, | |
| "loss": 0.8981, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.2591623036649215, | |
| "grad_norm": 1.2782585477588344, | |
| "learning_rate": 6.638763011671735e-07, | |
| "loss": 0.8778, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.2617801047120418, | |
| "grad_norm": 1.7286688483189723, | |
| "learning_rate": 6.597927269632526e-07, | |
| "loss": 0.8708, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.2643979057591623, | |
| "grad_norm": 1.2561419055543754, | |
| "learning_rate": 6.557155584435503e-07, | |
| "loss": 0.8966, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.2670157068062826, | |
| "grad_norm": 1.497272759598682, | |
| "learning_rate": 6.516448723761314e-07, | |
| "loss": 0.8719, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.2696335078534031, | |
| "grad_norm": 1.6250829967641724, | |
| "learning_rate": 6.475807454070039e-07, | |
| "loss": 0.8856, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.2722513089005236, | |
| "grad_norm": 1.6479813154722118, | |
| "learning_rate": 6.435232540586762e-07, | |
| "loss": 0.9266, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.274869109947644, | |
| "grad_norm": 1.3286340505653726, | |
| "learning_rate": 6.394724747287172e-07, | |
| "loss": 0.8334, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.2774869109947644, | |
| "grad_norm": 1.4542515014039075, | |
| "learning_rate": 6.354284836883156e-07, | |
| "loss": 0.8887, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.2801047120418847, | |
| "grad_norm": 1.3724418981619309, | |
| "learning_rate": 6.313913570808446e-07, | |
| "loss": 0.8706, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.2827225130890052, | |
| "grad_norm": 1.3658073904261523, | |
| "learning_rate": 6.273611709204303e-07, | |
| "loss": 0.9141, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.2853403141361257, | |
| "grad_norm": 1.2739502124007493, | |
| "learning_rate": 6.233380010905174e-07, | |
| "loss": 0.9124, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.2879581151832462, | |
| "grad_norm": 1.8768508251733684, | |
| "learning_rate": 6.193219233424414e-07, | |
| "loss": 0.9036, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.2905759162303665, | |
| "grad_norm": 1.3168948507652463, | |
| "learning_rate": 6.153130132940036e-07, | |
| "loss": 0.9322, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.2931937172774868, | |
| "grad_norm": 1.4566708836290705, | |
| "learning_rate": 6.11311346428046e-07, | |
| "loss": 0.9675, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.2958115183246073, | |
| "grad_norm": 1.3456105635036395, | |
| "learning_rate": 6.073169980910307e-07, | |
| "loss": 0.8839, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.2984293193717278, | |
| "grad_norm": 1.3260427877201129, | |
| "learning_rate": 6.033300434916202e-07, | |
| "loss": 0.8501, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.3010471204188483, | |
| "grad_norm": 1.6991685082617407, | |
| "learning_rate": 5.993505576992622e-07, | |
| "loss": 0.8694, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.3036649214659686, | |
| "grad_norm": 1.2197619039548226, | |
| "learning_rate": 5.953786156427764e-07, | |
| "loss": 0.9285, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.306282722513089, | |
| "grad_norm": 1.5649739326206697, | |
| "learning_rate": 5.914142921089433e-07, | |
| "loss": 0.9077, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.3089005235602094, | |
| "grad_norm": 1.5043102113788342, | |
| "learning_rate": 5.874576617410949e-07, | |
| "loss": 0.9359, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.3115183246073299, | |
| "grad_norm": 2.2191105066016523, | |
| "learning_rate": 5.835087990377123e-07, | |
| "loss": 0.8882, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.3141361256544504, | |
| "grad_norm": 1.3870827210325436, | |
| "learning_rate": 5.795677783510186e-07, | |
| "loss": 0.8605, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.3167539267015707, | |
| "grad_norm": 1.3303488313205487, | |
| "learning_rate": 5.756346738855835e-07, | |
| "loss": 0.862, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.3193717277486912, | |
| "grad_norm": 1.4039158189310836, | |
| "learning_rate": 5.717095596969226e-07, | |
| "loss": 0.8973, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.3219895287958114, | |
| "grad_norm": 1.2314389814739966, | |
| "learning_rate": 5.677925096901055e-07, | |
| "loss": 0.8651, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.324607329842932, | |
| "grad_norm": 1.3345927348395523, | |
| "learning_rate": 5.638835976183627e-07, | |
| "loss": 0.8745, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.3272251308900525, | |
| "grad_norm": 1.4154278961549511, | |
| "learning_rate": 5.599828970816963e-07, | |
| "loss": 0.8673, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.3298429319371727, | |
| "grad_norm": 1.3638849226919136, | |
| "learning_rate": 5.560904815254979e-07, | |
| "loss": 0.9074, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.3324607329842932, | |
| "grad_norm": 1.3669358510510996, | |
| "learning_rate": 5.522064242391603e-07, | |
| "loss": 0.8715, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.3350785340314135, | |
| "grad_norm": 1.5856610536711122, | |
| "learning_rate": 5.483307983547025e-07, | |
| "loss": 0.9246, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.337696335078534, | |
| "grad_norm": 1.365878150253015, | |
| "learning_rate": 5.444636768453887e-07, | |
| "loss": 0.876, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.3403141361256545, | |
| "grad_norm": 1.6334459477041363, | |
| "learning_rate": 5.406051325243585e-07, | |
| "loss": 0.9312, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.3429319371727748, | |
| "grad_norm": 1.5863516351938438, | |
| "learning_rate": 5.367552380432515e-07, | |
| "loss": 0.9283, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.3455497382198953, | |
| "grad_norm": 1.3595941807771459, | |
| "learning_rate": 5.329140658908422e-07, | |
| "loss": 0.9232, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.3481675392670156, | |
| "grad_norm": 1.5206184191201402, | |
| "learning_rate": 5.290816883916748e-07, | |
| "loss": 0.8676, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.350785340314136, | |
| "grad_norm": 1.3031469098418837, | |
| "learning_rate": 5.252581777047008e-07, | |
| "loss": 0.8812, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.3534031413612566, | |
| "grad_norm": 1.3798809076308727, | |
| "learning_rate": 5.214436058219198e-07, | |
| "loss": 0.9039, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.356020942408377, | |
| "grad_norm": 1.3510273757712852, | |
| "learning_rate": 5.176380445670254e-07, | |
| "loss": 0.8814, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.3586387434554974, | |
| "grad_norm": 1.542901220604215, | |
| "learning_rate": 5.138415655940525e-07, | |
| "loss": 0.9526, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.3612565445026177, | |
| "grad_norm": 1.2836209031828834, | |
| "learning_rate": 5.100542403860271e-07, | |
| "loss": 0.856, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.3638743455497382, | |
| "grad_norm": 1.4938375796062573, | |
| "learning_rate": 5.062761402536215e-07, | |
| "loss": 0.9408, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.3664921465968587, | |
| "grad_norm": 2.3056799393831082, | |
| "learning_rate": 5.02507336333811e-07, | |
| "loss": 0.902, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.369109947643979, | |
| "grad_norm": 1.368596540328692, | |
| "learning_rate": 4.98747899588535e-07, | |
| "loss": 0.874, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.3717277486910995, | |
| "grad_norm": 1.509703116789799, | |
| "learning_rate": 4.949979008033595e-07, | |
| "loss": 0.8776, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.3743455497382198, | |
| "grad_norm": 1.493268000765195, | |
| "learning_rate": 4.912574105861465e-07, | |
| "loss": 0.9217, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.3769633507853403, | |
| "grad_norm": 1.714251809547912, | |
| "learning_rate": 4.87526499365723e-07, | |
| "loss": 0.8575, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.3795811518324608, | |
| "grad_norm": 1.4496034561474174, | |
| "learning_rate": 4.838052373905553e-07, | |
| "loss": 0.8833, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.3821989528795813, | |
| "grad_norm": 1.56426776623298, | |
| "learning_rate": 4.800936947274254e-07, | |
| "loss": 0.8553, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.3848167539267016, | |
| "grad_norm": 1.591662406148868, | |
| "learning_rate": 4.7639194126011486e-07, | |
| "loss": 0.8626, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.387434554973822, | |
| "grad_norm": 1.2998408316507073, | |
| "learning_rate": 4.7270004668808393e-07, | |
| "loss": 0.8924, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.3900523560209423, | |
| "grad_norm": 1.7152024963422792, | |
| "learning_rate": 4.690180805251643e-07, | |
| "loss": 0.8902, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.3926701570680629, | |
| "grad_norm": 1.3075264023398263, | |
| "learning_rate": 4.653461120982459e-07, | |
| "loss": 0.8603, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.3952879581151834, | |
| "grad_norm": 1.276878966251307, | |
| "learning_rate": 4.6168421054597606e-07, | |
| "loss": 0.8739, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.3979057591623036, | |
| "grad_norm": 1.4884315886808126, | |
| "learning_rate": 4.5803244481745276e-07, | |
| "loss": 0.8923, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.4005235602094241, | |
| "grad_norm": 1.6380352911517773, | |
| "learning_rate": 4.5439088367093036e-07, | |
| "loss": 0.9608, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.4031413612565444, | |
| "grad_norm": 1.4430469631924363, | |
| "learning_rate": 4.507595956725233e-07, | |
| "loss": 0.8983, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.405759162303665, | |
| "grad_norm": 1.4694298853784378, | |
| "learning_rate": 4.471386491949151e-07, | |
| "loss": 0.8383, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.4083769633507854, | |
| "grad_norm": 1.9449190678271149, | |
| "learning_rate": 4.4352811241607146e-07, | |
| "loss": 0.8741, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.4109947643979057, | |
| "grad_norm": 1.5509421449752532, | |
| "learning_rate": 4.39928053317955e-07, | |
| "loss": 0.8887, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.4136125654450262, | |
| "grad_norm": 1.3749583182027436, | |
| "learning_rate": 4.36338539685249e-07, | |
| "loss": 0.9093, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.4162303664921465, | |
| "grad_norm": 1.3975843157116803, | |
| "learning_rate": 4.32759639104076e-07, | |
| "loss": 0.9235, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.418848167539267, | |
| "grad_norm": 1.4039921493904044, | |
| "learning_rate": 4.2919141896072965e-07, | |
| "loss": 0.9163, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.4214659685863875, | |
| "grad_norm": 1.3949577352275373, | |
| "learning_rate": 4.256339464404024e-07, | |
| "loss": 0.8548, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.4240837696335078, | |
| "grad_norm": 1.352669832446612, | |
| "learning_rate": 4.2208728852592466e-07, | |
| "loss": 0.9593, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.4267015706806283, | |
| "grad_norm": 1.535192382477205, | |
| "learning_rate": 4.185515119964985e-07, | |
| "loss": 0.9072, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.4293193717277486, | |
| "grad_norm": 1.4024835914952385, | |
| "learning_rate": 4.150266834264445e-07, | |
| "loss": 0.8771, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.431937172774869, | |
| "grad_norm": 1.3426811269822514, | |
| "learning_rate": 4.115128691839463e-07, | |
| "loss": 0.8857, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.4345549738219896, | |
| "grad_norm": 2.2188316350749986, | |
| "learning_rate": 4.0801013542980154e-07, | |
| "loss": 0.8902, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.4371727748691099, | |
| "grad_norm": 1.6290927785062779, | |
| "learning_rate": 4.045185481161747e-07, | |
| "loss": 0.968, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.4397905759162304, | |
| "grad_norm": 1.4583741240333974, | |
| "learning_rate": 4.010381729853579e-07, | |
| "loss": 0.8961, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.4424083769633507, | |
| "grad_norm": 1.3107716580378566, | |
| "learning_rate": 3.975690755685311e-07, | |
| "loss": 0.8983, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.4450261780104712, | |
| "grad_norm": 1.3562349394678586, | |
| "learning_rate": 3.9411132118452893e-07, | |
| "loss": 0.9214, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.4476439790575917, | |
| "grad_norm": 1.3224730554942807, | |
| "learning_rate": 3.906649749386105e-07, | |
| "loss": 0.9057, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.450261780104712, | |
| "grad_norm": 1.353535129786952, | |
| "learning_rate": 3.8723010172123373e-07, | |
| "loss": 0.946, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.4528795811518325, | |
| "grad_norm": 1.4259143441660183, | |
| "learning_rate": 3.838067662068341e-07, | |
| "loss": 0.8604, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.4554973821989527, | |
| "grad_norm": 1.42186194700426, | |
| "learning_rate": 3.80395032852605e-07, | |
| "loss": 0.8439, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.4581151832460733, | |
| "grad_norm": 1.3809858189745732, | |
| "learning_rate": 3.769949658972866e-07, | |
| "loss": 0.928, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.4607329842931938, | |
| "grad_norm": 1.6516624180839072, | |
| "learning_rate": 3.7360662935995504e-07, | |
| "loss": 0.9032, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.4633507853403143, | |
| "grad_norm": 1.34386031295635, | |
| "learning_rate": 3.70230087038817e-07, | |
| "loss": 0.9219, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.4659685863874345, | |
| "grad_norm": 1.6338778956502633, | |
| "learning_rate": 3.6686540251000754e-07, | |
| "loss": 0.916, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.468586387434555, | |
| "grad_norm": 1.346375748845269, | |
| "learning_rate": 3.635126391263964e-07, | |
| "loss": 0.8901, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.4712041884816753, | |
| "grad_norm": 1.8950133658290673, | |
| "learning_rate": 3.6017186001639035e-07, | |
| "loss": 0.8983, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.4738219895287958, | |
| "grad_norm": 1.502360041436484, | |
| "learning_rate": 3.5684312808274895e-07, | |
| "loss": 0.8465, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.4764397905759163, | |
| "grad_norm": 1.3328999154470254, | |
| "learning_rate": 3.5352650600139643e-07, | |
| "loss": 0.8678, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.4790575916230366, | |
| "grad_norm": 1.5402031214432916, | |
| "learning_rate": 3.502220562202457e-07, | |
| "loss": 0.9039, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.4816753926701571, | |
| "grad_norm": 1.3596182174458997, | |
| "learning_rate": 3.469298409580179e-07, | |
| "loss": 0.8975, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.4842931937172774, | |
| "grad_norm": 2.2946988942603097, | |
| "learning_rate": 3.4364992220307474e-07, | |
| "loss": 0.8954, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.486910994764398, | |
| "grad_norm": 1.4327603069667216, | |
| "learning_rate": 3.4038236171224943e-07, | |
| "loss": 0.9415, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.4895287958115184, | |
| "grad_norm": 1.3641815612490016, | |
| "learning_rate": 3.3712722100968416e-07, | |
| "loss": 0.9026, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.4921465968586387, | |
| "grad_norm": 1.2637466433514526, | |
| "learning_rate": 3.338845613856722e-07, | |
| "loss": 0.8561, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.4947643979057592, | |
| "grad_norm": 1.250859176376699, | |
| "learning_rate": 3.306544438955021e-07, | |
| "loss": 0.8633, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.4973821989528795, | |
| "grad_norm": 1.4958091635550417, | |
| "learning_rate": 3.2743692935831204e-07, | |
| "loss": 0.9117, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 1.4189458972675342, | |
| "learning_rate": 3.2423207835593945e-07, | |
| "loss": 0.9277, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.5026178010471205, | |
| "grad_norm": 1.6386051647596955, | |
| "learning_rate": 3.2103995123178485e-07, | |
| "loss": 0.9326, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.5052356020942408, | |
| "grad_norm": 1.3086995202347653, | |
| "learning_rate": 3.17860608089672e-07, | |
| "loss": 0.9019, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.5078534031413613, | |
| "grad_norm": 1.4054865422218317, | |
| "learning_rate": 3.146941087927203e-07, | |
| "loss": 0.9337, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.5104712041884816, | |
| "grad_norm": 1.3123033746962398, | |
| "learning_rate": 3.115405129622133e-07, | |
| "loss": 0.923, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.513089005235602, | |
| "grad_norm": 1.4643032025460945, | |
| "learning_rate": 3.083998799764793e-07, | |
| "loss": 0.8798, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.5157068062827226, | |
| "grad_norm": 1.422050292940817, | |
| "learning_rate": 3.052722689697719e-07, | |
| "loss": 0.8686, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.518324607329843, | |
| "grad_norm": 1.6086227282469414, | |
| "learning_rate": 3.02157738831157e-07, | |
| "loss": 0.9343, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.5209424083769634, | |
| "grad_norm": 1.490803668534442, | |
| "learning_rate": 2.990563482034032e-07, | |
| "loss": 0.9108, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.5235602094240837, | |
| "grad_norm": 1.5896774793419899, | |
| "learning_rate": 2.9596815548187906e-07, | |
| "loss": 0.9147, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.5261780104712042, | |
| "grad_norm": 1.3359116805228912, | |
| "learning_rate": 2.9289321881345254e-07, | |
| "loss": 0.8956, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.5287958115183247, | |
| "grad_norm": 1.4082406709301296, | |
| "learning_rate": 2.898315960953963e-07, | |
| "loss": 0.9, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.5314136125654452, | |
| "grad_norm": 1.5960566913445038, | |
| "learning_rate": 2.86783344974298e-07, | |
| "loss": 0.8866, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.5340314136125655, | |
| "grad_norm": 1.3367632546914359, | |
| "learning_rate": 2.837485228449744e-07, | |
| "loss": 0.9182, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.5366492146596857, | |
| "grad_norm": 1.4328906430200836, | |
| "learning_rate": 2.80727186849391e-07, | |
| "loss": 0.9065, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.5392670157068062, | |
| "grad_norm": 1.4305707171373445, | |
| "learning_rate": 2.777193938755855e-07, | |
| "loss": 0.8474, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.5418848167539267, | |
| "grad_norm": 1.390558186229553, | |
| "learning_rate": 2.7472520055659766e-07, | |
| "loss": 0.8292, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.5445026178010473, | |
| "grad_norm": 1.5168812321972025, | |
| "learning_rate": 2.717446632694025e-07, | |
| "loss": 0.9483, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.5471204188481675, | |
| "grad_norm": 1.5339281627360795, | |
| "learning_rate": 2.6877783813384893e-07, | |
| "loss": 0.8949, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.5497382198952878, | |
| "grad_norm": 1.4176186502561052, | |
| "learning_rate": 2.6582478101160166e-07, | |
| "loss": 0.9198, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.5523560209424083, | |
| "grad_norm": 1.3908601156289901, | |
| "learning_rate": 2.6288554750509283e-07, | |
| "loss": 0.8816, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.5549738219895288, | |
| "grad_norm": 1.303626234335547, | |
| "learning_rate": 2.599601929564709e-07, | |
| "loss": 0.8803, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.5575916230366493, | |
| "grad_norm": 1.3719162542207297, | |
| "learning_rate": 2.57048772446562e-07, | |
| "loss": 0.8948, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.5602094240837696, | |
| "grad_norm": 1.3156007083318564, | |
| "learning_rate": 2.5415134079383004e-07, | |
| "loss": 0.8825, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.56282722513089, | |
| "grad_norm": 1.393225250452261, | |
| "learning_rate": 2.5126795255334787e-07, | |
| "loss": 0.9464, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.5654450261780104, | |
| "grad_norm": 1.4173100082790748, | |
| "learning_rate": 2.4839866201576645e-07, | |
| "loss": 0.8965, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.568062827225131, | |
| "grad_norm": 1.3223995366617138, | |
| "learning_rate": 2.4554352320629523e-07, | |
| "loss": 0.9205, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.5706806282722514, | |
| "grad_norm": 1.4027524768427433, | |
| "learning_rate": 2.4270258988368374e-07, | |
| "loss": 0.9074, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.5732984293193717, | |
| "grad_norm": 1.4584542546530008, | |
| "learning_rate": 2.3987591553920996e-07, | |
| "loss": 0.8893, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.5759162303664922, | |
| "grad_norm": 1.3388421557211998, | |
| "learning_rate": 2.3706355339567286e-07, | |
| "loss": 0.8849, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.5785340314136125, | |
| "grad_norm": 1.341899738253868, | |
| "learning_rate": 2.3426555640638922e-07, | |
| "loss": 0.9048, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.581151832460733, | |
| "grad_norm": 1.4453501391703267, | |
| "learning_rate": 2.3148197725419983e-07, | |
| "loss": 0.9189, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.5837696335078535, | |
| "grad_norm": 1.511859976381982, | |
| "learning_rate": 2.2871286835047287e-07, | |
| "loss": 0.9055, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.5863874345549738, | |
| "grad_norm": 1.3403547937150142, | |
| "learning_rate": 2.2595828183412168e-07, | |
| "loss": 0.8339, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.5890052356020943, | |
| "grad_norm": 1.3796848926133887, | |
| "learning_rate": 2.2321826957061884e-07, | |
| "loss": 0.917, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.5916230366492146, | |
| "grad_norm": 1.3988877715990504, | |
| "learning_rate": 2.204928831510241e-07, | |
| "loss": 0.9039, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.594240837696335, | |
| "grad_norm": 1.278731483728787, | |
| "learning_rate": 2.1778217389100828e-07, | |
| "loss": 0.9258, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.5968586387434556, | |
| "grad_norm": 1.3160813737990813, | |
| "learning_rate": 2.1508619282989083e-07, | |
| "loss": 0.8876, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.599476439790576, | |
| "grad_norm": 1.2756509102772609, | |
| "learning_rate": 2.1240499072967676e-07, | |
| "loss": 0.9271, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.6020942408376964, | |
| "grad_norm": 1.4288572172533927, | |
| "learning_rate": 2.0973861807410187e-07, | |
| "loss": 0.8502, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.6047120418848166, | |
| "grad_norm": 1.4064091065807276, | |
| "learning_rate": 2.0708712506768077e-07, | |
| "loss": 0.9031, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.6073298429319371, | |
| "grad_norm": 1.4628361057234258, | |
| "learning_rate": 2.0445056163476372e-07, | |
| "loss": 0.893, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.6099476439790577, | |
| "grad_norm": 1.2800166095474408, | |
| "learning_rate": 2.0182897741859494e-07, | |
| "loss": 0.9062, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.6125654450261782, | |
| "grad_norm": 1.4185981585601595, | |
| "learning_rate": 1.9922242178037863e-07, | |
| "loss": 0.8921, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.6151832460732984, | |
| "grad_norm": 1.4565744107743526, | |
| "learning_rate": 1.966309437983491e-07, | |
| "loss": 0.8639, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.6178010471204187, | |
| "grad_norm": 1.495668458429946, | |
| "learning_rate": 1.9405459226684717e-07, | |
| "loss": 0.8979, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.6204188481675392, | |
| "grad_norm": 1.3012489378658836, | |
| "learning_rate": 1.9149341569540156e-07, | |
| "loss": 0.8967, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.6230366492146597, | |
| "grad_norm": 1.281621052307457, | |
| "learning_rate": 1.88947462307814e-07, | |
| "loss": 0.8495, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.6256544502617802, | |
| "grad_norm": 1.4215901575416943, | |
| "learning_rate": 1.8641678004125362e-07, | |
| "loss": 0.8946, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.6282722513089005, | |
| "grad_norm": 1.3593548915385338, | |
| "learning_rate": 1.8390141654535263e-07, | |
| "loss": 0.8708, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.6308900523560208, | |
| "grad_norm": 1.521225370539659, | |
| "learning_rate": 1.8140141918131003e-07, | |
| "loss": 0.9211, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.6335078534031413, | |
| "grad_norm": 1.308383204107825, | |
| "learning_rate": 1.7891683502099831e-07, | |
| "loss": 0.872, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.6361256544502618, | |
| "grad_norm": 1.3011165358618517, | |
| "learning_rate": 1.7644771084608011e-07, | |
| "loss": 0.9185, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.6387434554973823, | |
| "grad_norm": 1.5506599670903491, | |
| "learning_rate": 1.739940931471239e-07, | |
| "loss": 0.8768, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.6413612565445026, | |
| "grad_norm": 1.3984936451622314, | |
| "learning_rate": 1.715560281227315e-07, | |
| "loss": 0.8728, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.6439790575916229, | |
| "grad_norm": 1.453272317924072, | |
| "learning_rate": 1.6913356167866578e-07, | |
| "loss": 0.8847, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.6465968586387434, | |
| "grad_norm": 1.5302879869825294, | |
| "learning_rate": 1.6672673942698922e-07, | |
| "loss": 0.8946, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.649214659685864, | |
| "grad_norm": 1.5604301829465825, | |
| "learning_rate": 1.6433560668520174e-07, | |
| "loss": 0.9157, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.6518324607329844, | |
| "grad_norm": 1.5376108645580229, | |
| "learning_rate": 1.6196020847539006e-07, | |
| "loss": 0.9386, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.6544502617801047, | |
| "grad_norm": 1.2960957446844783, | |
| "learning_rate": 1.5960058952337884e-07, | |
| "loss": 0.8951, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.6570680628272252, | |
| "grad_norm": 1.4271085017911613, | |
| "learning_rate": 1.572567942578885e-07, | |
| "loss": 0.8765, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.6596858638743455, | |
| "grad_norm": 1.3941354342600962, | |
| "learning_rate": 1.5492886680969964e-07, | |
| "loss": 0.9211, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.662303664921466, | |
| "grad_norm": 1.4547756229332254, | |
| "learning_rate": 1.526168510108199e-07, | |
| "loss": 0.9032, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.6649214659685865, | |
| "grad_norm": 2.046655662815991, | |
| "learning_rate": 1.5032079039366208e-07, | |
| "loss": 0.8988, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.6675392670157068, | |
| "grad_norm": 1.3480768897271267, | |
| "learning_rate": 1.4804072819022106e-07, | |
| "loss": 0.9378, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.6701570680628273, | |
| "grad_norm": 1.3812591568322627, | |
| "learning_rate": 1.45776707331262e-07, | |
| "loss": 0.9235, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.6727748691099475, | |
| "grad_norm": 1.433731226822694, | |
| "learning_rate": 1.4352877044551048e-07, | |
| "loss": 0.9036, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.675392670157068, | |
| "grad_norm": 1.475252375209803, | |
| "learning_rate": 1.4129695985885227e-07, | |
| "loss": 0.907, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.6780104712041886, | |
| "grad_norm": 1.6504645816597694, | |
| "learning_rate": 1.3908131759353304e-07, | |
| "loss": 0.8855, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.680628272251309, | |
| "grad_norm": 1.4370298290777148, | |
| "learning_rate": 1.3688188536736968e-07, | |
| "loss": 0.9286, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.6832460732984293, | |
| "grad_norm": 2.601924820023129, | |
| "learning_rate": 1.3469870459296406e-07, | |
| "loss": 0.8947, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.6858638743455496, | |
| "grad_norm": 1.2670411167466755, | |
| "learning_rate": 1.3253181637692324e-07, | |
| "loss": 0.8945, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.6884816753926701, | |
| "grad_norm": 1.3738351586559805, | |
| "learning_rate": 1.303812615190849e-07, | |
| "loss": 0.9443, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.6910994764397906, | |
| "grad_norm": 1.3428167518112526, | |
| "learning_rate": 1.2824708051175014e-07, | |
| "loss": 0.859, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.6937172774869111, | |
| "grad_norm": 1.5087146741136386, | |
| "learning_rate": 1.2612931353892074e-07, | |
| "loss": 0.8993, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.6963350785340314, | |
| "grad_norm": 1.3091751432038465, | |
| "learning_rate": 1.2402800047554206e-07, | |
| "loss": 0.8872, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.6989528795811517, | |
| "grad_norm": 1.4051594837211576, | |
| "learning_rate": 1.2194318088675282e-07, | |
| "loss": 0.9054, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.7015706806282722, | |
| "grad_norm": 1.2366099250393099, | |
| "learning_rate": 1.198748940271398e-07, | |
| "loss": 0.9225, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.7041884816753927, | |
| "grad_norm": 1.2514264913592443, | |
| "learning_rate": 1.1782317883999915e-07, | |
| "loss": 0.9377, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.7068062827225132, | |
| "grad_norm": 1.570847699413206, | |
| "learning_rate": 1.1578807395660206e-07, | |
| "loss": 0.8891, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.7094240837696335, | |
| "grad_norm": 1.3355965811118744, | |
| "learning_rate": 1.1376961769546889e-07, | |
| "loss": 0.9141, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.7120418848167538, | |
| "grad_norm": 1.508690437109559, | |
| "learning_rate": 1.1176784806164674e-07, | |
| "loss": 0.8628, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.7146596858638743, | |
| "grad_norm": 1.6773878404961995, | |
| "learning_rate": 1.0978280274599417e-07, | |
| "loss": 0.8179, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.7172774869109948, | |
| "grad_norm": 1.4628566180299178, | |
| "learning_rate": 1.078145191244706e-07, | |
| "loss": 0.8923, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.7198952879581153, | |
| "grad_norm": 1.5566732390176854, | |
| "learning_rate": 1.0586303425743493e-07, | |
| "loss": 0.8911, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.7225130890052356, | |
| "grad_norm": 1.4155543135683646, | |
| "learning_rate": 1.0392838488894462e-07, | |
| "loss": 0.8629, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.7251308900523559, | |
| "grad_norm": 1.2831149126092454, | |
| "learning_rate": 1.0201060744606637e-07, | |
| "loss": 0.8875, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.7277486910994764, | |
| "grad_norm": 1.4332173905157346, | |
| "learning_rate": 1.0010973803818856e-07, | |
| "loss": 0.943, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.7303664921465969, | |
| "grad_norm": 1.2134779529389657, | |
| "learning_rate": 9.822581245634321e-08, | |
| "loss": 0.8183, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.7329842931937174, | |
| "grad_norm": 1.4149474814170255, | |
| "learning_rate": 9.635886617252975e-08, | |
| "loss": 0.901, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.7356020942408377, | |
| "grad_norm": 1.5242078190981234, | |
| "learning_rate": 9.450893433904895e-08, | |
| "loss": 0.8263, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.738219895287958, | |
| "grad_norm": 1.7776241358004243, | |
| "learning_rate": 9.267605178784033e-08, | |
| "loss": 0.8908, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.7408376963350785, | |
| "grad_norm": 1.4641461126272701, | |
| "learning_rate": 9.086025302982648e-08, | |
| "loss": 0.8887, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.743455497382199, | |
| "grad_norm": 2.5856902698595556, | |
| "learning_rate": 8.906157225426313e-08, | |
| "loss": 0.9558, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.7460732984293195, | |
| "grad_norm": 1.448131814585754, | |
| "learning_rate": 8.728004332809514e-08, | |
| "loss": 0.848, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.7486910994764397, | |
| "grad_norm": 1.3537545655810488, | |
| "learning_rate": 8.55156997953197e-08, | |
| "loss": 0.871, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.7513089005235603, | |
| "grad_norm": 1.3990392212643947, | |
| "learning_rate": 8.37685748763538e-08, | |
| "loss": 0.9056, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.7539267015706805, | |
| "grad_norm": 1.5454399399661, | |
| "learning_rate": 8.203870146740932e-08, | |
| "loss": 0.954, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.756544502617801, | |
| "grad_norm": 1.3257827710500718, | |
| "learning_rate": 8.03261121398735e-08, | |
| "loss": 0.9104, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.7591623036649215, | |
| "grad_norm": 1.3550433390583934, | |
| "learning_rate": 7.86308391396956e-08, | |
| "loss": 0.8676, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.761780104712042, | |
| "grad_norm": 1.4690820882565427, | |
| "learning_rate": 7.695291438677931e-08, | |
| "loss": 0.8799, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.7643979057591623, | |
| "grad_norm": 1.6381490580033888, | |
| "learning_rate": 7.529236947438256e-08, | |
| "loss": 0.9297, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.7670157068062826, | |
| "grad_norm": 1.4254089921050725, | |
| "learning_rate": 7.364923566852244e-08, | |
| "loss": 0.9021, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.7696335078534031, | |
| "grad_norm": 1.6928537975880145, | |
| "learning_rate": 7.202354390738608e-08, | |
| "loss": 0.8564, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.7722513089005236, | |
| "grad_norm": 1.3023570567264096, | |
| "learning_rate": 7.041532480074819e-08, | |
| "loss": 0.9184, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.7748691099476441, | |
| "grad_norm": 1.3429734060010876, | |
| "learning_rate": 6.88246086293952e-08, | |
| "loss": 0.9471, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.7774869109947644, | |
| "grad_norm": 1.382194472551508, | |
| "learning_rate": 6.725142534455486e-08, | |
| "loss": 0.8766, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.7801047120418847, | |
| "grad_norm": 1.3774349266930637, | |
| "learning_rate": 6.569580456733204e-08, | |
| "loss": 0.8905, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.7827225130890052, | |
| "grad_norm": 1.5906615374253104, | |
| "learning_rate": 6.415777558815138e-08, | |
| "loss": 0.8966, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.7853403141361257, | |
| "grad_norm": 1.4753708964257082, | |
| "learning_rate": 6.263736736620551e-08, | |
| "loss": 0.9317, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.7879581151832462, | |
| "grad_norm": 1.4312204179081733, | |
| "learning_rate": 6.113460852890973e-08, | |
| "loss": 0.8454, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.7905759162303665, | |
| "grad_norm": 1.5371219041917656, | |
| "learning_rate": 5.964952737136353e-08, | |
| "loss": 0.9033, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.7931937172774868, | |
| "grad_norm": 1.314841769284732, | |
| "learning_rate": 5.8182151855816986e-08, | |
| "loss": 0.8834, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.7958115183246073, | |
| "grad_norm": 1.353813544946452, | |
| "learning_rate": 5.6732509611145284e-08, | |
| "loss": 0.9084, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.7984293193717278, | |
| "grad_norm": 1.4640193110979116, | |
| "learning_rate": 5.5300627932327706e-08, | |
| "loss": 0.929, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.8010471204188483, | |
| "grad_norm": 1.3860981489888715, | |
| "learning_rate": 5.388653377993324e-08, | |
| "loss": 0.9187, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.8036649214659686, | |
| "grad_norm": 1.4257965000825006, | |
| "learning_rate": 5.2490253779615133e-08, | |
| "loss": 0.8793, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.8062827225130889, | |
| "grad_norm": 1.3839459669807797, | |
| "learning_rate": 5.111181422160671e-08, | |
| "loss": 0.9342, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.8089005235602094, | |
| "grad_norm": 1.927472714256995, | |
| "learning_rate": 4.975124106022843e-08, | |
| "loss": 0.912, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.8115183246073299, | |
| "grad_norm": 1.362684938892342, | |
| "learning_rate": 4.840855991339798e-08, | |
| "loss": 0.8619, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.8141361256544504, | |
| "grad_norm": 1.4760030845035397, | |
| "learning_rate": 4.7083796062149297e-08, | |
| "loss": 0.8613, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.8167539267015707, | |
| "grad_norm": 1.3554051401391647, | |
| "learning_rate": 4.577697445015471e-08, | |
| "loss": 0.8376, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.819371727748691, | |
| "grad_norm": 1.6346763895111678, | |
| "learning_rate": 4.448811968325683e-08, | |
| "loss": 0.8559, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.8219895287958114, | |
| "grad_norm": 1.4481074116917443, | |
| "learning_rate": 4.321725602900472e-08, | |
| "loss": 0.9446, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.824607329842932, | |
| "grad_norm": 1.2980006766112568, | |
| "learning_rate": 4.196440741619678e-08, | |
| "loss": 0.8896, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.8272251308900525, | |
| "grad_norm": 1.4191572591347388, | |
| "learning_rate": 4.0729597434430164e-08, | |
| "loss": 0.8363, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.8298429319371727, | |
| "grad_norm": 1.411676924931839, | |
| "learning_rate": 3.9512849333657064e-08, | |
| "loss": 0.8892, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.8324607329842932, | |
| "grad_norm": 1.3885038260855735, | |
| "learning_rate": 3.8314186023746696e-08, | |
| "loss": 0.8561, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.8350785340314135, | |
| "grad_norm": 1.4592148849927922, | |
| "learning_rate": 3.713363007405379e-08, | |
| "loss": 0.8753, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.837696335078534, | |
| "grad_norm": 1.5404990433301489, | |
| "learning_rate": 3.5971203712993894e-08, | |
| "loss": 0.9085, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.8403141361256545, | |
| "grad_norm": 1.3311115738208295, | |
| "learning_rate": 3.482692882762461e-08, | |
| "loss": 0.8894, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.8429319371727748, | |
| "grad_norm": 1.37981516106682, | |
| "learning_rate": 3.3700826963233734e-08, | |
| "loss": 0.8637, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.8455497382198953, | |
| "grad_norm": 1.5214558709057895, | |
| "learning_rate": 3.2592919322933174e-08, | |
| "loss": 0.9005, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.8481675392670156, | |
| "grad_norm": 1.3717062091821015, | |
| "learning_rate": 3.150322676726025e-08, | |
| "loss": 0.8954, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.850785340314136, | |
| "grad_norm": 1.306979872125254, | |
| "learning_rate": 3.0431769813784595e-08, | |
| "loss": 0.9342, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.8534031413612566, | |
| "grad_norm": 1.3332532481135164, | |
| "learning_rate": 2.9378568636721836e-08, | |
| "loss": 0.9161, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.8560209424083771, | |
| "grad_norm": 1.5679258379351098, | |
| "learning_rate": 2.834364306655379e-08, | |
| "loss": 0.9414, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.8586387434554974, | |
| "grad_norm": 1.3241677265890193, | |
| "learning_rate": 2.7327012589655307e-08, | |
| "loss": 0.9092, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.8612565445026177, | |
| "grad_norm": 1.848580838309608, | |
| "learning_rate": 2.6328696347926783e-08, | |
| "loss": 0.9327, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.8638743455497382, | |
| "grad_norm": 1.6231182635223822, | |
| "learning_rate": 2.5348713138434564e-08, | |
| "loss": 0.9256, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.8664921465968587, | |
| "grad_norm": 1.370044017499312, | |
| "learning_rate": 2.43870814130559e-08, | |
| "loss": 0.9057, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.8691099476439792, | |
| "grad_norm": 1.3354739269648654, | |
| "learning_rate": 2.3443819278132992e-08, | |
| "loss": 0.9143, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.8717277486910995, | |
| "grad_norm": 1.378182616321603, | |
| "learning_rate": 2.251894449413061e-08, | |
| "loss": 0.9092, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.8743455497382198, | |
| "grad_norm": 1.3052742423435106, | |
| "learning_rate": 2.161247447530268e-08, | |
| "loss": 0.9136, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.8769633507853403, | |
| "grad_norm": 1.6339003824243274, | |
| "learning_rate": 2.0724426289363995e-08, | |
| "loss": 0.8698, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.8795811518324608, | |
| "grad_norm": 1.377085237822559, | |
| "learning_rate": 1.9854816657168817e-08, | |
| "loss": 0.9006, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.8821989528795813, | |
| "grad_norm": 1.5109348592590883, | |
| "learning_rate": 1.9003661952396223e-08, | |
| "loss": 0.8986, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.8848167539267016, | |
| "grad_norm": 1.3810204321286608, | |
| "learning_rate": 1.817097820124147e-08, | |
| "loss": 0.863, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.8874345549738218, | |
| "grad_norm": 1.314882972235073, | |
| "learning_rate": 1.7356781082115024e-08, | |
| "loss": 0.8868, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.8900523560209423, | |
| "grad_norm": 1.4964476218671747, | |
| "learning_rate": 1.656108592534633e-08, | |
| "loss": 0.8755, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.8926701570680629, | |
| "grad_norm": 1.3775827034693857, | |
| "learning_rate": 1.578390771289606e-08, | |
| "loss": 0.8786, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.8952879581151834, | |
| "grad_norm": 1.497907001459483, | |
| "learning_rate": 1.5025261078073003e-08, | |
| "loss": 0.956, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.8979057591623036, | |
| "grad_norm": 1.422027343640092, | |
| "learning_rate": 1.4285160305259836e-08, | |
| "loss": 0.9062, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.900523560209424, | |
| "grad_norm": 1.3537382840275476, | |
| "learning_rate": 1.3563619329643117e-08, | |
| "loss": 0.894, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.9031413612565444, | |
| "grad_norm": 1.4016382516425014, | |
| "learning_rate": 1.2860651736951278e-08, | |
| "loss": 0.8895, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.905759162303665, | |
| "grad_norm": 1.290142647836188, | |
| "learning_rate": 1.2176270763198825e-08, | |
| "loss": 0.8809, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.9083769633507854, | |
| "grad_norm": 1.4168614683015706, | |
| "learning_rate": 1.1510489294437431e-08, | |
| "loss": 0.9017, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.9109947643979057, | |
| "grad_norm": 1.3924491890195099, | |
| "learning_rate": 1.0863319866512344e-08, | |
| "loss": 0.8747, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.9136125654450262, | |
| "grad_norm": 1.5107117645316126, | |
| "learning_rate": 1.0234774664827473e-08, | |
| "loss": 0.9059, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.9162303664921465, | |
| "grad_norm": 1.344447744542007, | |
| "learning_rate": 9.624865524115344e-09, | |
| "loss": 0.8854, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.918848167539267, | |
| "grad_norm": 1.442508498350657, | |
| "learning_rate": 9.033603928214396e-09, | |
| "loss": 0.8964, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.9214659685863875, | |
| "grad_norm": 1.3923227096852724, | |
| "learning_rate": 8.461001009852809e-09, | |
| "loss": 0.8501, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.9240837696335078, | |
| "grad_norm": 1.322210909486878, | |
| "learning_rate": 7.907067550438684e-09, | |
| "loss": 0.8854, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.9267015706806283, | |
| "grad_norm": 1.3293207958123026, | |
| "learning_rate": 7.371813979857311e-09, | |
| "loss": 0.9489, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.9293193717277486, | |
| "grad_norm": 1.6807388495323254, | |
| "learning_rate": 6.855250376274546e-09, | |
| "loss": 0.9322, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.931937172774869, | |
| "grad_norm": 1.4321457195007106, | |
| "learning_rate": 6.357386465947301e-09, | |
| "loss": 0.941, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.9345549738219896, | |
| "grad_norm": 1.366512872397213, | |
| "learning_rate": 5.878231623040242e-09, | |
| "loss": 0.9164, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.93717277486911, | |
| "grad_norm": 1.5040506501535371, | |
| "learning_rate": 5.417794869449377e-09, | |
| "loss": 0.9216, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.9397905759162304, | |
| "grad_norm": 1.5043370580153907, | |
| "learning_rate": 4.9760848746319695e-09, | |
| "loss": 0.903, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.9424083769633507, | |
| "grad_norm": 1.591493042963084, | |
| "learning_rate": 4.553109955443557e-09, | |
| "loss": 0.9202, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.9450261780104712, | |
| "grad_norm": 1.286578387396067, | |
| "learning_rate": 4.148878075981299e-09, | |
| "loss": 0.8912, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.9476439790575917, | |
| "grad_norm": 1.6240641009201287, | |
| "learning_rate": 3.763396847433875e-09, | |
| "loss": 0.8771, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.9502617801047122, | |
| "grad_norm": 1.4006914501273882, | |
| "learning_rate": 3.3966735279384875e-09, | |
| "loss": 0.8407, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.9528795811518325, | |
| "grad_norm": 1.2527953374854444, | |
| "learning_rate": 3.0487150224437487e-09, | |
| "loss": 0.8606, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.9554973821989527, | |
| "grad_norm": 1.3573387159729935, | |
| "learning_rate": 2.7195278825801195e-09, | |
| "loss": 0.8481, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.9581151832460733, | |
| "grad_norm": 1.8268988010137661, | |
| "learning_rate": 2.4091183065362285e-09, | |
| "loss": 0.9248, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.9607329842931938, | |
| "grad_norm": 1.286277864510778, | |
| "learning_rate": 2.1174921389424114e-09, | |
| "loss": 0.8809, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.9633507853403143, | |
| "grad_norm": 1.3340327743313127, | |
| "learning_rate": 1.8446548707604648e-09, | |
| "loss": 0.9177, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.9659685863874345, | |
| "grad_norm": 1.3617686042968828, | |
| "learning_rate": 1.5906116391801726e-09, | |
| "loss": 0.9111, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.9685863874345548, | |
| "grad_norm": 1.5794964764984032, | |
| "learning_rate": 1.355367227523052e-09, | |
| "loss": 0.9111, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.9712041884816753, | |
| "grad_norm": 1.4937021056451114, | |
| "learning_rate": 1.1389260651518684e-09, | |
| "loss": 0.8331, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 1.9738219895287958, | |
| "grad_norm": 1.4048693955875151, | |
| "learning_rate": 9.412922273871471e-10, | |
| "loss": 0.909, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.9764397905759163, | |
| "grad_norm": 1.5558437033842454, | |
| "learning_rate": 7.624694354309014e-10, | |
| "loss": 0.8696, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.9790575916230366, | |
| "grad_norm": 1.3565360114481129, | |
| "learning_rate": 6.02461056296244e-10, | |
| "loss": 0.9147, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.981675392670157, | |
| "grad_norm": 1.4504340570028544, | |
| "learning_rate": 4.6127010274399356e-10, | |
| "loss": 0.9321, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 1.9842931937172774, | |
| "grad_norm": 1.6341119346475543, | |
| "learning_rate": 3.3889923322594217e-10, | |
| "loss": 0.9144, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.986910994764398, | |
| "grad_norm": 1.3600992136272299, | |
| "learning_rate": 2.353507518350062e-10, | |
| "loss": 0.8706, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 1.9895287958115184, | |
| "grad_norm": 1.8493583140551575, | |
| "learning_rate": 1.506266082615948e-10, | |
| "loss": 0.8717, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.9921465968586387, | |
| "grad_norm": 1.3957149457130282, | |
| "learning_rate": 8.472839775719442e-11, | |
| "loss": 0.9138, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 1.9947643979057592, | |
| "grad_norm": 1.6670788746427903, | |
| "learning_rate": 3.765736110383777e-11, | |
| "loss": 0.9377, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.9973821989528795, | |
| "grad_norm": 1.3773872289172804, | |
| "learning_rate": 9.414384591233116e-12, | |
| "loss": 0.9113, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.4031461299714583, | |
| "learning_rate": 0.0, | |
| "loss": 0.9218, | |
| "step": 764 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 764, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 191, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 996711585546240.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |