| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9983193277310924, | |
| "eval_steps": 500, | |
| "global_step": 594, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005042016806722689, | |
| "grad_norm": 9.45597365399993, | |
| "learning_rate": 0.0, | |
| "loss": 1.7242, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.010084033613445379, | |
| "grad_norm": 9.218921810032594, | |
| "learning_rate": 1.6666666666666668e-07, | |
| "loss": 1.9603, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.015126050420168067, | |
| "grad_norm": 9.19364568473009, | |
| "learning_rate": 3.3333333333333335e-07, | |
| "loss": 1.7815, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.020168067226890758, | |
| "grad_norm": 9.753359655679406, | |
| "learning_rate": 5.000000000000001e-07, | |
| "loss": 1.8671, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.025210084033613446, | |
| "grad_norm": 10.188684139684757, | |
| "learning_rate": 6.666666666666667e-07, | |
| "loss": 1.8868, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.030252100840336135, | |
| "grad_norm": 9.253535763532076, | |
| "learning_rate": 8.333333333333333e-07, | |
| "loss": 1.8821, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03529411764705882, | |
| "grad_norm": 9.452472463389428, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 1.8398, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.040336134453781515, | |
| "grad_norm": 8.338459992866273, | |
| "learning_rate": 1.1666666666666668e-06, | |
| "loss": 1.7522, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0453781512605042, | |
| "grad_norm": 8.599040436901118, | |
| "learning_rate": 1.3333333333333334e-06, | |
| "loss": 1.7879, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.05042016806722689, | |
| "grad_norm": 9.204139051227466, | |
| "learning_rate": 1.5e-06, | |
| "loss": 1.8949, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05546218487394958, | |
| "grad_norm": 8.383986517840034, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 1.7568, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.06050420168067227, | |
| "grad_norm": 6.14215523192106, | |
| "learning_rate": 1.8333333333333333e-06, | |
| "loss": 1.6243, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.06554621848739496, | |
| "grad_norm": 5.998914335428499, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 1.5973, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.07058823529411765, | |
| "grad_norm": 5.047474738743573, | |
| "learning_rate": 2.166666666666667e-06, | |
| "loss": 1.3774, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.07563025210084033, | |
| "grad_norm": 5.330740621399064, | |
| "learning_rate": 2.3333333333333336e-06, | |
| "loss": 1.5953, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.08067226890756303, | |
| "grad_norm": 3.3659526026887012, | |
| "learning_rate": 2.5e-06, | |
| "loss": 1.3746, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.08571428571428572, | |
| "grad_norm": 3.639732034816691, | |
| "learning_rate": 2.666666666666667e-06, | |
| "loss": 1.4698, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0907563025210084, | |
| "grad_norm": 3.461514147091586, | |
| "learning_rate": 2.8333333333333335e-06, | |
| "loss": 1.4229, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0957983193277311, | |
| "grad_norm": 3.765309579932919, | |
| "learning_rate": 3e-06, | |
| "loss": 1.3948, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.10084033613445378, | |
| "grad_norm": 2.825230202760748, | |
| "learning_rate": 3.1666666666666667e-06, | |
| "loss": 1.3286, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.10588235294117647, | |
| "grad_norm": 2.387015147619193, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 1.2574, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.11092436974789915, | |
| "grad_norm": 2.6592293064240176, | |
| "learning_rate": 3.5e-06, | |
| "loss": 1.2994, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.11596638655462185, | |
| "grad_norm": 2.9338685422018163, | |
| "learning_rate": 3.6666666666666666e-06, | |
| "loss": 1.271, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.12100840336134454, | |
| "grad_norm": 2.8053283243940923, | |
| "learning_rate": 3.833333333333334e-06, | |
| "loss": 1.239, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.12605042016806722, | |
| "grad_norm": 2.4764651014882673, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 1.2632, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.13109243697478992, | |
| "grad_norm": 4.193230652323676, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 1.2494, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.1361344537815126, | |
| "grad_norm": 2.166632601601999, | |
| "learning_rate": 4.333333333333334e-06, | |
| "loss": 1.1772, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.1411764705882353, | |
| "grad_norm": 2.0456983888545133, | |
| "learning_rate": 4.5e-06, | |
| "loss": 1.3323, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.146218487394958, | |
| "grad_norm": 1.9041534025850353, | |
| "learning_rate": 4.666666666666667e-06, | |
| "loss": 1.123, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.15126050420168066, | |
| "grad_norm": 1.7473372136225975, | |
| "learning_rate": 4.833333333333333e-06, | |
| "loss": 1.1116, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.15630252100840336, | |
| "grad_norm": 1.9237786068741898, | |
| "learning_rate": 5e-06, | |
| "loss": 1.2038, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.16134453781512606, | |
| "grad_norm": 1.9862371515679214, | |
| "learning_rate": 5.1666666666666675e-06, | |
| "loss": 1.2171, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.16638655462184873, | |
| "grad_norm": 1.5922593116941988, | |
| "learning_rate": 5.333333333333334e-06, | |
| "loss": 1.0193, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.17142857142857143, | |
| "grad_norm": 1.6830455258736572, | |
| "learning_rate": 5.500000000000001e-06, | |
| "loss": 1.0761, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.17647058823529413, | |
| "grad_norm": 1.594143028453368, | |
| "learning_rate": 5.666666666666667e-06, | |
| "loss": 1.1126, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.1815126050420168, | |
| "grad_norm": 1.9420003685481775, | |
| "learning_rate": 5.833333333333334e-06, | |
| "loss": 1.1203, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.1865546218487395, | |
| "grad_norm": 1.5815112240806883, | |
| "learning_rate": 6e-06, | |
| "loss": 1.0293, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.1915966386554622, | |
| "grad_norm": 1.4697006996217221, | |
| "learning_rate": 6.166666666666667e-06, | |
| "loss": 0.995, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.19663865546218487, | |
| "grad_norm": 1.5886739084366435, | |
| "learning_rate": 6.333333333333333e-06, | |
| "loss": 1.1051, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.20168067226890757, | |
| "grad_norm": 1.3717225438634324, | |
| "learning_rate": 6.5000000000000004e-06, | |
| "loss": 1.0817, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.20672268907563024, | |
| "grad_norm": 1.4586233032739204, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.9949, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.21176470588235294, | |
| "grad_norm": 1.4404526895251804, | |
| "learning_rate": 6.833333333333334e-06, | |
| "loss": 1.0369, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.21680672268907564, | |
| "grad_norm": 1.5011071614715905, | |
| "learning_rate": 7e-06, | |
| "loss": 1.0126, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.2218487394957983, | |
| "grad_norm": 1.446801500279163, | |
| "learning_rate": 7.166666666666667e-06, | |
| "loss": 0.9829, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.226890756302521, | |
| "grad_norm": 1.3157845464395648, | |
| "learning_rate": 7.333333333333333e-06, | |
| "loss": 0.9432, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.2319327731092437, | |
| "grad_norm": 1.3291092123967403, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.9518, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.23697478991596638, | |
| "grad_norm": 1.5105509029003468, | |
| "learning_rate": 7.666666666666667e-06, | |
| "loss": 1.0235, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.24201680672268908, | |
| "grad_norm": 1.420355667391472, | |
| "learning_rate": 7.833333333333333e-06, | |
| "loss": 0.9567, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.24705882352941178, | |
| "grad_norm": 1.463732709856337, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 1.0417, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.25210084033613445, | |
| "grad_norm": 1.4275241446789713, | |
| "learning_rate": 8.166666666666668e-06, | |
| "loss": 1.0347, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2571428571428571, | |
| "grad_norm": 1.309592587931707, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.9524, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.26218487394957984, | |
| "grad_norm": 1.3344872488030621, | |
| "learning_rate": 8.5e-06, | |
| "loss": 1.0684, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2672268907563025, | |
| "grad_norm": 1.3533956797177575, | |
| "learning_rate": 8.666666666666668e-06, | |
| "loss": 0.9501, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.2722689075630252, | |
| "grad_norm": 1.4422509166091777, | |
| "learning_rate": 8.833333333333334e-06, | |
| "loss": 0.9452, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.2773109243697479, | |
| "grad_norm": 1.3534627088209181, | |
| "learning_rate": 9e-06, | |
| "loss": 0.9243, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.2823529411764706, | |
| "grad_norm": 1.370929089587996, | |
| "learning_rate": 9.166666666666666e-06, | |
| "loss": 0.9577, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.28739495798319326, | |
| "grad_norm": 1.34141912977082, | |
| "learning_rate": 9.333333333333334e-06, | |
| "loss": 0.9216, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.292436974789916, | |
| "grad_norm": 1.437190020022949, | |
| "learning_rate": 9.5e-06, | |
| "loss": 0.986, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.29747899159663865, | |
| "grad_norm": 1.3190591357074484, | |
| "learning_rate": 9.666666666666667e-06, | |
| "loss": 1.0163, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.3025210084033613, | |
| "grad_norm": 1.3230400720636633, | |
| "learning_rate": 9.833333333333333e-06, | |
| "loss": 0.9071, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.30756302521008405, | |
| "grad_norm": 1.570821042981294, | |
| "learning_rate": 1e-05, | |
| "loss": 1.0532, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.3126050420168067, | |
| "grad_norm": 1.3817712282096664, | |
| "learning_rate": 9.999913472135126e-06, | |
| "loss": 0.9497, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.3176470588235294, | |
| "grad_norm": 1.3461235016869455, | |
| "learning_rate": 9.99965389153533e-06, | |
| "loss": 0.9656, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.3226890756302521, | |
| "grad_norm": 1.2703045215015534, | |
| "learning_rate": 9.999221267184993e-06, | |
| "loss": 0.8563, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.3277310924369748, | |
| "grad_norm": 1.4463044763025328, | |
| "learning_rate": 9.998615614057743e-06, | |
| "loss": 0.9743, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.33277310924369746, | |
| "grad_norm": 1.2126520135581191, | |
| "learning_rate": 9.997836953115927e-06, | |
| "loss": 0.8256, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.3378151260504202, | |
| "grad_norm": 1.465456256707118, | |
| "learning_rate": 9.996885311309892e-06, | |
| "loss": 0.9112, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.34285714285714286, | |
| "grad_norm": 1.3774012861831768, | |
| "learning_rate": 9.995760721577053e-06, | |
| "loss": 1.0031, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.34789915966386553, | |
| "grad_norm": 1.214727510886685, | |
| "learning_rate": 9.994463222840748e-06, | |
| "loss": 0.8777, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.35294117647058826, | |
| "grad_norm": 1.3372556283226344, | |
| "learning_rate": 9.992992860008893e-06, | |
| "loss": 0.9503, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.35798319327731093, | |
| "grad_norm": 1.2629663699758409, | |
| "learning_rate": 9.991349683972435e-06, | |
| "loss": 0.9707, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.3630252100840336, | |
| "grad_norm": 1.2961666438854509, | |
| "learning_rate": 9.989533751603578e-06, | |
| "loss": 0.8987, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.3680672268907563, | |
| "grad_norm": 1.3451690514655665, | |
| "learning_rate": 9.987545125753818e-06, | |
| "loss": 0.9614, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.373109243697479, | |
| "grad_norm": 1.3824819884360038, | |
| "learning_rate": 9.985383875251783e-06, | |
| "loss": 0.9101, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.37815126050420167, | |
| "grad_norm": 1.290324816657544, | |
| "learning_rate": 9.983050074900824e-06, | |
| "loss": 0.8901, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.3831932773109244, | |
| "grad_norm": 1.3785449206810632, | |
| "learning_rate": 9.980543805476447e-06, | |
| "loss": 0.9305, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.38823529411764707, | |
| "grad_norm": 1.2723741333137952, | |
| "learning_rate": 9.977865153723508e-06, | |
| "loss": 0.9145, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.39327731092436974, | |
| "grad_norm": 1.3277787150964286, | |
| "learning_rate": 9.975014212353212e-06, | |
| "loss": 0.9386, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.3983193277310924, | |
| "grad_norm": 1.300378629259356, | |
| "learning_rate": 9.971991080039912e-06, | |
| "loss": 0.9072, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.40336134453781514, | |
| "grad_norm": 1.3180887220440103, | |
| "learning_rate": 9.968795861417676e-06, | |
| "loss": 0.8538, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.4084033613445378, | |
| "grad_norm": 1.2852565908527667, | |
| "learning_rate": 9.965428667076687e-06, | |
| "loss": 0.8625, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.4134453781512605, | |
| "grad_norm": 1.22082061679436, | |
| "learning_rate": 9.961889613559396e-06, | |
| "loss": 0.8002, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.4184873949579832, | |
| "grad_norm": 1.3948047447367582, | |
| "learning_rate": 9.958178823356503e-06, | |
| "loss": 0.9563, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.4235294117647059, | |
| "grad_norm": 1.32125427246041, | |
| "learning_rate": 9.954296424902709e-06, | |
| "loss": 0.9009, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.42857142857142855, | |
| "grad_norm": 1.2664915782700163, | |
| "learning_rate": 9.950242552572272e-06, | |
| "loss": 0.8489, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.4336134453781513, | |
| "grad_norm": 1.273298827077617, | |
| "learning_rate": 9.946017346674362e-06, | |
| "loss": 0.847, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.43865546218487395, | |
| "grad_norm": 1.328680054216705, | |
| "learning_rate": 9.941620953448195e-06, | |
| "loss": 0.9382, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.4436974789915966, | |
| "grad_norm": 1.263646905073375, | |
| "learning_rate": 9.937053525057977e-06, | |
| "loss": 0.8991, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.44873949579831934, | |
| "grad_norm": 1.209796673070386, | |
| "learning_rate": 9.932315219587641e-06, | |
| "loss": 0.8611, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.453781512605042, | |
| "grad_norm": 1.1317133515894529, | |
| "learning_rate": 9.927406201035368e-06, | |
| "loss": 0.8254, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4588235294117647, | |
| "grad_norm": 1.2581352252268798, | |
| "learning_rate": 9.922326639307918e-06, | |
| "loss": 0.8186, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.4638655462184874, | |
| "grad_norm": 1.1615726675287243, | |
| "learning_rate": 9.917076710214739e-06, | |
| "loss": 0.8217, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.4689075630252101, | |
| "grad_norm": 1.3906544125113194, | |
| "learning_rate": 9.911656595461899e-06, | |
| "loss": 0.9606, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.47394957983193275, | |
| "grad_norm": 1.3491688269700184, | |
| "learning_rate": 9.906066482645774e-06, | |
| "loss": 0.8865, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.4789915966386555, | |
| "grad_norm": 1.2884319333617182, | |
| "learning_rate": 9.900306565246579e-06, | |
| "loss": 0.8608, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.48403361344537815, | |
| "grad_norm": 1.332999472417029, | |
| "learning_rate": 9.894377042621654e-06, | |
| "loss": 0.8476, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.4890756302521008, | |
| "grad_norm": 1.3206768360556793, | |
| "learning_rate": 9.888278119998573e-06, | |
| "loss": 0.898, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.49411764705882355, | |
| "grad_norm": 1.3732673184556148, | |
| "learning_rate": 9.882010008468038e-06, | |
| "loss": 0.9482, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.4991596638655462, | |
| "grad_norm": 1.4284063475101123, | |
| "learning_rate": 9.875572924976568e-06, | |
| "loss": 0.8932, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.5042016806722689, | |
| "grad_norm": 1.249757410129038, | |
| "learning_rate": 9.868967092319003e-06, | |
| "loss": 0.9113, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5092436974789916, | |
| "grad_norm": 1.2033755235104269, | |
| "learning_rate": 9.86219273913078e-06, | |
| "loss": 0.8373, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.5142857142857142, | |
| "grad_norm": 1.3285676372655046, | |
| "learning_rate": 9.855250099880026e-06, | |
| "loss": 0.82, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.519327731092437, | |
| "grad_norm": 1.280372963776325, | |
| "learning_rate": 9.848139414859441e-06, | |
| "loss": 0.9269, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.5243697478991597, | |
| "grad_norm": 1.3597201294098022, | |
| "learning_rate": 9.840860930177984e-06, | |
| "loss": 0.8917, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.5294117647058824, | |
| "grad_norm": 1.3044841757394627, | |
| "learning_rate": 9.833414897752346e-06, | |
| "loss": 0.8242, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.534453781512605, | |
| "grad_norm": 1.2237707733265701, | |
| "learning_rate": 9.825801575298248e-06, | |
| "loss": 0.8369, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.5394957983193277, | |
| "grad_norm": 1.2984723776565605, | |
| "learning_rate": 9.818021226321502e-06, | |
| "loss": 0.8687, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.5445378151260504, | |
| "grad_norm": 1.3966505679016854, | |
| "learning_rate": 9.8100741201089e-06, | |
| "loss": 0.8698, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.5495798319327732, | |
| "grad_norm": 1.3695596995593027, | |
| "learning_rate": 9.801960531718898e-06, | |
| "loss": 0.9224, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.5546218487394958, | |
| "grad_norm": 1.2219956732497297, | |
| "learning_rate": 9.793680741972084e-06, | |
| "loss": 0.7909, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5596638655462185, | |
| "grad_norm": 1.1958717679101365, | |
| "learning_rate": 9.785235037441473e-06, | |
| "loss": 0.8222, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.5647058823529412, | |
| "grad_norm": 1.3284406137942217, | |
| "learning_rate": 9.77662371044258e-06, | |
| "loss": 0.9698, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.5697478991596638, | |
| "grad_norm": 1.4005342916908725, | |
| "learning_rate": 9.767847059023292e-06, | |
| "loss": 0.8141, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.5747899159663865, | |
| "grad_norm": 1.3280058867861344, | |
| "learning_rate": 9.75890538695358e-06, | |
| "loss": 0.8281, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.5798319327731093, | |
| "grad_norm": 1.348332178712391, | |
| "learning_rate": 9.749799003714954e-06, | |
| "loss": 0.8174, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.584873949579832, | |
| "grad_norm": 1.345901958116435, | |
| "learning_rate": 9.74052822448978e-06, | |
| "loss": 0.8662, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.5899159663865546, | |
| "grad_norm": 1.4938772005815362, | |
| "learning_rate": 9.731093370150349e-06, | |
| "loss": 0.9227, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.5949579831932773, | |
| "grad_norm": 1.5782055001938107, | |
| "learning_rate": 9.721494767247779e-06, | |
| "loss": 0.9292, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.2813061736782214, | |
| "learning_rate": 9.71173274800072e-06, | |
| "loss": 0.808, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.6050420168067226, | |
| "grad_norm": 1.3387521092808896, | |
| "learning_rate": 9.70180765028384e-06, | |
| "loss": 0.8052, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6100840336134454, | |
| "grad_norm": 1.1971567112258479, | |
| "learning_rate": 9.691719817616148e-06, | |
| "loss": 0.8321, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.6151260504201681, | |
| "grad_norm": 1.4022847044925355, | |
| "learning_rate": 9.681469599149093e-06, | |
| "loss": 0.8362, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.6201680672268908, | |
| "grad_norm": 1.4458562904255674, | |
| "learning_rate": 9.671057349654481e-06, | |
| "loss": 0.8753, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.6252100840336134, | |
| "grad_norm": 1.3489812277335955, | |
| "learning_rate": 9.660483429512198e-06, | |
| "loss": 0.8406, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.6302521008403361, | |
| "grad_norm": 1.2541520148654464, | |
| "learning_rate": 9.649748204697741e-06, | |
| "loss": 0.8096, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6352941176470588, | |
| "grad_norm": 1.4166136476450861, | |
| "learning_rate": 9.63885204676954e-06, | |
| "loss": 0.9279, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.6403361344537815, | |
| "grad_norm": 1.2096305649684784, | |
| "learning_rate": 9.627795332856107e-06, | |
| "loss": 0.8668, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.6453781512605042, | |
| "grad_norm": 1.0817129947497557, | |
| "learning_rate": 9.616578445642982e-06, | |
| "loss": 0.8021, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.6504201680672269, | |
| "grad_norm": 1.2857282530529068, | |
| "learning_rate": 9.605201773359485e-06, | |
| "loss": 0.9031, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.6554621848739496, | |
| "grad_norm": 1.2909981390159206, | |
| "learning_rate": 9.59366570976528e-06, | |
| "loss": 0.9028, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6605042016806723, | |
| "grad_norm": 1.277642300275485, | |
| "learning_rate": 9.581970654136752e-06, | |
| "loss": 0.8206, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.6655462184873949, | |
| "grad_norm": 1.2618202348884826, | |
| "learning_rate": 9.570117011253173e-06, | |
| "loss": 0.8038, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.6705882352941176, | |
| "grad_norm": 1.3158796346136465, | |
| "learning_rate": 9.55810519138271e-06, | |
| "loss": 0.8594, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.6756302521008404, | |
| "grad_norm": 1.464049668724664, | |
| "learning_rate": 9.545935610268213e-06, | |
| "loss": 0.8946, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.680672268907563, | |
| "grad_norm": 1.3568598282729065, | |
| "learning_rate": 9.533608689112827e-06, | |
| "loss": 0.8747, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.6857142857142857, | |
| "grad_norm": 1.459842199207566, | |
| "learning_rate": 9.521124854565425e-06, | |
| "loss": 0.8665, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.6907563025210084, | |
| "grad_norm": 1.2651754016717647, | |
| "learning_rate": 9.508484538705823e-06, | |
| "loss": 0.8172, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.6957983193277311, | |
| "grad_norm": 1.3148283789857567, | |
| "learning_rate": 9.495688179029838e-06, | |
| "loss": 0.8159, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.7008403361344537, | |
| "grad_norm": 1.3062514406684878, | |
| "learning_rate": 9.482736218434144e-06, | |
| "loss": 0.772, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": 1.233357901449911, | |
| "learning_rate": 9.469629105200937e-06, | |
| "loss": 0.812, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7109243697478992, | |
| "grad_norm": 1.4036092051385856, | |
| "learning_rate": 9.45636729298243e-06, | |
| "loss": 0.9176, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.7159663865546219, | |
| "grad_norm": 1.2475986918890871, | |
| "learning_rate": 9.442951240785135e-06, | |
| "loss": 0.9227, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.7210084033613445, | |
| "grad_norm": 1.33327258291273, | |
| "learning_rate": 9.429381412954e-06, | |
| "loss": 0.8406, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.7260504201680672, | |
| "grad_norm": 1.2457766641422836, | |
| "learning_rate": 9.415658279156312e-06, | |
| "loss": 0.7944, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.7310924369747899, | |
| "grad_norm": 1.214604972950531, | |
| "learning_rate": 9.401782314365458e-06, | |
| "loss": 0.7889, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.7361344537815127, | |
| "grad_norm": 1.4091496584822034, | |
| "learning_rate": 9.387753998844482e-06, | |
| "loss": 0.8542, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.7411764705882353, | |
| "grad_norm": 1.336371637577696, | |
| "learning_rate": 9.37357381812946e-06, | |
| "loss": 0.8713, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.746218487394958, | |
| "grad_norm": 1.2559095107113698, | |
| "learning_rate": 9.359242263012693e-06, | |
| "loss": 0.8405, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.7512605042016807, | |
| "grad_norm": 1.371982879040437, | |
| "learning_rate": 9.344759829525734e-06, | |
| "loss": 0.8666, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.7563025210084033, | |
| "grad_norm": 1.23974913873784, | |
| "learning_rate": 9.330127018922195e-06, | |
| "loss": 0.7429, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.761344537815126, | |
| "grad_norm": 1.3741045518217379, | |
| "learning_rate": 9.315344337660422e-06, | |
| "loss": 0.8649, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.7663865546218488, | |
| "grad_norm": 1.348659089360585, | |
| "learning_rate": 9.300412297385954e-06, | |
| "loss": 0.8614, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.7714285714285715, | |
| "grad_norm": 1.199362811459465, | |
| "learning_rate": 9.285331414913816e-06, | |
| "loss": 0.837, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.7764705882352941, | |
| "grad_norm": 1.2184218309322916, | |
| "learning_rate": 9.270102212210632e-06, | |
| "loss": 0.8404, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.7815126050420168, | |
| "grad_norm": 1.386612554465055, | |
| "learning_rate": 9.254725216376562e-06, | |
| "loss": 0.9221, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.7865546218487395, | |
| "grad_norm": 1.3380478699356555, | |
| "learning_rate": 9.239200959627048e-06, | |
| "loss": 0.8627, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.7915966386554621, | |
| "grad_norm": 1.4014570562834296, | |
| "learning_rate": 9.223529979274411e-06, | |
| "loss": 0.8525, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.7966386554621848, | |
| "grad_norm": 1.3172489244042282, | |
| "learning_rate": 9.207712817709237e-06, | |
| "loss": 0.7901, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.8016806722689076, | |
| "grad_norm": 1.354483035270781, | |
| "learning_rate": 9.191750022381613e-06, | |
| "loss": 0.865, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.8067226890756303, | |
| "grad_norm": 1.2415343975219086, | |
| "learning_rate": 9.175642145782179e-06, | |
| "loss": 0.7898, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8117647058823529, | |
| "grad_norm": 1.2532359973917484, | |
| "learning_rate": 9.159389745423003e-06, | |
| "loss": 0.8372, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.8168067226890756, | |
| "grad_norm": 1.2390725118364732, | |
| "learning_rate": 9.142993383818284e-06, | |
| "loss": 0.8383, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.8218487394957983, | |
| "grad_norm": 1.3766117307822159, | |
| "learning_rate": 9.126453628464889e-06, | |
| "loss": 0.8151, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.826890756302521, | |
| "grad_norm": 1.3256804846243377, | |
| "learning_rate": 9.109771051822702e-06, | |
| "loss": 0.8444, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.8319327731092437, | |
| "grad_norm": 1.3520618668694473, | |
| "learning_rate": 9.09294623129482e-06, | |
| "loss": 0.8672, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.8369747899159664, | |
| "grad_norm": 1.329653882039925, | |
| "learning_rate": 9.07597974920756e-06, | |
| "loss": 0.8168, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.8420168067226891, | |
| "grad_norm": 1.3543281390803807, | |
| "learning_rate": 9.058872192790314e-06, | |
| "loss": 0.9118, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.8470588235294118, | |
| "grad_norm": 1.3456977881970305, | |
| "learning_rate": 9.041624154155208e-06, | |
| "loss": 0.8515, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.8521008403361344, | |
| "grad_norm": 1.297767613562501, | |
| "learning_rate": 9.02423623027663e-06, | |
| "loss": 0.7417, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 1.2894576740180352, | |
| "learning_rate": 9.006709022970547e-06, | |
| "loss": 0.8408, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.8621848739495799, | |
| "grad_norm": 1.2240598626483896, | |
| "learning_rate": 8.98904313887369e-06, | |
| "loss": 0.7358, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.8672268907563025, | |
| "grad_norm": 1.1890744366393113, | |
| "learning_rate": 8.971239189422555e-06, | |
| "loss": 0.8322, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.8722689075630252, | |
| "grad_norm": 1.3386067991043302, | |
| "learning_rate": 8.953297790832231e-06, | |
| "loss": 0.8411, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.8773109243697479, | |
| "grad_norm": 1.408000314117784, | |
| "learning_rate": 8.935219564075087e-06, | |
| "loss": 0.8036, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.8823529411764706, | |
| "grad_norm": 1.3426412490545896, | |
| "learning_rate": 8.917005134859263e-06, | |
| "loss": 0.8035, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.8873949579831932, | |
| "grad_norm": 1.4645291848377162, | |
| "learning_rate": 8.89865513360703e-06, | |
| "loss": 0.8392, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.892436974789916, | |
| "grad_norm": 1.2117719390717796, | |
| "learning_rate": 8.88017019543296e-06, | |
| "loss": 0.8328, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.8974789915966387, | |
| "grad_norm": 1.3344830085574295, | |
| "learning_rate": 8.861550960121946e-06, | |
| "loss": 0.8543, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.9025210084033614, | |
| "grad_norm": 1.4853304361578643, | |
| "learning_rate": 8.842798072107055e-06, | |
| "loss": 0.8512, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.907563025210084, | |
| "grad_norm": 1.2284352653979531, | |
| "learning_rate": 8.823912180447237e-06, | |
| "loss": 0.8598, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.9126050420168067, | |
| "grad_norm": 1.37221802812512, | |
| "learning_rate": 8.804893938804839e-06, | |
| "loss": 0.8613, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.9176470588235294, | |
| "grad_norm": 1.4397712752139291, | |
| "learning_rate": 8.785744005423003e-06, | |
| "loss": 0.8192, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.9226890756302522, | |
| "grad_norm": 1.4307484306743805, | |
| "learning_rate": 8.766463043102864e-06, | |
| "loss": 0.8114, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.9277310924369748, | |
| "grad_norm": 1.4036453214728524, | |
| "learning_rate": 8.747051719180626e-06, | |
| "loss": 0.8922, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.9327731092436975, | |
| "grad_norm": 1.4752551479904314, | |
| "learning_rate": 8.727510705504453e-06, | |
| "loss": 0.8932, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.9378151260504202, | |
| "grad_norm": 1.322337640774981, | |
| "learning_rate": 8.707840678411223e-06, | |
| "loss": 0.7998, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.9428571428571428, | |
| "grad_norm": 1.2136277321616975, | |
| "learning_rate": 8.688042318703111e-06, | |
| "loss": 0.7416, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.9478991596638655, | |
| "grad_norm": 1.342849040104635, | |
| "learning_rate": 8.66811631162404e-06, | |
| "loss": 0.8685, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.9529411764705882, | |
| "grad_norm": 1.5250386207067939, | |
| "learning_rate": 8.648063346835943e-06, | |
| "loss": 0.8485, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.957983193277311, | |
| "grad_norm": 1.3173191874193797, | |
| "learning_rate": 8.627884118394913e-06, | |
| "loss": 0.8286, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.9630252100840336, | |
| "grad_norm": 1.32796081599915, | |
| "learning_rate": 8.607579324727175e-06, | |
| "loss": 0.8544, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.9680672268907563, | |
| "grad_norm": 1.350363153783161, | |
| "learning_rate": 8.5871496686049e-06, | |
| "loss": 0.8102, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.973109243697479, | |
| "grad_norm": 1.3655669107662696, | |
| "learning_rate": 8.566595857121902e-06, | |
| "loss": 0.8122, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.9781512605042016, | |
| "grad_norm": 1.3452211499259599, | |
| "learning_rate": 8.545918601669147e-06, | |
| "loss": 0.8834, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.9831932773109243, | |
| "grad_norm": 1.3376410418915317, | |
| "learning_rate": 8.525118617910144e-06, | |
| "loss": 0.8148, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.9882352941176471, | |
| "grad_norm": 1.2489273918302621, | |
| "learning_rate": 8.504196625756166e-06, | |
| "loss": 0.8271, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.9932773109243698, | |
| "grad_norm": 1.4139088289405872, | |
| "learning_rate": 8.483153349341336e-06, | |
| "loss": 0.845, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.9983193277310924, | |
| "grad_norm": 1.384588034693747, | |
| "learning_rate": 8.461989516997565e-06, | |
| "loss": 0.8312, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.0050420168067227, | |
| "grad_norm": 2.499955060770187, | |
| "learning_rate": 8.440705861229344e-06, | |
| "loss": 1.4381, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.0100840336134453, | |
| "grad_norm": 1.413536932523174, | |
| "learning_rate": 8.41930311868839e-06, | |
| "loss": 0.713, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.015126050420168, | |
| "grad_norm": 1.3570359586304308, | |
| "learning_rate": 8.397782030148147e-06, | |
| "loss": 0.716, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.0201680672268907, | |
| "grad_norm": 1.187974845871534, | |
| "learning_rate": 8.376143340478153e-06, | |
| "loss": 0.6197, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.0252100840336134, | |
| "grad_norm": 1.1805636492053666, | |
| "learning_rate": 8.354387798618254e-06, | |
| "loss": 0.6082, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.030252100840336, | |
| "grad_norm": 1.3319326327566277, | |
| "learning_rate": 8.332516157552684e-06, | |
| "loss": 0.6667, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.035294117647059, | |
| "grad_norm": 1.3080442340316867, | |
| "learning_rate": 8.310529174284004e-06, | |
| "loss": 0.6438, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.0403361344537816, | |
| "grad_norm": 1.360919752940988, | |
| "learning_rate": 8.288427609806899e-06, | |
| "loss": 0.6931, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.0453781512605043, | |
| "grad_norm": 1.2928882019326107, | |
| "learning_rate": 8.266212229081846e-06, | |
| "loss": 0.6571, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.050420168067227, | |
| "grad_norm": 1.279346131512037, | |
| "learning_rate": 8.243883801008632e-06, | |
| "loss": 0.6105, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.0554621848739496, | |
| "grad_norm": 1.3976246828088796, | |
| "learning_rate": 8.221443098399733e-06, | |
| "loss": 0.633, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.0605042016806723, | |
| "grad_norm": 1.4051676037106482, | |
| "learning_rate": 8.198890897953586e-06, | |
| "loss": 0.631, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.065546218487395, | |
| "grad_norm": 1.4026478680925658, | |
| "learning_rate": 8.176227980227693e-06, | |
| "loss": 0.646, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.0705882352941176, | |
| "grad_norm": 1.4783461586544826, | |
| "learning_rate": 8.153455129611605e-06, | |
| "loss": 0.6341, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.0756302521008403, | |
| "grad_norm": 1.2992917788523406, | |
| "learning_rate": 8.130573134299782e-06, | |
| "loss": 0.7027, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.080672268907563, | |
| "grad_norm": 1.4403523864907255, | |
| "learning_rate": 8.107582786264299e-06, | |
| "loss": 0.6745, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.0857142857142856, | |
| "grad_norm": 1.2904789259135272, | |
| "learning_rate": 8.084484881227449e-06, | |
| "loss": 0.6278, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.0907563025210083, | |
| "grad_norm": 1.3928383691850674, | |
| "learning_rate": 8.061280218634192e-06, | |
| "loss": 0.665, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.0957983193277312, | |
| "grad_norm": 1.3355440702392616, | |
| "learning_rate": 8.037969601624495e-06, | |
| "loss": 0.6095, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.1008403361344539, | |
| "grad_norm": 1.3135802297885384, | |
| "learning_rate": 8.014553837005527e-06, | |
| "loss": 0.7134, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.1058823529411765, | |
| "grad_norm": 1.3334358438044307, | |
| "learning_rate": 7.99103373522373e-06, | |
| "loss": 0.6149, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.1109243697478992, | |
| "grad_norm": 1.3855125872698653, | |
| "learning_rate": 7.967410110336782e-06, | |
| "loss": 0.6709, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.1159663865546219, | |
| "grad_norm": 1.4082439279428, | |
| "learning_rate": 7.943683779985412e-06, | |
| "loss": 0.6665, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.1210084033613446, | |
| "grad_norm": 1.3849413150174785, | |
| "learning_rate": 7.919855565365102e-06, | |
| "loss": 0.6698, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.1260504201680672, | |
| "grad_norm": 1.3025006342892487, | |
| "learning_rate": 7.895926291197667e-06, | |
| "loss": 0.6726, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.13109243697479, | |
| "grad_norm": 1.3438499346918609, | |
| "learning_rate": 7.871896785702707e-06, | |
| "loss": 0.6361, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.1361344537815126, | |
| "grad_norm": 1.252763414951386, | |
| "learning_rate": 7.847767880568944e-06, | |
| "loss": 0.6534, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.1411764705882352, | |
| "grad_norm": 1.4594024040073388, | |
| "learning_rate": 7.823540410925434e-06, | |
| "loss": 0.7176, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.146218487394958, | |
| "grad_norm": 1.3020082357416656, | |
| "learning_rate": 7.799215215312667e-06, | |
| "loss": 0.6117, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.1512605042016806, | |
| "grad_norm": 1.3344891922181583, | |
| "learning_rate": 7.774793135653537e-06, | |
| "loss": 0.6502, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.1563025210084033, | |
| "grad_norm": 1.1931020476239522, | |
| "learning_rate": 7.750275017224208e-06, | |
| "loss": 0.5864, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.1613445378151261, | |
| "grad_norm": 1.3817137725123274, | |
| "learning_rate": 7.725661708624855e-06, | |
| "loss": 0.6845, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.1663865546218488, | |
| "grad_norm": 1.3718851116188664, | |
| "learning_rate": 7.700954061750295e-06, | |
| "loss": 0.6666, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.1714285714285715, | |
| "grad_norm": 1.3538961263237106, | |
| "learning_rate": 7.676152931760496e-06, | |
| "loss": 0.6815, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "grad_norm": 1.3576998269549865, | |
| "learning_rate": 7.651259177050996e-06, | |
| "loss": 0.6169, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.1815126050420168, | |
| "grad_norm": 1.3317040137841496, | |
| "learning_rate": 7.626273659223166e-06, | |
| "loss": 0.8546, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.1865546218487395, | |
| "grad_norm": 1.368524911957153, | |
| "learning_rate": 7.601197243054411e-06, | |
| "loss": 0.6168, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.1915966386554622, | |
| "grad_norm": 1.3058914037226665, | |
| "learning_rate": 7.576030796468233e-06, | |
| "loss": 0.7452, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.1966386554621848, | |
| "grad_norm": 1.5392470830352827, | |
| "learning_rate": 7.5507751905041885e-06, | |
| "loss": 0.6195, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.2016806722689075, | |
| "grad_norm": 1.4102673119306182, | |
| "learning_rate": 7.525431299287737e-06, | |
| "loss": 0.6523, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.2067226890756302, | |
| "grad_norm": 1.4511322902886419, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.6862, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.2117647058823529, | |
| "grad_norm": 1.2661930310847365, | |
| "learning_rate": 7.474482172847391e-06, | |
| "loss": 0.6528, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.2168067226890757, | |
| "grad_norm": 1.3307860380456358, | |
| "learning_rate": 7.4488787010311425e-06, | |
| "loss": 0.6602, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.2218487394957984, | |
| "grad_norm": 1.3750585055686875, | |
| "learning_rate": 7.423190470716761e-06, | |
| "loss": 0.6432, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.226890756302521, | |
| "grad_norm": 1.2979245099980825, | |
| "learning_rate": 7.3974183710033334e-06, | |
| "loss": 0.6288, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.2319327731092438, | |
| "grad_norm": 1.2999814021886877, | |
| "learning_rate": 7.371563293892761e-06, | |
| "loss": 0.6119, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.2369747899159664, | |
| "grad_norm": 1.2917976929827104, | |
| "learning_rate": 7.345626134258897e-06, | |
| "loss": 0.6657, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.242016806722689, | |
| "grad_norm": 1.4010288472470998, | |
| "learning_rate": 7.319607789816555e-06, | |
| "loss": 0.6586, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.2470588235294118, | |
| "grad_norm": 1.4146400942510136, | |
| "learning_rate": 7.293509161090453e-06, | |
| "loss": 0.6595, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.2521008403361344, | |
| "grad_norm": 1.2728109027093242, | |
| "learning_rate": 7.2673311513840395e-06, | |
| "loss": 0.6353, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.2571428571428571, | |
| "grad_norm": 1.3471043709018875, | |
| "learning_rate": 7.241074666748228e-06, | |
| "loss": 0.6713, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.2621848739495798, | |
| "grad_norm": 1.353231427350053, | |
| "learning_rate": 7.214740615950041e-06, | |
| "loss": 0.6102, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.2672268907563025, | |
| "grad_norm": 1.337514944324046, | |
| "learning_rate": 7.188329910441154e-06, | |
| "loss": 0.6282, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.2722689075630251, | |
| "grad_norm": 1.362404295247445, | |
| "learning_rate": 7.161843464326349e-06, | |
| "loss": 0.6072, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.2773109243697478, | |
| "grad_norm": 1.1818447088372563, | |
| "learning_rate": 7.135282194331881e-06, | |
| "loss": 0.6057, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.2823529411764705, | |
| "grad_norm": 1.4982822435126113, | |
| "learning_rate": 7.1086470197737405e-06, | |
| "loss": 0.6803, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.2873949579831931, | |
| "grad_norm": 1.4344811997979932, | |
| "learning_rate": 7.0819388625258385e-06, | |
| "loss": 0.8567, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.292436974789916, | |
| "grad_norm": 1.3859091438882214, | |
| "learning_rate": 7.05515864698811e-06, | |
| "loss": 0.7355, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.2974789915966387, | |
| "grad_norm": 1.1626254136263392, | |
| "learning_rate": 7.028307300054499e-06, | |
| "loss": 0.5839, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.3025210084033614, | |
| "grad_norm": 1.3552944579781003, | |
| "learning_rate": 7.0013857510808934e-06, | |
| "loss": 0.6836, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.307563025210084, | |
| "grad_norm": 1.3028817545835125, | |
| "learning_rate": 6.974394931852957e-06, | |
| "loss": 0.6284, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.3126050420168067, | |
| "grad_norm": 1.5434124541373508, | |
| "learning_rate": 6.94733577655387e-06, | |
| "loss": 0.7012, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.3176470588235294, | |
| "grad_norm": 1.303474015679206, | |
| "learning_rate": 6.920209221732007e-06, | |
| "loss": 0.5703, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.322689075630252, | |
| "grad_norm": 1.3348450903633984, | |
| "learning_rate": 6.893016206268518e-06, | |
| "loss": 0.5917, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.3277310924369747, | |
| "grad_norm": 1.3433706513738732, | |
| "learning_rate": 6.865757671344827e-06, | |
| "loss": 0.6672, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.3327731092436974, | |
| "grad_norm": 1.2935787672149481, | |
| "learning_rate": 6.838434560410064e-06, | |
| "loss": 0.6701, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.3378151260504203, | |
| "grad_norm": 1.3458569492608534, | |
| "learning_rate": 6.811047819148413e-06, | |
| "loss": 0.6647, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.342857142857143, | |
| "grad_norm": 1.3814097147596185, | |
| "learning_rate": 6.783598395446371e-06, | |
| "loss": 0.6866, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.3478991596638656, | |
| "grad_norm": 1.384769236934002, | |
| "learning_rate": 6.756087239359948e-06, | |
| "loss": 0.6058, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.3529411764705883, | |
| "grad_norm": 1.4299755108319103, | |
| "learning_rate": 6.728515303081782e-06, | |
| "loss": 0.6608, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.357983193277311, | |
| "grad_norm": 1.6844501725850975, | |
| "learning_rate": 6.700883540908185e-06, | |
| "loss": 0.6902, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.3630252100840337, | |
| "grad_norm": 1.490837215727114, | |
| "learning_rate": 6.673192909206109e-06, | |
| "loss": 0.6622, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.3680672268907563, | |
| "grad_norm": 1.5025542365103597, | |
| "learning_rate": 6.64544436638005e-06, | |
| "loss": 0.7318, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.373109243697479, | |
| "grad_norm": 1.368007843570876, | |
| "learning_rate": 6.617638872838874e-06, | |
| "loss": 0.6616, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.3781512605042017, | |
| "grad_norm": 1.3302784390410516, | |
| "learning_rate": 6.589777390962575e-06, | |
| "loss": 0.5837, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.3831932773109243, | |
| "grad_norm": 1.3818583989196362, | |
| "learning_rate": 6.561860885068972e-06, | |
| "loss": 0.7319, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.388235294117647, | |
| "grad_norm": 1.3678970576063487, | |
| "learning_rate": 6.53389032138032e-06, | |
| "loss": 0.6479, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.3932773109243697, | |
| "grad_norm": 1.3918528373329961, | |
| "learning_rate": 6.505866667989884e-06, | |
| "loss": 0.6657, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.3983193277310924, | |
| "grad_norm": 1.3578596611461975, | |
| "learning_rate": 6.477790894828422e-06, | |
| "loss": 0.6227, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.403361344537815, | |
| "grad_norm": 1.37442116613121, | |
| "learning_rate": 6.449663973630613e-06, | |
| "loss": 0.668, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.4084033613445377, | |
| "grad_norm": 1.251535744853749, | |
| "learning_rate": 6.421486877901436e-06, | |
| "loss": 0.6394, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.4134453781512604, | |
| "grad_norm": 1.3817098557899696, | |
| "learning_rate": 6.393260582882462e-06, | |
| "loss": 0.7289, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.4184873949579833, | |
| "grad_norm": 1.3924770743130575, | |
| "learning_rate": 6.364986065518106e-06, | |
| "loss": 0.6632, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.423529411764706, | |
| "grad_norm": 1.3388647960669742, | |
| "learning_rate": 6.336664304421818e-06, | |
| "loss": 0.6445, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 1.3627824010774807, | |
| "learning_rate": 6.308296279842204e-06, | |
| "loss": 0.6785, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.4336134453781513, | |
| "grad_norm": 1.2353887841733255, | |
| "learning_rate": 6.279882973629101e-06, | |
| "loss": 0.5987, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.438655462184874, | |
| "grad_norm": 1.2803646798399686, | |
| "learning_rate": 6.2514253691996e-06, | |
| "loss": 0.6593, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.4436974789915966, | |
| "grad_norm": 1.3106097252223476, | |
| "learning_rate": 6.222924451504001e-06, | |
| "loss": 0.6612, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.4487394957983193, | |
| "grad_norm": 1.491149138722541, | |
| "learning_rate": 6.194381206991723e-06, | |
| "loss": 0.6603, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.453781512605042, | |
| "grad_norm": 1.4729722170121724, | |
| "learning_rate": 6.165796623577171e-06, | |
| "loss": 0.6458, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.4588235294117646, | |
| "grad_norm": 1.2583772868484708, | |
| "learning_rate": 6.1371716906055336e-06, | |
| "loss": 0.6571, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.4638655462184875, | |
| "grad_norm": 1.6484902113991295, | |
| "learning_rate": 6.10850739881854e-06, | |
| "loss": 0.8048, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.4689075630252102, | |
| "grad_norm": 1.1293948636395863, | |
| "learning_rate": 6.079804740320181e-06, | |
| "loss": 0.631, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.4739495798319329, | |
| "grad_norm": 1.357543211738453, | |
| "learning_rate": 6.051064708542357e-06, | |
| "loss": 0.6834, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.4789915966386555, | |
| "grad_norm": 1.422094283192291, | |
| "learning_rate": 6.022288298210502e-06, | |
| "loss": 0.7688, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.4840336134453782, | |
| "grad_norm": 1.3320687626409005, | |
| "learning_rate": 5.993476505309154e-06, | |
| "loss": 0.6438, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.4890756302521009, | |
| "grad_norm": 1.479155880731166, | |
| "learning_rate": 5.964630327047485e-06, | |
| "loss": 0.6983, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.4941176470588236, | |
| "grad_norm": 1.4751670026359378, | |
| "learning_rate": 5.935750761824777e-06, | |
| "loss": 0.6784, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.4991596638655462, | |
| "grad_norm": 1.3971166152312533, | |
| "learning_rate": 5.906838809195879e-06, | |
| "loss": 0.7934, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.504201680672269, | |
| "grad_norm": 1.486282793941636, | |
| "learning_rate": 5.877895469836604e-06, | |
| "loss": 0.7149, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.5092436974789916, | |
| "grad_norm": 1.3831360984251488, | |
| "learning_rate": 5.848921745509094e-06, | |
| "loss": 0.6853, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.5142857142857142, | |
| "grad_norm": 1.373255418518971, | |
| "learning_rate": 5.819918639027149e-06, | |
| "loss": 0.6262, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.519327731092437, | |
| "grad_norm": 1.398139776725886, | |
| "learning_rate": 5.790887154221521e-06, | |
| "loss": 0.6682, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.5243697478991596, | |
| "grad_norm": 1.459786025141565, | |
| "learning_rate": 5.7618282959051685e-06, | |
| "loss": 0.6596, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.5294117647058822, | |
| "grad_norm": 1.386843554966046, | |
| "learning_rate": 5.7327430698384775e-06, | |
| "loss": 0.662, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.534453781512605, | |
| "grad_norm": 1.334093052658649, | |
| "learning_rate": 5.703632482694453e-06, | |
| "loss": 0.5642, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.5394957983193276, | |
| "grad_norm": 1.394936799748242, | |
| "learning_rate": 5.674497542023875e-06, | |
| "loss": 0.6785, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.5445378151260503, | |
| "grad_norm": 1.2487045092120568, | |
| "learning_rate": 5.645339256220427e-06, | |
| "loss": 0.6405, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.5495798319327732, | |
| "grad_norm": 1.449626002944486, | |
| "learning_rate": 5.616158634485793e-06, | |
| "loss": 0.7186, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.5546218487394958, | |
| "grad_norm": 1.3148115913009149, | |
| "learning_rate": 5.5869566867947344e-06, | |
| "loss": 0.6689, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.5596638655462185, | |
| "grad_norm": 1.3031066852612374, | |
| "learning_rate": 5.557734423860122e-06, | |
| "loss": 0.6865, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.5647058823529412, | |
| "grad_norm": 1.4070190634154978, | |
| "learning_rate": 5.528492857097966e-06, | |
| "loss": 0.692, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.5697478991596638, | |
| "grad_norm": 1.424416347019562, | |
| "learning_rate": 5.499232998592399e-06, | |
| "loss": 0.6712, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.5747899159663865, | |
| "grad_norm": 1.4045930546601455, | |
| "learning_rate": 5.469955861060653e-06, | |
| "loss": 0.692, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.5798319327731094, | |
| "grad_norm": 1.4633924161825607, | |
| "learning_rate": 5.44066245781801e-06, | |
| "loss": 0.6972, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.584873949579832, | |
| "grad_norm": 1.3419059215183884, | |
| "learning_rate": 5.4113538027427245e-06, | |
| "loss": 0.5832, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.5899159663865547, | |
| "grad_norm": 1.4651690425379238, | |
| "learning_rate": 5.382030910240936e-06, | |
| "loss": 0.7263, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.5949579831932774, | |
| "grad_norm": 1.3544416080791692, | |
| "learning_rate": 5.352694795211555e-06, | |
| "loss": 0.6693, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 1.3796831843734638, | |
| "learning_rate": 5.3233464730111426e-06, | |
| "loss": 0.6843, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.6050420168067228, | |
| "grad_norm": 1.3756368583869594, | |
| "learning_rate": 5.29398695941876e-06, | |
| "loss": 0.6956, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.6100840336134454, | |
| "grad_norm": 1.354906917799083, | |
| "learning_rate": 5.2646172706008154e-06, | |
| "loss": 0.5865, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.615126050420168, | |
| "grad_norm": 1.283604806155226, | |
| "learning_rate": 5.235238423075899e-06, | |
| "loss": 0.6476, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.6201680672268908, | |
| "grad_norm": 1.3323430668544856, | |
| "learning_rate": 5.20585143367959e-06, | |
| "loss": 0.5978, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.6252100840336134, | |
| "grad_norm": 1.4432636768429228, | |
| "learning_rate": 5.176457319529264e-06, | |
| "loss": 0.7229, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.6302521008403361, | |
| "grad_norm": 1.3389659599587687, | |
| "learning_rate": 5.147057097988898e-06, | |
| "loss": 0.7036, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.6352941176470588, | |
| "grad_norm": 1.40224689957347, | |
| "learning_rate": 5.1176517866338495e-06, | |
| "loss": 0.6524, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.6403361344537815, | |
| "grad_norm": 1.448948508673923, | |
| "learning_rate": 5.088242403215644e-06, | |
| "loss": 0.6574, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.6453781512605041, | |
| "grad_norm": 1.4336192786572701, | |
| "learning_rate": 5.058829965626742e-06, | |
| "loss": 0.6649, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.6504201680672268, | |
| "grad_norm": 1.1551398885920936, | |
| "learning_rate": 5.029415491865311e-06, | |
| "loss": 0.6607, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.6554621848739495, | |
| "grad_norm": 1.4081755117550179, | |
| "learning_rate": 5e-06, | |
| "loss": 0.6308, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.6605042016806721, | |
| "grad_norm": 1.2962293823552042, | |
| "learning_rate": 4.97058450813469e-06, | |
| "loss": 0.6315, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.6655462184873948, | |
| "grad_norm": 1.2609233329938516, | |
| "learning_rate": 4.94117003437326e-06, | |
| "loss": 0.6453, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.6705882352941175, | |
| "grad_norm": 1.4395586718171531, | |
| "learning_rate": 4.911757596784358e-06, | |
| "loss": 0.7056, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.6756302521008404, | |
| "grad_norm": 1.490647265803814, | |
| "learning_rate": 4.882348213366152e-06, | |
| "loss": 0.7463, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.680672268907563, | |
| "grad_norm": 1.4744084173114673, | |
| "learning_rate": 4.8529429020111035e-06, | |
| "loss": 0.6518, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.6857142857142857, | |
| "grad_norm": 1.3256051086606053, | |
| "learning_rate": 4.823542680470738e-06, | |
| "loss": 0.6322, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.6907563025210084, | |
| "grad_norm": 1.4043201154667322, | |
| "learning_rate": 4.794148566320412e-06, | |
| "loss": 0.6623, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.695798319327731, | |
| "grad_norm": 1.3058283187944708, | |
| "learning_rate": 4.7647615769241e-06, | |
| "loss": 0.7233, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.7008403361344537, | |
| "grad_norm": 1.3709304051984876, | |
| "learning_rate": 4.7353827293991845e-06, | |
| "loss": 0.7237, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.7058823529411766, | |
| "grad_norm": 1.3476441152074792, | |
| "learning_rate": 4.706013040581242e-06, | |
| "loss": 0.6408, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.7109243697478993, | |
| "grad_norm": 1.4435937624188804, | |
| "learning_rate": 4.676653526988858e-06, | |
| "loss": 0.6647, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.715966386554622, | |
| "grad_norm": 1.3226553142476545, | |
| "learning_rate": 4.647305204788445e-06, | |
| "loss": 0.6489, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.7210084033613446, | |
| "grad_norm": 1.3388051536697478, | |
| "learning_rate": 4.617969089759066e-06, | |
| "loss": 0.6414, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.7260504201680673, | |
| "grad_norm": 1.369018029455846, | |
| "learning_rate": 4.588646197257278e-06, | |
| "loss": 0.6535, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.73109243697479, | |
| "grad_norm": 1.4137443784434733, | |
| "learning_rate": 4.559337542181993e-06, | |
| "loss": 0.6446, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.7361344537815127, | |
| "grad_norm": 1.3718987426836817, | |
| "learning_rate": 4.53004413893935e-06, | |
| "loss": 0.6477, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.7411764705882353, | |
| "grad_norm": 1.262236928246166, | |
| "learning_rate": 4.500767001407604e-06, | |
| "loss": 0.6059, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.746218487394958, | |
| "grad_norm": 1.3613528737566392, | |
| "learning_rate": 4.471507142902036e-06, | |
| "loss": 0.6545, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.7512605042016807, | |
| "grad_norm": 1.303211681985445, | |
| "learning_rate": 4.4422655761398785e-06, | |
| "loss": 0.633, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.7563025210084033, | |
| "grad_norm": 1.3262900181605304, | |
| "learning_rate": 4.413043313205266e-06, | |
| "loss": 0.6873, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.761344537815126, | |
| "grad_norm": 1.5014706286550592, | |
| "learning_rate": 4.383841365514208e-06, | |
| "loss": 0.6715, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.7663865546218487, | |
| "grad_norm": 1.3748458240376293, | |
| "learning_rate": 4.354660743779575e-06, | |
| "loss": 0.6322, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.7714285714285714, | |
| "grad_norm": 1.3200606309946945, | |
| "learning_rate": 4.325502457976126e-06, | |
| "loss": 0.6468, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.776470588235294, | |
| "grad_norm": 1.4363798100469027, | |
| "learning_rate": 4.296367517305548e-06, | |
| "loss": 0.6424, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.7815126050420167, | |
| "grad_norm": 1.3665833844005753, | |
| "learning_rate": 4.267256930161523e-06, | |
| "loss": 0.6895, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.7865546218487394, | |
| "grad_norm": 1.3126702843544444, | |
| "learning_rate": 4.238171704094833e-06, | |
| "loss": 0.6766, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.791596638655462, | |
| "grad_norm": 1.3931998076257006, | |
| "learning_rate": 4.209112845778481e-06, | |
| "loss": 0.7165, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.7966386554621847, | |
| "grad_norm": 1.4120182498478362, | |
| "learning_rate": 4.180081360972852e-06, | |
| "loss": 0.6909, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.8016806722689076, | |
| "grad_norm": 1.3825157448385343, | |
| "learning_rate": 4.151078254490908e-06, | |
| "loss": 0.6634, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.8067226890756303, | |
| "grad_norm": 1.2976324503271779, | |
| "learning_rate": 4.122104530163397e-06, | |
| "loss": 0.6482, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.811764705882353, | |
| "grad_norm": 1.3371821093594873, | |
| "learning_rate": 4.09316119080412e-06, | |
| "loss": 0.5939, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.8168067226890756, | |
| "grad_norm": 1.2815723486743216, | |
| "learning_rate": 4.064249238175223e-06, | |
| "loss": 0.5873, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.8218487394957983, | |
| "grad_norm": 1.2598876616725718, | |
| "learning_rate": 4.035369672952516e-06, | |
| "loss": 0.6211, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.826890756302521, | |
| "grad_norm": 1.3775558524100238, | |
| "learning_rate": 4.0065234946908456e-06, | |
| "loss": 0.6362, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.8319327731092439, | |
| "grad_norm": 1.3605455122282684, | |
| "learning_rate": 3.977711701789499e-06, | |
| "loss": 0.6173, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.8369747899159665, | |
| "grad_norm": 1.2800072707024852, | |
| "learning_rate": 3.948935291457645e-06, | |
| "loss": 0.6325, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.8420168067226892, | |
| "grad_norm": 1.3258336050686086, | |
| "learning_rate": 3.920195259679822e-06, | |
| "loss": 0.653, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.8470588235294119, | |
| "grad_norm": 1.3413446326047822, | |
| "learning_rate": 3.891492601181462e-06, | |
| "loss": 0.651, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.8521008403361345, | |
| "grad_norm": 1.41115994835795, | |
| "learning_rate": 3.862828309394469e-06, | |
| "loss": 0.6292, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.8571428571428572, | |
| "grad_norm": 1.3205359045412157, | |
| "learning_rate": 3.834203376422831e-06, | |
| "loss": 0.6064, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.8621848739495799, | |
| "grad_norm": 1.271016774529, | |
| "learning_rate": 3.805618793008279e-06, | |
| "loss": 0.6503, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.8672268907563025, | |
| "grad_norm": 1.38208148943542, | |
| "learning_rate": 3.777075548496001e-06, | |
| "loss": 0.673, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.8722689075630252, | |
| "grad_norm": 1.4627608316199674, | |
| "learning_rate": 3.7485746308004013e-06, | |
| "loss": 0.6853, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.877310924369748, | |
| "grad_norm": 1.2952312321525565, | |
| "learning_rate": 3.7201170263709004e-06, | |
| "loss": 0.6164, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.8823529411764706, | |
| "grad_norm": 1.4840833764786416, | |
| "learning_rate": 3.6917037201577977e-06, | |
| "loss": 0.6935, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.8873949579831932, | |
| "grad_norm": 1.371096887673559, | |
| "learning_rate": 3.6633356955781827e-06, | |
| "loss": 0.6571, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.892436974789916, | |
| "grad_norm": 1.1787569156110669, | |
| "learning_rate": 3.635013934481895e-06, | |
| "loss": 0.5976, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.8974789915966386, | |
| "grad_norm": 1.292415912438797, | |
| "learning_rate": 3.6067394171175397e-06, | |
| "loss": 0.662, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.9025210084033612, | |
| "grad_norm": 1.4004270726912136, | |
| "learning_rate": 3.578513122098566e-06, | |
| "loss": 0.6902, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.907563025210084, | |
| "grad_norm": 1.3676893820953542, | |
| "learning_rate": 3.5503360263693887e-06, | |
| "loss": 0.6736, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.9126050420168066, | |
| "grad_norm": 1.5497019666472422, | |
| "learning_rate": 3.5222091051715803e-06, | |
| "loss": 0.6474, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.9176470588235293, | |
| "grad_norm": 1.4107058784966016, | |
| "learning_rate": 3.4941333320101173e-06, | |
| "loss": 0.6214, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.9226890756302522, | |
| "grad_norm": 1.3074693513299003, | |
| "learning_rate": 3.466109678619681e-06, | |
| "loss": 0.5863, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.9277310924369748, | |
| "grad_norm": 1.2533065740051568, | |
| "learning_rate": 3.4381391149310294e-06, | |
| "loss": 0.6145, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.9327731092436975, | |
| "grad_norm": 1.279932965905714, | |
| "learning_rate": 3.4102226090374246e-06, | |
| "loss": 0.6138, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.9378151260504202, | |
| "grad_norm": 1.279194036152673, | |
| "learning_rate": 3.3823611271611266e-06, | |
| "loss": 0.6051, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.9428571428571428, | |
| "grad_norm": 1.4523883672700335, | |
| "learning_rate": 3.35455563361995e-06, | |
| "loss": 0.6475, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.9478991596638655, | |
| "grad_norm": 1.319917640705539, | |
| "learning_rate": 3.3268070907938915e-06, | |
| "loss": 0.575, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.9529411764705882, | |
| "grad_norm": 1.356219744351625, | |
| "learning_rate": 3.2991164590918162e-06, | |
| "loss": 0.6707, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.957983193277311, | |
| "grad_norm": 1.3980927144998019, | |
| "learning_rate": 3.271484696918218e-06, | |
| "loss": 0.62, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.9630252100840337, | |
| "grad_norm": 1.3412194145756722, | |
| "learning_rate": 3.2439127606400546e-06, | |
| "loss": 0.6249, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.9680672268907564, | |
| "grad_norm": 1.231905550971943, | |
| "learning_rate": 3.2164016045536306e-06, | |
| "loss": 0.6542, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.973109243697479, | |
| "grad_norm": 1.3549695794420435, | |
| "learning_rate": 3.1889521808515888e-06, | |
| "loss": 0.6176, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.9781512605042018, | |
| "grad_norm": 1.415166811994311, | |
| "learning_rate": 3.1615654395899377e-06, | |
| "loss": 0.6593, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.9831932773109244, | |
| "grad_norm": 1.3126591809141124, | |
| "learning_rate": 3.1342423286551756e-06, | |
| "loss": 0.6891, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.988235294117647, | |
| "grad_norm": 1.3842054436860431, | |
| "learning_rate": 3.1069837937314846e-06, | |
| "loss": 0.6342, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.9932773109243698, | |
| "grad_norm": 1.4424046044230687, | |
| "learning_rate": 3.0797907782679944e-06, | |
| "loss": 0.6461, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.9983193277310924, | |
| "grad_norm": 1.3718751038472339, | |
| "learning_rate": 3.0526642234461313e-06, | |
| "loss": 0.6338, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.0050420168067227, | |
| "grad_norm": 3.363833604785768, | |
| "learning_rate": 3.0256050681470446e-06, | |
| "loss": 1.2006, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 2.0100840336134453, | |
| "grad_norm": 1.410375521884215, | |
| "learning_rate": 2.9986142489191074e-06, | |
| "loss": 0.5121, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 2.015126050420168, | |
| "grad_norm": 1.463355598251907, | |
| "learning_rate": 2.971692699945502e-06, | |
| "loss": 0.4394, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 2.0201680672268907, | |
| "grad_norm": 1.2914998337098158, | |
| "learning_rate": 2.9448413530118912e-06, | |
| "loss": 0.4978, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.0252100840336134, | |
| "grad_norm": 1.3604150815997402, | |
| "learning_rate": 2.9180611374741623e-06, | |
| "loss": 0.4689, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 2.030252100840336, | |
| "grad_norm": 1.1964953052023972, | |
| "learning_rate": 2.891352980226262e-06, | |
| "loss": 0.5015, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 2.0352941176470587, | |
| "grad_norm": 1.1694739760631343, | |
| "learning_rate": 2.8647178056681197e-06, | |
| "loss": 0.447, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 2.0403361344537814, | |
| "grad_norm": 1.3174590682003549, | |
| "learning_rate": 2.838156535673652e-06, | |
| "loss": 0.414, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 2.045378151260504, | |
| "grad_norm": 1.2140198128144435, | |
| "learning_rate": 2.8116700895588473e-06, | |
| "loss": 0.4505, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.0504201680672267, | |
| "grad_norm": 1.3398119898455612, | |
| "learning_rate": 2.785259384049959e-06, | |
| "loss": 0.4532, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 2.0554621848739494, | |
| "grad_norm": 1.4229930176202614, | |
| "learning_rate": 2.7589253332517736e-06, | |
| "loss": 0.5546, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 2.060504201680672, | |
| "grad_norm": 1.4684509907326317, | |
| "learning_rate": 2.7326688486159613e-06, | |
| "loss": 0.5254, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.065546218487395, | |
| "grad_norm": 1.4962520925453975, | |
| "learning_rate": 2.706490838909547e-06, | |
| "loss": 0.4673, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 2.070588235294118, | |
| "grad_norm": 1.3630229586386085, | |
| "learning_rate": 2.680392210183446e-06, | |
| "loss": 0.4473, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.0756302521008405, | |
| "grad_norm": 1.38978907137299, | |
| "learning_rate": 2.6543738657411033e-06, | |
| "loss": 0.5159, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.080672268907563, | |
| "grad_norm": 1.429662885547244, | |
| "learning_rate": 2.628436706107238e-06, | |
| "loss": 0.5161, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.085714285714286, | |
| "grad_norm": 1.394356185017467, | |
| "learning_rate": 2.6025816289966703e-06, | |
| "loss": 0.5032, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.0907563025210085, | |
| "grad_norm": 1.480088664868798, | |
| "learning_rate": 2.5768095292832412e-06, | |
| "loss": 0.4802, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.095798319327731, | |
| "grad_norm": 1.3859048551297604, | |
| "learning_rate": 2.5511212989688587e-06, | |
| "loss": 0.4993, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.100840336134454, | |
| "grad_norm": 1.440430022618694, | |
| "learning_rate": 2.525517827152614e-06, | |
| "loss": 0.4551, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.1058823529411765, | |
| "grad_norm": 1.4332550806993916, | |
| "learning_rate": 2.5000000000000015e-06, | |
| "loss": 0.5611, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.110924369747899, | |
| "grad_norm": 1.3161188350792523, | |
| "learning_rate": 2.4745687007122636e-06, | |
| "loss": 0.4602, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.115966386554622, | |
| "grad_norm": 1.4145836319136063, | |
| "learning_rate": 2.449224809495815e-06, | |
| "loss": 0.4464, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.1210084033613446, | |
| "grad_norm": 1.3638972016864883, | |
| "learning_rate": 2.423969203531768e-06, | |
| "loss": 0.4625, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.1260504201680672, | |
| "grad_norm": 1.4282920146552893, | |
| "learning_rate": 2.3988027569455895e-06, | |
| "loss": 0.4809, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.13109243697479, | |
| "grad_norm": 1.452704091304085, | |
| "learning_rate": 2.373726340776837e-06, | |
| "loss": 0.4959, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.1361344537815126, | |
| "grad_norm": 1.4474065940760683, | |
| "learning_rate": 2.348740822949006e-06, | |
| "loss": 0.4557, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.1411764705882352, | |
| "grad_norm": 1.406883162238408, | |
| "learning_rate": 2.323847068239504e-06, | |
| "loss": 0.5069, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.146218487394958, | |
| "grad_norm": 1.4713827636564831, | |
| "learning_rate": 2.2990459382497086e-06, | |
| "loss": 0.4813, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.1512605042016806, | |
| "grad_norm": 1.4582227343532888, | |
| "learning_rate": 2.274338291375147e-06, | |
| "loss": 0.462, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.1563025210084033, | |
| "grad_norm": 1.353197229608169, | |
| "learning_rate": 2.2497249827757933e-06, | |
| "loss": 0.4658, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.161344537815126, | |
| "grad_norm": 1.3550947330778897, | |
| "learning_rate": 2.225206864346465e-06, | |
| "loss": 0.5794, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.1663865546218486, | |
| "grad_norm": 1.4137143069445475, | |
| "learning_rate": 2.2007847846873342e-06, | |
| "loss": 0.4722, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.1714285714285713, | |
| "grad_norm": 1.2932234077066185, | |
| "learning_rate": 2.176459589074566e-06, | |
| "loss": 0.4369, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.176470588235294, | |
| "grad_norm": 1.3725308971047603, | |
| "learning_rate": 2.1522321194310577e-06, | |
| "loss": 0.4958, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.1815126050420166, | |
| "grad_norm": 1.4324324040918073, | |
| "learning_rate": 2.1281032142972933e-06, | |
| "loss": 0.4954, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.1865546218487397, | |
| "grad_norm": 1.4153168395436235, | |
| "learning_rate": 2.1040737088023323e-06, | |
| "loss": 0.4457, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.1915966386554624, | |
| "grad_norm": 1.3341155055487035, | |
| "learning_rate": 2.080144434634898e-06, | |
| "loss": 0.5017, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.196638655462185, | |
| "grad_norm": 1.352939614197411, | |
| "learning_rate": 2.056316220014588e-06, | |
| "loss": 0.4553, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.2016806722689077, | |
| "grad_norm": 1.393182470026338, | |
| "learning_rate": 2.0325898896632178e-06, | |
| "loss": 0.4448, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.2067226890756304, | |
| "grad_norm": 1.4033955608191793, | |
| "learning_rate": 2.0089662647762716e-06, | |
| "loss": 0.441, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.211764705882353, | |
| "grad_norm": 1.41226298350313, | |
| "learning_rate": 1.9854461629944764e-06, | |
| "loss": 0.4656, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.2168067226890757, | |
| "grad_norm": 1.3512621478929514, | |
| "learning_rate": 1.962030398375506e-06, | |
| "loss": 0.5245, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.2218487394957984, | |
| "grad_norm": 1.3932479184910864, | |
| "learning_rate": 1.9387197813658092e-06, | |
| "loss": 0.456, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.226890756302521, | |
| "grad_norm": 1.3400595100259751, | |
| "learning_rate": 1.915515118772555e-06, | |
| "loss": 0.4622, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.2319327731092438, | |
| "grad_norm": 1.3239101426319217, | |
| "learning_rate": 1.8924172137357038e-06, | |
| "loss": 0.4821, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.2369747899159664, | |
| "grad_norm": 1.4028557110251756, | |
| "learning_rate": 1.8694268657002197e-06, | |
| "loss": 0.4592, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.242016806722689, | |
| "grad_norm": 1.4043326661254716, | |
| "learning_rate": 1.8465448703883959e-06, | |
| "loss": 0.4642, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.2470588235294118, | |
| "grad_norm": 1.4748018123002309, | |
| "learning_rate": 1.8237720197723075e-06, | |
| "loss": 0.5244, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.2521008403361344, | |
| "grad_norm": 1.3653204295657917, | |
| "learning_rate": 1.8011091020464138e-06, | |
| "loss": 0.5117, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.257142857142857, | |
| "grad_norm": 1.4578979263769525, | |
| "learning_rate": 1.7785569016002686e-06, | |
| "loss": 0.4622, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.26218487394958, | |
| "grad_norm": 1.4739147697577966, | |
| "learning_rate": 1.75611619899137e-06, | |
| "loss": 0.4524, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.2672268907563025, | |
| "grad_norm": 1.3465934593186815, | |
| "learning_rate": 1.7337877709181527e-06, | |
| "loss": 0.4616, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.272268907563025, | |
| "grad_norm": 1.4287084373091115, | |
| "learning_rate": 1.711572390193102e-06, | |
| "loss": 0.6594, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.277310924369748, | |
| "grad_norm": 1.3274840093520053, | |
| "learning_rate": 1.689470825715998e-06, | |
| "loss": 0.4529, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.2823529411764705, | |
| "grad_norm": 1.4216422105253623, | |
| "learning_rate": 1.6674838424473172e-06, | |
| "loss": 0.4655, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.287394957983193, | |
| "grad_norm": 1.452303728671861, | |
| "learning_rate": 1.6456122013817477e-06, | |
| "loss": 0.4625, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.292436974789916, | |
| "grad_norm": 1.4369743256615972, | |
| "learning_rate": 1.6238566595218475e-06, | |
| "loss": 0.4761, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.2974789915966385, | |
| "grad_norm": 1.407023006658543, | |
| "learning_rate": 1.6022179698518525e-06, | |
| "loss": 0.4505, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.302521008403361, | |
| "grad_norm": 1.391039540718536, | |
| "learning_rate": 1.580696881311611e-06, | |
| "loss": 0.4894, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.307563025210084, | |
| "grad_norm": 1.3557281771597436, | |
| "learning_rate": 1.5592941387706562e-06, | |
| "loss": 0.4108, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.3126050420168065, | |
| "grad_norm": 1.3010131467886796, | |
| "learning_rate": 1.538010483002435e-06, | |
| "loss": 0.425, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.317647058823529, | |
| "grad_norm": 1.3625069219769537, | |
| "learning_rate": 1.5168466506586654e-06, | |
| "loss": 0.4431, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.3226890756302523, | |
| "grad_norm": 1.2997097389936179, | |
| "learning_rate": 1.4958033742438348e-06, | |
| "loss": 0.4058, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.327731092436975, | |
| "grad_norm": 1.3546221586310845, | |
| "learning_rate": 1.4748813820898554e-06, | |
| "loss": 0.5043, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.3327731092436976, | |
| "grad_norm": 1.3503940282999218, | |
| "learning_rate": 1.454081398330855e-06, | |
| "loss": 0.5015, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.3378151260504203, | |
| "grad_norm": 1.2879127697899735, | |
| "learning_rate": 1.4334041428781003e-06, | |
| "loss": 0.4219, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.342857142857143, | |
| "grad_norm": 1.5900890446730591, | |
| "learning_rate": 1.4128503313951008e-06, | |
| "loss": 0.5508, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.3478991596638656, | |
| "grad_norm": 1.4693275041182954, | |
| "learning_rate": 1.3924206752728282e-06, | |
| "loss": 0.5196, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.3529411764705883, | |
| "grad_norm": 1.3739526563603481, | |
| "learning_rate": 1.3721158816050872e-06, | |
| "loss": 0.5223, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.357983193277311, | |
| "grad_norm": 1.2888756368302696, | |
| "learning_rate": 1.3519366531640589e-06, | |
| "loss": 0.4745, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 2.3630252100840337, | |
| "grad_norm": 1.3646861171520672, | |
| "learning_rate": 1.3318836883759634e-06, | |
| "loss": 0.4765, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.3680672268907563, | |
| "grad_norm": 1.3876282049959663, | |
| "learning_rate": 1.3119576812968893e-06, | |
| "loss": 0.4552, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 2.373109243697479, | |
| "grad_norm": 1.3212811305037033, | |
| "learning_rate": 1.292159321588778e-06, | |
| "loss": 0.4444, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.3781512605042017, | |
| "grad_norm": 1.4025656868262555, | |
| "learning_rate": 1.272489294495548e-06, | |
| "loss": 0.5373, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 2.3831932773109243, | |
| "grad_norm": 1.3992039142572703, | |
| "learning_rate": 1.252948280819375e-06, | |
| "loss": 0.4297, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.388235294117647, | |
| "grad_norm": 1.438194701698973, | |
| "learning_rate": 1.2335369568971362e-06, | |
| "loss": 0.4577, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 2.3932773109243697, | |
| "grad_norm": 1.3560235059252677, | |
| "learning_rate": 1.2142559945769995e-06, | |
| "loss": 0.4576, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.3983193277310924, | |
| "grad_norm": 1.357949004614199, | |
| "learning_rate": 1.1951060611951615e-06, | |
| "loss": 0.5944, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.403361344537815, | |
| "grad_norm": 1.2895013043643404, | |
| "learning_rate": 1.1760878195527642e-06, | |
| "loss": 0.4192, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.4084033613445377, | |
| "grad_norm": 1.2608640104913673, | |
| "learning_rate": 1.1572019278929457e-06, | |
| "loss": 0.4431, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 2.4134453781512604, | |
| "grad_norm": 1.4235058216914491, | |
| "learning_rate": 1.1384490398780563e-06, | |
| "loss": 0.4835, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.418487394957983, | |
| "grad_norm": 1.3849158950764375, | |
| "learning_rate": 1.1198298045670402e-06, | |
| "loss": 0.4497, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 2.4235294117647057, | |
| "grad_norm": 1.4243621054419897, | |
| "learning_rate": 1.1013448663929704e-06, | |
| "loss": 0.5031, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.4285714285714284, | |
| "grad_norm": 1.2997464135987702, | |
| "learning_rate": 1.0829948651407374e-06, | |
| "loss": 0.483, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 2.4336134453781515, | |
| "grad_norm": 1.2887117802326669, | |
| "learning_rate": 1.0647804359249143e-06, | |
| "loss": 0.4424, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.438655462184874, | |
| "grad_norm": 1.2955280324064098, | |
| "learning_rate": 1.0467022091677692e-06, | |
| "loss": 0.4963, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.443697478991597, | |
| "grad_norm": 1.5695989821047664, | |
| "learning_rate": 1.0287608105774456e-06, | |
| "loss": 0.512, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.4487394957983195, | |
| "grad_norm": 1.3900121464168351, | |
| "learning_rate": 1.0109568611263094e-06, | |
| "loss": 0.4418, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.453781512605042, | |
| "grad_norm": 1.443290081700745, | |
| "learning_rate": 9.932909770294542e-07, | |
| "loss": 0.4439, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.458823529411765, | |
| "grad_norm": 1.3476484251272791, | |
| "learning_rate": 9.757637697233723e-07, | |
| "loss": 0.4885, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 2.4638655462184875, | |
| "grad_norm": 1.3389474168899225, | |
| "learning_rate": 9.58375845844793e-07, | |
| "loss": 0.4486, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.46890756302521, | |
| "grad_norm": 1.2353966317116258, | |
| "learning_rate": 9.41127807209688e-07, | |
| "loss": 0.4321, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 2.473949579831933, | |
| "grad_norm": 1.2849383161233021, | |
| "learning_rate": 9.240202507924412e-07, | |
| "loss": 0.433, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.4789915966386555, | |
| "grad_norm": 1.3336087651970685, | |
| "learning_rate": 9.070537687051817e-07, | |
| "loss": 0.4516, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 2.484033613445378, | |
| "grad_norm": 1.3550057200939567, | |
| "learning_rate": 8.902289481772996e-07, | |
| "loss": 0.4616, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.489075630252101, | |
| "grad_norm": 1.3590095983206505, | |
| "learning_rate": 8.735463715351139e-07, | |
| "loss": 0.4203, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 2.4941176470588236, | |
| "grad_norm": 1.2915320514796769, | |
| "learning_rate": 8.570066161817176e-07, | |
| "loss": 0.4503, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.499159663865546, | |
| "grad_norm": 1.2679676777389248, | |
| "learning_rate": 8.406102545769989e-07, | |
| "loss": 0.4566, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.504201680672269, | |
| "grad_norm": 1.426642729326135, | |
| "learning_rate": 8.243578542178227e-07, | |
| "loss": 0.4707, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.5092436974789916, | |
| "grad_norm": 1.4592108582229681, | |
| "learning_rate": 8.082499776183883e-07, | |
| "loss": 0.4845, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 2.5142857142857142, | |
| "grad_norm": 1.5266839034291377, | |
| "learning_rate": 7.922871822907641e-07, | |
| "loss": 0.5228, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.519327731092437, | |
| "grad_norm": 1.471645595600825, | |
| "learning_rate": 7.764700207255904e-07, | |
| "loss": 0.4173, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 2.5243697478991596, | |
| "grad_norm": 1.3871858021840573, | |
| "learning_rate": 7.607990403729526e-07, | |
| "loss": 0.4601, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.5294117647058822, | |
| "grad_norm": 1.3138350820905274, | |
| "learning_rate": 7.452747836234392e-07, | |
| "loss": 0.4504, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 2.534453781512605, | |
| "grad_norm": 1.2975304324598231, | |
| "learning_rate": 7.298977877893688e-07, | |
| "loss": 0.4265, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.5394957983193276, | |
| "grad_norm": 1.3447001192643702, | |
| "learning_rate": 7.146685850861851e-07, | |
| "loss": 0.466, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 2.5445378151260503, | |
| "grad_norm": 1.3862420743153665, | |
| "learning_rate": 6.995877026140468e-07, | |
| "loss": 0.4884, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.549579831932773, | |
| "grad_norm": 1.4032983423284162, | |
| "learning_rate": 6.846556623395795e-07, | |
| "loss": 0.4948, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.5546218487394956, | |
| "grad_norm": 1.362120295068725, | |
| "learning_rate": 6.698729810778065e-07, | |
| "loss": 0.4702, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.5596638655462183, | |
| "grad_norm": 1.389808913275814, | |
| "learning_rate": 6.552401704742678e-07, | |
| "loss": 0.4825, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 2.564705882352941, | |
| "grad_norm": 1.2860994495581453, | |
| "learning_rate": 6.40757736987307e-07, | |
| "loss": 0.4321, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.5697478991596636, | |
| "grad_norm": 1.212606448511892, | |
| "learning_rate": 6.26426181870542e-07, | |
| "loss": 0.3868, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 2.5747899159663863, | |
| "grad_norm": 1.2670489383748516, | |
| "learning_rate": 6.122460011555187e-07, | |
| "loss": 0.4532, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.5798319327731094, | |
| "grad_norm": 1.3801554590726837, | |
| "learning_rate": 5.982176856345445e-07, | |
| "loss": 0.4263, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 2.584873949579832, | |
| "grad_norm": 1.3394504151016333, | |
| "learning_rate": 5.843417208436908e-07, | |
| "loss": 0.496, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.5899159663865547, | |
| "grad_norm": 1.2955707760211432, | |
| "learning_rate": 5.706185870460018e-07, | |
| "loss": 0.4253, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 2.5949579831932774, | |
| "grad_norm": 1.289481906227215, | |
| "learning_rate": 5.570487592148666e-07, | |
| "loss": 0.4035, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 1.3376266312340062, | |
| "learning_rate": 5.436327070175729e-07, | |
| "loss": 0.4545, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.6050420168067228, | |
| "grad_norm": 1.4001675009701846, | |
| "learning_rate": 5.303708947990638e-07, | |
| "loss": 0.4684, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.6100840336134454, | |
| "grad_norm": 1.4896915805848956, | |
| "learning_rate": 5.172637815658583e-07, | |
| "loss": 0.4704, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 2.615126050420168, | |
| "grad_norm": 1.430686916061002, | |
| "learning_rate": 5.04311820970163e-07, | |
| "loss": 0.4782, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.6201680672268908, | |
| "grad_norm": 1.3676105828350056, | |
| "learning_rate": 4.915154612941781e-07, | |
| "loss": 0.5979, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 2.6252100840336134, | |
| "grad_norm": 1.3552413071380474, | |
| "learning_rate": 4.788751454345763e-07, | |
| "loss": 0.4405, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.630252100840336, | |
| "grad_norm": 1.320913107468769, | |
| "learning_rate": 4.663913108871726e-07, | |
| "loss": 0.4105, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 2.635294117647059, | |
| "grad_norm": 1.2848967010536776, | |
| "learning_rate": 4.540643897317887e-07, | |
| "loss": 0.3934, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.6403361344537815, | |
| "grad_norm": 1.3500509189164658, | |
| "learning_rate": 4.4189480861729137e-07, | |
| "loss": 0.4339, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 2.645378151260504, | |
| "grad_norm": 1.3387080610453355, | |
| "learning_rate": 4.2988298874682754e-07, | |
| "loss": 0.4552, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.650420168067227, | |
| "grad_norm": 1.3397812410356982, | |
| "learning_rate": 4.1802934586324897e-07, | |
| "loss": 0.5401, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.6554621848739495, | |
| "grad_norm": 1.446011629760243, | |
| "learning_rate": 4.0633429023472004e-07, | |
| "loss": 0.5409, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.660504201680672, | |
| "grad_norm": 1.3710949034220614, | |
| "learning_rate": 3.947982266405159e-07, | |
| "loss": 0.501, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 2.665546218487395, | |
| "grad_norm": 1.5073033115483478, | |
| "learning_rate": 3.834215543570191e-07, | |
| "loss": 0.5156, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.6705882352941175, | |
| "grad_norm": 1.3549599833015573, | |
| "learning_rate": 3.72204667143895e-07, | |
| "loss": 0.4667, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.6756302521008406, | |
| "grad_norm": 1.368632751852017, | |
| "learning_rate": 3.611479532304618e-07, | |
| "loss": 0.4596, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.6806722689075633, | |
| "grad_norm": 1.3310734620781681, | |
| "learning_rate": 3.5025179530225995e-07, | |
| "loss": 0.4248, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 2.685714285714286, | |
| "grad_norm": 1.429961991715737, | |
| "learning_rate": 3.395165704878023e-07, | |
| "loss": 0.4921, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.6907563025210086, | |
| "grad_norm": 1.3220689464603654, | |
| "learning_rate": 3.289426503455201e-07, | |
| "loss": 0.4686, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.6957983193277313, | |
| "grad_norm": 1.3596446823078556, | |
| "learning_rate": 3.185304008509077e-07, | |
| "loss": 0.4692, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.700840336134454, | |
| "grad_norm": 1.2664017870580138, | |
| "learning_rate": 3.082801823838527e-07, | |
| "loss": 0.4792, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.7058823529411766, | |
| "grad_norm": 1.277008676617942, | |
| "learning_rate": 2.9819234971616154e-07, | |
| "loss": 0.4496, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.7109243697478993, | |
| "grad_norm": 1.3031675483473417, | |
| "learning_rate": 2.882672519992824e-07, | |
| "loss": 0.4599, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 2.715966386554622, | |
| "grad_norm": 1.475285425023621, | |
| "learning_rate": 2.785052327522214e-07, | |
| "loss": 0.5562, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.7210084033613446, | |
| "grad_norm": 1.2387397112349467, | |
| "learning_rate": 2.6890662984965234e-07, | |
| "loss": 0.4508, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 2.7260504201680673, | |
| "grad_norm": 1.2769755883493084, | |
| "learning_rate": 2.594717755102205e-07, | |
| "loss": 0.4497, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.73109243697479, | |
| "grad_norm": 1.4117553058680856, | |
| "learning_rate": 2.5020099628504603e-07, | |
| "loss": 0.4176, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 2.7361344537815127, | |
| "grad_norm": 1.3430474164461437, | |
| "learning_rate": 2.4109461304642254e-07, | |
| "loss": 0.61, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.7411764705882353, | |
| "grad_norm": 1.319429861827343, | |
| "learning_rate": 2.3215294097670927e-07, | |
| "loss": 0.4451, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 2.746218487394958, | |
| "grad_norm": 1.436920605125832, | |
| "learning_rate": 2.2337628955742263e-07, | |
| "loss": 0.4874, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.7512605042016807, | |
| "grad_norm": 1.3812471581213166, | |
| "learning_rate": 2.1476496255852685e-07, | |
| "loss": 0.382, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.7563025210084033, | |
| "grad_norm": 1.205494792014491, | |
| "learning_rate": 2.0631925802791608e-07, | |
| "loss": 0.5224, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.761344537815126, | |
| "grad_norm": 1.3083334014447827, | |
| "learning_rate": 1.9803946828110376e-07, | |
| "loss": 0.5117, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 2.7663865546218487, | |
| "grad_norm": 1.3758887119834913, | |
| "learning_rate": 1.8992587989110133e-07, | |
| "loss": 0.4898, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.7714285714285714, | |
| "grad_norm": 1.3436017213466456, | |
| "learning_rate": 1.8197877367849948e-07, | |
| "loss": 0.5596, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 2.776470588235294, | |
| "grad_norm": 1.4507659924194913, | |
| "learning_rate": 1.7419842470175196e-07, | |
| "loss": 0.4889, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.7815126050420167, | |
| "grad_norm": 1.5070411133243147, | |
| "learning_rate": 1.6658510224765333e-07, | |
| "loss": 0.47, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 2.7865546218487394, | |
| "grad_norm": 1.3934953281445221, | |
| "learning_rate": 1.5913906982201744e-07, | |
| "loss": 0.4626, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.791596638655462, | |
| "grad_norm": 1.4300047982632422, | |
| "learning_rate": 1.5186058514055912e-07, | |
| "loss": 0.4808, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 2.7966386554621847, | |
| "grad_norm": 1.3007207174809041, | |
| "learning_rate": 1.447499001199748e-07, | |
| "loss": 0.5228, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.8016806722689074, | |
| "grad_norm": 1.335166451449638, | |
| "learning_rate": 1.3780726086922103e-07, | |
| "loss": 0.5314, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.80672268907563, | |
| "grad_norm": 1.2727049723883297, | |
| "learning_rate": 1.3103290768099796e-07, | |
| "loss": 0.4538, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.8117647058823527, | |
| "grad_norm": 1.4233653924829766, | |
| "learning_rate": 1.244270750234333e-07, | |
| "loss": 0.4768, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 2.8168067226890754, | |
| "grad_norm": 1.4089563114452142, | |
| "learning_rate": 1.1798999153196433e-07, | |
| "loss": 0.4543, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.821848739495798, | |
| "grad_norm": 1.3596745441590257, | |
| "learning_rate": 1.1172188000142803e-07, | |
| "loss": 0.5016, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 2.8268907563025207, | |
| "grad_norm": 1.3375081145484837, | |
| "learning_rate": 1.0562295737834738e-07, | |
| "loss": 0.47, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.831932773109244, | |
| "grad_norm": 1.3797076618818533, | |
| "learning_rate": 9.969343475342285e-08, | |
| "loss": 0.4762, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 2.8369747899159665, | |
| "grad_norm": 1.4014527371585839, | |
| "learning_rate": 9.393351735422773e-08, | |
| "loss": 0.4606, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.842016806722689, | |
| "grad_norm": 1.317969883356561, | |
| "learning_rate": 8.834340453810375e-08, | |
| "loss": 0.4353, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 2.847058823529412, | |
| "grad_norm": 1.3062183016322855, | |
| "learning_rate": 8.29232897852611e-08, | |
| "loss": 0.3857, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.8521008403361345, | |
| "grad_norm": 1.3280320137002732, | |
| "learning_rate": 7.76733606920832e-08, | |
| "loss": 0.4572, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 1.4128418670110612, | |
| "learning_rate": 7.259379896463248e-08, | |
| "loss": 0.4476, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.86218487394958, | |
| "grad_norm": 1.3977595292294513, | |
| "learning_rate": 6.768478041236037e-08, | |
| "loss": 0.4436, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 2.8672268907563025, | |
| "grad_norm": 1.3855652086248782, | |
| "learning_rate": 6.294647494202444e-08, | |
| "loss": 0.4346, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.872268907563025, | |
| "grad_norm": 1.3251986287781006, | |
| "learning_rate": 5.8379046551807486e-08, | |
| "loss": 0.493, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 2.877310924369748, | |
| "grad_norm": 1.32087943884219, | |
| "learning_rate": 5.398265332563935e-08, | |
| "loss": 0.4551, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.8823529411764706, | |
| "grad_norm": 1.2437729277991256, | |
| "learning_rate": 4.975744742772848e-08, | |
| "loss": 0.4098, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 2.8873949579831932, | |
| "grad_norm": 1.340919476266603, | |
| "learning_rate": 4.5703575097292286e-08, | |
| "loss": 0.4726, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 2.892436974789916, | |
| "grad_norm": 1.2461844948007363, | |
| "learning_rate": 4.182117664349783e-08, | |
| "loss": 0.449, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 2.8974789915966386, | |
| "grad_norm": 1.3240662502351237, | |
| "learning_rate": 3.8110386440605164e-08, | |
| "loss": 0.4603, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 2.9025210084033612, | |
| "grad_norm": 1.3494315545656852, | |
| "learning_rate": 3.457133292331494e-08, | |
| "loss": 0.5058, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.907563025210084, | |
| "grad_norm": 1.3389143724686245, | |
| "learning_rate": 3.120413858232474e-08, | |
| "loss": 0.4578, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.9126050420168066, | |
| "grad_norm": 1.344475790060752, | |
| "learning_rate": 2.8008919960090253e-08, | |
| "loss": 0.5347, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 2.9176470588235293, | |
| "grad_norm": 1.388286539991785, | |
| "learning_rate": 2.4985787646788497e-08, | |
| "loss": 0.4792, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 2.9226890756302524, | |
| "grad_norm": 1.4667343155241181, | |
| "learning_rate": 2.2134846276494205e-08, | |
| "loss": 0.4854, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 2.927731092436975, | |
| "grad_norm": 1.393293250138424, | |
| "learning_rate": 1.9456194523554404e-08, | |
| "loss": 0.4796, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.9327731092436977, | |
| "grad_norm": 1.3210976282362301, | |
| "learning_rate": 1.69499250991767e-08, | |
| "loss": 0.4465, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 2.9378151260504204, | |
| "grad_norm": 1.3544687735071852, | |
| "learning_rate": 1.4616124748217387e-08, | |
| "loss": 0.5223, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.942857142857143, | |
| "grad_norm": 1.467595755846224, | |
| "learning_rate": 1.2454874246181081e-08, | |
| "loss": 0.6671, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 2.9478991596638657, | |
| "grad_norm": 1.3671723526105932, | |
| "learning_rate": 1.0466248396424072e-08, | |
| "loss": 0.4499, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 2.9529411764705884, | |
| "grad_norm": 1.4167636187504142, | |
| "learning_rate": 8.650316027566386e-09, | |
| "loss": 0.4873, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 2.957983193277311, | |
| "grad_norm": 1.220474765102595, | |
| "learning_rate": 7.007139991108136e-09, | |
| "loss": 0.4043, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 2.9630252100840337, | |
| "grad_norm": 1.3733660106334655, | |
| "learning_rate": 5.536777159254603e-09, | |
| "loss": 0.4793, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 2.9680672268907564, | |
| "grad_norm": 1.3544611708705747, | |
| "learning_rate": 4.239278422948911e-09, | |
| "loss": 0.4953, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 2.973109243697479, | |
| "grad_norm": 1.4589364978859505, | |
| "learning_rate": 3.1146886901090024e-09, | |
| "loss": 0.4547, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 2.9781512605042018, | |
| "grad_norm": 1.3938123480231057, | |
| "learning_rate": 2.1630468840738716e-09, | |
| "loss": 0.4115, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.9831932773109244, | |
| "grad_norm": 1.3511479563562372, | |
| "learning_rate": 1.3843859422574269e-09, | |
| "loss": 0.4926, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 2.988235294117647, | |
| "grad_norm": 1.445464043641677, | |
| "learning_rate": 7.787328150071771e-10, | |
| "loss": 0.5346, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 2.9932773109243698, | |
| "grad_norm": 1.5785257738352532, | |
| "learning_rate": 3.4610846467109106e-10, | |
| "loss": 0.5032, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 2.9983193277310924, | |
| "grad_norm": 1.305339383484568, | |
| "learning_rate": 8.652786487484133e-11, | |
| "loss": 0.4666, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.9983193277310924, | |
| "step": 594, | |
| "total_flos": 4.726427205490442e+17, | |
| "train_loss": 0.7082312573688199, | |
| "train_runtime": 63951.2458, | |
| "train_samples_per_second": 0.447, | |
| "train_steps_per_second": 0.009 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 594, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.726427205490442e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |