| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.921671018276763, | |
| "global_step": 57000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9825935596170586e-05, | |
| "loss": 2.4238, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.9651871192341167e-05, | |
| "loss": 2.1067, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.947780678851175e-05, | |
| "loss": 1.9034, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.9303742384682335e-05, | |
| "loss": 1.8061, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.9129677980852916e-05, | |
| "loss": 1.7317, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 1.89556135770235e-05, | |
| "loss": 1.6795, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.8781549173194084e-05, | |
| "loss": 1.6346, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.8607484769364665e-05, | |
| "loss": 1.5838, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.843342036553525e-05, | |
| "loss": 1.5477, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.8259355961705833e-05, | |
| "loss": 1.5084, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.8085291557876417e-05, | |
| "loss": 1.4765, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.2889001369476318, | |
| "eval_runtime": 523.3258, | |
| "eval_samples_per_second": 126.487, | |
| "eval_steps_per_second": 1.978, | |
| "step": 5745 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 1.7911227154046998e-05, | |
| "loss": 1.4277, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 1.7737162750217582e-05, | |
| "loss": 1.3842, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 1.7563098346388167e-05, | |
| "loss": 1.3659, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.7389033942558747e-05, | |
| "loss": 1.346, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.721496953872933e-05, | |
| "loss": 1.3252, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.7040905134899916e-05, | |
| "loss": 1.3148, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 1.6866840731070497e-05, | |
| "loss": 1.3071, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 1.669277632724108e-05, | |
| "loss": 1.287, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 1.6518711923411665e-05, | |
| "loss": 1.2802, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.6344647519582246e-05, | |
| "loss": 1.2637, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.617058311575283e-05, | |
| "loss": 1.2451, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 1.1364690065383911, | |
| "eval_runtime": 527.405, | |
| "eval_samples_per_second": 125.509, | |
| "eval_steps_per_second": 1.962, | |
| "step": 11490 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.599651871192341e-05, | |
| "loss": 1.2384, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.5822454308093995e-05, | |
| "loss": 1.1927, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.564838990426458e-05, | |
| "loss": 1.1753, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.5474325500435163e-05, | |
| "loss": 1.1781, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.5300261096605747e-05, | |
| "loss": 1.1705, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.512619669277633e-05, | |
| "loss": 1.1653, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 1.4952132288946912e-05, | |
| "loss": 1.1541, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 1.4778067885117495e-05, | |
| "loss": 1.1485, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 1.4604003481288079e-05, | |
| "loss": 1.1479, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 1.4429939077458661e-05, | |
| "loss": 1.1399, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 1.4255874673629244e-05, | |
| "loss": 1.1355, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 1.4081810269799826e-05, | |
| "loss": 1.1291, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 1.0703651905059814, | |
| "eval_runtime": 523.7516, | |
| "eval_samples_per_second": 126.384, | |
| "eval_steps_per_second": 1.976, | |
| "step": 17235 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 1.390774586597041e-05, | |
| "loss": 1.1075, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 1.3733681462140993e-05, | |
| "loss": 1.0931, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 1.3559617058311576e-05, | |
| "loss": 1.0864, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 1.338555265448216e-05, | |
| "loss": 1.0822, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 1.3211488250652742e-05, | |
| "loss": 1.0847, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 1.3037423846823325e-05, | |
| "loss": 1.0795, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 1.2863359442993907e-05, | |
| "loss": 1.0704, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 1.2689295039164491e-05, | |
| "loss": 1.072, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 1.2515230635335076e-05, | |
| "loss": 1.0704, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 1.234116623150566e-05, | |
| "loss": 1.0623, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 1.2167101827676242e-05, | |
| "loss": 1.0657, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.0373072624206543, | |
| "eval_runtime": 524.067, | |
| "eval_samples_per_second": 126.308, | |
| "eval_steps_per_second": 1.975, | |
| "step": 22980 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 1.1993037423846825e-05, | |
| "loss": 1.0612, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 1.1818973020017407e-05, | |
| "loss": 1.0267, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 1.1644908616187991e-05, | |
| "loss": 1.035, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 1.1470844212358574e-05, | |
| "loss": 1.0298, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 1.1296779808529156e-05, | |
| "loss": 1.0265, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 1.112271540469974e-05, | |
| "loss": 1.0337, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 1.0948651000870323e-05, | |
| "loss": 1.0265, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 1.0774586597040905e-05, | |
| "loss": 1.0264, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 1.0600522193211488e-05, | |
| "loss": 1.0181, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 1.0426457789382072e-05, | |
| "loss": 1.021, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 1.0252393385552655e-05, | |
| "loss": 1.0249, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 1.0078328981723237e-05, | |
| "loss": 1.0205, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 1.0189781188964844, | |
| "eval_runtime": 521.0752, | |
| "eval_samples_per_second": 127.033, | |
| "eval_steps_per_second": 1.986, | |
| "step": 28725 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 9.904264577893821e-06, | |
| "loss": 1.0044, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 9.730200174064405e-06, | |
| "loss": 0.9887, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 9.556135770234988e-06, | |
| "loss": 0.993, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 9.38207136640557e-06, | |
| "loss": 0.9946, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 9.208006962576153e-06, | |
| "loss": 0.9898, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 9.033942558746737e-06, | |
| "loss": 0.9902, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 8.85987815491732e-06, | |
| "loss": 0.9899, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 8.685813751087904e-06, | |
| "loss": 0.9884, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 8.511749347258486e-06, | |
| "loss": 0.9906, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 8.33768494342907e-06, | |
| "loss": 0.9856, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 8.163620539599653e-06, | |
| "loss": 0.9923, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 1.0085190534591675, | |
| "eval_runtime": 521.5738, | |
| "eval_samples_per_second": 126.912, | |
| "eval_steps_per_second": 1.984, | |
| "step": 34470 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 7.989556135770235e-06, | |
| "loss": 0.9843, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 7.815491731940818e-06, | |
| "loss": 0.9674, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 7.641427328111402e-06, | |
| "loss": 0.9647, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 7.4673629242819845e-06, | |
| "loss": 0.9664, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 7.293298520452569e-06, | |
| "loss": 0.9689, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 7.119234116623151e-06, | |
| "loss": 0.9668, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 6.9451697127937345e-06, | |
| "loss": 0.9617, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 6.771105308964318e-06, | |
| "loss": 0.962, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 6.5970409051349e-06, | |
| "loss": 0.9633, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 6.422976501305484e-06, | |
| "loss": 0.9611, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 6.248912097476066e-06, | |
| "loss": 0.9617, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 6.0748476936466495e-06, | |
| "loss": 0.959, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 1.0030972957611084, | |
| "eval_runtime": 523.5292, | |
| "eval_samples_per_second": 126.438, | |
| "eval_steps_per_second": 1.977, | |
| "step": 40215 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "learning_rate": 5.900783289817232e-06, | |
| "loss": 0.9511, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 5.726718885987816e-06, | |
| "loss": 0.9432, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "learning_rate": 5.5526544821583995e-06, | |
| "loss": 0.942, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 5.378590078328983e-06, | |
| "loss": 0.9415, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 5.204525674499565e-06, | |
| "loss": 0.9427, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 5.030461270670149e-06, | |
| "loss": 0.9477, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 4.856396866840731e-06, | |
| "loss": 0.9439, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 4.682332463011314e-06, | |
| "loss": 0.945, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 7.75, | |
| "learning_rate": 4.508268059181898e-06, | |
| "loss": 0.9456, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 4.334203655352481e-06, | |
| "loss": 0.9436, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 4.1601392515230636e-06, | |
| "loss": 0.9447, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 1.0001778602600098, | |
| "eval_runtime": 520.9721, | |
| "eval_samples_per_second": 127.059, | |
| "eval_steps_per_second": 1.987, | |
| "step": 45960 | |
| }, | |
| { | |
| "epoch": 8.01, | |
| "learning_rate": 3.986074847693647e-06, | |
| "loss": 0.9459, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 8.09, | |
| "learning_rate": 3.8120104438642302e-06, | |
| "loss": 0.9265, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "learning_rate": 3.637946040034813e-06, | |
| "loss": 0.9271, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 3.463881636205396e-06, | |
| "loss": 0.9286, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "learning_rate": 3.2898172323759794e-06, | |
| "loss": 0.9302, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "learning_rate": 3.1157528285465627e-06, | |
| "loss": 0.933, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "learning_rate": 2.9416884247171456e-06, | |
| "loss": 0.9272, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 8.62, | |
| "learning_rate": 2.7676240208877285e-06, | |
| "loss": 0.9266, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 2.5935596170583114e-06, | |
| "loss": 0.9294, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 8.79, | |
| "learning_rate": 2.4194952132288948e-06, | |
| "loss": 0.9312, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "learning_rate": 2.245430809399478e-06, | |
| "loss": 0.9306, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "learning_rate": 2.071366405570061e-06, | |
| "loss": 0.9278, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 0.9995460510253906, | |
| "eval_runtime": 522.976, | |
| "eval_samples_per_second": 126.572, | |
| "eval_steps_per_second": 1.979, | |
| "step": 51705 | |
| }, | |
| { | |
| "epoch": 9.05, | |
| "learning_rate": 1.8973020017406443e-06, | |
| "loss": 0.92, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 9.14, | |
| "learning_rate": 1.7232375979112272e-06, | |
| "loss": 0.9178, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "learning_rate": 1.5491731940818103e-06, | |
| "loss": 0.9189, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "learning_rate": 1.3751087902523935e-06, | |
| "loss": 0.9164, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "learning_rate": 1.2010443864229766e-06, | |
| "loss": 0.9186, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "learning_rate": 1.0269799825935597e-06, | |
| "loss": 0.9199, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "learning_rate": 8.529155787641428e-07, | |
| "loss": 0.918, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 9.66, | |
| "learning_rate": 6.788511749347259e-07, | |
| "loss": 0.9229, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "learning_rate": 5.04786771105309e-07, | |
| "loss": 0.9203, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 9.83, | |
| "learning_rate": 3.3072236727589213e-07, | |
| "loss": 0.9202, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 9.92, | |
| "learning_rate": 1.566579634464752e-07, | |
| "loss": 0.9168, | |
| "step": 57000 | |
| } | |
| ], | |
| "max_steps": 57450, | |
| "num_train_epochs": 10, | |
| "total_flos": 8.267810851042099e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |