| { | |
| "best_metric": 0.8761239051818848, | |
| "best_model_checkpoint": "./tst-translation-output/checkpoint-3000", | |
| "epoch": 5.465310459996964, | |
| "eval_steps": 1500, | |
| "global_step": 9000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 5e-05, | |
| "loss": 1.562, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.956224829276834e-05, | |
| "loss": 1.118, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.912449658553668e-05, | |
| "loss": 1.014, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_bleu": 5.9697, | |
| "eval_gen_len": 18.2307, | |
| "eval_loss": 0.9561266303062439, | |
| "eval_runtime": 1131.8043, | |
| "eval_samples_per_second": 11.638, | |
| "eval_steps_per_second": 1.455, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 4.868674487830503e-05, | |
| "loss": 0.8557, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 4.824899317107337e-05, | |
| "loss": 0.7997, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 4.781124146384171e-05, | |
| "loss": 0.7888, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "eval_bleu": 10.9388, | |
| "eval_gen_len": 17.4752, | |
| "eval_loss": 0.8761239051818848, | |
| "eval_runtime": 1004.0048, | |
| "eval_samples_per_second": 13.119, | |
| "eval_steps_per_second": 1.64, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 4.737348975661005e-05, | |
| "loss": 0.705, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 4.69357380493784e-05, | |
| "loss": 0.61, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.649798634214674e-05, | |
| "loss": 0.6166, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "eval_bleu": 11.0165, | |
| "eval_gen_len": 17.1855, | |
| "eval_loss": 0.8778244256973267, | |
| "eval_runtime": 987.8488, | |
| "eval_samples_per_second": 13.334, | |
| "eval_steps_per_second": 1.667, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 4.606023463491508e-05, | |
| "loss": 0.5985, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 4.5622482927683416e-05, | |
| "loss": 0.4585, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 4.518473122045176e-05, | |
| "loss": 0.474, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "eval_bleu": 11.8779, | |
| "eval_gen_len": 17.1838, | |
| "eval_loss": 0.9008126258850098, | |
| "eval_runtime": 981.3519, | |
| "eval_samples_per_second": 13.422, | |
| "eval_steps_per_second": 1.678, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 4.474697951322011e-05, | |
| "loss": 0.4858, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 4.430922780598845e-05, | |
| "loss": 0.3623, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 4.387147609875679e-05, | |
| "loss": 0.3559, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "eval_bleu": 12.6024, | |
| "eval_gen_len": 17.3179, | |
| "eval_loss": 0.9503829479217529, | |
| "eval_runtime": 990.8364, | |
| "eval_samples_per_second": 13.294, | |
| "eval_steps_per_second": 1.662, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 4.3433724391525126e-05, | |
| "loss": 0.371, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 4.299597268429347e-05, | |
| "loss": 0.3053, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 4.255822097706181e-05, | |
| "loss": 0.2588, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "eval_bleu": 13.4321, | |
| "eval_gen_len": 17.1999, | |
| "eval_loss": 0.9977866411209106, | |
| "eval_runtime": 986.567, | |
| "eval_samples_per_second": 13.351, | |
| "eval_steps_per_second": 1.669, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "step": 9000, | |
| "total_flos": 1.248266746331136e+18, | |
| "train_loss": 0.6522123430040148, | |
| "train_runtime": 40792.4155, | |
| "train_samples_per_second": 90.421, | |
| "train_steps_per_second": 1.412 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 57610, | |
| "num_train_epochs": 35, | |
| "save_steps": 1500, | |
| "total_flos": 1.248266746331136e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |