| { | |
| "best_metric": 1.0901482105255127, | |
| "best_model_checkpoint": "/kaggle/output/checkpoint-16000", | |
| "epoch": 0.6926336375488917, | |
| "eval_steps": 1000, | |
| "global_step": 17000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2.7777777777777777e-11, | |
| "loss": 1.1078, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.7750000000000004e-08, | |
| "loss": 1.128, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_accuracy": 0.330938123752495, | |
| "eval_loss": 1.1083118915557861, | |
| "eval_runtime": 12.3979, | |
| "eval_samples_per_second": 404.1, | |
| "eval_steps_per_second": 50.573, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 5.5527777777777784e-08, | |
| "loss": 1.1164, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_accuracy": 0.3323353293413174, | |
| "eval_loss": 1.102672815322876, | |
| "eval_runtime": 12.2861, | |
| "eval_samples_per_second": 407.778, | |
| "eval_steps_per_second": 51.033, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 8.327777777777778e-08, | |
| "loss": 1.1158, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_accuracy": 0.34211576846307384, | |
| "eval_loss": 1.1024774312973022, | |
| "eval_runtime": 12.3746, | |
| "eval_samples_per_second": 404.863, | |
| "eval_steps_per_second": 50.668, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.1105555555555557e-07, | |
| "loss": 1.1115, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_accuracy": 0.34890219560878244, | |
| "eval_loss": 1.1012382507324219, | |
| "eval_runtime": 12.5405, | |
| "eval_samples_per_second": 399.505, | |
| "eval_steps_per_second": 49.998, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.3880555555555558e-07, | |
| "loss": 1.1141, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_accuracy": 0.3534930139720559, | |
| "eval_loss": 1.1002192497253418, | |
| "eval_runtime": 12.3001, | |
| "eval_samples_per_second": 407.314, | |
| "eval_steps_per_second": 50.975, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.6658333333333335e-07, | |
| "loss": 1.112, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 0.3524950099800399, | |
| "eval_loss": 1.100056767463684, | |
| "eval_runtime": 12.4062, | |
| "eval_samples_per_second": 403.831, | |
| "eval_steps_per_second": 50.539, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.9433333333333334e-07, | |
| "loss": 1.1115, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_accuracy": 0.34331337325349304, | |
| "eval_loss": 1.098986029624939, | |
| "eval_runtime": 12.4456, | |
| "eval_samples_per_second": 402.551, | |
| "eval_steps_per_second": 50.379, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 2.2211111111111114e-07, | |
| "loss": 1.109, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.3347305389221557, | |
| "eval_loss": 1.098678469657898, | |
| "eval_runtime": 12.5978, | |
| "eval_samples_per_second": 397.688, | |
| "eval_steps_per_second": 49.771, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 2.4986111111111113e-07, | |
| "loss": 1.1117, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_accuracy": 0.34311377245508984, | |
| "eval_loss": 1.0984419584274292, | |
| "eval_runtime": 12.313, | |
| "eval_samples_per_second": 406.886, | |
| "eval_steps_per_second": 50.922, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.776388888888889e-07, | |
| "loss": 1.1081, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_accuracy": 0.35429141716566864, | |
| "eval_loss": 1.0968743562698364, | |
| "eval_runtime": 12.4088, | |
| "eval_samples_per_second": 403.744, | |
| "eval_steps_per_second": 50.528, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 3.053888888888889e-07, | |
| "loss": 1.107, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_accuracy": 0.3722554890219561, | |
| "eval_loss": 1.0950665473937988, | |
| "eval_runtime": 12.5775, | |
| "eval_samples_per_second": 398.33, | |
| "eval_steps_per_second": 49.851, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 3.331666666666667e-07, | |
| "loss": 1.1057, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.37544910179640717, | |
| "eval_loss": 1.0940738916397095, | |
| "eval_runtime": 12.5186, | |
| "eval_samples_per_second": 400.203, | |
| "eval_steps_per_second": 50.085, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 3.609166666666667e-07, | |
| "loss": 1.1069, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_accuracy": 0.3401197604790419, | |
| "eval_loss": 1.0937162637710571, | |
| "eval_runtime": 12.2594, | |
| "eval_samples_per_second": 408.666, | |
| "eval_steps_per_second": 51.144, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 3.886944444444445e-07, | |
| "loss": 1.1046, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_accuracy": 0.37604790419161677, | |
| "eval_loss": 1.0926916599273682, | |
| "eval_runtime": 12.256, | |
| "eval_samples_per_second": 408.78, | |
| "eval_steps_per_second": 51.159, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.164444444444445e-07, | |
| "loss": 1.1046, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_accuracy": 0.39201596806387223, | |
| "eval_loss": 1.0912976264953613, | |
| "eval_runtime": 12.5019, | |
| "eval_samples_per_second": 400.74, | |
| "eval_steps_per_second": 50.152, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.442222222222223e-07, | |
| "loss": 1.1052, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 0.393812375249501, | |
| "eval_loss": 1.0901482105255127, | |
| "eval_runtime": 12.3407, | |
| "eval_samples_per_second": 405.973, | |
| "eval_steps_per_second": 50.807, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.7197222222222224e-07, | |
| "loss": 1.1011, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_accuracy": 0.3932135728542914, | |
| "eval_loss": 1.0901678800582886, | |
| "eval_runtime": 12.3644, | |
| "eval_samples_per_second": 405.197, | |
| "eval_steps_per_second": 50.71, | |
| "step": 17000 | |
| } | |
| ], | |
| "logging_steps": 1000, | |
| "max_steps": 10000000, | |
| "num_train_epochs": 408, | |
| "save_steps": 1000, | |
| "total_flos": 5923475030016000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |