{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 720, "global_step": 7198, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13892747985551543, "grad_norm": 0.595678985118866, "learning_rate": 0.00039537834583680656, "loss": 0.376, "step": 500 }, { "epoch": 0.2000555709919422, "eval_loss": 0.30611157417297363, "eval_runtime": 21.4927, "eval_samples_per_second": 23.264, "eval_steps_per_second": 2.931, "step": 720 }, { "epoch": 0.27785495971103086, "grad_norm": 0.7439117431640625, "learning_rate": 0.0003907474298416227, "loss": 0.2956, "step": 1000 }, { "epoch": 0.4001111419838844, "eval_loss": 0.271937757730484, "eval_runtime": 21.3424, "eval_samples_per_second": 23.428, "eval_steps_per_second": 2.952, "step": 1440 }, { "epoch": 0.41678243956654626, "grad_norm": 0.5364288091659546, "learning_rate": 0.00038611651384643886, "loss": 0.2658, "step": 1500 }, { "epoch": 0.5557099194220617, "grad_norm": 0.5661336779594421, "learning_rate": 0.000381485597851255, "loss": 0.2461, "step": 2000 }, { "epoch": 0.6001667129758266, "eval_loss": 0.2515988349914551, "eval_runtime": 21.3216, "eval_samples_per_second": 23.45, "eval_steps_per_second": 2.955, "step": 2160 }, { "epoch": 0.6946373992775771, "grad_norm": 0.4893731474876404, "learning_rate": 0.00037685468185607115, "loss": 0.2307, "step": 2500 }, { "epoch": 0.8002222839677688, "eval_loss": 0.22781750559806824, "eval_runtime": 21.3004, "eval_samples_per_second": 23.474, "eval_steps_per_second": 2.958, "step": 2880 }, { "epoch": 0.8335648791330925, "grad_norm": 0.5245664119720459, "learning_rate": 0.0003722237658608873, "loss": 0.2205, "step": 3000 }, { "epoch": 0.972492358988608, "grad_norm": 0.7262536287307739, "learning_rate": 0.00036759284986570345, "loss": 0.2154, "step": 3500 }, { "epoch": 1.000277854959711, "eval_loss": 0.21747416257858276, "eval_runtime": 21.4198, "eval_samples_per_second": 23.343, "eval_steps_per_second": 2.941, "step": 3600 }, { "epoch": 1.1114198388441234, "grad_norm": 0.5650269389152527, "learning_rate": 0.0003629619338705196, "loss": 0.1815, "step": 4000 }, { "epoch": 1.2003334259516532, "eval_loss": 0.2159462422132492, "eval_runtime": 21.4142, "eval_samples_per_second": 23.349, "eval_steps_per_second": 2.942, "step": 4320 }, { "epoch": 1.2503473186996388, "grad_norm": 0.6974398493766785, "learning_rate": 0.00035833101787533574, "loss": 0.1813, "step": 4500 }, { "epoch": 1.3892747985551543, "grad_norm": 0.6206701993942261, "learning_rate": 0.0003537001018801519, "loss": 0.1785, "step": 5000 }, { "epoch": 1.4003889969435954, "eval_loss": 0.20631669461727142, "eval_runtime": 21.3451, "eval_samples_per_second": 23.425, "eval_steps_per_second": 2.952, "step": 5040 }, { "epoch": 1.5282022784106695, "grad_norm": 0.6988590359687805, "learning_rate": 0.00034906918588496804, "loss": 0.1748, "step": 5500 }, { "epoch": 1.6004445679355377, "eval_loss": 0.20530453324317932, "eval_runtime": 21.3207, "eval_samples_per_second": 23.451, "eval_steps_per_second": 2.955, "step": 5760 }, { "epoch": 1.667129758266185, "grad_norm": 0.5506817698478699, "learning_rate": 0.0003444382698897842, "loss": 0.1715, "step": 6000 }, { "epoch": 1.8005001389274797, "eval_loss": 0.19710968434810638, "eval_runtime": 21.2884, "eval_samples_per_second": 23.487, "eval_steps_per_second": 2.959, "step": 6480 }, { "epoch": 1.8060572381217006, "grad_norm": 0.464895099401474, "learning_rate": 0.0003398073538946004, "loss": 0.1678, "step": 6500 }, { "epoch": 1.9449847179772157, "grad_norm": 0.6984584331512451, "learning_rate": 0.0003351764378994165, "loss": 0.1664, "step": 7000 } ], "logging_steps": 500, "max_steps": 43188, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.732735090256773e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }