| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.4043807919123842, | |
| "eval_steps": 15, | |
| "global_step": 240, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02527379949452401, | |
| "grad_norm": 14.990800857543945, | |
| "learning_rate": 9.999802884287873e-06, | |
| "loss": 2.5024, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.02527379949452401, | |
| "eval_loss": 2.251873016357422, | |
| "eval_runtime": 2381.2021, | |
| "eval_samples_per_second": 0.519, | |
| "eval_steps_per_second": 0.13, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.05054759898904802, | |
| "grad_norm": 11.093191146850586, | |
| "learning_rate": 9.996846459432971e-06, | |
| "loss": 2.0154, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.05054759898904802, | |
| "eval_loss": 1.9771775007247925, | |
| "eval_runtime": 2379.4231, | |
| "eval_samples_per_second": 0.52, | |
| "eval_steps_per_second": 0.13, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07582139848357203, | |
| "grad_norm": 10.310027122497559, | |
| "learning_rate": 9.990344375946395e-06, | |
| "loss": 1.8536, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.07582139848357203, | |
| "eval_loss": 1.8929402828216553, | |
| "eval_runtime": 2379.7201, | |
| "eval_samples_per_second": 0.52, | |
| "eval_steps_per_second": 0.13, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.10109519797809605, | |
| "grad_norm": 14.144546508789062, | |
| "learning_rate": 9.980301247571758e-06, | |
| "loss": 1.8062, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.10109519797809605, | |
| "eval_loss": 1.8586353063583374, | |
| "eval_runtime": 2379.9138, | |
| "eval_samples_per_second": 0.52, | |
| "eval_steps_per_second": 0.13, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.12636899747262004, | |
| "grad_norm": 12.928871154785156, | |
| "learning_rate": 9.966724200704695e-06, | |
| "loss": 1.8739, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.12636899747262004, | |
| "eval_loss": 1.8373581171035767, | |
| "eval_runtime": 2379.1727, | |
| "eval_samples_per_second": 0.52, | |
| "eval_steps_per_second": 0.13, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.15164279696714406, | |
| "grad_norm": 11.413592338562012, | |
| "learning_rate": 9.94962286933613e-06, | |
| "loss": 1.8687, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.15164279696714406, | |
| "eval_loss": 1.8189234733581543, | |
| "eval_runtime": 2379.5736, | |
| "eval_samples_per_second": 0.52, | |
| "eval_steps_per_second": 0.13, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.17691659646166807, | |
| "grad_norm": 10.060262680053711, | |
| "learning_rate": 9.929009388216183e-06, | |
| "loss": 1.749, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.17691659646166807, | |
| "eval_loss": 1.8106799125671387, | |
| "eval_runtime": 2379.2612, | |
| "eval_samples_per_second": 0.52, | |
| "eval_steps_per_second": 0.13, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.2021903959561921, | |
| "grad_norm": 11.168642044067383, | |
| "learning_rate": 9.904898384243608e-06, | |
| "loss": 1.826, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2021903959561921, | |
| "eval_loss": 1.8040649890899658, | |
| "eval_runtime": 2380.0282, | |
| "eval_samples_per_second": 0.52, | |
| "eval_steps_per_second": 0.13, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.22746419545071608, | |
| "grad_norm": 9.17194938659668, | |
| "learning_rate": 9.877306966086854e-06, | |
| "loss": 1.7828, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.22746419545071608, | |
| "eval_loss": 1.7994695901870728, | |
| "eval_runtime": 2379.6821, | |
| "eval_samples_per_second": 0.52, | |
| "eval_steps_per_second": 0.13, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.2527379949452401, | |
| "grad_norm": 9.84255599975586, | |
| "learning_rate": 9.846254712044102e-06, | |
| "loss": 1.7225, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2527379949452401, | |
| "eval_loss": 1.7961242198944092, | |
| "eval_runtime": 2379.9202, | |
| "eval_samples_per_second": 0.52, | |
| "eval_steps_per_second": 0.13, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2780117944397641, | |
| "grad_norm": 8.864015579223633, | |
| "learning_rate": 9.811763656150912e-06, | |
| "loss": 1.8227, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.2780117944397641, | |
| "eval_loss": 1.7934980392456055, | |
| "eval_runtime": 2379.8003, | |
| "eval_samples_per_second": 0.52, | |
| "eval_steps_per_second": 0.13, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.3032855939342881, | |
| "grad_norm": 10.471166610717773, | |
| "learning_rate": 9.773858272545329e-06, | |
| "loss": 1.7436, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3032855939342881, | |
| "eval_loss": 1.791121006011963, | |
| "eval_runtime": 2379.8892, | |
| "eval_samples_per_second": 0.52, | |
| "eval_steps_per_second": 0.13, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.32855939342881213, | |
| "grad_norm": 12.403428077697754, | |
| "learning_rate": 9.732565458101545e-06, | |
| "loss": 1.843, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.32855939342881213, | |
| "eval_loss": 1.7891260385513306, | |
| "eval_runtime": 2379.2135, | |
| "eval_samples_per_second": 0.52, | |
| "eval_steps_per_second": 0.13, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.35383319292333615, | |
| "grad_norm": 10.464547157287598, | |
| "learning_rate": 9.687914513344432e-06, | |
| "loss": 1.7454, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.35383319292333615, | |
| "eval_loss": 1.78617262840271, | |
| "eval_runtime": 2378.8348, | |
| "eval_samples_per_second": 0.52, | |
| "eval_steps_per_second": 0.13, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.37910699241786017, | |
| "grad_norm": 8.834742546081543, | |
| "learning_rate": 9.639937121658492e-06, | |
| "loss": 1.7015, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.37910699241786017, | |
| "eval_loss": 1.7838687896728516, | |
| "eval_runtime": 2379.7979, | |
| "eval_samples_per_second": 0.52, | |
| "eval_steps_per_second": 0.13, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.4043807919123842, | |
| "grad_norm": 8.59927749633789, | |
| "learning_rate": 9.588667326805996e-06, | |
| "loss": 1.7009, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.4043807919123842, | |
| "eval_loss": 1.7831588983535767, | |
| "eval_runtime": 2379.7143, | |
| "eval_samples_per_second": 0.52, | |
| "eval_steps_per_second": 0.13, | |
| "step": 240 | |
| } | |
| ], | |
| "logging_steps": 15, | |
| "max_steps": 1779, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 15, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.4792379146940416e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |