|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.791208791208791, |
|
"eval_steps": 500, |
|
"global_step": 110, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 2.0971016883850098, |
|
"learning_rate": 1.6363636363636365e-06, |
|
"loss": 0.7587, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 0.8449636101722717, |
|
"learning_rate": 1.967948701396356e-06, |
|
"loss": 0.691, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.3076923076923077, |
|
"grad_norm": 0.6521225571632385, |
|
"learning_rate": 1.8412535328311812e-06, |
|
"loss": 0.627, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.7472527472527473, |
|
"grad_norm": 0.580456554889679, |
|
"learning_rate": 1.6305526670845225e-06, |
|
"loss": 0.6099, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.1758241758241756, |
|
"grad_norm": 0.6621115207672119, |
|
"learning_rate": 1.3568862215918717e-06, |
|
"loss": 0.5569, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.6153846153846154, |
|
"grad_norm": 0.4708077311515808, |
|
"learning_rate": 1.0475819158237424e-06, |
|
"loss": 0.5164, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.043956043956044, |
|
"grad_norm": 0.6353986859321594, |
|
"learning_rate": 7.335261863099651e-07, |
|
"loss": 0.4935, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.4835164835164836, |
|
"grad_norm": 0.653141975402832, |
|
"learning_rate": 4.460799361338897e-07, |
|
"loss": 0.4535, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.9230769230769234, |
|
"grad_norm": 0.6834583878517151, |
|
"learning_rate": 2.139469052572127e-07, |
|
"loss": 0.4811, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.351648351648351, |
|
"grad_norm": 0.584416925907135, |
|
"learning_rate": 6.030737921409168e-08, |
|
"loss": 0.4414, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.791208791208791, |
|
"grad_norm": 0.6029403805732727, |
|
"learning_rate": 5.034576168149174e-10, |
|
"loss": 0.4517, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.791208791208791, |
|
"step": 110, |
|
"total_flos": 1.8378852146977178e+17, |
|
"train_loss": 0.552842664718628, |
|
"train_runtime": 1381.2939, |
|
"train_samples_per_second": 5.267, |
|
"train_steps_per_second": 0.08 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 110, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 250, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.8378852146977178e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|