| { | |
| "best_global_step": 201, | |
| "best_metric": 0.6686656671664168, | |
| "best_model_checkpoint": "./results_kcbert_grouped_balanced/checkpoint-201", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 603, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.24875621890547264, | |
| "grad_norm": 4.725838661193848, | |
| "learning_rate": 2.45e-05, | |
| "loss": 0.6238, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4975124378109453, | |
| "grad_norm": 3.734846353530884, | |
| "learning_rate": 4.9500000000000004e-05, | |
| "loss": 0.459, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.746268656716418, | |
| "grad_norm": 3.5573644638061523, | |
| "learning_rate": 4.5129224652087475e-05, | |
| "loss": 0.38, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.9950248756218906, | |
| "grad_norm": 5.902843475341797, | |
| "learning_rate": 4.015904572564613e-05, | |
| "loss": 0.3466, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7653927813163482, | |
| "eval_f1": 0.6686656671664168, | |
| "eval_loss": 0.5385204553604126, | |
| "eval_runtime": 7.8505, | |
| "eval_samples_per_second": 239.986, | |
| "eval_steps_per_second": 3.821, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.243781094527363, | |
| "grad_norm": 4.441750526428223, | |
| "learning_rate": 3.5188866799204776e-05, | |
| "loss": 0.2598, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.4925373134328357, | |
| "grad_norm": 4.32944393157959, | |
| "learning_rate": 3.0218687872763416e-05, | |
| "loss": 0.2311, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.7412935323383083, | |
| "grad_norm": 3.940319538116455, | |
| "learning_rate": 2.524850894632207e-05, | |
| "loss": 0.2388, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.9900497512437811, | |
| "grad_norm": 6.620518207550049, | |
| "learning_rate": 2.0278330019880718e-05, | |
| "loss": 0.2314, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7494692144373672, | |
| "eval_f1": 0.6424242424242425, | |
| "eval_loss": 0.6471742987632751, | |
| "eval_runtime": 7.5879, | |
| "eval_samples_per_second": 248.289, | |
| "eval_steps_per_second": 3.954, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 2.2388059701492535, | |
| "grad_norm": 2.4657232761383057, | |
| "learning_rate": 1.5308151093439365e-05, | |
| "loss": 0.1475, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.487562189054726, | |
| "grad_norm": 4.68269157409668, | |
| "learning_rate": 1.0337972166998012e-05, | |
| "loss": 0.1217, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.7363184079601988, | |
| "grad_norm": 6.081990718841553, | |
| "learning_rate": 5.3677932405566604e-06, | |
| "loss": 0.1172, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.9850746268656714, | |
| "grad_norm": 6.272706031799316, | |
| "learning_rate": 3.9761431411530813e-07, | |
| "loss": 0.1335, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7611464968152867, | |
| "eval_f1": 0.6347402597402597, | |
| "eval_loss": 0.842448890209198, | |
| "eval_runtime": 7.5784, | |
| "eval_samples_per_second": 248.601, | |
| "eval_steps_per_second": 3.959, | |
| "step": 603 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 603, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.2477246786125352e+16, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |