{ "best_metric": 1.4907110929489136, "best_model_checkpoint": "checkpoints/Llama-3.1-8B-Instruct/hatedemics_rag-0402-custom_loss/checkpoint-150", "epoch": 0.43509789702683105, "eval_steps": 10, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.029006526468455404, "eval_loss": 2.542835235595703, "eval_runtime": 2442.4047, "eval_samples_per_second": 0.564, "eval_steps_per_second": 0.141, "step": 10 }, { "epoch": 0.05801305293691081, "grad_norm": 0.5312708616256714, "learning_rate": 1.153846153846154e-05, "loss": 2.4943, "step": 20 }, { "epoch": 0.05801305293691081, "eval_loss": 2.384974718093872, "eval_runtime": 2440.2318, "eval_samples_per_second": 0.565, "eval_steps_per_second": 0.141, "step": 20 }, { "epoch": 0.08701957940536621, "eval_loss": 2.191277027130127, "eval_runtime": 2442.7783, "eval_samples_per_second": 0.564, "eval_steps_per_second": 0.141, "step": 30 }, { "epoch": 0.11602610587382162, "grad_norm": 0.5317983031272888, "learning_rate": 2.307692307692308e-05, "loss": 2.2195, "step": 40 }, { "epoch": 0.11602610587382162, "eval_loss": 1.9987460374832153, "eval_runtime": 2442.7258, "eval_samples_per_second": 0.564, "eval_steps_per_second": 0.141, "step": 40 }, { "epoch": 0.145032632342277, "eval_loss": 1.7984939813613892, "eval_runtime": 2437.4111, "eval_samples_per_second": 0.565, "eval_steps_per_second": 0.142, "step": 50 }, { "epoch": 0.17403915881073242, "grad_norm": 0.5333729386329651, "learning_rate": 2.999829729005125e-05, "loss": 1.8587, "step": 60 }, { "epoch": 0.17403915881073242, "eval_loss": 1.639336109161377, "eval_runtime": 2434.3583, "eval_samples_per_second": 0.566, "eval_steps_per_second": 0.142, "step": 60 }, { "epoch": 0.20304568527918782, "eval_loss": 1.5912648439407349, "eval_runtime": 2435.7205, "eval_samples_per_second": 0.566, "eval_steps_per_second": 0.142, "step": 70 }, { "epoch": 0.23205221174764323, "grad_norm": 0.43636104464530945, "learning_rate": 2.9979146242283087e-05, "loss": 1.6584, "step": 80 }, { "epoch": 0.23205221174764323, "eval_loss": 1.567818522453308, "eval_runtime": 2435.2495, "eval_samples_per_second": 0.566, "eval_steps_per_second": 0.142, "step": 80 }, { "epoch": 0.2610587382160986, "eval_loss": 1.549891471862793, "eval_runtime": 2435.1297, "eval_samples_per_second": 0.566, "eval_steps_per_second": 0.142, "step": 90 }, { "epoch": 0.290065264684554, "grad_norm": 0.3220229148864746, "learning_rate": 2.9938743021113326e-05, "loss": 1.5901, "step": 100 }, { "epoch": 0.290065264684554, "eval_loss": 1.5360591411590576, "eval_runtime": 2434.2214, "eval_samples_per_second": 0.566, "eval_steps_per_second": 0.142, "step": 100 }, { "epoch": 0.31907179115300943, "eval_loss": 1.5227614641189575, "eval_runtime": 2432.2291, "eval_samples_per_second": 0.567, "eval_steps_per_second": 0.142, "step": 110 }, { "epoch": 0.34807831762146485, "grad_norm": 0.3948887586593628, "learning_rate": 2.987714494998674e-05, "loss": 1.557, "step": 120 }, { "epoch": 0.34807831762146485, "eval_loss": 1.511211633682251, "eval_runtime": 2432.4757, "eval_samples_per_second": 0.567, "eval_steps_per_second": 0.142, "step": 120 }, { "epoch": 0.3770848440899202, "eval_loss": 1.5033425092697144, "eval_runtime": 2433.8386, "eval_samples_per_second": 0.566, "eval_steps_per_second": 0.142, "step": 130 }, { "epoch": 0.40609137055837563, "grad_norm": 0.5044500231742859, "learning_rate": 2.9794439423262625e-05, "loss": 1.4977, "step": 140 }, { "epoch": 0.40609137055837563, "eval_loss": 1.4966373443603516, "eval_runtime": 2433.7306, "eval_samples_per_second": 0.566, "eval_steps_per_second": 0.142, "step": 140 }, { "epoch": 0.43509789702683105, "eval_loss": 1.4907110929489136, "eval_runtime": 2434.4621, "eval_samples_per_second": 0.566, "eval_steps_per_second": 0.142, "step": 150 } ], "logging_steps": 20, "max_steps": 1720, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.3876451196928e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }