| { | |
| "best_metric": 0.49999999700000003, | |
| "best_model_checkpoint": "checkpoint/cross_encoder_20250522_full_data/checkpoint-4633", | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 4633, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 2.436934471130371, | |
| "learning_rate": 4.315925766076824e-12, | |
| "loss": 0.7375, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 1.8707695007324219, | |
| "learning_rate": 8.631851532153648e-10, | |
| "loss": 0.7326, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 1.4507315158843994, | |
| "learning_rate": 1.7263703064307296e-09, | |
| "loss": 0.734, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 4.8174872398376465, | |
| "learning_rate": 2.589555459646094e-09, | |
| "loss": 0.731, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 0.8691744208335876, | |
| "learning_rate": 3.452740612861459e-09, | |
| "loss": 0.7356, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 1.7719039916992188, | |
| "learning_rate": 4.315925766076823e-09, | |
| "loss": 0.7286, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 2.3143906593322754, | |
| "learning_rate": 5.179110919292188e-09, | |
| "loss": 0.7302, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 2.4967010021209717, | |
| "learning_rate": 6.042296072507553e-09, | |
| "loss": 0.7318, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 3.2115557193756104, | |
| "learning_rate": 6.905481225722918e-09, | |
| "loss": 0.7322, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 2.6317012310028076, | |
| "learning_rate": 7.768666378938282e-09, | |
| "loss": 0.7288, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 1.578616738319397, | |
| "learning_rate": 8.631851532153647e-09, | |
| "loss": 0.727, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 2.6788175106048584, | |
| "learning_rate": 9.495036685369011e-09, | |
| "loss": 0.7236, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.1162360906600952, | |
| "learning_rate": 9.99960892371536e-09, | |
| "loss": 0.7234, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 3.412252426147461, | |
| "learning_rate": 9.995454119562455e-09, | |
| "loss": 0.7273, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 2.049833297729492, | |
| "learning_rate": 9.986762270880315e-09, | |
| "loss": 0.7216, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 2.14269757270813, | |
| "learning_rate": 9.973541271907098e-09, | |
| "loss": 0.7236, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 3.2950925827026367, | |
| "learning_rate": 9.955803130412195e-09, | |
| "loss": 0.7257, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 1.6116753816604614, | |
| "learning_rate": 9.933563956790353e-09, | |
| "loss": 0.7202, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 2.2606937885284424, | |
| "learning_rate": 9.906843949429669e-09, | |
| "loss": 0.718, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 3.41159725189209, | |
| "learning_rate": 9.875667376366706e-09, | |
| "loss": 0.717, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 2.007009983062744, | |
| "learning_rate": 9.840062553245418e-09, | |
| "loss": 0.7156, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 2.2354674339294434, | |
| "learning_rate": 9.800061817599912e-09, | |
| "loss": 0.7138, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 3.80654239654541, | |
| "learning_rate": 9.755701499484371e-09, | |
| "loss": 0.7144, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 5.391232490539551, | |
| "learning_rate": 9.707021888476834e-09, | |
| "loss": 0.7088, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_f2": 0.49999999700000003, | |
| "eval_loss": 0.7074111700057983, | |
| "eval_precision": 0.16666666666666666, | |
| "eval_recall": 1.0, | |
| "eval_runtime": 1108.1527, | |
| "eval_samples_per_second": 108.462, | |
| "eval_steps_per_second": 1.695, | |
| "step": 4633 | |
| } | |
| ], | |
| "logging_steps": 200, | |
| "max_steps": 23165, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "total_flos": 7.801698556418458e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |