qwencoder_rq4_tsp / trainer_state.json
Easonnoway's picture
Upload folder using huggingface_hub
3ae4b94 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 147,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.20408163265306123,
"grad_norm": 23.69936180114746,
"learning_rate": 1.2e-06,
"logits/chosen": -1.049736738204956,
"logits/rejected": -1.3097896575927734,
"logps/chosen": -594.0164184570312,
"logps/rejected": -527.3587036132812,
"loss": 0.6756,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": 0.049942269921302795,
"rewards/margins": 0.03757396712899208,
"rewards/rejected": 0.012368302792310715,
"step": 10
},
{
"epoch": 0.40816326530612246,
"grad_norm": 4.33872652053833,
"learning_rate": 1.9954719225730845e-06,
"logits/chosen": -1.0503668785095215,
"logits/rejected": -1.2729942798614502,
"logps/chosen": -548.0631103515625,
"logps/rejected": -502.37054443359375,
"loss": 0.3614,
"rewards/accuracies": 0.948437511920929,
"rewards/chosen": 1.080135464668274,
"rewards/margins": 1.3379336595535278,
"rewards/rejected": -0.2577982246875763,
"step": 20
},
{
"epoch": 0.6122448979591837,
"grad_norm": 1.2732926607131958,
"learning_rate": 1.945000818714668e-06,
"logits/chosen": -1.0236886739730835,
"logits/rejected": -1.1413735151290894,
"logps/chosen": -566.3969116210938,
"logps/rejected": -538.9616088867188,
"loss": 0.0815,
"rewards/accuracies": 0.9609375,
"rewards/chosen": 2.209315538406372,
"rewards/margins": 6.348783493041992,
"rewards/rejected": -4.139468193054199,
"step": 30
},
{
"epoch": 0.8163265306122449,
"grad_norm": 0.6404849290847778,
"learning_rate": 1.8412535328311812e-06,
"logits/chosen": -1.033827781677246,
"logits/rejected": -1.052042841911316,
"logps/chosen": -512.7654418945312,
"logps/rejected": -603.2037353515625,
"loss": 0.0676,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 1.7590594291687012,
"rewards/margins": 13.065619468688965,
"rewards/rejected": -11.306559562683105,
"step": 40
},
{
"epoch": 1.0204081632653061,
"grad_norm": 0.993448793888092,
"learning_rate": 1.690079011482112e-06,
"logits/chosen": -1.0401982069015503,
"logits/rejected": -1.0194432735443115,
"logps/chosen": -548.8553466796875,
"logps/rejected": -660.1163330078125,
"loss": 0.0565,
"rewards/accuracies": 0.9546875357627869,
"rewards/chosen": 1.041262149810791,
"rewards/margins": 17.123653411865234,
"rewards/rejected": -16.0823917388916,
"step": 50
},
{
"epoch": 1.2244897959183674,
"grad_norm": 1.5318107604980469,
"learning_rate": 1.5e-06,
"logits/chosen": -1.053700566291809,
"logits/rejected": -1.0167734622955322,
"logps/chosen": -581.4273681640625,
"logps/rejected": -730.9420776367188,
"loss": 0.046,
"rewards/accuracies": 0.9640624523162842,
"rewards/chosen": 0.5817679762840271,
"rewards/margins": 21.025224685668945,
"rewards/rejected": -20.443456649780273,
"step": 60
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.9771612882614136,
"learning_rate": 1.2817325568414297e-06,
"logits/chosen": -1.0788830518722534,
"logits/rejected": -1.027891993522644,
"logps/chosen": -565.0576782226562,
"logps/rejected": -717.9182739257812,
"loss": 0.0494,
"rewards/accuracies": 0.9515625238418579,
"rewards/chosen": -0.2791057229042053,
"rewards/margins": 21.86798858642578,
"rewards/rejected": -22.1470947265625,
"step": 70
},
{
"epoch": 1.6326530612244898,
"grad_norm": 0.9185511469841003,
"learning_rate": 1.0475819158237424e-06,
"logits/chosen": -1.095476746559143,
"logits/rejected": -1.0277090072631836,
"logps/chosen": -589.8963623046875,
"logps/rejected": -739.2642822265625,
"loss": 0.052,
"rewards/accuracies": 0.957812488079071,
"rewards/chosen": -0.22568494081497192,
"rewards/margins": 22.34253692626953,
"rewards/rejected": -22.56822395324707,
"step": 80
},
{
"epoch": 1.836734693877551,
"grad_norm": 0.8473263382911682,
"learning_rate": 8.107487556395901e-07,
"logits/chosen": -1.094291090965271,
"logits/rejected": -1.0417732000350952,
"logps/chosen": -553.2315673828125,
"logps/rejected": -720.135986328125,
"loss": 0.0472,
"rewards/accuracies": 0.9609375596046448,
"rewards/chosen": -0.06711739301681519,
"rewards/margins": 23.08963966369629,
"rewards/rejected": -23.15675926208496,
"step": 90
},
{
"epoch": 2.0408163265306123,
"grad_norm": 0.14011289179325104,
"learning_rate": 5.845849869981136e-07,
"logits/chosen": -1.116294264793396,
"logits/rejected": -1.0436064004898071,
"logps/chosen": -572.4153442382812,
"logps/rejected": -768.2971801757812,
"loss": 0.0356,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.18042321503162384,
"rewards/margins": 25.167421340942383,
"rewards/rejected": -25.347843170166016,
"step": 100
},
{
"epoch": 2.2448979591836733,
"grad_norm": 0.14527460932731628,
"learning_rate": 3.818410137793947e-07,
"logits/chosen": -1.1352490186691284,
"logits/rejected": -1.0526891946792603,
"logps/chosen": -590.7538452148438,
"logps/rejected": -767.3261108398438,
"loss": 0.043,
"rewards/accuracies": 0.9593750238418579,
"rewards/chosen": -0.6409277319908142,
"rewards/margins": 24.502239227294922,
"rewards/rejected": -25.14316749572754,
"step": 110
},
{
"epoch": 2.4489795918367347,
"grad_norm": 0.25144967436790466,
"learning_rate": 2.139469052572127e-07,
"logits/chosen": -1.1227790117263794,
"logits/rejected": -1.0495781898498535,
"logps/chosen": -569.977294921875,
"logps/rejected": -750.4077758789062,
"loss": 0.0422,
"rewards/accuracies": 0.953125,
"rewards/chosen": -0.6355734467506409,
"rewards/margins": 23.878570556640625,
"rewards/rejected": -24.514142990112305,
"step": 120
},
{
"epoch": 2.6530612244897958,
"grad_norm": 0.33485284447669983,
"learning_rate": 9.036800464548156e-08,
"logits/chosen": -1.1463559865951538,
"logits/rejected": -1.0605792999267578,
"logps/chosen": -559.5640258789062,
"logps/rejected": -743.02001953125,
"loss": 0.0409,
"rewards/accuracies": 0.9578125476837158,
"rewards/chosen": -0.5392616391181946,
"rewards/margins": 25.962358474731445,
"rewards/rejected": -26.501623153686523,
"step": 130
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.08462539315223694,
"learning_rate": 1.807130273729329e-08,
"logits/chosen": -1.1435989141464233,
"logits/rejected": -1.0657496452331543,
"logps/chosen": -560.4498291015625,
"logps/rejected": -757.8974609375,
"loss": 0.0363,
"rewards/accuracies": 0.964062511920929,
"rewards/chosen": -0.5970851182937622,
"rewards/margins": 25.187143325805664,
"rewards/rejected": -25.784229278564453,
"step": 140
},
{
"epoch": 3.0,
"step": 147,
"total_flos": 9.728979632799089e+17,
"train_loss": 0.11137714680461656,
"train_runtime": 2917.8588,
"train_samples_per_second": 3.22,
"train_steps_per_second": 0.05
}
],
"logging_steps": 10,
"max_steps": 147,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.728979632799089e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}