| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 147, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.20408163265306123, | |
| "grad_norm": 23.69936180114746, | |
| "learning_rate": 1.2e-06, | |
| "logits/chosen": -1.049736738204956, | |
| "logits/rejected": -1.3097896575927734, | |
| "logps/chosen": -594.0164184570312, | |
| "logps/rejected": -527.3587036132812, | |
| "loss": 0.6756, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": 0.049942269921302795, | |
| "rewards/margins": 0.03757396712899208, | |
| "rewards/rejected": 0.012368302792310715, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.40816326530612246, | |
| "grad_norm": 4.33872652053833, | |
| "learning_rate": 1.9954719225730845e-06, | |
| "logits/chosen": -1.0503668785095215, | |
| "logits/rejected": -1.2729942798614502, | |
| "logps/chosen": -548.0631103515625, | |
| "logps/rejected": -502.37054443359375, | |
| "loss": 0.3614, | |
| "rewards/accuracies": 0.948437511920929, | |
| "rewards/chosen": 1.080135464668274, | |
| "rewards/margins": 1.3379336595535278, | |
| "rewards/rejected": -0.2577982246875763, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.6122448979591837, | |
| "grad_norm": 1.2732926607131958, | |
| "learning_rate": 1.945000818714668e-06, | |
| "logits/chosen": -1.0236886739730835, | |
| "logits/rejected": -1.1413735151290894, | |
| "logps/chosen": -566.3969116210938, | |
| "logps/rejected": -538.9616088867188, | |
| "loss": 0.0815, | |
| "rewards/accuracies": 0.9609375, | |
| "rewards/chosen": 2.209315538406372, | |
| "rewards/margins": 6.348783493041992, | |
| "rewards/rejected": -4.139468193054199, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.8163265306122449, | |
| "grad_norm": 0.6404849290847778, | |
| "learning_rate": 1.8412535328311812e-06, | |
| "logits/chosen": -1.033827781677246, | |
| "logits/rejected": -1.052042841911316, | |
| "logps/chosen": -512.7654418945312, | |
| "logps/rejected": -603.2037353515625, | |
| "loss": 0.0676, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 1.7590594291687012, | |
| "rewards/margins": 13.065619468688965, | |
| "rewards/rejected": -11.306559562683105, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.0204081632653061, | |
| "grad_norm": 0.993448793888092, | |
| "learning_rate": 1.690079011482112e-06, | |
| "logits/chosen": -1.0401982069015503, | |
| "logits/rejected": -1.0194432735443115, | |
| "logps/chosen": -548.8553466796875, | |
| "logps/rejected": -660.1163330078125, | |
| "loss": 0.0565, | |
| "rewards/accuracies": 0.9546875357627869, | |
| "rewards/chosen": 1.041262149810791, | |
| "rewards/margins": 17.123653411865234, | |
| "rewards/rejected": -16.0823917388916, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.2244897959183674, | |
| "grad_norm": 1.5318107604980469, | |
| "learning_rate": 1.5e-06, | |
| "logits/chosen": -1.053700566291809, | |
| "logits/rejected": -1.0167734622955322, | |
| "logps/chosen": -581.4273681640625, | |
| "logps/rejected": -730.9420776367188, | |
| "loss": 0.046, | |
| "rewards/accuracies": 0.9640624523162842, | |
| "rewards/chosen": 0.5817679762840271, | |
| "rewards/margins": 21.025224685668945, | |
| "rewards/rejected": -20.443456649780273, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 0.9771612882614136, | |
| "learning_rate": 1.2817325568414297e-06, | |
| "logits/chosen": -1.0788830518722534, | |
| "logits/rejected": -1.027891993522644, | |
| "logps/chosen": -565.0576782226562, | |
| "logps/rejected": -717.9182739257812, | |
| "loss": 0.0494, | |
| "rewards/accuracies": 0.9515625238418579, | |
| "rewards/chosen": -0.2791057229042053, | |
| "rewards/margins": 21.86798858642578, | |
| "rewards/rejected": -22.1470947265625, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.6326530612244898, | |
| "grad_norm": 0.9185511469841003, | |
| "learning_rate": 1.0475819158237424e-06, | |
| "logits/chosen": -1.095476746559143, | |
| "logits/rejected": -1.0277090072631836, | |
| "logps/chosen": -589.8963623046875, | |
| "logps/rejected": -739.2642822265625, | |
| "loss": 0.052, | |
| "rewards/accuracies": 0.957812488079071, | |
| "rewards/chosen": -0.22568494081497192, | |
| "rewards/margins": 22.34253692626953, | |
| "rewards/rejected": -22.56822395324707, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.836734693877551, | |
| "grad_norm": 0.8473263382911682, | |
| "learning_rate": 8.107487556395901e-07, | |
| "logits/chosen": -1.094291090965271, | |
| "logits/rejected": -1.0417732000350952, | |
| "logps/chosen": -553.2315673828125, | |
| "logps/rejected": -720.135986328125, | |
| "loss": 0.0472, | |
| "rewards/accuracies": 0.9609375596046448, | |
| "rewards/chosen": -0.06711739301681519, | |
| "rewards/margins": 23.08963966369629, | |
| "rewards/rejected": -23.15675926208496, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.0408163265306123, | |
| "grad_norm": 0.14011289179325104, | |
| "learning_rate": 5.845849869981136e-07, | |
| "logits/chosen": -1.116294264793396, | |
| "logits/rejected": -1.0436064004898071, | |
| "logps/chosen": -572.4153442382812, | |
| "logps/rejected": -768.2971801757812, | |
| "loss": 0.0356, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -0.18042321503162384, | |
| "rewards/margins": 25.167421340942383, | |
| "rewards/rejected": -25.347843170166016, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.2448979591836733, | |
| "grad_norm": 0.14527460932731628, | |
| "learning_rate": 3.818410137793947e-07, | |
| "logits/chosen": -1.1352490186691284, | |
| "logits/rejected": -1.0526891946792603, | |
| "logps/chosen": -590.7538452148438, | |
| "logps/rejected": -767.3261108398438, | |
| "loss": 0.043, | |
| "rewards/accuracies": 0.9593750238418579, | |
| "rewards/chosen": -0.6409277319908142, | |
| "rewards/margins": 24.502239227294922, | |
| "rewards/rejected": -25.14316749572754, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.4489795918367347, | |
| "grad_norm": 0.25144967436790466, | |
| "learning_rate": 2.139469052572127e-07, | |
| "logits/chosen": -1.1227790117263794, | |
| "logits/rejected": -1.0495781898498535, | |
| "logps/chosen": -569.977294921875, | |
| "logps/rejected": -750.4077758789062, | |
| "loss": 0.0422, | |
| "rewards/accuracies": 0.953125, | |
| "rewards/chosen": -0.6355734467506409, | |
| "rewards/margins": 23.878570556640625, | |
| "rewards/rejected": -24.514142990112305, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.6530612244897958, | |
| "grad_norm": 0.33485284447669983, | |
| "learning_rate": 9.036800464548156e-08, | |
| "logits/chosen": -1.1463559865951538, | |
| "logits/rejected": -1.0605792999267578, | |
| "logps/chosen": -559.5640258789062, | |
| "logps/rejected": -743.02001953125, | |
| "loss": 0.0409, | |
| "rewards/accuracies": 0.9578125476837158, | |
| "rewards/chosen": -0.5392616391181946, | |
| "rewards/margins": 25.962358474731445, | |
| "rewards/rejected": -26.501623153686523, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 0.08462539315223694, | |
| "learning_rate": 1.807130273729329e-08, | |
| "logits/chosen": -1.1435989141464233, | |
| "logits/rejected": -1.0657496452331543, | |
| "logps/chosen": -560.4498291015625, | |
| "logps/rejected": -757.8974609375, | |
| "loss": 0.0363, | |
| "rewards/accuracies": 0.964062511920929, | |
| "rewards/chosen": -0.5970851182937622, | |
| "rewards/margins": 25.187143325805664, | |
| "rewards/rejected": -25.784229278564453, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 147, | |
| "total_flos": 9.728979632799089e+17, | |
| "train_loss": 0.11137714680461656, | |
| "train_runtime": 2917.8588, | |
| "train_samples_per_second": 3.22, | |
| "train_steps_per_second": 0.05 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 147, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 250, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.728979632799089e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |