| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9955022488755623, | |
| "eval_steps": 500, | |
| "global_step": 166, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.002083333333333337, | |
| "completions/max_length": 414.3, | |
| "completions/max_terminated_length": 390.7, | |
| "completions/mean_length": 96.92292022705078, | |
| "completions/mean_terminated_length": 95.08196258544922, | |
| "completions/min_length": 43.4, | |
| "completions/min_terminated_length": 43.4, | |
| "epoch": 0.05997001499250375, | |
| "frac_reward_zero_std": 0.10000000149011612, | |
| "grad_norm": 0.875, | |
| "kl": 0.04828977584838867, | |
| "learning_rate": 1.9855293386108995e-05, | |
| "loss": -0.0894, | |
| "num_tokens": 164459.0, | |
| "reward": 0.25649446398019793, | |
| "reward_std": 0.12709882631897926, | |
| "rewards/_accuracy_reward/mean": 0.25024444460868833, | |
| "rewards/_accuracy_reward/std": 0.16934245973825454, | |
| "rewards/_format_reward/mean": 0.00625, | |
| "rewards/_format_reward/std": 0.02446230351924896, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 405.9, | |
| "completions/max_terminated_length": 405.9, | |
| "completions/mean_length": 115.59583740234375, | |
| "completions/mean_terminated_length": 115.59583740234375, | |
| "completions/min_length": 65.6, | |
| "completions/min_terminated_length": 65.6, | |
| "epoch": 0.1199400299850075, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.8359375, | |
| "kl": 0.0818756103515625, | |
| "learning_rate": 1.936044737814273e-05, | |
| "loss": 0.0581, | |
| "num_tokens": 338097.0, | |
| "reward": 0.36916170418262484, | |
| "reward_std": 0.13012803941965104, | |
| "rewards/_accuracy_reward/mean": 0.36916169822216033, | |
| "rewards/_accuracy_reward/std": 0.1557157054543495, | |
| "rewards/_format_reward/mean": 0.0, | |
| "rewards/_format_reward/std": 0.0, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 269.2, | |
| "completions/max_terminated_length": 269.2, | |
| "completions/mean_length": 85.4083351135254, | |
| "completions/mean_terminated_length": 85.4083351135254, | |
| "completions/min_length": 57.7, | |
| "completions/min_terminated_length": 57.7, | |
| "epoch": 0.17991004497751126, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.6484375, | |
| "kl": 0.102984619140625, | |
| "learning_rate": 1.8531342035272768e-05, | |
| "loss": 0.0225, | |
| "num_tokens": 497557.0, | |
| "reward": 0.4350260511040688, | |
| "reward_std": 0.14344265162944794, | |
| "rewards/_accuracy_reward/mean": 0.43502604514360427, | |
| "rewards/_accuracy_reward/std": 0.1680240161716938, | |
| "rewards/_format_reward/mean": 0.0, | |
| "rewards/_format_reward/std": 0.0, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 198.7, | |
| "completions/max_terminated_length": 198.7, | |
| "completions/mean_length": 76.06458435058593, | |
| "completions/mean_terminated_length": 76.06458435058593, | |
| "completions/min_length": 44.7, | |
| "completions/min_terminated_length": 44.7, | |
| "epoch": 0.239880059970015, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.63671875, | |
| "kl": 0.11517333984375, | |
| "learning_rate": 1.7397584510798208e-05, | |
| "loss": 0.0064, | |
| "num_tokens": 652196.0, | |
| "reward": 0.4085416719317436, | |
| "reward_std": 0.13231892064213752, | |
| "rewards/_accuracy_reward/mean": 0.4085416689515114, | |
| "rewards/_accuracy_reward/std": 0.1608368895947933, | |
| "rewards/_format_reward/mean": 0.0, | |
| "rewards/_format_reward/std": 0.0, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 265.6, | |
| "completions/max_terminated_length": 265.6, | |
| "completions/mean_length": 89.46666946411133, | |
| "completions/mean_terminated_length": 89.46666946411133, | |
| "completions/min_length": 62.4, | |
| "completions/min_terminated_length": 62.4, | |
| "epoch": 0.29985007496251875, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.392578125, | |
| "kl": 0.118402099609375, | |
| "learning_rate": 1.5999661014486956e-05, | |
| "loss": 0.0506, | |
| "num_tokens": 813532.0, | |
| "reward": 0.5038281351327896, | |
| "reward_std": 0.12968316152691842, | |
| "rewards/_accuracy_reward/mean": 0.5038281202316284, | |
| "rewards/_accuracy_reward/std": 0.16127740293741227, | |
| "rewards/_format_reward/mean": 0.0, | |
| "rewards/_format_reward/std": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.002083333333333337, | |
| "completions/max_length": 364.6, | |
| "completions/max_terminated_length": 304.4, | |
| "completions/mean_length": 93.76042022705079, | |
| "completions/mean_terminated_length": 91.8341781616211, | |
| "completions/min_length": 62.7, | |
| "completions/min_terminated_length": 62.7, | |
| "epoch": 0.3598200899550225, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.455078125, | |
| "kl": 0.288519287109375, | |
| "learning_rate": 1.4387491059717653e-05, | |
| "loss": 0.0338, | |
| "num_tokens": 976497.0, | |
| "reward": 0.4925865650177002, | |
| "reward_std": 0.12617484703660012, | |
| "rewards/_accuracy_reward/mean": 0.492586562037468, | |
| "rewards/_accuracy_reward/std": 0.15444535091519357, | |
| "rewards/_format_reward/mean": 0.0, | |
| "rewards/_format_reward/std": 0.0, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 265.3, | |
| "completions/max_terminated_length": 265.3, | |
| "completions/mean_length": 85.07291946411132, | |
| "completions/mean_terminated_length": 85.07291946411132, | |
| "completions/min_length": 70.0, | |
| "completions/min_terminated_length": 70.0, | |
| "epoch": 0.4197901049475262, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.451171875, | |
| "kl": 0.089068603515625, | |
| "learning_rate": 1.2618644849608068e-05, | |
| "loss": 0.0247, | |
| "num_tokens": 1135460.0, | |
| "reward": 0.4750963538885117, | |
| "reward_std": 0.12951767966151237, | |
| "rewards/_accuracy_reward/mean": 0.4750963240861893, | |
| "rewards/_accuracy_reward/std": 0.1608477719128132, | |
| "rewards/_format_reward/mean": 0.0, | |
| "rewards/_format_reward/std": 0.0, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 284.3, | |
| "completions/max_terminated_length": 284.3, | |
| "completions/mean_length": 87.73958587646484, | |
| "completions/mean_terminated_length": 87.73958587646484, | |
| "completions/min_length": 69.4, | |
| "completions/min_terminated_length": 69.4, | |
| "epoch": 0.47976011994003, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4140625, | |
| "kl": 0.07869873046875, | |
| "learning_rate": 1.075628745884457e-05, | |
| "loss": 0.0442, | |
| "num_tokens": 1295727.0, | |
| "reward": 0.5100168704986572, | |
| "reward_std": 0.15252956375479698, | |
| "rewards/_accuracy_reward/mean": 0.5100168436765671, | |
| "rewards/_accuracy_reward/std": 0.17259212732315063, | |
| "rewards/_format_reward/mean": 0.0, | |
| "rewards/_format_reward/std": 0.0, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 259.8, | |
| "completions/max_terminated_length": 259.8, | |
| "completions/mean_length": 88.41250228881836, | |
| "completions/mean_terminated_length": 88.41250228881836, | |
| "completions/min_length": 64.1, | |
| "completions/min_terminated_length": 64.1, | |
| "epoch": 0.5397301349325337, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.66796875, | |
| "kl": 0.17242431640625, | |
| "learning_rate": 8.866923223987303e-06, | |
| "loss": 0.027, | |
| "num_tokens": 1456149.0, | |
| "reward": 0.4691749334335327, | |
| "reward_std": 0.11657274290919303, | |
| "rewards/_accuracy_reward/mean": 0.4691749155521393, | |
| "rewards/_accuracy_reward/std": 0.13906535133719444, | |
| "rewards/_format_reward/mean": 0.0, | |
| "rewards/_format_reward/std": 0.0, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 338.8, | |
| "completions/max_terminated_length": 338.8, | |
| "completions/mean_length": 94.3062530517578, | |
| "completions/mean_terminated_length": 94.3062530517578, | |
| "completions/min_length": 59.2, | |
| "completions/min_terminated_length": 59.2, | |
| "epoch": 0.5997001499250375, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.59375, | |
| "kl": 0.106280517578125, | |
| "learning_rate": 7.018020889533348e-06, | |
| "loss": -0.0246, | |
| "num_tokens": 1619424.0, | |
| "reward": 0.5008379817008972, | |
| "reward_std": 0.1251222789287567, | |
| "rewards/_accuracy_reward/mean": 0.5008379787206649, | |
| "rewards/_accuracy_reward/std": 0.14559592306613922, | |
| "rewards/_format_reward/mean": 0.0, | |
| "rewards/_format_reward/std": 0.0, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 404.7, | |
| "completions/max_terminated_length": 404.7, | |
| "completions/mean_length": 115.0875030517578, | |
| "completions/mean_terminated_length": 115.0875030517578, | |
| "completions/min_length": 51.4, | |
| "completions/min_terminated_length": 51.4, | |
| "epoch": 0.6596701649175413, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.353515625, | |
| "kl": 0.0727294921875, | |
| "learning_rate": 5.2756043152032934e-06, | |
| "loss": -0.0641, | |
| "num_tokens": 1792722.0, | |
| "reward": 0.4739863067865372, | |
| "reward_std": 0.13204658553004264, | |
| "rewards/_accuracy_reward/mean": 0.47398627996444703, | |
| "rewards/_accuracy_reward/std": 0.15842494517564773, | |
| "rewards/_format_reward/mean": 0.0, | |
| "rewards/_format_reward/std": 0.0, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.002083333333333337, | |
| "completions/max_length": 517.4, | |
| "completions/max_terminated_length": 435.3, | |
| "completions/mean_length": 136.24375381469727, | |
| "completions/mean_terminated_length": 134.30962219238282, | |
| "completions/min_length": 51.6, | |
| "completions/min_terminated_length": 51.6, | |
| "epoch": 0.719640179910045, | |
| "frac_reward_zero_std": 0.01666666716337204, | |
| "grad_norm": 0.279296875, | |
| "kl": 0.0670654296875, | |
| "learning_rate": 3.7018947797172864e-06, | |
| "loss": -0.0695, | |
| "num_tokens": 1976103.0, | |
| "reward": 0.5253316760063171, | |
| "reward_std": 0.12880267389118671, | |
| "rewards/_accuracy_reward/mean": 0.5253316521644592, | |
| "rewards/_accuracy_reward/std": 0.17076537311077117, | |
| "rewards/_format_reward/mean": 0.0, | |
| "rewards/_format_reward/std": 0.0, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 522.1, | |
| "completions/max_terminated_length": 522.1, | |
| "completions/mean_length": 152.3375045776367, | |
| "completions/mean_terminated_length": 152.3375045776367, | |
| "completions/min_length": 46.7, | |
| "completions/min_terminated_length": 46.7, | |
| "epoch": 0.7796101949025487, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.298828125, | |
| "kl": 0.063446044921875, | |
| "learning_rate": 2.353089073828255e-06, | |
| "loss": -0.0654, | |
| "num_tokens": 2167713.0, | |
| "reward": 0.48647034764289854, | |
| "reward_std": 0.1461639277637005, | |
| "rewards/_accuracy_reward/mean": 0.48647033274173734, | |
| "rewards/_accuracy_reward/std": 0.17206955328583717, | |
| "rewards/_format_reward/mean": 0.0, | |
| "rewards/_format_reward/std": 0.0, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.002083333333333337, | |
| "completions/max_length": 562.7, | |
| "completions/max_terminated_length": 500.5, | |
| "completions/mean_length": 158.33542022705078, | |
| "completions/mean_terminated_length": 156.5459243774414, | |
| "completions/min_length": 50.7, | |
| "completions/min_terminated_length": 50.7, | |
| "epoch": 0.8395802098950524, | |
| "frac_reward_zero_std": 0.03333333432674408, | |
| "grad_norm": 0.5, | |
| "kl": 0.06429443359375, | |
| "learning_rate": 1.2773527263780626e-06, | |
| "loss": -0.0735, | |
| "num_tokens": 2362058.0, | |
| "reward": 0.4694186806678772, | |
| "reward_std": 0.13372117429971694, | |
| "rewards/_accuracy_reward/mean": 0.46941866278648375, | |
| "rewards/_accuracy_reward/std": 0.1658677004277706, | |
| "rewards/_format_reward/mean": 0.0, | |
| "rewards/_format_reward/std": 0.0, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.004166666666666674, | |
| "completions/max_length": 636.7, | |
| "completions/max_terminated_length": 532.8, | |
| "completions/mean_length": 168.87708892822266, | |
| "completions/mean_terminated_length": 165.28546447753905, | |
| "completions/min_length": 42.2, | |
| "completions/min_terminated_length": 42.2, | |
| "epoch": 0.8995502248875562, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.232421875, | |
| "kl": 0.085772705078125, | |
| "learning_rate": 5.131000247938367e-07, | |
| "loss": -0.074, | |
| "num_tokens": 2561559.0, | |
| "reward": 0.5166842222213746, | |
| "reward_std": 0.15563009977340697, | |
| "rewards/_accuracy_reward/mean": 0.5166841924190522, | |
| "rewards/_accuracy_reward/std": 0.1897404298186302, | |
| "rewards/_format_reward/mean": 0.0, | |
| "rewards/_format_reward/std": 0.0, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.004166666666666674, | |
| "completions/max_length": 578.3, | |
| "completions/max_terminated_length": 469.4, | |
| "completions/mean_length": 176.32083740234376, | |
| "completions/mean_terminated_length": 172.7707046508789, | |
| "completions/min_length": 57.6, | |
| "completions/min_terminated_length": 57.6, | |
| "epoch": 0.95952023988006, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.310546875, | |
| "kl": 0.0620758056640625, | |
| "learning_rate": 8.762225008062675e-08, | |
| "loss": -0.0572, | |
| "num_tokens": 2764345.0, | |
| "reward": 0.4916923582553864, | |
| "reward_std": 0.14141732677817345, | |
| "rewards/_accuracy_reward/mean": 0.49169233739376067, | |
| "rewards/_accuracy_reward/std": 0.17603871822357178, | |
| "rewards/_format_reward/mean": 0.0, | |
| "rewards/_format_reward/std": 0.0, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 447.6666666666667, | |
| "completions/max_terminated_length": 447.6666666666667, | |
| "completions/mean_length": 160.1840337117513, | |
| "completions/mean_terminated_length": 160.1840337117513, | |
| "completions/min_length": 49.333333333333336, | |
| "completions/min_terminated_length": 49.333333333333336, | |
| "epoch": 0.9955022488755623, | |
| "frac_reward_zero_std": 0.0, | |
| "kl": 0.069488525390625, | |
| "num_tokens": 2881422.0, | |
| "reward": 0.49811801811059314, | |
| "reward_std": 0.12225283433993657, | |
| "rewards/_accuracy_reward/mean": 0.4981180081764857, | |
| "rewards/_accuracy_reward/std": 0.14967897906899452, | |
| "rewards/_format_reward/mean": 0.0, | |
| "rewards/_format_reward/std": 0.0, | |
| "step": 166, | |
| "total_flos": 0.0, | |
| "train_loss": -0.017725162387612355, | |
| "train_runtime": 3869.3247, | |
| "train_samples_per_second": 0.258, | |
| "train_steps_per_second": 0.043 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 166, | |
| "num_input_tokens_seen": 2881422, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 12, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |