diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,60034 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.40008001600320064, + "eval_steps": 500, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1200.0, + "completions/max_terminated_length": 1200.0, + "completions/mean_length": 933.5, + "completions/mean_terminated_length": 933.5, + "completions/min_length": 747.0, + "completions/min_terminated_length": 747.0, + "epoch": 0.00020004000800160032, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7719647798746982, + "kl": 0.002201080322265625, + "learning_rate": 0.0, + "loss": -0.0057, + "num_tokens": 38512.0, + "reward": 0.0, + "reward_std": 0.9647135734558105, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.060370187077817924, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10503200815284514, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12141145226353545, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1258.0, + "completions/max_terminated_length": 1258.0, + "completions/mean_length": 1106.0625, + "completions/mean_terminated_length": 1106.0625, + "completions/min_length": 914.0, + "completions/min_terminated_length": 914.0, + "epoch": 0.00040008001600320064, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9700291501296006, + "kl": 0.002105712890625, + "learning_rate": 2e-09, + "loss": 0.0239, + "num_tokens": 80777.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8185614943504333, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.014317208923658092, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13695925671090664, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06763190130459203, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 2 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1171.0, + "completions/max_terminated_length": 1171.0, + "completions/mean_length": 1078.625, + "completions/mean_terminated_length": 1078.625, + "completions/min_length": 901.0, + "completions/min_terminated_length": 901.0, + "epoch": 0.000600120024004801, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.947513734144849, + "kl": 0.0017986297607421875, + "learning_rate": 4e-09, + "loss": 0.0119, + "num_tokens": 124019.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0482888221740723, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.006112259756802029, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06127776955270601, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07290277645477444, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 3 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1226.0, + "completions/max_terminated_length": 1226.0, + "completions/mean_length": 1032.4375, + "completions/mean_terminated_length": 1032.4375, + "completions/min_length": 893.0, + "completions/min_terminated_length": 893.0, + "epoch": 0.0008001600320064013, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0119546122469014, + "kl": 0.001750946044921875, + "learning_rate": 6e-09, + "loss": -0.0208, + "num_tokens": 158706.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0440306663513184, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.022592957667906335, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05565147373855221, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0850925422157591, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 4 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1246.0, + "completions/max_terminated_length": 1246.0, + "completions/mean_length": 1080.5625, + "completions/mean_terminated_length": 1080.5625, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.0010002000400080016, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9737080568144156, + "kl": 0.00146484375, + "learning_rate": 8e-09, + "loss": 0.0093, + "num_tokens": 201219.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.027381420135498, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.01994477547151707, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16656435467473105, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10327955589886446, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 5 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1176.0, + "completions/max_terminated_length": 1176.0, + "completions/mean_length": 978.75, + "completions/mean_terminated_length": 978.75, + "completions/min_length": 847.0, + "completions/min_terminated_length": 847.0, + "epoch": 0.001200240048009602, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9917665085449117, + "kl": 0.0015659332275390625, + "learning_rate": 1e-08, + "loss": 0.0181, + "num_tokens": 249207.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.809004545211792, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.008728696416791854, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07015606727046673, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1264911064067352, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 6 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1232.0, + "completions/mean_length": 1206.9375, + "completions/mean_terminated_length": 1031.0999755859375, + "completions/min_length": 935.0, + "completions/min_terminated_length": 935.0, + "epoch": 0.0014002800560112022, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.639379068886793, + "kl": 0.0007863044738769531, + "learning_rate": 1.2e-08, + "loss": 0.0449, + "num_tokens": 302078.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8771082162857056, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.3502630592566262, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12749047493722304, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 7 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1343.0, + "completions/max_terminated_length": 1343.0, + "completions/mean_length": 1099.3125, + "completions/mean_terminated_length": 1099.3125, + "completions/min_length": 981.0, + "completions/min_terminated_length": 981.0, + "epoch": 0.0016003200640128026, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.243196093659085, + "kl": 0.002201080322265625, + "learning_rate": 1.4e-08, + "loss": -0.0365, + "num_tokens": 343099.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9199880361557007, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.002734912511273082, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20623099641453918, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026005, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 8 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1331.0, + "completions/mean_length": 1183.0625, + "completions/mean_terminated_length": 1161.933349609375, + "completions/min_length": 978.0, + "completions/min_terminated_length": 978.0, + "epoch": 0.0018003600720144029, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.920180620371179, + "kl": 0.00189971923828125, + "learning_rate": 1.6e-08, + "loss": -0.0167, + "num_tokens": 383164.0, + "reward": 0.0, + "reward_std": 0.8865110874176025, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.003208096101791111, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.038060780202601126, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.107496769977314, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 9 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1268.0, + "completions/max_terminated_length": 1268.0, + "completions/mean_length": 1010.375, + "completions/mean_terminated_length": 1010.375, + "completions/min_length": 810.0, + "completions/min_terminated_length": 810.0, + "epoch": 0.002000400080016003, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.3311884305234587, + "kl": 0.0005748271942138672, + "learning_rate": 1.8e-08, + "loss": -0.0162, + "num_tokens": 423010.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9406280517578125, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.032047581619717706, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07062985630141067, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.057089922571845, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 10 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1440.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1025.125, + "completions/mean_terminated_length": 1025.125, + "completions/min_length": 823.0, + "completions/min_terminated_length": 823.0, + "epoch": 0.0022004400880176033, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.460943003159714, + "kl": 0.002651214599609375, + "learning_rate": 2e-08, + "loss": -0.0158, + "num_tokens": 462852.0, + "reward": 0.0, + "reward_std": 0.7455190420150757, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0985167328964536, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12336587533374523, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.102469507659596, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 11 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1304.0, + "completions/max_terminated_length": 1304.0, + "completions/mean_length": 921.5, + "completions/mean_terminated_length": 921.5, + "completions/min_length": 623.0, + "completions/min_terminated_length": 623.0, + "epoch": 0.002400480096019204, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.709115716804453, + "kl": 0.00206756591796875, + "learning_rate": 2.2e-08, + "loss": -0.0447, + "num_tokens": 501836.0, + "reward": 1.862645149230957e-08, + "reward_std": 1.03127121925354, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.862645149230957e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12132059480818284, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07080041624827665, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 12 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1462.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 949.3125, + "completions/mean_terminated_length": 949.3125, + "completions/min_length": 19.0, + "completions/min_terminated_length": 19.0, + "epoch": 0.002600520104020804, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7319401805313674, + "kl": 0.002208709716796875, + "learning_rate": 2.4e-08, + "loss": -0.1311, + "num_tokens": 539449.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9264252185821533, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05212208944218129, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0894238264480926, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1954576775256058, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 13 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1207.0, + "completions/max_terminated_length": 1207.0, + "completions/mean_length": 953.875, + "completions/mean_terminated_length": 953.875, + "completions/min_length": 705.0, + "completions/min_terminated_length": 705.0, + "epoch": 0.0028005601120224045, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.638759552858246, + "kl": 0.0021190643310546875, + "learning_rate": 2.5999999999999998e-08, + "loss": -0.0365, + "num_tokens": 581103.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8865999579429626, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06966433804127876, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08722335434657477, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11674600476945511, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 14 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1344.8125, + "completions/mean_terminated_length": 1309.0, + "completions/min_length": 1104.0, + "completions/min_terminated_length": 1104.0, + "epoch": 0.0030006001200240046, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9915602396660868, + "kl": 0.0021572113037109375, + "learning_rate": 2.8e-08, + "loss": -0.0449, + "num_tokens": 631348.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0497987270355225, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.171379321673656, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08012907504237765, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1261979632400061, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 15 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1172.0, + "completions/max_terminated_length": 1172.0, + "completions/mean_length": 945.5625, + "completions/mean_terminated_length": 945.5625, + "completions/min_length": 676.0, + "completions/min_terminated_length": 676.0, + "epoch": 0.003200640128025605, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6298293932054704, + "kl": 0.0025787353515625, + "learning_rate": 3e-08, + "loss": -0.0539, + "num_tokens": 674037.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.4462427496910095, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.20244106528332742, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.28045652603820376, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10174405069512348, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 16 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1450.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1122.875, + "completions/mean_terminated_length": 1122.875, + "completions/min_length": 890.0, + "completions/min_terminated_length": 890.0, + "epoch": 0.0034006801360272052, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4539224563032147, + "kl": 0.00266265869140625, + "learning_rate": 3.2e-08, + "loss": -0.0576, + "num_tokens": 720891.0, + "reward": 0.0, + "reward_std": 0.8166095018386841, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.13926108902150022, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06225757268562033, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14548768561863465, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 17 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1235.5, + "completions/mean_terminated_length": 1147.3333740234375, + "completions/min_length": 925.0, + "completions/min_terminated_length": 925.0, + "epoch": 0.0036007201440288058, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.310244506439362, + "kl": 0.002460479736328125, + "learning_rate": 3.4e-08, + "loss": 0.0082, + "num_tokens": 755787.0, + "reward": 0.0, + "reward_std": 0.8688819408416748, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1928178499337109, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05919134825103573, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258098, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 18 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1460.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1096.75, + "completions/mean_terminated_length": 1096.75, + "completions/min_length": 664.0, + "completions/min_terminated_length": 664.0, + "epoch": 0.003800760152030406, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.580429681066231, + "kl": 0.002483367919921875, + "learning_rate": 3.6e-08, + "loss": 0.0656, + "num_tokens": 802655.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0053932666778564, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.059552301980649744, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.30641248650761815, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0768596604689834, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 19 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1172.0, + "completions/mean_terminated_length": 1150.1334228515625, + "completions/min_length": 880.0, + "completions/min_terminated_length": 880.0, + "epoch": 0.004000800160032006, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.912839765387738, + "kl": 0.002101898193359375, + "learning_rate": 3.7999999999999996e-08, + "loss": -0.045, + "num_tokens": 855487.0, + "reward": 0.0, + "reward_std": 0.8540974259376526, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2323384938262261, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19145014437057972, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 20 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1200.0, + "completions/max_terminated_length": 1200.0, + "completions/mean_length": 1026.9375, + "completions/mean_terminated_length": 1026.9375, + "completions/min_length": 787.0, + "completions/min_terminated_length": 787.0, + "epoch": 0.004200840168033607, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.173125547057575, + "kl": 0.00208282470703125, + "learning_rate": 4e-08, + "loss": -0.0428, + "num_tokens": 899862.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7571918964385986, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08694783307115611, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08979933585957603, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08933913745655644, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 21 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1194.0, + "completions/max_terminated_length": 1194.0, + "completions/mean_length": 993.6875, + "completions/mean_terminated_length": 993.6875, + "completions/min_length": 785.0, + "completions/min_terminated_length": 785.0, + "epoch": 0.004400880176035207, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4233976593566995, + "kl": 0.00220489501953125, + "learning_rate": 4.2e-08, + "loss": -0.0554, + "num_tokens": 939233.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9667501449584961, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07716698075489657, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09278685814087667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14343665526661614, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 22 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1240.0, + "completions/max_terminated_length": 1240.0, + "completions/mean_length": 1024.3125, + "completions/mean_terminated_length": 1024.3125, + "completions/min_length": 859.0, + "completions/min_terminated_length": 859.0, + "epoch": 0.004600920184036807, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9053635341144766, + "kl": 0.00281524658203125, + "learning_rate": 4.4e-08, + "loss": 0.0084, + "num_tokens": 991270.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0313401222229004, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.040713052141186264, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03877196229122596, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09583937179043481, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 23 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1350.375, + "completions/mean_terminated_length": 1234.0, + "completions/min_length": 830.0, + "completions/min_terminated_length": 830.0, + "epoch": 0.004800960192038408, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8347583387707997, + "kl": 0.002254486083984375, + "learning_rate": 4.5999999999999995e-08, + "loss": -0.0079, + "num_tokens": 1042404.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7025319337844849, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11384987220223106, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22231476342583645, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15104573749303493, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 24 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1395.0, + "completions/mean_length": 1350.875, + "completions/mean_terminated_length": 1201.75, + "completions/min_length": 868.0, + "completions/min_terminated_length": 868.0, + "epoch": 0.005001000200040008, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1749644907675294, + "kl": 0.001194000244140625, + "learning_rate": 4.8e-08, + "loss": -0.0055, + "num_tokens": 1088570.0, + "reward": 0.0, + "reward_std": 0.6684867143630981, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.14066539836457012, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16119678885969496, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 25 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1412.0, + "completions/mean_length": 1140.5625, + "completions/mean_terminated_length": 1089.21435546875, + "completions/min_length": 815.0, + "completions/min_terminated_length": 815.0, + "epoch": 0.005201040208041608, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.518899223397213, + "kl": 0.00257110595703125, + "learning_rate": 5e-08, + "loss": -0.0119, + "num_tokens": 1131747.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8927016258239746, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04656408911965942, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11998447423999102, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08606629658238706, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 26 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1421.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 1140.0, + "completions/mean_terminated_length": 1140.0, + "completions/min_length": 912.0, + "completions/min_terminated_length": 912.0, + "epoch": 0.0054010802160432084, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2133112282805523, + "kl": 0.0018939971923828125, + "learning_rate": 5.1999999999999996e-08, + "loss": -0.002, + "num_tokens": 1171675.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9350282549858093, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.05008900726608281, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1325755136633137, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11080513425729775, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 27 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1171.0, + "completions/mean_length": 959.5, + "completions/mean_terminated_length": 923.4667358398438, + "completions/min_length": 709.0, + "completions/min_terminated_length": 709.0, + "epoch": 0.005601120224044809, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9687888917833605, + "kl": 0.001415252685546875, + "learning_rate": 5.3999999999999994e-08, + "loss": -0.0533, + "num_tokens": 1221051.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8977407813072205, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.01360270260072841, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.47423726257210935, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10036968702787749, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 28 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1430.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1161.3125, + "completions/mean_terminated_length": 1161.3125, + "completions/min_length": 906.0, + "completions/min_terminated_length": 906.0, + "epoch": 0.0058011602320464095, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9668321588460183, + "kl": 0.0017681121826171875, + "learning_rate": 5.6e-08, + "loss": -0.034, + "num_tokens": 1269072.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9734693169593811, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.017117204331721984, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18530350760353362, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1261979632400061, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 29 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1344.0, + "completions/mean_length": 1144.375, + "completions/mean_terminated_length": 1120.666748046875, + "completions/min_length": 898.0, + "completions/min_terminated_length": 898.0, + "epoch": 0.006001200240048009, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9016791362845042, + "kl": 0.00154876708984375, + "learning_rate": 5.8e-08, + "loss": -0.0033, + "num_tokens": 1312670.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.34135910868644714, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.059032232409145964, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19352585984224857, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13709958532503408, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 30 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1150.625, + "completions/mean_terminated_length": 1127.3333740234375, + "completions/min_length": 895.0, + "completions/min_terminated_length": 895.0, + "epoch": 0.00620124024804961, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0496838081361175, + "kl": 0.0020847320556640625, + "learning_rate": 6e-08, + "loss": -0.0489, + "num_tokens": 1363440.0, + "reward": 0.0, + "reward_std": 0.9318310022354126, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15187980872432078, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2133875566422585, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07097208632298363, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 31 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 959.5625, + "completions/mean_terminated_length": 923.5333862304688, + "completions/min_length": 533.0, + "completions/min_terminated_length": 533.0, + "epoch": 0.00640128025605121, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8778473593349827, + "kl": 0.003116607666015625, + "learning_rate": 6.2e-08, + "loss": 0.0158, + "num_tokens": 1408097.0, + "reward": 0.0, + "reward_std": 0.7282787561416626, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15725799418815217, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14265897243063286, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12141145226353543, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 32 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1328.0, + "completions/max_terminated_length": 1328.0, + "completions/mean_length": 1139.375, + "completions/mean_terminated_length": 1139.375, + "completions/min_length": 19.0, + "completions/min_terminated_length": 19.0, + "epoch": 0.006601320264052811, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.440073226029727, + "kl": 0.001972198486328125, + "learning_rate": 6.4e-08, + "loss": -0.1614, + "num_tokens": 1460247.0, + "reward": -1.1175870895385742e-08, + "reward_std": 0.9736117124557495, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.013071816189455791, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06890520839383243, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14375905768565217, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 33 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1142.0, + "completions/max_terminated_length": 1142.0, + "completions/mean_length": 993.5, + "completions/mean_terminated_length": 993.5, + "completions/min_length": 754.0, + "completions/min_terminated_length": 754.0, + "epoch": 0.0068013602720544105, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.791683855749169, + "kl": 0.0024662017822265625, + "learning_rate": 6.6e-08, + "loss": -0.0334, + "num_tokens": 1512439.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8932263851165771, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.23288021425492458, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.28652649548820835, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08606629658238706, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 34 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1327.0, + "completions/max_terminated_length": 1327.0, + "completions/mean_length": 1066.75, + "completions/mean_terminated_length": 1066.75, + "completions/min_length": 907.0, + "completions/min_terminated_length": 907.0, + "epoch": 0.007001400280056011, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9261123737911374, + "kl": 0.001560211181640625, + "learning_rate": 6.8e-08, + "loss": -0.0125, + "num_tokens": 1550771.0, + "reward": 0.0, + "reward_std": 0.9625250101089478, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.17647650317207003, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19073882526080158, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09651328828101766, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 35 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1306.75, + "completions/mean_terminated_length": 1262.1539306640625, + "completions/min_length": 1016.0, + "completions/min_terminated_length": 1016.0, + "epoch": 0.0072014402880576115, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5330741888418538, + "kl": 0.001483917236328125, + "learning_rate": 7e-08, + "loss": -0.0391, + "num_tokens": 1598575.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8229151964187622, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.023932399757228686, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14268957856232678, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11155467020454343, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 36 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1332.0, + "completions/max_terminated_length": 1332.0, + "completions/mean_length": 959.5625, + "completions/mean_terminated_length": 959.5625, + "completions/min_length": 701.0, + "completions/min_terminated_length": 701.0, + "epoch": 0.007401480296059212, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.28524385962073, + "kl": 0.001827239990234375, + "learning_rate": 7.2e-08, + "loss": -0.0014, + "num_tokens": 1631184.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8785633444786072, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04434556450553164, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06365564022927482, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1067187372905475, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 37 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1345.6875, + "completions/mean_terminated_length": 1310.0770263671875, + "completions/min_length": 907.0, + "completions/min_terminated_length": 907.0, + "epoch": 0.007601520304060812, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.651061619895391, + "kl": 0.0015621185302734375, + "learning_rate": 7.399999999999999e-08, + "loss": -0.0194, + "num_tokens": 1681243.0, + "reward": 0.0, + "reward_std": 0.9295207262039185, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07356850194079113, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04459485097101169, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10602235962635781, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 38 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1172.0, + "completions/max_terminated_length": 1172.0, + "completions/mean_length": 1056.5, + "completions/mean_terminated_length": 1056.5, + "completions/min_length": 933.0, + "completions/min_terminated_length": 933.0, + "epoch": 0.007801560312062412, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.295057360786906, + "kl": 0.00225830078125, + "learning_rate": 7.599999999999999e-08, + "loss": 0.0201, + "num_tokens": 1724459.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6720481514930725, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07260523340387169, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2706783918209044, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.133263870794973, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 39 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1322.6875, + "completions/mean_terminated_length": 1281.769287109375, + "completions/min_length": 1155.0, + "completions/min_terminated_length": 1155.0, + "epoch": 0.008001600320064013, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.521059085351725, + "kl": 0.00140380859375, + "learning_rate": 7.8e-08, + "loss": 0.0072, + "num_tokens": 1780710.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0237410068511963, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.3799412038122201, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2840200336642274, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07290277645477447, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 40 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1372.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 1202.8125, + "completions/mean_terminated_length": 1202.8125, + "completions/min_length": 952.0, + "completions/min_terminated_length": 952.0, + "epoch": 0.008201640328065612, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.441659300519132, + "kl": 0.0014057159423828125, + "learning_rate": 8e-08, + "loss": -0.0188, + "num_tokens": 1812651.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.9012058973312378, + "rewards/wordcountpos_reward_nokeypoint/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.003170744883113894, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08994321344632451, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05146016078626404, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 41 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1316.1875, + "completions/mean_terminated_length": 1273.769287109375, + "completions/min_length": 1040.0, + "completions/min_terminated_length": 1040.0, + "epoch": 0.008401680336067214, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9121936102993957, + "kl": 0.00201416015625, + "learning_rate": 8.2e-08, + "loss": 0.0245, + "num_tokens": 1848406.0, + "reward": 0.0, + "reward_std": 0.9400888085365295, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03415649649582079, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03593302889683083, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08255189164891874, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 42 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1206.75, + "completions/mean_terminated_length": 1187.2000732421875, + "completions/min_length": 940.0, + "completions/min_terminated_length": 940.0, + "epoch": 0.008601720344068814, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.772229110144138, + "kl": 0.0019207000732421875, + "learning_rate": 8.4e-08, + "loss": -0.0367, + "num_tokens": 1901682.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8670361042022705, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0868613987165086, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09832479870998065, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07781745019952502, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 43 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1172.0625, + "completions/mean_terminated_length": 1096.3846435546875, + "completions/min_length": 858.0, + "completions/min_terminated_length": 858.0, + "epoch": 0.008801760352070413, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.616292704461705, + "kl": 0.0013418197631835938, + "learning_rate": 8.599999999999999e-08, + "loss": 0.0588, + "num_tokens": 1947163.0, + "reward": 0.0, + "reward_std": 0.688073992729187, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07901685473120568, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23982536903084375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194864, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 44 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1226.625, + "completions/mean_terminated_length": 1187.571533203125, + "completions/min_length": 868.0, + "completions/min_terminated_length": 868.0, + "epoch": 0.009001800360072015, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.412260855856314, + "kl": 0.0016884803771972656, + "learning_rate": 8.8e-08, + "loss": -0.0306, + "num_tokens": 1993093.0, + "reward": 0.0, + "reward_std": 1.0023939609527588, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.028434693746305088, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04462157779593071, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 45 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1300.0, + "completions/max_terminated_length": 1300.0, + "completions/mean_length": 995.6875, + "completions/mean_terminated_length": 995.6875, + "completions/min_length": 753.0, + "completions/min_terminated_length": 753.0, + "epoch": 0.009201840368073614, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.569068200907718, + "kl": 0.0022411346435546875, + "learning_rate": 9e-08, + "loss": 0.0029, + "num_tokens": 2039288.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9607493877410889, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04518690239242903, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07859208444533065, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 46 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1050.9375, + "completions/mean_terminated_length": 1021.0000610351562, + "completions/min_length": 838.0, + "completions/min_terminated_length": 838.0, + "epoch": 0.009401880376075216, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.416615193495999, + "kl": 0.002109527587890625, + "learning_rate": 9.199999999999999e-08, + "loss": -0.0756, + "num_tokens": 2079111.0, + "reward": 2.60770320892334e-08, + "reward_std": 1.0435000658035278, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.60770320892334e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10087334826624465, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07135589712346369, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11344765475923412, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 47 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1423.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1227.125, + "completions/mean_terminated_length": 1227.125, + "completions/min_length": 917.0, + "completions/min_terminated_length": 917.0, + "epoch": 0.009601920384076815, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.148342191663946, + "kl": 0.00241851806640625, + "learning_rate": 9.4e-08, + "loss": -0.0304, + "num_tokens": 2129089.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.965854287147522, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11521688668307693, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14581381624697703, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07698003589195011, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 48 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1180.0, + "completions/max_terminated_length": 1180.0, + "completions/mean_length": 1011.9375, + "completions/mean_terminated_length": 1011.9375, + "completions/min_length": 892.0, + "completions/min_terminated_length": 892.0, + "epoch": 0.009801960392078415, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9076360635952785, + "kl": 0.0014190673828125, + "learning_rate": 9.6e-08, + "loss": -0.0021, + "num_tokens": 2166104.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6169154644012451, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.21058342884268705, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16667790176792996, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10878112581387149, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 49 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1325.0, + "completions/mean_length": 1246.75, + "completions/mean_terminated_length": 1131.6363525390625, + "completions/min_length": 924.0, + "completions/min_terminated_length": 924.0, + "epoch": 0.010002000400080016, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4828478920399304, + "kl": 0.0011320114135742188, + "learning_rate": 9.8e-08, + "loss": 0.0262, + "num_tokens": 2214876.0, + "reward": 0.0, + "reward_std": 0.7879159450531006, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1415329345310017, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12674041289538696, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08421753138505422, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 50 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1422.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1046.5625, + "completions/mean_terminated_length": 1046.5625, + "completions/min_length": 860.0, + "completions/min_terminated_length": 860.0, + "epoch": 0.010202040408081616, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8582940676309567, + "kl": 0.0018634796142578125, + "learning_rate": 1e-07, + "loss": -0.0394, + "num_tokens": 2266365.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.007798671722412, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.058036587707642424, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1802991911177658, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07187952884282608, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 51 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1420.0, + "completions/mean_length": 1133.75, + "completions/mean_terminated_length": 1109.3333740234375, + "completions/min_length": 668.0, + "completions/min_terminated_length": 668.0, + "epoch": 0.010402080416083216, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.39741509929014, + "kl": 0.002422332763671875, + "learning_rate": 1.0199999999999999e-07, + "loss": -0.0296, + "num_tokens": 2309233.0, + "reward": 0.0, + "reward_std": 0.9326049089431763, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.05665291776239778, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09305745304672738, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06831300510639735, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 52 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1455.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1193.1875, + "completions/mean_terminated_length": 1193.1875, + "completions/min_length": 893.0, + "completions/min_terminated_length": 893.0, + "epoch": 0.010602120424084817, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.368445836057777, + "kl": 0.00213623046875, + "learning_rate": 1.0399999999999999e-07, + "loss": -0.0024, + "num_tokens": 2362556.0, + "reward": 0.0, + "reward_std": 0.7186998128890991, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.20127997495680922, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10541975778710927, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11013459778666117, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 53 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1256.0, + "completions/max_terminated_length": 1256.0, + "completions/mean_length": 986.6875, + "completions/mean_terminated_length": 986.6875, + "completions/min_length": 760.0, + "completions/min_terminated_length": 760.0, + "epoch": 0.010802160432086417, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6795849759838397, + "kl": 0.0025177001953125, + "learning_rate": 1.06e-07, + "loss": -0.016, + "num_tokens": 2411727.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5343930721282959, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.023478160202542394, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19179916073168654, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12141145226353543, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 54 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1176.0, + "completions/mean_length": 960.1875, + "completions/mean_terminated_length": 924.2000732421875, + "completions/min_length": 612.0, + "completions/min_terminated_length": 612.0, + "epoch": 0.011002200440088018, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0264936182606723, + "kl": 0.001659393310546875, + "learning_rate": 1.0799999999999999e-07, + "loss": 0.027, + "num_tokens": 2452242.0, + "reward": 0.0, + "reward_std": 0.8035188913345337, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.01157511922244489, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13682142550971602, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08681611046941137, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 55 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1329.0, + "completions/max_terminated_length": 1329.0, + "completions/mean_length": 1090.8125, + "completions/mean_terminated_length": 1090.8125, + "completions/min_length": 954.0, + "completions/min_terminated_length": 954.0, + "epoch": 0.011202240448089618, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8845862911827638, + "kl": 0.0012149810791015625, + "learning_rate": 1.0999999999999999e-07, + "loss": -0.028, + "num_tokens": 2489119.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8766696453094482, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12431232959123606, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0626447251015014, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.200554786086551, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 56 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1158.8125, + "completions/mean_terminated_length": 1045.0833740234375, + "completions/min_length": 737.0, + "completions/min_terminated_length": 737.0, + "epoch": 0.011402280456091218, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.081442915950309, + "kl": 0.0018939971923828125, + "learning_rate": 1.12e-07, + "loss": -0.0256, + "num_tokens": 2530388.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5026324391365051, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.011911896628029887, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1459156946116194, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11642832797715323, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 57 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1163.5625, + "completions/mean_terminated_length": 961.7000122070312, + "completions/min_length": 669.0, + "completions/min_terminated_length": 669.0, + "epoch": 0.011602320464092819, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.468627420322096, + "kl": 0.00238800048828125, + "learning_rate": 1.14e-07, + "loss": 0.0766, + "num_tokens": 2573037.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0578970909118652, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08778626847415458, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1533522580093028, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10036968702787749, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 58 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1154.0, + "completions/max_terminated_length": 1154.0, + "completions/mean_length": 1030.0, + "completions/mean_terminated_length": 1030.0, + "completions/min_length": 827.0, + "completions/min_terminated_length": 827.0, + "epoch": 0.011802360472094419, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.901361992787647, + "kl": 0.001529693603515625, + "learning_rate": 1.16e-07, + "loss": -0.0358, + "num_tokens": 2620301.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8717843890190125, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.018730657388011406, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09668335435687728, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05288001793018132, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 59 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1454.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1184.625, + "completions/mean_terminated_length": 1184.625, + "completions/min_length": 993.0, + "completions/min_terminated_length": 993.0, + "epoch": 0.012002400480096018, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.98510892636235, + "kl": 0.0019855499267578125, + "learning_rate": 1.1799999999999998e-07, + "loss": 0.0035, + "num_tokens": 2672319.0, + "reward": 0.0, + "reward_std": 0.690589427947998, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12133663452897447, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06143257286595526, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1060223596263578, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 60 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1258.0, + "completions/max_terminated_length": 1258.0, + "completions/mean_length": 1002.0, + "completions/mean_terminated_length": 1002.0, + "completions/min_length": 791.0, + "completions/min_terminated_length": 791.0, + "epoch": 0.01220244048809762, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8946491354693116, + "kl": 0.001689910888671875, + "learning_rate": 1.2e-07, + "loss": 0.0428, + "num_tokens": 2714511.0, + "reward": 0.0, + "reward_std": 0.7043496370315552, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10076848212543327, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1521964903695193, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09888264649460886, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 61 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1426.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1030.3125, + "completions/mean_terminated_length": 1030.3125, + "completions/min_length": 787.0, + "completions/min_terminated_length": 787.0, + "epoch": 0.01240248049609922, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.48263710027003, + "kl": 0.0016574859619140625, + "learning_rate": 1.2199999999999998e-07, + "loss": -0.0038, + "num_tokens": 2757108.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0403692722320557, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09020156153601697, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08121491262762001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06652763279965646, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 62 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1300.0, + "completions/max_terminated_length": 1300.0, + "completions/mean_length": 1107.75, + "completions/mean_terminated_length": 1107.75, + "completions/min_length": 955.0, + "completions/min_terminated_length": 955.0, + "epoch": 0.01260252050410082, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.730815358251627, + "kl": 0.0015583038330078125, + "learning_rate": 1.24e-07, + "loss": -0.0208, + "num_tokens": 2797248.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8903665542602539, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.030498948575200937, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.038456874873685085, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08421753138505424, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 63 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1068.0, + "completions/max_terminated_length": 1068.0, + "completions/mean_length": 871.625, + "completions/mean_terminated_length": 871.625, + "completions/min_length": 641.0, + "completions/min_terminated_length": 641.0, + "epoch": 0.01280256051210242, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.168871630586243, + "kl": 0.0031280517578125, + "learning_rate": 1.26e-07, + "loss": -0.0233, + "num_tokens": 2836242.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9966598749160767, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1153473865203158, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1747651374723548, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 64 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1377.0, + "completions/max_terminated_length": 1377.0, + "completions/mean_length": 1152.375, + "completions/mean_terminated_length": 1152.375, + "completions/min_length": 923.0, + "completions/min_terminated_length": 923.0, + "epoch": 0.01300260052010402, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.131854118899087, + "kl": 0.001987457275390625, + "learning_rate": 1.28e-07, + "loss": 0.0027, + "num_tokens": 2877480.0, + "reward": 0.0, + "reward_std": 1.0128731727600098, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.01041096436918126, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10277206050089882, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.048686449556014776, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 65 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1254.375, + "completions/mean_terminated_length": 1219.2857666015625, + "completions/min_length": 837.0, + "completions/min_terminated_length": 837.0, + "epoch": 0.013202640528105622, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3834783058864892, + "kl": 0.0026702880859375, + "learning_rate": 1.3e-07, + "loss": -0.0093, + "num_tokens": 2931390.0, + "reward": 0.0, + "reward_std": 0.9515336751937866, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.3025348592566154, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3117181872259639, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12049281521534187, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 66 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1314.0, + "completions/max_terminated_length": 1314.0, + "completions/mean_length": 1019.5625, + "completions/mean_terminated_length": 1019.5625, + "completions/min_length": 527.0, + "completions/min_terminated_length": 527.0, + "epoch": 0.013402680536107221, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2297117502633985, + "kl": 0.0019702911376953125, + "learning_rate": 1.32e-07, + "loss": -0.0433, + "num_tokens": 2964903.0, + "reward": 0.0, + "reward_std": 0.9942666292190552, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08347932348977771, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18285898771082015, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12583057392117916, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 67 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1150.125, + "completions/mean_terminated_length": 1126.800048828125, + "completions/min_length": 876.0, + "completions/min_terminated_length": 876.0, + "epoch": 0.013602720544108821, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0437488755195994, + "kl": 0.0019359588623046875, + "learning_rate": 1.34e-07, + "loss": 0.0221, + "num_tokens": 3010425.0, + "reward": 0.0, + "reward_std": 0.7881048321723938, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05801769698266319, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.058825236551608436, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11547005383792515, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 68 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1338.1875, + "completions/mean_terminated_length": 1315.071533203125, + "completions/min_length": 1068.0, + "completions/min_terminated_length": 1068.0, + "epoch": 0.013802760552110422, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3766570550287893, + "kl": 0.0014514923095703125, + "learning_rate": 1.36e-07, + "loss": -0.0172, + "num_tokens": 3056684.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.5337771773338318, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.05006284016847701, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09892041438133183, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14981470036162822, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 69 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1255.0, + "completions/max_terminated_length": 1255.0, + "completions/mean_length": 1097.25, + "completions/mean_terminated_length": 1097.25, + "completions/min_length": 19.0, + "completions/min_terminated_length": 19.0, + "epoch": 0.014002800560112022, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.3293103174512186, + "kl": 0.0005540847778320312, + "learning_rate": 1.3800000000000002e-07, + "loss": -0.1162, + "num_tokens": 3102800.0, + "reward": 0.0, + "reward_std": 0.9220105409622192, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.23931886025007632, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15095697232665106, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.19085577257690145, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 70 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1416.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1109.4375, + "completions/mean_terminated_length": 1109.4375, + "completions/min_length": 926.0, + "completions/min_terminated_length": 926.0, + "epoch": 0.014202840568113623, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.460324649255111, + "kl": 0.00244140625, + "learning_rate": 1.4e-07, + "loss": 0.062, + "num_tokens": 3152463.0, + "reward": 3.725290298461914e-08, + "reward_std": 1.0309052467346191, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02202785983342131, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13594954247056723, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1112221667221529, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 71 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1134.3125, + "completions/mean_terminated_length": 1012.4166870117188, + "completions/min_length": 674.0, + "completions/min_terminated_length": 674.0, + "epoch": 0.014402880576115223, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4056558085692576, + "kl": 0.002231597900390625, + "learning_rate": 1.4199999999999997e-07, + "loss": -0.0528, + "num_tokens": 3202924.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5503576397895813, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.21634874647010213, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15936996831414466, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12171612389003694, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 72 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1326.0, + "completions/mean_length": 1198.5, + "completions/mean_terminated_length": 1178.4000244140625, + "completions/min_length": 1031.0, + "completions/min_terminated_length": 1031.0, + "epoch": 0.014602920584116823, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.209106059063876, + "kl": 0.0022430419921875, + "learning_rate": 1.44e-07, + "loss": -0.0002, + "num_tokens": 3249820.0, + "reward": 0.0, + "reward_std": 0.8450759649276733, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12377202890728928, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12727755874468963, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0582141639885766, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 73 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1364.0, + "completions/max_terminated_length": 1364.0, + "completions/mean_length": 1112.6875, + "completions/mean_terminated_length": 1112.6875, + "completions/min_length": 924.0, + "completions/min_terminated_length": 924.0, + "epoch": 0.014802960592118424, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.680252967419727, + "kl": 0.0006761550903320312, + "learning_rate": 1.4599999999999998e-07, + "loss": -0.0596, + "num_tokens": 3288863.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0031185150146484, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1257700114201481, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12998677455689395, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08766518798921948, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 74 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1288.75, + "completions/mean_terminated_length": 1274.666748046875, + "completions/min_length": 1053.0, + "completions/min_terminated_length": 1053.0, + "epoch": 0.015003000600120024, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4886238116688695, + "kl": 0.0014753341674804688, + "learning_rate": 1.4799999999999998e-07, + "loss": -0.0187, + "num_tokens": 3342627.0, + "reward": 3.725290298461914e-08, + "reward_std": 1.0341912508010864, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.012068172100911908, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11227672871348723, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 75 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1226.0, + "completions/max_terminated_length": 1226.0, + "completions/mean_length": 1149.5, + "completions/mean_terminated_length": 1149.5, + "completions/min_length": 983.0, + "completions/min_terminated_length": 983.0, + "epoch": 0.015203040608121624, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4394586124322513, + "kl": 0.0012445449829101562, + "learning_rate": 1.5e-07, + "loss": -0.0057, + "num_tokens": 3380435.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0103185176849365, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10564662814966255, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11587973298104114, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11213417888437976, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 76 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1397.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1114.5, + "completions/mean_terminated_length": 1114.5, + "completions/min_length": 758.0, + "completions/min_terminated_length": 758.0, + "epoch": 0.015403080616123225, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.993422637331136, + "kl": 0.002017974853515625, + "learning_rate": 1.5199999999999998e-07, + "loss": 0.0024, + "num_tokens": 3427051.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9753294587135315, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.023196725239383164, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09098198106310386, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 77 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1124.75, + "completions/mean_terminated_length": 954.1818237304688, + "completions/min_length": 784.0, + "completions/min_terminated_length": 784.0, + "epoch": 0.015603120624124825, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6597090123385714, + "kl": 0.0011911392211914062, + "learning_rate": 1.54e-07, + "loss": 0.0549, + "num_tokens": 3472943.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9673945307731628, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0880722454629059, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11644698133474465, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09737289911202954, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 78 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1342.0, + "completions/mean_length": 996.3125, + "completions/mean_terminated_length": 962.7333984375, + "completions/min_length": 740.0, + "completions/min_terminated_length": 740.0, + "epoch": 0.015803160632126424, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5152277019461295, + "kl": 0.0015716552734375, + "learning_rate": 1.56e-07, + "loss": -0.0622, + "num_tokens": 3520980.0, + "reward": 0.0, + "reward_std": 0.9413830041885376, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.31226981210882, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.28077676341104196, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 79 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1210.0, + "completions/max_terminated_length": 1210.0, + "completions/mean_length": 1027.0625, + "completions/mean_terminated_length": 1027.0625, + "completions/min_length": 866.0, + "completions/min_terminated_length": 866.0, + "epoch": 0.016003200640128026, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8937025102105784, + "kl": 0.002643585205078125, + "learning_rate": 1.5799999999999999e-07, + "loss": -0.0254, + "num_tokens": 3562757.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0436766147613525, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2480143040078986, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19663042195673877, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13655822255780922, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 80 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1317.0, + "completions/max_terminated_length": 1317.0, + "completions/mean_length": 1113.125, + "completions/mean_terminated_length": 1113.125, + "completions/min_length": 759.0, + "completions/min_terminated_length": 759.0, + "epoch": 0.016203240648129627, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4230502934886378, + "kl": 0.002368927001953125, + "learning_rate": 1.6e-07, + "loss": 0.0162, + "num_tokens": 3605215.0, + "reward": 0.0, + "reward_std": 0.9536545276641846, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13286669991764097, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13969254201253317, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026005, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 81 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1200.0, + "completions/max_terminated_length": 1200.0, + "completions/mean_length": 1039.6875, + "completions/mean_terminated_length": 1039.6875, + "completions/min_length": 664.0, + "completions/min_terminated_length": 664.0, + "epoch": 0.016403280656131225, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.759228895726515, + "kl": 0.0006964206695556641, + "learning_rate": 1.62e-07, + "loss": 0.0192, + "num_tokens": 3644586.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7567745447158813, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08478803610308866, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1273275572743379, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09428090415820634, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 82 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1211.5, + "completions/mean_terminated_length": 1144.923095703125, + "completions/min_length": 870.0, + "completions/min_terminated_length": 870.0, + "epoch": 0.016603320664132826, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2639637413338476, + "kl": 0.0028839111328125, + "learning_rate": 1.64e-07, + "loss": -0.01, + "num_tokens": 3687810.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9528352618217468, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12091797407046247, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07287331125528733, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14782371884055634, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 83 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1497.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1187.25, + "completions/mean_terminated_length": 1187.25, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.016803360672134428, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.784818514753396, + "kl": 0.0017223358154296875, + "learning_rate": 1.66e-07, + "loss": -0.0338, + "num_tokens": 3738070.0, + "reward": 0.0, + "reward_std": 1.0022386312484741, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05383379077881438, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14100679263725346, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14950535726806533, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 84 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1208.0, + "completions/max_terminated_length": 1208.0, + "completions/mean_length": 998.3125, + "completions/mean_terminated_length": 998.3125, + "completions/min_length": 822.0, + "completions/min_terminated_length": 822.0, + "epoch": 0.017003400680136026, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.818355919986007, + "kl": 0.00293731689453125, + "learning_rate": 1.68e-07, + "loss": -0.0159, + "num_tokens": 3788403.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.4327883720397949, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.23391750740877001, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2731600049351636, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14981470036162822, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 85 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1485.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1131.5625, + "completions/mean_terminated_length": 1131.5625, + "completions/min_length": 843.0, + "completions/min_terminated_length": 843.0, + "epoch": 0.017203440688137627, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.228348657710386, + "kl": 0.0021457672119140625, + "learning_rate": 1.7000000000000001e-07, + "loss": -0.0144, + "num_tokens": 3823596.0, + "reward": 0.0, + "reward_std": 0.7043069005012512, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.010809423473430578, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10339233882083214, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08153617692869926, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 86 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1360.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1016.375, + "completions/mean_terminated_length": 1016.375, + "completions/min_length": 766.0, + "completions/min_terminated_length": 766.0, + "epoch": 0.01740348069613923, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2973811406420985, + "kl": 0.0020198822021484375, + "learning_rate": 1.7199999999999998e-07, + "loss": 0.0337, + "num_tokens": 3864098.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5692154169082642, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.19652216857664326, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.31648427261962436, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06831300510639735, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 87 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1481.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1054.9375, + "completions/mean_terminated_length": 1054.9375, + "completions/min_length": 797.0, + "completions/min_terminated_length": 797.0, + "epoch": 0.017603520704140826, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9318574571404867, + "kl": 0.0020236968994140625, + "learning_rate": 1.7399999999999997e-07, + "loss": 0.017, + "num_tokens": 3903089.0, + "reward": 0.0, + "reward_std": 0.9250479340553284, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.22215025130296348, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2529218125653046, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10741060020797313, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 88 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1340.125, + "completions/mean_terminated_length": 1267.45458984375, + "completions/min_length": 1141.0, + "completions/min_terminated_length": 1141.0, + "epoch": 0.017803560712142428, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.703036478546929, + "kl": 0.00138092041015625, + "learning_rate": 1.76e-07, + "loss": 0.0143, + "num_tokens": 3955043.0, + "reward": 0.0, + "reward_std": 0.8516822457313538, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.17080231731619674, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.40000003681444624, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.056927504255331086, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 89 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1249.0, + "completions/max_terminated_length": 1249.0, + "completions/mean_length": 987.0625, + "completions/mean_terminated_length": 987.0625, + "completions/min_length": 19.0, + "completions/min_terminated_length": 19.0, + "epoch": 0.01800360072014403, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.422184882562855, + "kl": 0.00254058837890625, + "learning_rate": 1.7799999999999998e-07, + "loss": -0.1466, + "num_tokens": 4001996.0, + "reward": 0.0, + "reward_std": 0.9406076669692993, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05037954888395703, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06877238174017485, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.16233253479155635, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 90 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1106.125, + "completions/mean_terminated_length": 1079.86669921875, + "completions/min_length": 553.0, + "completions/min_terminated_length": 553.0, + "epoch": 0.01820364072814563, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.23771096045299, + "kl": 0.0021343231201171875, + "learning_rate": 1.8e-07, + "loss": -0.0162, + "num_tokens": 4035414.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0494256019592285, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.010962955942807348, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05591232342434406, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08243965245133132, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 91 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1396.0, + "completions/max_terminated_length": 1396.0, + "completions/mean_length": 1019.125, + "completions/mean_terminated_length": 1019.125, + "completions/min_length": 657.0, + "completions/min_terminated_length": 657.0, + "epoch": 0.01840368073614723, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8565079125266095, + "kl": 0.002712249755859375, + "learning_rate": 1.82e-07, + "loss": -0.0634, + "num_tokens": 4086824.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.9201100468635559, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2067635298229599, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11893440395112716, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.18733017000097074, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 92 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1169.0, + "completions/max_terminated_length": 1169.0, + "completions/mean_length": 1023.625, + "completions/mean_terminated_length": 1023.625, + "completions/min_length": 878.0, + "completions/min_terminated_length": 878.0, + "epoch": 0.01860372074414883, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.119549283932735, + "kl": 0.002094268798828125, + "learning_rate": 1.8399999999999998e-07, + "loss": -0.0135, + "num_tokens": 4138258.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0452443361282349, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05138822953250199, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07020534673356732, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09803627446568497, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 93 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1142.0, + "completions/max_terminated_length": 1142.0, + "completions/mean_length": 923.1875, + "completions/mean_terminated_length": 923.1875, + "completions/min_length": 814.0, + "completions/min_terminated_length": 814.0, + "epoch": 0.01880376075215043, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4271022007439886, + "kl": 0.0011415481567382812, + "learning_rate": 1.86e-07, + "loss": -0.0032, + "num_tokens": 4186813.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9798460006713867, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06235097255989256, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.061489151488012705, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06885303726590962, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 94 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1236.625, + "completions/mean_terminated_length": 1199.0, + "completions/min_length": 895.0, + "completions/min_terminated_length": 895.0, + "epoch": 0.01900380076015203, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3449868463013073, + "kl": 0.0016994476318359375, + "learning_rate": 1.88e-07, + "loss": -0.0701, + "num_tokens": 4227735.0, + "reward": 0.0, + "reward_std": 0.7144107222557068, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09088323801650731, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1523025454954215, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09888264649460887, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 95 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1236.0, + "completions/max_terminated_length": 1236.0, + "completions/mean_length": 1121.25, + "completions/mean_terminated_length": 1121.25, + "completions/min_length": 909.0, + "completions/min_terminated_length": 909.0, + "epoch": 0.01920384076815363, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4646123303928267, + "kl": 0.002414703369140625, + "learning_rate": 1.8999999999999998e-07, + "loss": -0.0313, + "num_tokens": 4271459.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9233419895172119, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08350078903867811, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07441077855336868, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09583937179043478, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 96 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1287.0, + "completions/max_terminated_length": 1287.0, + "completions/mean_length": 1195.875, + "completions/mean_terminated_length": 1195.875, + "completions/min_length": 1028.0, + "completions/min_terminated_length": 1028.0, + "epoch": 0.019403880776155232, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.831621178623153, + "kl": 0.001422882080078125, + "learning_rate": 1.92e-07, + "loss": -0.0122, + "num_tokens": 4306681.0, + "reward": 0.0, + "reward_std": 0.8419162034988403, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.417639011507551, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2529486387007145, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042256, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 97 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1277.125, + "completions/mean_terminated_length": 1175.8182373046875, + "completions/min_length": 893.0, + "completions/min_terminated_length": 893.0, + "epoch": 0.01960392078415683, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.111339088546679, + "kl": 0.00119781494140625, + "learning_rate": 1.94e-07, + "loss": 0.0001, + "num_tokens": 4358779.0, + "reward": 0.0, + "reward_std": 0.6361056566238403, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0735804582041904, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0953366817116122, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11674600476945508, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 98 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1316.0, + "completions/mean_length": 1072.375, + "completions/mean_terminated_length": 1043.86669921875, + "completions/min_length": 886.0, + "completions/min_terminated_length": 886.0, + "epoch": 0.01980396079215843, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.390518761292935, + "kl": 0.001556396484375, + "learning_rate": 1.96e-07, + "loss": 0.0091, + "num_tokens": 4400785.0, + "reward": 0.0, + "reward_std": 0.9252045750617981, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03935403158647792, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04212183157786884, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.2260367062947799, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 99 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1201.0, + "completions/max_terminated_length": 1201.0, + "completions/mean_length": 956.5625, + "completions/mean_terminated_length": 956.5625, + "completions/min_length": 606.0, + "completions/min_terminated_length": 606.0, + "epoch": 0.020004000800160033, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.599057389794248, + "kl": 0.00238037109375, + "learning_rate": 1.98e-07, + "loss": -0.0136, + "num_tokens": 4437986.0, + "reward": 0.0, + "reward_std": 0.9181259870529175, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.055012230371236905, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06994835053862222, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07503085784948504, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 100 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 898.4375, + "completions/mean_terminated_length": 858.3333740234375, + "completions/min_length": 529.0, + "completions/min_terminated_length": 529.0, + "epoch": 0.02020404080816163, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8256292146473236, + "kl": 0.0017032623291015625, + "learning_rate": 2e-07, + "loss": 0.0343, + "num_tokens": 4476577.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8039265275001526, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05151014601291972, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04408967295598983, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1449776483411099, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 101 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1401.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1135.125, + "completions/mean_terminated_length": 1135.125, + "completions/min_length": 754.0, + "completions/min_terminated_length": 754.0, + "epoch": 0.020404080816163232, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.439019368964733, + "kl": 0.0011501312255859375, + "learning_rate": 2.02e-07, + "loss": -0.026, + "num_tokens": 4516499.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.6389927864074707, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04704247258319095, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07820345947927132, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11122216672215289, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 102 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1217.0, + "completions/max_terminated_length": 1217.0, + "completions/mean_length": 806.0, + "completions/mean_terminated_length": 806.0, + "completions/min_length": 652.0, + "completions/min_terminated_length": 652.0, + "epoch": 0.020604120824164834, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.066473410110339, + "kl": 0.0023288726806640625, + "learning_rate": 2.0399999999999997e-07, + "loss": -0.0574, + "num_tokens": 4553507.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9908996820449829, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0725467910768109, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.059368499088882754, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 103 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1320.0, + "completions/max_terminated_length": 1320.0, + "completions/mean_length": 1056.1875, + "completions/mean_terminated_length": 1056.1875, + "completions/min_length": 776.0, + "completions/min_terminated_length": 776.0, + "epoch": 0.02080416083216643, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2756858436905505, + "kl": 0.002315521240234375, + "learning_rate": 2.06e-07, + "loss": -0.0564, + "num_tokens": 4590774.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6627820134162903, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1277671924387987, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22142948274790322, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.093392838174146, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 104 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1224.375, + "completions/mean_terminated_length": 1206.0001220703125, + "completions/min_length": 914.0, + "completions/min_terminated_length": 914.0, + "epoch": 0.021004200840168033, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.040247563421175, + "kl": 0.0020389556884765625, + "learning_rate": 2.0799999999999998e-07, + "loss": -0.0356, + "num_tokens": 4645068.0, + "reward": 0.0, + "reward_std": 0.9710835814476013, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.18450119519964633, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13553921798673735, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09878896324620104, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 105 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1379.0, + "completions/max_terminated_length": 1379.0, + "completions/mean_length": 1190.375, + "completions/mean_terminated_length": 1190.375, + "completions/min_length": 1050.0, + "completions/min_terminated_length": 1050.0, + "epoch": 0.021204240848169634, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4114896776830324, + "kl": 0.0011091232299804688, + "learning_rate": 2.0999999999999997e-07, + "loss": 0.0036, + "num_tokens": 4688146.0, + "reward": 0.0, + "reward_std": 1.0165187120437622, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10239954928882158, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09192922296005414, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0683130051063973, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 106 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1290.0, + "completions/max_terminated_length": 1290.0, + "completions/mean_length": 1106.9375, + "completions/mean_terminated_length": 1106.9375, + "completions/min_length": 1003.0, + "completions/min_terminated_length": 1003.0, + "epoch": 0.021404280856171236, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.217945466555581, + "kl": 0.0020904541015625, + "learning_rate": 2.12e-07, + "loss": -0.0005, + "num_tokens": 4730609.0, + "reward": 0.0, + "reward_std": 1.058624267578125, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11009174375107376, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07414826560296199, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09737289911202954, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 107 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1308.0, + "completions/max_terminated_length": 1308.0, + "completions/mean_length": 1010.6875, + "completions/mean_terminated_length": 1010.6875, + "completions/min_length": 772.0, + "completions/min_terminated_length": 772.0, + "epoch": 0.021604320864172834, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.650202527417449, + "kl": 0.0011157989501953125, + "learning_rate": 2.1399999999999998e-07, + "loss": -0.0137, + "num_tokens": 4767716.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9467637538909912, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0014805761159808134, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06740961265803053, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08944271909999159, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 108 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1253.0, + "completions/max_terminated_length": 1253.0, + "completions/mean_length": 953.0, + "completions/mean_terminated_length": 953.0, + "completions/min_length": 661.0, + "completions/min_terminated_length": 661.0, + "epoch": 0.021804360872174435, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8464020230735514, + "kl": 0.0017271041870117188, + "learning_rate": 2.1599999999999998e-07, + "loss": -0.0099, + "num_tokens": 4797740.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9999340772628784, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09972553549905454, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10180440064472594, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1067187372905475, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 109 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1368.0, + "completions/mean_length": 1201.75, + "completions/mean_terminated_length": 1159.1429443359375, + "completions/min_length": 987.0, + "completions/min_terminated_length": 987.0, + "epoch": 0.022004400880176037, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.469119442175247, + "kl": 0.0025482177734375, + "learning_rate": 2.18e-07, + "loss": -0.0394, + "num_tokens": 4839336.0, + "reward": -3.3527612686157227e-08, + "reward_std": 1.0515577793121338, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.3527612686157227e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.01646182267996693, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06707359977659033, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07781745019952502, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 110 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1192.0, + "completions/max_terminated_length": 1192.0, + "completions/mean_length": 953.0, + "completions/mean_terminated_length": 953.0, + "completions/min_length": 778.0, + "completions/min_terminated_length": 778.0, + "epoch": 0.022204440888177635, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.814179464106817, + "kl": 0.00247955322265625, + "learning_rate": 2.1999999999999998e-07, + "loss": -0.0489, + "num_tokens": 4870600.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7192301154136658, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08624717206443891, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10146628962386675, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12641788434189793, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 111 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1292.625, + "completions/mean_terminated_length": 1131.3333740234375, + "completions/min_length": 1002.0, + "completions/min_terminated_length": 1002.0, + "epoch": 0.022404480896179236, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.727416200993016, + "kl": 0.001590728759765625, + "learning_rate": 2.22e-07, + "loss": -0.0265, + "num_tokens": 4923450.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0415483713150024, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12210845506493943, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14157810684964042, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12852438880818895, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 112 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1373.0, + "completions/mean_length": 1182.9375, + "completions/mean_terminated_length": 1161.800048828125, + "completions/min_length": 886.0, + "completions/min_terminated_length": 886.0, + "epoch": 0.022604520904180837, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3421126178069467, + "kl": 0.002933502197265625, + "learning_rate": 2.24e-07, + "loss": 0.031, + "num_tokens": 4972985.0, + "reward": 0.0, + "reward_std": 0.7647777199745178, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.15655819741182744, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17146423243178308, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07588978362901863, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 113 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1395.0, + "completions/mean_length": 1060.0625, + "completions/mean_terminated_length": 1030.7333984375, + "completions/min_length": 804.0, + "completions/min_terminated_length": 804.0, + "epoch": 0.022804560912182435, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1128360224221474, + "kl": 0.0019474029541015625, + "learning_rate": 2.2599999999999999e-07, + "loss": -0.0113, + "num_tokens": 5022946.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9326044321060181, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.17625573941547293, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22522647972597215, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08421753138505425, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 114 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1468.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1110.25, + "completions/mean_terminated_length": 1110.25, + "completions/min_length": 858.0, + "completions/min_terminated_length": 858.0, + "epoch": 0.023004600920184037, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4379765323832006, + "kl": 0.00273895263671875, + "learning_rate": 2.28e-07, + "loss": 0.0192, + "num_tokens": 5074582.0, + "reward": 0.0, + "reward_std": 0.8919834494590759, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09774678992930491, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08916756895014041, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1287403358472941, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 115 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1176.0, + "completions/max_terminated_length": 1176.0, + "completions/mean_length": 938.5, + "completions/mean_terminated_length": 938.5, + "completions/min_length": 719.0, + "completions/min_terminated_length": 719.0, + "epoch": 0.023204640928185638, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2314555885641583, + "kl": 0.00226593017578125, + "learning_rate": 2.3e-07, + "loss": 0.0052, + "num_tokens": 5114766.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9206539392471313, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06139309860035621, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1932167714606914, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09428090415820636, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 116 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1139.9375, + "completions/mean_terminated_length": 1115.933349609375, + "completions/min_length": 941.0, + "completions/min_terminated_length": 941.0, + "epoch": 0.023404680936187236, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0480942171178995, + "kl": 0.0019397735595703125, + "learning_rate": 2.32e-07, + "loss": 0.0247, + "num_tokens": 5157573.0, + "reward": 0.0, + "reward_std": 0.6737322211265564, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.011396892898005516, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.062452579884054844, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04533823502911816, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 117 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1215.0, + "completions/max_terminated_length": 1215.0, + "completions/mean_length": 985.9375, + "completions/mean_terminated_length": 985.9375, + "completions/min_length": 756.0, + "completions/min_terminated_length": 756.0, + "epoch": 0.023604720944188837, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.115186735875751, + "kl": 0.0016689300537109375, + "learning_rate": 2.34e-07, + "loss": -0.0085, + "num_tokens": 5194284.0, + "reward": 0.0, + "reward_std": 1.0652129650115967, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03587191485107324, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1297703932123198, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.057089922571845, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 118 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1190.0625, + "completions/mean_terminated_length": 1169.4000244140625, + "completions/min_length": 804.0, + "completions/min_terminated_length": 804.0, + "epoch": 0.02380476095219044, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4569353114482646, + "kl": 0.0014524459838867188, + "learning_rate": 2.3599999999999997e-07, + "loss": -0.0444, + "num_tokens": 5236189.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9756671786308289, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12130040655263016, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16120256388117143, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258099, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 119 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1172.0, + "completions/mean_terminated_length": 1150.1334228515625, + "completions/min_length": 953.0, + "completions/min_terminated_length": 953.0, + "epoch": 0.024004800960192037, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1943476904981063, + "kl": 0.00254058837890625, + "learning_rate": 2.38e-07, + "loss": -0.0198, + "num_tokens": 5282541.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0564508438110352, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.18219084988001652, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20890345838515526, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11603000888978232, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 120 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1349.0, + "completions/max_terminated_length": 1349.0, + "completions/mean_length": 962.25, + "completions/mean_terminated_length": 962.25, + "completions/min_length": 543.0, + "completions/min_terminated_length": 543.0, + "epoch": 0.024204840968193638, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.928634025208207, + "kl": 0.0005166530609130859, + "learning_rate": 2.4e-07, + "loss": -0.0065, + "num_tokens": 5319017.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0305774211883545, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.054799385461577504, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05175088393696206, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 121 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1311.0, + "completions/max_terminated_length": 1311.0, + "completions/mean_length": 1035.0, + "completions/mean_terminated_length": 1035.0, + "completions/min_length": 824.0, + "completions/min_terminated_length": 824.0, + "epoch": 0.02440488097619524, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.440007402560148, + "kl": 0.002166748046875, + "learning_rate": 2.4199999999999997e-07, + "loss": -0.0095, + "num_tokens": 5351649.0, + "reward": 0.0, + "reward_std": 0.935604989528656, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0422275703684044, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.047534931848648904, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 122 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1369.8125, + "completions/mean_terminated_length": 1268.5555419921875, + "completions/min_length": 1044.0, + "completions/min_terminated_length": 1044.0, + "epoch": 0.02460492098419684, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2432466831765003, + "kl": 0.0014743804931640625, + "learning_rate": 2.4399999999999996e-07, + "loss": -0.0116, + "num_tokens": 5404158.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5487322807312012, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.046378109727992264, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.151058200931393, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1355373393953503, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 123 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1114.9375, + "completions/mean_terminated_length": 986.5833740234375, + "completions/min_length": 761.0, + "completions/min_terminated_length": 761.0, + "epoch": 0.02480496099219844, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.739323966803266, + "kl": 0.00278472900390625, + "learning_rate": 2.46e-07, + "loss": -0.0122, + "num_tokens": 5442221.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.903695821762085, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.3610522249424556, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2974894563977306, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 124 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1309.0, + "completions/max_terminated_length": 1309.0, + "completions/mean_length": 1040.125, + "completions/mean_terminated_length": 1040.125, + "completions/min_length": 720.0, + "completions/min_terminated_length": 720.0, + "epoch": 0.02500500100020004, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8162193919206604, + "kl": 0.000804901123046875, + "learning_rate": 2.48e-07, + "loss": -0.0381, + "num_tokens": 5485223.0, + "reward": 1.1175870895385742e-08, + "reward_std": 0.9530208706855774, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.34772052804404113, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13257182938315787, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11155467020454342, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 125 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1265.0, + "completions/mean_length": 1087.6875, + "completions/mean_terminated_length": 1028.7857666015625, + "completions/min_length": 816.0, + "completions/min_terminated_length": 816.0, + "epoch": 0.02520504100820164, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.851570323702548, + "kl": 0.0013904571533203125, + "learning_rate": 2.5e-07, + "loss": -0.042, + "num_tokens": 5527746.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9834113717079163, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08967989093769949, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12245030784775242, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09878896324620105, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 126 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1263.0, + "completions/max_terminated_length": 1263.0, + "completions/mean_length": 1033.0625, + "completions/mean_terminated_length": 1033.0625, + "completions/min_length": 790.0, + "completions/min_terminated_length": 790.0, + "epoch": 0.02540508101620324, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3664084460928216, + "kl": 0.002353668212890625, + "learning_rate": 2.52e-07, + "loss": 0.0063, + "num_tokens": 5572483.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0424232482910156, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.012147464235358464, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.02332293616657532, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10945995377982529, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 127 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1276.0, + "completions/max_terminated_length": 1276.0, + "completions/mean_length": 1145.5, + "completions/mean_terminated_length": 1145.5, + "completions/min_length": 1057.0, + "completions/min_terminated_length": 1057.0, + "epoch": 0.02560512102420484, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.933421035604269, + "kl": 0.0017948150634765625, + "learning_rate": 2.5399999999999997e-07, + "loss": -0.0071, + "num_tokens": 5613811.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9627362489700317, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.013127951179170648, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08426997582660087, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14343665526661614, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 128 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1147.875, + "completions/mean_terminated_length": 1124.4000244140625, + "completions/min_length": 988.0, + "completions/min_terminated_length": 988.0, + "epoch": 0.025805161032206442, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.587050456079953, + "kl": 0.0024871826171875, + "learning_rate": 2.56e-07, + "loss": 0.0031, + "num_tokens": 5657801.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0255975723266602, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08524312216116556, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07870469282881147, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258096, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 129 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1357.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1125.5, + "completions/mean_terminated_length": 1125.5, + "completions/min_length": 762.0, + "completions/min_terminated_length": 762.0, + "epoch": 0.02600520104020804, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.35625081811361, + "kl": 0.002758026123046875, + "learning_rate": 2.58e-07, + "loss": -0.0447, + "num_tokens": 5705817.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9048060178756714, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.047632249005290034, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10328618981621764, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11080513425729777, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 130 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1146.5625, + "completions/mean_terminated_length": 1123.0, + "completions/min_length": 749.0, + "completions/min_terminated_length": 749.0, + "epoch": 0.026205241048209642, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.869142131338689, + "kl": 0.002849578857421875, + "learning_rate": 2.6e-07, + "loss": -0.0234, + "num_tokens": 5750274.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9122779369354248, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12021544508838079, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.048423129201913495, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 131 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1230.8125, + "completions/mean_terminated_length": 1192.357177734375, + "completions/min_length": 985.0, + "completions/min_terminated_length": 985.0, + "epoch": 0.026405281056211243, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7278690086246944, + "kl": 0.00189971923828125, + "learning_rate": 2.62e-07, + "loss": 0.0128, + "num_tokens": 5794815.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0520426034927368, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0031429081318171525, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.154257756853433, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11855612829185828, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 132 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1230.0, + "completions/max_terminated_length": 1230.0, + "completions/mean_length": 962.25, + "completions/mean_terminated_length": 962.25, + "completions/min_length": 797.0, + "completions/min_terminated_length": 797.0, + "epoch": 0.02660532106421284, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7973540665658105, + "kl": 0.002803802490234375, + "learning_rate": 2.64e-07, + "loss": -0.0326, + "num_tokens": 5835011.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0408565998077393, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.21631827117369679, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0882261240966692, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1387777332977422, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 133 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1180.0, + "completions/max_terminated_length": 1180.0, + "completions/mean_length": 984.1875, + "completions/mean_terminated_length": 984.1875, + "completions/min_length": 817.0, + "completions/min_terminated_length": 817.0, + "epoch": 0.026805361072214443, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4535662463230197, + "kl": 0.002410888671875, + "learning_rate": 2.66e-07, + "loss": -0.0291, + "num_tokens": 5865382.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8922386169433594, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.042849649319625545, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0716234336285192, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 134 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1128.0, + "completions/mean_length": 1001.9375, + "completions/mean_terminated_length": 968.7333984375, + "completions/min_length": 792.0, + "completions/min_terminated_length": 792.0, + "epoch": 0.027005401080216044, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.729203934898098, + "kl": 0.002277374267578125, + "learning_rate": 2.68e-07, + "loss": 0.0154, + "num_tokens": 5899757.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0213439464569092, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.018471621557600987, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05215553147920067, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08766518798921945, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 135 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1274.0, + "completions/max_terminated_length": 1274.0, + "completions/mean_length": 940.9375, + "completions/mean_terminated_length": 940.9375, + "completions/min_length": 574.0, + "completions/min_terminated_length": 574.0, + "epoch": 0.027205441088217642, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7662056530807404, + "kl": 0.0022373199462890625, + "learning_rate": 2.7e-07, + "loss": -0.0255, + "num_tokens": 5928820.0, + "reward": 0.0, + "reward_std": 0.7778316736221313, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.009987125047538115, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07771466589535221, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11855612829185827, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 136 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1208.0, + "completions/max_terminated_length": 1208.0, + "completions/mean_length": 984.6875, + "completions/mean_terminated_length": 984.6875, + "completions/min_length": 591.0, + "completions/min_terminated_length": 591.0, + "epoch": 0.027405481096219243, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8778634802302565, + "kl": 0.0016765594482421875, + "learning_rate": 2.72e-07, + "loss": -0.0022, + "num_tokens": 5968135.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.717841386795044, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.14415607115399753, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19216428514364028, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11674600476945511, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 137 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1262.0, + "completions/mean_terminated_length": 1228.0, + "completions/min_length": 977.0, + "completions/min_terminated_length": 977.0, + "epoch": 0.027605521104220845, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1449175274717125, + "kl": 0.0023365020751953125, + "learning_rate": 2.74e-07, + "loss": -0.0025, + "num_tokens": 6013559.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0567677021026611, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.006156517234583223, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10481386785626404, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09428090415820636, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 138 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1144.75, + "completions/mean_terminated_length": 1121.0667724609375, + "completions/min_length": 876.0, + "completions/min_terminated_length": 876.0, + "epoch": 0.027805561112222446, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8870840662740713, + "kl": 0.00183868408203125, + "learning_rate": 2.7600000000000004e-07, + "loss": -0.0238, + "num_tokens": 6059603.0, + "reward": 0.0, + "reward_std": 0.6442309617996216, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0021321244824853564, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19182110881591302, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07084150279686703, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 139 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1492.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1125.8125, + "completions/mean_terminated_length": 1125.8125, + "completions/min_length": 887.0, + "completions/min_terminated_length": 887.0, + "epoch": 0.028005601120224044, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.400740293367391, + "kl": 0.0021343231201171875, + "learning_rate": 2.7800000000000003e-07, + "loss": -0.0407, + "num_tokens": 6093672.0, + "reward": 0.0, + "reward_std": 0.9701550006866455, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07195084690562202, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07943066350594068, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11155467020454342, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 140 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 955.125, + "completions/mean_terminated_length": 955.125, + "completions/min_length": 583.0, + "completions/min_terminated_length": 583.0, + "epoch": 0.028205641128225645, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.103589540549013, + "kl": 0.002353668212890625, + "learning_rate": 2.8e-07, + "loss": -0.068, + "num_tokens": 6133498.0, + "reward": 0.0, + "reward_std": 1.0092287063598633, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05306497247682408, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.060638045052417475, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08062257748298553, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 141 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1459.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1143.8125, + "completions/mean_terminated_length": 1143.8125, + "completions/min_length": 770.0, + "completions/min_terminated_length": 770.0, + "epoch": 0.028405681136227247, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.961174452433921, + "kl": 0.001911163330078125, + "learning_rate": 2.8199999999999996e-07, + "loss": -0.0036, + "num_tokens": 6181207.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8430895805358887, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0416463153034868, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04222214683396056, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 142 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1235.25, + "completions/mean_terminated_length": 1076.4000244140625, + "completions/min_length": 895.0, + "completions/min_terminated_length": 895.0, + "epoch": 0.028605721144228845, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8153659439487866, + "kl": 0.001880645751953125, + "learning_rate": 2.8399999999999995e-07, + "loss": -0.0282, + "num_tokens": 6239139.0, + "reward": 0.0, + "reward_std": 0.5499482154846191, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11872169519563383, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22892203981623763, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11603000888978234, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 143 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1398.0, + "completions/mean_length": 1239.5, + "completions/mean_terminated_length": 1202.2857666015625, + "completions/min_length": 1023.0, + "completions/min_terminated_length": 1023.0, + "epoch": 0.028805761152230446, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7400807031037258, + "kl": 0.00170135498046875, + "learning_rate": 2.8599999999999994e-07, + "loss": -0.0089, + "num_tokens": 6287019.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8104555606842041, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03120041606761964, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12674146076576076, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04554200340426487, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 144 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1110.1875, + "completions/mean_terminated_length": 1084.2000732421875, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.029005801160232048, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.479077246814682, + "kl": 0.00264739990234375, + "learning_rate": 2.88e-07, + "loss": 0.0222, + "num_tokens": 6331078.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0622413158416748, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0229781680726173, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.062386417495850256, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12285191326386657, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 145 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1494.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1160.4375, + "completions/mean_terminated_length": 1160.4375, + "completions/min_length": 948.0, + "completions/min_terminated_length": 948.0, + "epoch": 0.029205841168233646, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1874866949163727, + "kl": 0.002490997314453125, + "learning_rate": 2.9e-07, + "loss": -0.0288, + "num_tokens": 6376357.0, + "reward": 0.0, + "reward_std": 0.9845371246337891, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.13919063538212495, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07286609695086006, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.16487930490266264, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 146 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1064.0, + "completions/max_terminated_length": 1064.0, + "completions/mean_length": 871.75, + "completions/mean_terminated_length": 871.75, + "completions/min_length": 703.0, + "completions/min_terminated_length": 703.0, + "epoch": 0.029405881176235247, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1800480978254995, + "kl": 0.0016918182373046875, + "learning_rate": 2.9199999999999997e-07, + "loss": -0.0344, + "num_tokens": 6403521.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0003687143325806, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.038310864890444205, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06477880086334199, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11538983843829065, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 147 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1267.0, + "completions/max_terminated_length": 1267.0, + "completions/mean_length": 1142.9375, + "completions/mean_terminated_length": 1142.9375, + "completions/min_length": 1056.0, + "completions/min_terminated_length": 1056.0, + "epoch": 0.02960592118423685, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6663401011683368, + "kl": 0.0015869140625, + "learning_rate": 2.9399999999999996e-07, + "loss": -0.0032, + "num_tokens": 6437480.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.737164318561554, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13898396243960146, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22064230841627436, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08734775114237132, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 148 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1320.0, + "completions/max_terminated_length": 1320.0, + "completions/mean_length": 1151.625, + "completions/mean_terminated_length": 1151.625, + "completions/min_length": 927.0, + "completions/min_terminated_length": 927.0, + "epoch": 0.029805961192238446, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4561136671838875, + "kl": 0.002590179443359375, + "learning_rate": 2.9599999999999995e-07, + "loss": 0.0096, + "num_tokens": 6486834.0, + "reward": 4.470348358154297e-08, + "reward_std": 1.0135126113891602, + "rewards/wordcountpos_reward_nokeypoint/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.029865231061422456, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.27045368139859627, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08432740427115676, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 149 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1426.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1215.125, + "completions/mean_terminated_length": 1215.125, + "completions/min_length": 921.0, + "completions/min_terminated_length": 921.0, + "epoch": 0.030006001200240048, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.932674466070943, + "kl": 0.0016689300537109375, + "learning_rate": 2.98e-07, + "loss": -0.0259, + "num_tokens": 6528132.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8692072629928589, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0760869396096592, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06954083658000401, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09016445879408155, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 150 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1315.0, + "completions/max_terminated_length": 1315.0, + "completions/mean_length": 1121.375, + "completions/mean_terminated_length": 1121.375, + "completions/min_length": 844.0, + "completions/min_terminated_length": 844.0, + "epoch": 0.03020604120824165, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3384051828635357, + "kl": 0.00246429443359375, + "learning_rate": 3e-07, + "loss": 0.0344, + "num_tokens": 6575970.0, + "reward": 0.0, + "reward_std": 0.9283574819564819, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.13139106463530237, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1719223926543388, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06666666666666667, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 151 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1278.0, + "completions/mean_length": 1104.0625, + "completions/mean_terminated_length": 1077.666748046875, + "completions/min_length": 783.0, + "completions/min_terminated_length": 783.0, + "epoch": 0.030406081216243247, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3780648291146407, + "kl": 0.0013179779052734375, + "learning_rate": 3.02e-07, + "loss": -0.0482, + "num_tokens": 6620923.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9227230548858643, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.006320500617849806, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0742709894478399, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10327955589886446, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 152 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1204.3125, + "completions/mean_terminated_length": 1184.60009765625, + "completions/min_length": 701.0, + "completions/min_terminated_length": 701.0, + "epoch": 0.03060612122424485, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.429780348500396, + "kl": 0.002513885498046875, + "learning_rate": 3.0399999999999997e-07, + "loss": -0.0488, + "num_tokens": 6665552.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0536808967590332, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07594127186262835, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06207535927619286, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13333333333333336, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 153 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1393.0, + "completions/max_terminated_length": 1393.0, + "completions/mean_length": 1020.0625, + "completions/mean_terminated_length": 1020.0625, + "completions/min_length": 777.0, + "completions/min_terminated_length": 777.0, + "epoch": 0.03080616123224645, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.65903581367956, + "kl": 0.0024871826171875, + "learning_rate": 3.0599999999999996e-07, + "loss": 0.0171, + "num_tokens": 6701785.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.98410964012146, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.05213859802825146, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08251904375556517, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12285191326386657, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 154 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1428.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1092.6875, + "completions/mean_terminated_length": 1092.6875, + "completions/min_length": 730.0, + "completions/min_terminated_length": 730.0, + "epoch": 0.03100620124024805, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.425707140554762, + "kl": 0.002292633056640625, + "learning_rate": 3.08e-07, + "loss": 0.0088, + "num_tokens": 6743212.0, + "reward": 0.0, + "reward_std": 0.9829471111297607, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.007110529448843771, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03322805088735759, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04013864859597433, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 155 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1295.0, + "completions/max_terminated_length": 1295.0, + "completions/mean_length": 1023.125, + "completions/mean_terminated_length": 1023.125, + "completions/min_length": 732.0, + "completions/min_terminated_length": 732.0, + "epoch": 0.03120624124824965, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.08648663636759, + "kl": 0.001857757568359375, + "learning_rate": 3.1e-07, + "loss": -0.0498, + "num_tokens": 6792990.0, + "reward": 0.0, + "reward_std": 0.8458909392356873, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13510182761265527, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2074490733494405, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 156 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1284.0, + "completions/max_terminated_length": 1284.0, + "completions/mean_length": 1112.4375, + "completions/mean_terminated_length": 1112.4375, + "completions/min_length": 727.0, + "completions/min_terminated_length": 727.0, + "epoch": 0.03140628125625125, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0180095481876434, + "kl": 0.0018253326416015625, + "learning_rate": 3.12e-07, + "loss": -0.0277, + "num_tokens": 6834357.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8750590085983276, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12479203936522901, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09918608245611617, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 157 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 1082.3125, + "completions/mean_terminated_length": 1054.4666748046875, + "completions/min_length": 860.0, + "completions/min_terminated_length": 860.0, + "epoch": 0.03160632126425285, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5526674150978033, + "kl": 0.002490997314453125, + "learning_rate": 3.14e-07, + "loss": -0.1137, + "num_tokens": 6885306.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.982924222946167, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.271622239225961, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1237625837967818, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12285191326386657, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 158 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1299.875, + "completions/mean_terminated_length": 1271.2857666015625, + "completions/min_length": 955.0, + "completions/min_terminated_length": 955.0, + "epoch": 0.03180636127225445, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.998187346663724, + "kl": 0.0019512176513671875, + "learning_rate": 3.1599999999999997e-07, + "loss": 0.0157, + "num_tokens": 6927624.0, + "reward": 0.0, + "reward_std": 0.5777603387832642, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08521017330200366, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08768187776495252, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09878896324620104, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 159 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 1130.1875, + "completions/mean_terminated_length": 1077.357177734375, + "completions/min_length": 460.0, + "completions/min_terminated_length": 460.0, + "epoch": 0.03200640128025605, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.22203115373172, + "kl": 0.0017070770263671875, + "learning_rate": 3.18e-07, + "loss": 0.0261, + "num_tokens": 6983835.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.4904226064682007, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.01936994616669139, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.031239188211028744, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14950535726806533, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 160 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1117.5, + "completions/mean_terminated_length": 888.0, + "completions/min_length": 716.0, + "completions/min_terminated_length": 716.0, + "epoch": 0.03220644128825765, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6325474477667656, + "kl": 0.001049041748046875, + "learning_rate": 3.2e-07, + "loss": 0.0223, + "num_tokens": 7036443.0, + "reward": 0.0, + "reward_std": 0.9139224290847778, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15922273806850681, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08008253401355062, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07302967433402216, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 161 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1127.0, + "completions/max_terminated_length": 1127.0, + "completions/mean_length": 1035.5, + "completions/mean_terminated_length": 1035.5, + "completions/min_length": 887.0, + "completions/min_terminated_length": 887.0, + "epoch": 0.032406481296259254, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.089791155813099, + "kl": 0.0013055801391601562, + "learning_rate": 3.22e-07, + "loss": 0.0039, + "num_tokens": 7071003.0, + "reward": 0.0, + "reward_std": 0.9833001494407654, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.02803451837443741, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12026210598870715, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08734775114237131, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 162 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1120.0, + "completions/max_terminated_length": 1120.0, + "completions/mean_length": 1004.375, + "completions/mean_terminated_length": 1004.375, + "completions/min_length": 809.0, + "completions/min_terminated_length": 809.0, + "epoch": 0.03260652130426085, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.788946188808364, + "kl": 0.0007534027099609375, + "learning_rate": 3.24e-07, + "loss": -0.0377, + "num_tokens": 7113345.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6196906566619873, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06862655907481806, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09210954585001063, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11538983843829065, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 163 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1042.0, + "completions/max_terminated_length": 1042.0, + "completions/mean_length": 913.875, + "completions/mean_terminated_length": 913.875, + "completions/min_length": 771.0, + "completions/min_terminated_length": 771.0, + "epoch": 0.03280656131226245, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4327317898805902, + "kl": 0.0008561611175537109, + "learning_rate": 3.26e-07, + "loss": 0.0048, + "num_tokens": 7139855.0, + "reward": 0.0, + "reward_std": 0.5437282919883728, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0053283216876329415, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06376503063907243, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08766518798921946, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 164 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1489.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1187.0, + "completions/mean_terminated_length": 1187.0, + "completions/min_length": 915.0, + "completions/min_terminated_length": 915.0, + "epoch": 0.033006601320264055, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8697910800491115, + "kl": 0.00180816650390625, + "learning_rate": 3.28e-07, + "loss": 0.052, + "num_tokens": 7193887.0, + "reward": 0.0, + "reward_std": 0.5625689029693604, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0898076815274327, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1598284341758666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11409872268574493, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 165 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1437.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 991.125, + "completions/mean_terminated_length": 991.125, + "completions/min_length": 542.0, + "completions/min_terminated_length": 542.0, + "epoch": 0.03320664132826565, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4102872473477635, + "kl": 0.002285003662109375, + "learning_rate": 3.3e-07, + "loss": -0.014, + "num_tokens": 7236121.0, + "reward": -4.470348358154297e-08, + "reward_std": 1.0143083333969116, + "rewards/wordcountpos_reward_nokeypoint/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.17912678090567397, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15857592133662246, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08944271909999159, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 166 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 931.0, + "completions/max_terminated_length": 931.0, + "completions/mean_length": 794.5625, + "completions/mean_terminated_length": 794.5625, + "completions/min_length": 661.0, + "completions/min_terminated_length": 661.0, + "epoch": 0.03340668133626725, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.738175077947658, + "kl": 0.0018177032470703125, + "learning_rate": 3.32e-07, + "loss": 0.0323, + "num_tokens": 7263026.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.523534893989563, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04418185265265868, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.041938982459963456, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059472994182545036, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 167 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1290.0, + "completions/max_terminated_length": 1290.0, + "completions/mean_length": 956.125, + "completions/mean_terminated_length": 956.125, + "completions/min_length": 793.0, + "completions/min_terminated_length": 793.0, + "epoch": 0.033606721344268856, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.132496141587113, + "kl": 0.003063201904296875, + "learning_rate": 3.34e-07, + "loss": -0.04, + "num_tokens": 7294484.0, + "reward": 0.0, + "reward_std": 0.9157199859619141, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02825486143881847, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03706761999641554, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1167460047694551, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 168 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1350.0, + "completions/mean_length": 1105.25, + "completions/mean_terminated_length": 1078.933349609375, + "completions/min_length": 874.0, + "completions/min_terminated_length": 874.0, + "epoch": 0.033806761352270454, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3789610996379498, + "kl": 0.0012750625610351562, + "learning_rate": 3.36e-07, + "loss": 0.042, + "num_tokens": 7334448.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.42565929889678955, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.00499537308584153, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07763949852446665, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08606629658238706, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 169 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1430.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 971.375, + "completions/mean_terminated_length": 971.375, + "completions/min_length": 619.0, + "completions/min_terminated_length": 619.0, + "epoch": 0.03400680136027205, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6117083796260867, + "kl": 0.002315521240234375, + "learning_rate": 3.38e-07, + "loss": -0.0257, + "num_tokens": 7384446.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.31007200479507446, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06738330247126917, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.31749505306075715, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15869840952317446, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 170 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1432.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1102.125, + "completions/mean_terminated_length": 1102.125, + "completions/min_length": 839.0, + "completions/min_terminated_length": 839.0, + "epoch": 0.034206841368273656, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6427120342410944, + "kl": 0.002803802490234375, + "learning_rate": 3.4000000000000003e-07, + "loss": 0.0506, + "num_tokens": 7428808.0, + "reward": 0.0, + "reward_std": 0.7787526845932007, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.014685974827549282, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11111564539742785, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1261979632400061, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 171 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1424.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1085.5, + "completions/mean_terminated_length": 1085.5, + "completions/min_length": 765.0, + "completions/min_terminated_length": 765.0, + "epoch": 0.034406881376275254, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5753514173833696, + "kl": 0.0015048980712890625, + "learning_rate": 3.42e-07, + "loss": -0.0011, + "num_tokens": 7476688.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9988751411437988, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11364034930612663, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12574912862548052, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 172 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1496.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1225.125, + "completions/mean_terminated_length": 1225.125, + "completions/min_length": 948.0, + "completions/min_terminated_length": 948.0, + "epoch": 0.03460692138427685, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8394401575520467, + "kl": 0.00177001953125, + "learning_rate": 3.4399999999999996e-07, + "loss": -0.0321, + "num_tokens": 7518426.0, + "reward": 0.0, + "reward_std": 0.9847222566604614, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03596319864615712, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1791744572928041, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9791666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03191423692521126, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 173 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1361.0, + "completions/max_terminated_length": 1361.0, + "completions/mean_length": 1191.75, + "completions/mean_terminated_length": 1191.75, + "completions/min_length": 978.0, + "completions/min_terminated_length": 978.0, + "epoch": 0.03480696139227846, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9495457997720567, + "kl": 0.0018405914306640625, + "learning_rate": 3.4599999999999995e-07, + "loss": -0.0173, + "num_tokens": 7564862.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.5561766028404236, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0061557687854108195, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.264735549686655, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10246950765959602, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 174 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1324.0, + "completions/mean_length": 1288.9375, + "completions/mean_terminated_length": 1124.77783203125, + "completions/min_length": 913.0, + "completions/min_terminated_length": 913.0, + "epoch": 0.035007001400280055, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.114228993887127, + "kl": 0.001224517822265625, + "learning_rate": 3.4799999999999994e-07, + "loss": -0.0033, + "num_tokens": 7623085.0, + "reward": 0.0, + "reward_std": 0.6006279587745667, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06419899482054885, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10876533993195055, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0666666666666667, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 175 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1425.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 1089.875, + "completions/mean_terminated_length": 1089.875, + "completions/min_length": 903.0, + "completions/min_terminated_length": 903.0, + "epoch": 0.03520704140828165, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0212182640478344, + "kl": 0.001834869384765625, + "learning_rate": 3.5e-07, + "loss": 0.0234, + "num_tokens": 7664443.0, + "reward": 0.0, + "reward_std": 0.9223772287368774, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2059037192343417, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24065282799024, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07588978362901862, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 176 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1390.75, + "completions/mean_terminated_length": 1281.5, + "completions/min_length": 1183.0, + "completions/min_terminated_length": 1183.0, + "epoch": 0.03540708141628326, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.974056223843701, + "kl": 0.002490997314453125, + "learning_rate": 3.52e-07, + "loss": 0.0155, + "num_tokens": 7718407.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0572954416275024, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.005201436286234839, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.039752390545626626, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10183501544346313, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 177 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1479.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1162.1875, + "completions/mean_terminated_length": 1162.1875, + "completions/min_length": 835.0, + "completions/min_terminated_length": 835.0, + "epoch": 0.035607121424284856, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.772445147816875, + "kl": 0.0016803741455078125, + "learning_rate": 3.5399999999999997e-07, + "loss": -0.0493, + "num_tokens": 7760778.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.76764976978302, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.16005479996599672, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15348936192987436, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12524050936172842, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 178 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1354.0, + "completions/max_terminated_length": 1354.0, + "completions/mean_length": 1016.0625, + "completions/mean_terminated_length": 1016.0625, + "completions/min_length": 620.0, + "completions/min_terminated_length": 620.0, + "epoch": 0.03580716143228646, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9539479467640706, + "kl": 0.0027618408203125, + "learning_rate": 3.5599999999999996e-07, + "loss": -0.063, + "num_tokens": 7794211.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0678188800811768, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.004017468616367143, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.033759968740323935, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14168300559373406, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 179 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1206.0, + "completions/max_terminated_length": 1206.0, + "completions/mean_length": 1019.875, + "completions/mean_terminated_length": 1019.875, + "completions/min_length": 789.0, + "completions/min_terminated_length": 789.0, + "epoch": 0.03600720144028806, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.856438223356495, + "kl": 0.002658843994140625, + "learning_rate": 3.5799999999999995e-07, + "loss": -0.0177, + "num_tokens": 7834753.0, + "reward": 0.0, + "reward_std": 0.4452708959579468, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.17910333566989842, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.21425650967660279, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14089659985908765, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 180 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1149.0, + "completions/max_terminated_length": 1149.0, + "completions/mean_length": 914.9375, + "completions/mean_terminated_length": 914.9375, + "completions/min_length": 652.0, + "completions/min_terminated_length": 652.0, + "epoch": 0.036207241448289657, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.768105683526148, + "kl": 0.0020294189453125, + "learning_rate": 3.6e-07, + "loss": -0.0253, + "num_tokens": 7865120.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9534052610397339, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05397492617303175, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06319673488746749, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1264911064067352, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 181 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1192.0, + "completions/max_terminated_length": 1192.0, + "completions/mean_length": 1001.8125, + "completions/mean_terminated_length": 1001.8125, + "completions/min_length": 818.0, + "completions/min_terminated_length": 818.0, + "epoch": 0.03640728145629126, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5466670198653243, + "kl": 0.002094268798828125, + "learning_rate": 3.62e-07, + "loss": -0.0517, + "num_tokens": 7900781.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0178545713424683, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07014063942226302, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08371767834847682, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14240006242195888, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 182 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1084.0, + "completions/max_terminated_length": 1084.0, + "completions/mean_length": 910.0, + "completions/mean_terminated_length": 910.0, + "completions/min_length": 753.0, + "completions/min_terminated_length": 753.0, + "epoch": 0.03660732146429286, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.120683339644871, + "kl": 0.001560211181640625, + "learning_rate": 3.64e-07, + "loss": 0.0297, + "num_tokens": 7930333.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0622828006744385, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08381550619831163, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04507353420948678, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13526380260918403, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 183 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1159.0, + "completions/max_terminated_length": 1159.0, + "completions/mean_length": 890.375, + "completions/mean_terminated_length": 890.375, + "completions/min_length": 707.0, + "completions/min_terminated_length": 707.0, + "epoch": 0.03680736147229446, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.274316693961824, + "kl": 0.002895355224609375, + "learning_rate": 3.6599999999999997e-07, + "loss": -0.0104, + "num_tokens": 7961835.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9619171619415283, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04164492656820182, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10650089044342628, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10886621079036349, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 184 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1153.0, + "completions/max_terminated_length": 1153.0, + "completions/mean_length": 967.1875, + "completions/mean_terminated_length": 967.1875, + "completions/min_length": 775.0, + "completions/min_terminated_length": 775.0, + "epoch": 0.03700740148029606, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.943732941436741, + "kl": 0.0023822784423828125, + "learning_rate": 3.6799999999999996e-07, + "loss": -0.0224, + "num_tokens": 8008966.0, + "reward": 0.0, + "reward_std": 0.9330922961235046, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09236415035233463, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.087074012006948, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11080513425729777, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 185 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 918.0, + "completions/max_terminated_length": 918.0, + "completions/mean_length": 795.25, + "completions/mean_terminated_length": 795.25, + "completions/min_length": 730.0, + "completions/min_terminated_length": 730.0, + "epoch": 0.03720744148829766, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3593025167293753, + "kl": 0.0018241405487060547, + "learning_rate": 3.7e-07, + "loss": -0.0014, + "num_tokens": 8033874.0, + "reward": 0.0, + "reward_std": 0.9045600295066833, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.14919061977394385, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07502485833630256, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07781745019952502, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 186 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1156.0, + "completions/max_terminated_length": 1156.0, + "completions/mean_length": 928.5, + "completions/mean_terminated_length": 928.5, + "completions/min_length": 843.0, + "completions/min_terminated_length": 843.0, + "epoch": 0.03740748149629926, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7270227900818957, + "kl": 0.00257110595703125, + "learning_rate": 3.72e-07, + "loss": -0.0007, + "num_tokens": 8069962.0, + "reward": 0.0, + "reward_std": 0.43980270624160767, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.011540626254184491, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0816678336600803, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.16599866130651644, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 187 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1072.0, + "completions/max_terminated_length": 1072.0, + "completions/mean_length": 876.8125, + "completions/mean_terminated_length": 876.8125, + "completions/min_length": 571.0, + "completions/min_terminated_length": 571.0, + "epoch": 0.03760752150430086, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.893457503180745, + "kl": 0.0014133453369140625, + "learning_rate": 3.74e-07, + "loss": -0.0024, + "num_tokens": 8118807.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0275522470474243, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03851334153737305, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0383907078416112, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09259629622222519, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 188 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1412.0, + "completions/mean_length": 1143.4375, + "completions/mean_terminated_length": 1119.666748046875, + "completions/min_length": 907.0, + "completions/min_terminated_length": 907.0, + "epoch": 0.03780756151230246, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.463697284468848, + "kl": 0.0025787353515625, + "learning_rate": 3.76e-07, + "loss": -0.0107, + "num_tokens": 8159822.0, + "reward": 0.0, + "reward_std": 0.8558562994003296, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.13301973814933657, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10383502070380055, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12141145226353543, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 189 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1038.0, + "completions/max_terminated_length": 1038.0, + "completions/mean_length": 866.625, + "completions/mean_terminated_length": 866.625, + "completions/min_length": 709.0, + "completions/min_terminated_length": 709.0, + "epoch": 0.03800760152030406, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.1795004412830865, + "kl": 0.002422332763671875, + "learning_rate": 3.7799999999999997e-07, + "loss": -0.0339, + "num_tokens": 8182168.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0267484188079834, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.011716645950759094, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.02872126723048205, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07302967433402215, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 190 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1223.0, + "completions/max_terminated_length": 1223.0, + "completions/mean_length": 1052.4375, + "completions/mean_terminated_length": 1052.4375, + "completions/min_length": 877.0, + "completions/min_terminated_length": 877.0, + "epoch": 0.038207641528305664, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.114582517316078, + "kl": 0.00159454345703125, + "learning_rate": 3.7999999999999996e-07, + "loss": 0.0102, + "num_tokens": 8224135.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9151319265365601, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0010001520596073475, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13826575692633394, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06763190130459203, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 191 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1142.0, + "completions/max_terminated_length": 1142.0, + "completions/mean_length": 945.6875, + "completions/mean_terminated_length": 945.6875, + "completions/min_length": 784.0, + "completions/min_terminated_length": 784.0, + "epoch": 0.03840768153630726, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5139309708989375, + "kl": 0.002056121826171875, + "learning_rate": 3.82e-07, + "loss": -0.0108, + "num_tokens": 8265706.0, + "reward": 0.0, + "reward_std": 0.8298763632774353, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.033584089821501864, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11656761280892308, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09651328828101766, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 192 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1446.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1172.875, + "completions/mean_terminated_length": 1172.875, + "completions/min_length": 886.0, + "completions/min_terminated_length": 886.0, + "epoch": 0.03860772154430886, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8395379027600196, + "kl": 0.0016803741455078125, + "learning_rate": 3.84e-07, + "loss": 0.0053, + "num_tokens": 8317744.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0340901613235474, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15159783870947832, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06652847993546569, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1029203215725281, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 193 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1346.0, + "completions/mean_length": 1233.3125, + "completions/mean_terminated_length": 1195.21435546875, + "completions/min_length": 1092.0, + "completions/min_terminated_length": 1092.0, + "epoch": 0.038807761552310464, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0756107850121874, + "kl": 0.0020999908447265625, + "learning_rate": 3.86e-07, + "loss": 0.0477, + "num_tokens": 8371181.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0659716129302979, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.018575599287467926, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13688421613647894, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12164002752505568, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 194 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1470.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1088.0, + "completions/mean_terminated_length": 1088.0, + "completions/min_length": 812.0, + "completions/min_terminated_length": 812.0, + "epoch": 0.03900780156031206, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.649792854030946, + "kl": 0.00153350830078125, + "learning_rate": 3.88e-07, + "loss": -0.0261, + "num_tokens": 8412829.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0503056049346924, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11662646391803742, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10160251350895681, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13709958532503408, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 195 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1298.0, + "completions/max_terminated_length": 1298.0, + "completions/mean_length": 1042.375, + "completions/mean_terminated_length": 1042.375, + "completions/min_length": 837.0, + "completions/min_terminated_length": 837.0, + "epoch": 0.03920784156831366, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.555739401995227, + "kl": 0.0020809173583984375, + "learning_rate": 3.8999999999999997e-07, + "loss": -0.0217, + "num_tokens": 8448883.0, + "reward": 0.0, + "reward_std": 0.9704576730728149, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09422958908293387, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0920857952907729, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0938872452190116, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 196 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1406.0, + "completions/max_terminated_length": 1406.0, + "completions/mean_length": 1101.125, + "completions/mean_terminated_length": 1101.125, + "completions/min_length": 699.0, + "completions/min_terminated_length": 699.0, + "epoch": 0.039407881576315265, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3401743235399985, + "kl": 0.002193450927734375, + "learning_rate": 3.92e-07, + "loss": -0.0105, + "num_tokens": 8486525.0, + "reward": 0.0, + "reward_std": 0.9778169393539429, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.035431867707859485, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10797814806237127, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 197 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1492.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1212.375, + "completions/mean_terminated_length": 1212.375, + "completions/min_length": 1025.0, + "completions/min_terminated_length": 1025.0, + "epoch": 0.03960792158431686, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8375373902582843, + "kl": 0.0008540153503417969, + "learning_rate": 3.94e-07, + "loss": -0.0201, + "num_tokens": 8530019.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7465068101882935, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02561642762876406, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11348618673557807, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07490735018081411, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 198 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1389.8125, + "completions/mean_terminated_length": 1248.1429443359375, + "completions/min_length": 624.0, + "completions/min_terminated_length": 624.0, + "epoch": 0.03980796159231846, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.157060744011228, + "kl": 0.002292633056640625, + "learning_rate": 3.96e-07, + "loss": -0.0066, + "num_tokens": 8586728.0, + "reward": 0.0, + "reward_std": 0.6975679993629456, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.029670595629294742, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.054809964274482204, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14168300559373406, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 199 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1056.0, + "completions/max_terminated_length": 1056.0, + "completions/mean_length": 915.6875, + "completions/mean_terminated_length": 915.6875, + "completions/min_length": 769.0, + "completions/min_terminated_length": 769.0, + "epoch": 0.040008001600320066, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.892537160853922, + "kl": 0.002124786376953125, + "learning_rate": 3.98e-07, + "loss": -0.0147, + "num_tokens": 8626755.0, + "reward": 0.0, + "reward_std": 1.0048415660858154, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04446400488652367, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07410787343771846, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07503085784948502, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 200 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1159.125, + "completions/mean_terminated_length": 1136.4000244140625, + "completions/min_length": 958.0, + "completions/min_terminated_length": 958.0, + "epoch": 0.040208041608321664, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7884062699751135, + "kl": 0.002773284912109375, + "learning_rate": 4e-07, + "loss": -0.0172, + "num_tokens": 8676661.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.4169219732284546, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08515229747346062, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09224316258108724, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08421753138505425, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 201 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1383.0, + "completions/mean_length": 1117.75, + "completions/mean_terminated_length": 1063.1429443359375, + "completions/min_length": 784.0, + "completions/min_terminated_length": 784.0, + "epoch": 0.04040808161632326, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.312055208535012, + "kl": 0.0023651123046875, + "learning_rate": 4.02e-07, + "loss": -0.0165, + "num_tokens": 8719793.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0479657649993896, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2125977953416231, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14079846345799585, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13270686158262923, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 202 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1251.3125, + "completions/mean_terminated_length": 1168.416748046875, + "completions/min_length": 733.0, + "completions/min_terminated_length": 733.0, + "epoch": 0.04060812162432487, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3304858995223743, + "kl": 0.002346038818359375, + "learning_rate": 4.04e-07, + "loss": -0.0243, + "num_tokens": 8768054.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8664104342460632, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.006140615913568188, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1262301590573947, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11344765475923412, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 203 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1252.3125, + "completions/mean_terminated_length": 1139.727294921875, + "completions/min_length": 974.0, + "completions/min_terminated_length": 974.0, + "epoch": 0.040808161632326465, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3712143311840173, + "kl": 0.0011091232299804688, + "learning_rate": 4.06e-07, + "loss": -0.0019, + "num_tokens": 8818563.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8902668952941895, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.010660692981607899, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1348418403840958, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09269623828717427, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 204 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1481.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1019.25, + "completions/mean_terminated_length": 1019.25, + "completions/min_length": 657.0, + "completions/min_terminated_length": 657.0, + "epoch": 0.04100820164032806, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5050201239171814, + "kl": 0.00254058837890625, + "learning_rate": 4.0799999999999995e-07, + "loss": -0.038, + "num_tokens": 8857719.0, + "reward": 0.0, + "reward_std": 0.8508757948875427, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08287538486344614, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09066915842404275, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10470416879457554, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 205 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1306.125, + "completions/mean_terminated_length": 1293.2000732421875, + "completions/min_length": 996.0, + "completions/min_terminated_length": 996.0, + "epoch": 0.04120824164832967, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5523040541236806, + "kl": 0.001651763916015625, + "learning_rate": 4.0999999999999994e-07, + "loss": -0.0607, + "num_tokens": 8904145.0, + "reward": 0.0, + "reward_std": 0.6867795586585999, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11007451804158204, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10727902969453933, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194865, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 206 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1244.5, + "completions/mean_terminated_length": 1159.3333740234375, + "completions/min_length": 1033.0, + "completions/min_terminated_length": 1033.0, + "epoch": 0.041408281656331265, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3388796341002362, + "kl": 0.001289963722229004, + "learning_rate": 4.12e-07, + "loss": -0.0133, + "num_tokens": 8954449.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9390036463737488, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.037894060887900434, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20933075255750253, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12641788434189796, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 207 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1174.0625, + "completions/mean_terminated_length": 1152.3333740234375, + "completions/min_length": 925.0, + "completions/min_terminated_length": 925.0, + "epoch": 0.04160832166433286, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2331826453383643, + "kl": 0.002422332763671875, + "learning_rate": 4.14e-07, + "loss": 0.0264, + "num_tokens": 8993554.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8411836624145508, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06784248858472236, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1480953003258581, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05947299418254506, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 208 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1152.4375, + "completions/mean_terminated_length": 994.45458984375, + "completions/min_length": 271.0, + "completions/min_terminated_length": 271.0, + "epoch": 0.04180836167233447, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8432318765100657, + "kl": 0.0018672943115234375, + "learning_rate": 4.1599999999999997e-07, + "loss": -0.1326, + "num_tokens": 9040545.0, + "reward": 0.0, + "reward_std": 0.7641099095344543, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0551617647793354, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16650149330039557, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1102186379345533, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 209 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 932.0, + "completions/max_terminated_length": 932.0, + "completions/mean_length": 841.9375, + "completions/mean_terminated_length": 841.9375, + "completions/min_length": 719.0, + "completions/min_terminated_length": 719.0, + "epoch": 0.042008401680336066, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.9503350291539121, + "kl": 0.00011467933654785156, + "learning_rate": 4.1799999999999996e-07, + "loss": -0.0102, + "num_tokens": 9078304.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0452624559402466, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0013474647209354112, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05638855779500174, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05692750425533111, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 210 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1478.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1167.0625, + "completions/mean_terminated_length": 1167.0625, + "completions/min_length": 895.0, + "completions/min_terminated_length": 895.0, + "epoch": 0.04220844168833767, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8785571002746018, + "kl": 0.0017242431640625, + "learning_rate": 4.1999999999999995e-07, + "loss": -0.0142, + "num_tokens": 9130265.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9150549173355103, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.028976761363457233, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07249823460286393, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1276569477008451, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 211 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1468.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1159.125, + "completions/mean_terminated_length": 1159.125, + "completions/min_length": 594.0, + "completions/min_terminated_length": 594.0, + "epoch": 0.04240848169633927, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9308579129709322, + "kl": 0.0017242431640625, + "learning_rate": 4.2199999999999994e-07, + "loss": -0.0336, + "num_tokens": 9179923.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8918459415435791, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.00013447584597701956, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.35560873162651774, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13526380260918403, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 212 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1292.0, + "completions/max_terminated_length": 1292.0, + "completions/mean_length": 1033.5, + "completions/mean_terminated_length": 1033.5, + "completions/min_length": 848.0, + "completions/min_terminated_length": 848.0, + "epoch": 0.04260852170434087, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4253404704791017, + "kl": 0.0023441314697265625, + "learning_rate": 4.24e-07, + "loss": -0.0692, + "num_tokens": 9220851.0, + "reward": 0.0, + "reward_std": 0.8730344176292419, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.061001731357271625, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1578657276486732, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0894427190999916, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 213 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1121.0, + "completions/max_terminated_length": 1121.0, + "completions/mean_length": 980.375, + "completions/mean_terminated_length": 980.375, + "completions/min_length": 728.0, + "completions/min_terminated_length": 728.0, + "epoch": 0.04280856171234247, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.858402334782274, + "kl": 0.0013270378112792969, + "learning_rate": 4.26e-07, + "loss": 0.0202, + "num_tokens": 9261409.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7836552262306213, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.082179030869751, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07348151873042426, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1172998689652263, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 214 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1090.9375, + "completions/mean_terminated_length": 1032.5, + "completions/min_length": 698.0, + "completions/min_terminated_length": 698.0, + "epoch": 0.04300860172034407, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4162063157992324, + "kl": 0.002346038818359375, + "learning_rate": 4.2799999999999997e-07, + "loss": 0.003, + "num_tokens": 9305648.0, + "reward": 0.0, + "reward_std": 0.9420455694198608, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0692983128258255, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0747415968787502, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11674600476945511, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 215 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1171.0, + "completions/max_terminated_length": 1171.0, + "completions/mean_length": 880.5, + "completions/mean_terminated_length": 880.5, + "completions/min_length": 613.0, + "completions/min_terminated_length": 613.0, + "epoch": 0.04320864172834567, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9565859649679247, + "kl": 0.0024433135986328125, + "learning_rate": 4.2999999999999996e-07, + "loss": -0.0558, + "num_tokens": 9342288.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.026890754699707, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05489900836076578, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06558008123257113, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15299479536052005, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 216 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1201.375, + "completions/mean_terminated_length": 1181.4666748046875, + "completions/min_length": 877.0, + "completions/min_terminated_length": 877.0, + "epoch": 0.04340868173634727, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3023387457564217, + "kl": 0.0011529922485351562, + "learning_rate": 4.3199999999999995e-07, + "loss": 0.022, + "num_tokens": 9376150.0, + "reward": 2.2351741790771484e-08, + "reward_std": 0.9103466868400574, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15538569539202937, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.28305115083333093, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15752718754175363, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 217 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1085.0, + "completions/max_terminated_length": 1085.0, + "completions/mean_length": 885.875, + "completions/mean_terminated_length": 885.875, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.04360872174434887, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.537963270474684, + "kl": 0.0010976791381835938, + "learning_rate": 4.34e-07, + "loss": 0.0068, + "num_tokens": 9412804.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9205169677734375, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06382075172663658, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.029654530114218824, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14599594109020572, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 218 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1225.375, + "completions/mean_terminated_length": 1162.0, + "completions/min_length": 848.0, + "completions/min_terminated_length": 848.0, + "epoch": 0.04380876175235047, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2530470631287494, + "kl": 0.0009889602661132812, + "learning_rate": 4.36e-07, + "loss": -0.0145, + "num_tokens": 9471962.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.523733913898468, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.019829517398796077, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10234363669460969, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07503085784948506, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 219 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1308.0, + "completions/mean_length": 1149.0, + "completions/mean_terminated_length": 1098.857177734375, + "completions/min_length": 895.0, + "completions/min_terminated_length": 895.0, + "epoch": 0.04400880176035207, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1859625943660568, + "kl": 0.00229644775390625, + "learning_rate": 4.38e-07, + "loss": -0.0488, + "num_tokens": 9519210.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.47792327404022217, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.29968231651025223, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23980986217448877, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 220 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1230.0, + "completions/max_terminated_length": 1230.0, + "completions/mean_length": 965.3125, + "completions/mean_terminated_length": 965.3125, + "completions/min_length": 758.0, + "completions/min_terminated_length": 758.0, + "epoch": 0.04420884176835367, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3319354414464986, + "kl": 0.0022258758544921875, + "learning_rate": 4.3999999999999997e-07, + "loss": -0.0124, + "num_tokens": 9568295.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6336479187011719, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06862283966138173, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11566767398133283, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.17867827491311283, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 221 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1042.0, + "completions/max_terminated_length": 1042.0, + "completions/mean_length": 846.5, + "completions/mean_terminated_length": 846.5, + "completions/min_length": 621.0, + "completions/min_terminated_length": 621.0, + "epoch": 0.04440888177635527, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.5940347146191565, + "kl": 0.002979278564453125, + "learning_rate": 4.4199999999999996e-07, + "loss": 0.0011, + "num_tokens": 9601455.0, + "reward": 0.0, + "reward_std": 0.9030362963676453, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.15548470773781897, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19535398613666274, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.19302657656203526, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 222 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1241.0, + "completions/max_terminated_length": 1241.0, + "completions/mean_length": 1098.375, + "completions/mean_terminated_length": 1098.375, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.044608921784356874, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5634856972444595, + "kl": 0.002712249755859375, + "learning_rate": 4.44e-07, + "loss": -0.0313, + "num_tokens": 9651021.0, + "reward": 0.0, + "reward_std": 0.8934721946716309, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2773988374292751, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.33343266948464734, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06666666666666671, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 223 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1280.0, + "completions/max_terminated_length": 1280.0, + "completions/mean_length": 807.0, + "completions/mean_terminated_length": 807.0, + "completions/min_length": 608.0, + "completions/min_terminated_length": 608.0, + "epoch": 0.04480896179235847, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8646908682046908, + "kl": 0.002239227294921875, + "learning_rate": 4.46e-07, + "loss": 0.042, + "num_tokens": 9695893.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0253026485443115, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15497356785656796, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.33208755826771225, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.16815997674172586, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 224 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1434.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1176.8125, + "completions/mean_terminated_length": 1176.8125, + "completions/min_length": 986.0, + "completions/min_terminated_length": 986.0, + "epoch": 0.04500900180036007, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.007336532818445, + "kl": 0.001667022705078125, + "learning_rate": 4.48e-07, + "loss": -0.0016, + "num_tokens": 9737514.0, + "reward": 0.0, + "reward_std": 0.8165589570999146, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07263423218266918, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2554293894790988, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11213417888437975, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 225 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1142.0, + "completions/max_terminated_length": 1142.0, + "completions/mean_length": 1023.5, + "completions/mean_terminated_length": 1023.5, + "completions/min_length": 895.0, + "completions/min_terminated_length": 895.0, + "epoch": 0.045209041808361675, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.143660594505062, + "kl": 0.0018329620361328125, + "learning_rate": 4.5e-07, + "loss": 0.0219, + "num_tokens": 9778434.0, + "reward": 0.0, + "reward_std": 0.46018385887145996, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.05405496853728381, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17267060861994463, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1548595540529595, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 226 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1430.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1167.875, + "completions/mean_terminated_length": 1167.875, + "completions/min_length": 889.0, + "completions/min_terminated_length": 889.0, + "epoch": 0.04540908181636327, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.541906094620699, + "kl": 0.0015106201171875, + "learning_rate": 4.5199999999999997e-07, + "loss": -0.0518, + "num_tokens": 9826832.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0159473419189453, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12502466892099923, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24723218933464056, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11474609652039006, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 227 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1125.25, + "completions/mean_terminated_length": 1100.2667236328125, + "completions/min_length": 796.0, + "completions/min_terminated_length": 796.0, + "epoch": 0.04560912182436487, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6380052995573613, + "kl": 0.0025787353515625, + "learning_rate": 4.54e-07, + "loss": -0.0567, + "num_tokens": 9868436.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.810700535774231, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08288718015503932, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08549122413189665, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 228 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1364.0, + "completions/max_terminated_length": 1364.0, + "completions/mean_length": 1067.3125, + "completions/mean_terminated_length": 1067.3125, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.045809161832366475, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8883904956468958, + "kl": 0.0016040802001953125, + "learning_rate": 4.56e-07, + "loss": -0.0607, + "num_tokens": 9910985.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9811691045761108, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.056602730797416824, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0766100188330585, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13763881881375054, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 229 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1391.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1017.3125, + "completions/mean_terminated_length": 1017.3125, + "completions/min_length": 717.0, + "completions/min_terminated_length": 717.0, + "epoch": 0.04600920184036807, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.99892163247479, + "kl": 0.00310516357421875, + "learning_rate": 4.58e-07, + "loss": 0.0284, + "num_tokens": 9951550.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9637755155563354, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.14657808865223987, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3213728750324189, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1517795672580372, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 230 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1069.0, + "completions/max_terminated_length": 1069.0, + "completions/mean_length": 955.3125, + "completions/mean_terminated_length": 955.3125, + "completions/min_length": 914.0, + "completions/min_terminated_length": 914.0, + "epoch": 0.04620924184836967, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.17963447326155, + "kl": 0.0015716552734375, + "learning_rate": 4.6e-07, + "loss": 0.0047, + "num_tokens": 9989179.0, + "reward": 0.0, + "reward_std": 0.650810182094574, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.3142218827453241, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20112549564817764, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07187952884282608, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 231 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1079.0, + "completions/max_terminated_length": 1079.0, + "completions/mean_length": 952.3125, + "completions/mean_terminated_length": 952.3125, + "completions/min_length": 708.0, + "completions/min_terminated_length": 708.0, + "epoch": 0.046409281856371276, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5622464344856266, + "kl": 0.0021839141845703125, + "learning_rate": 4.62e-07, + "loss": -0.0391, + "num_tokens": 10028048.0, + "reward": 0.0, + "reward_std": 0.8484879732131958, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.044956607492688996, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23633638410277347, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04367387557118565, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 232 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1341.0, + "completions/max_terminated_length": 1341.0, + "completions/mean_length": 1148.6875, + "completions/mean_terminated_length": 1148.6875, + "completions/min_length": 943.0, + "completions/min_terminated_length": 943.0, + "epoch": 0.046609321864372874, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3261127341425984, + "kl": 0.002223968505859375, + "learning_rate": 4.64e-07, + "loss": -0.0115, + "num_tokens": 10076459.0, + "reward": 0.0, + "reward_std": 0.897235631942749, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04034816652205139, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1605389226839477, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10327955589886446, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 233 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1107.0, + "completions/max_terminated_length": 1107.0, + "completions/mean_length": 889.5, + "completions/mean_terminated_length": 889.5, + "completions/min_length": 588.0, + "completions/min_terminated_length": 588.0, + "epoch": 0.04680936187237447, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.96686554637501, + "kl": 0.0023040771484375, + "learning_rate": 4.66e-07, + "loss": -0.0617, + "num_tokens": 10102675.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.9673017263412476, + "rewards/wordcountpos_reward_nokeypoint/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08220673258008306, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09802446852023158, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10671873729054748, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 234 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1055.0, + "completions/max_terminated_length": 1055.0, + "completions/mean_length": 916.5625, + "completions/mean_terminated_length": 916.5625, + "completions/min_length": 781.0, + "completions/min_terminated_length": 781.0, + "epoch": 0.04700940188037608, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5163690458793933, + "kl": 0.0011548995971679688, + "learning_rate": 4.68e-07, + "loss": -0.0213, + "num_tokens": 10145620.0, + "reward": 0.0, + "reward_std": 0.5395073890686035, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12540322515245947, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1448322589030256, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11792967144619462, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 235 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1321.8125, + "completions/mean_terminated_length": 1214.9000244140625, + "completions/min_length": 1032.0, + "completions/min_terminated_length": 1032.0, + "epoch": 0.047209441888377675, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.310633500930069, + "kl": 0.002460479736328125, + "learning_rate": 4.6999999999999995e-07, + "loss": -0.0006, + "num_tokens": 10192633.0, + "reward": 0.0, + "reward_std": 0.6647611856460571, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12619763972331316, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3269676463231041, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10749676997731401, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 236 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1183.0, + "completions/max_terminated_length": 1183.0, + "completions/mean_length": 1029.0, + "completions/mean_terminated_length": 1029.0, + "completions/min_length": 743.0, + "completions/min_terminated_length": 743.0, + "epoch": 0.04740948189637927, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.725677345447223, + "kl": 0.0026092529296875, + "learning_rate": 4.7199999999999994e-07, + "loss": -0.0083, + "num_tokens": 10229865.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9369378089904785, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1933315344419013, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07077252392183027, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07290277645477444, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 237 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1245.0, + "completions/max_terminated_length": 1245.0, + "completions/mean_length": 960.25, + "completions/mean_terminated_length": 960.25, + "completions/min_length": 807.0, + "completions/min_terminated_length": 807.0, + "epoch": 0.04760952190438088, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.361171838586012, + "kl": 0.001861572265625, + "learning_rate": 4.7399999999999993e-07, + "loss": 0.0219, + "num_tokens": 10279181.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0442225933074951, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08875852092612156, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07092377599855854, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08243965245133131, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 238 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1409.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1173.4375, + "completions/mean_terminated_length": 1173.4375, + "completions/min_length": 905.0, + "completions/min_terminated_length": 905.0, + "epoch": 0.047809561912382476, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.933342237022181, + "kl": 0.002105712890625, + "learning_rate": 4.76e-07, + "loss": -0.029, + "num_tokens": 10331980.0, + "reward": 0.0, + "reward_std": 0.8224970102310181, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.24699288053489815, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19048379559775705, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10532137766186217, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 239 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1383.0, + "completions/max_terminated_length": 1383.0, + "completions/mean_length": 1057.3125, + "completions/mean_terminated_length": 1057.3125, + "completions/min_length": 773.0, + "completions/min_terminated_length": 773.0, + "epoch": 0.048009601920384073, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3077117279460415, + "kl": 0.002109527587890625, + "learning_rate": 4.779999999999999e-07, + "loss": -0.0322, + "num_tokens": 10377841.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9543654918670654, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12452094718739255, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11410595519407349, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09108400680852978, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 240 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1170.0, + "completions/mean_length": 1055.5625, + "completions/mean_terminated_length": 1025.933349609375, + "completions/min_length": 865.0, + "completions/min_terminated_length": 865.0, + "epoch": 0.04820964192838568, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.1787849759855293, + "kl": 0.0008411407470703125, + "learning_rate": 4.8e-07, + "loss": 0.047, + "num_tokens": 10421122.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.792097806930542, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.14866288144743384, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16832163974708378, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.17299111516469834, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 241 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1408.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 1006.0625, + "completions/mean_terminated_length": 1006.0625, + "completions/min_length": 770.0, + "completions/min_terminated_length": 770.0, + "epoch": 0.048409681936387276, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6703302629381387, + "kl": 0.002452850341796875, + "learning_rate": 4.82e-07, + "loss": -0.0818, + "num_tokens": 10464251.0, + "reward": 0.0, + "reward_std": 0.8565043210983276, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12685269116702483, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24558355155901537, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11213417888437975, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 242 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1195.0, + "completions/max_terminated_length": 1195.0, + "completions/mean_length": 869.875, + "completions/mean_terminated_length": 869.875, + "completions/min_length": 674.0, + "completions/min_terminated_length": 674.0, + "epoch": 0.04860972194438888, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.67617998135893, + "kl": 0.0012769699096679688, + "learning_rate": 4.839999999999999e-07, + "loss": -0.0137, + "num_tokens": 10496297.0, + "reward": 0.0, + "reward_std": 0.5240008234977722, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04355294872353595, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11656327233608009, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11417984514369006, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 243 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1079.0, + "completions/max_terminated_length": 1079.0, + "completions/mean_length": 1039.3125, + "completions/mean_terminated_length": 1039.3125, + "completions/min_length": 966.0, + "completions/min_terminated_length": 966.0, + "epoch": 0.04880976195239048, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.0619065096583717, + "kl": 0.0003324151039123535, + "learning_rate": 4.86e-07, + "loss": 0.0012, + "num_tokens": 10540174.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.04690420627594, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.020412582360573226, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.052809767477860384, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 244 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1325.4375, + "completions/mean_terminated_length": 1267.25, + "completions/min_length": 964.0, + "completions/min_terminated_length": 964.0, + "epoch": 0.04900980196039208, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5184797801444665, + "kl": 0.0016956329345703125, + "learning_rate": 4.879999999999999e-07, + "loss": -0.0042, + "num_tokens": 10589757.0, + "reward": 0.0, + "reward_std": 0.7827122807502747, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09245345437936417, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12240614643473521, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12382783747337807, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 245 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1387.0, + "completions/max_terminated_length": 1387.0, + "completions/mean_length": 1038.5625, + "completions/mean_terminated_length": 1038.5625, + "completions/min_length": 837.0, + "completions/min_terminated_length": 837.0, + "epoch": 0.04920984196839368, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5515334276856474, + "kl": 0.00110626220703125, + "learning_rate": 4.9e-07, + "loss": -0.0582, + "num_tokens": 10627022.0, + "reward": 0.0, + "reward_std": 0.6431183218955994, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.054071785414875985, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10073384258515902, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09888264649460886, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 246 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1329.0, + "completions/mean_length": 1052.375, + "completions/mean_terminated_length": 1022.5333862304688, + "completions/min_length": 670.0, + "completions/min_terminated_length": 670.0, + "epoch": 0.04940988197639528, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8893900795680008, + "kl": 0.0014591217041015625, + "learning_rate": 4.92e-07, + "loss": -0.0354, + "num_tokens": 10661388.0, + "reward": 0.0, + "reward_std": 0.6269555687904358, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13438965898694297, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1560246172398219, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15910630036178586, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 247 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1382.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1057.0625, + "completions/mean_terminated_length": 1057.0625, + "completions/min_length": 665.0, + "completions/min_terminated_length": 665.0, + "epoch": 0.04960992198439688, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9257407009312564, + "kl": 0.00196075439453125, + "learning_rate": 4.94e-07, + "loss": -0.0484, + "num_tokens": 10709293.0, + "reward": 0.0, + "reward_std": 0.7428077459335327, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.019288408190148067, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07437734421236523, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1287403358472941, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 248 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1246.8125, + "completions/mean_terminated_length": 1188.3846435546875, + "completions/min_length": 946.0, + "completions/min_terminated_length": 946.0, + "epoch": 0.04980996199239848, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8261479365990994, + "kl": 0.0019073486328125, + "learning_rate": 4.96e-07, + "loss": 0.0202, + "num_tokens": 10749074.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0199456214904785, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.359686599785524, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10111299181683772, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08255189164891873, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 249 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1336.4375, + "completions/mean_terminated_length": 1298.6923828125, + "completions/min_length": 1092.0, + "completions/min_terminated_length": 1092.0, + "epoch": 0.05001000200040008, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.520582858858388, + "kl": 0.0018444061279296875, + "learning_rate": 4.979999999999999e-07, + "loss": 0.0164, + "num_tokens": 10802161.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5609039068222046, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03305295539384437, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.062664505437289, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8500000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04554200340426487, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 250 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1380.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1037.3125, + "completions/mean_terminated_length": 1037.3125, + "completions/min_length": 701.0, + "completions/min_terminated_length": 701.0, + "epoch": 0.05021004200840168, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8348180784687735, + "kl": 0.001255035400390625, + "learning_rate": 5e-07, + "loss": -0.0729, + "num_tokens": 10837198.0, + "reward": 0.0, + "reward_std": 0.5964027643203735, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03502872983625095, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13784751341144977, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1128748897706693, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 251 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1273.0, + "completions/max_terminated_length": 1273.0, + "completions/mean_length": 964.25, + "completions/mean_terminated_length": 964.25, + "completions/min_length": 686.0, + "completions/min_terminated_length": 686.0, + "epoch": 0.05041008201640328, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8278640277663496, + "kl": 0.0012826919555664062, + "learning_rate": 5.02e-07, + "loss": -0.0086, + "num_tokens": 10882410.0, + "reward": 0.0, + "reward_std": 0.8011411428451538, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.17035550865208646, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0653736074773378, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06831300510639733, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 252 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1185.875, + "completions/mean_terminated_length": 1164.933349609375, + "completions/min_length": 866.0, + "completions/min_terminated_length": 866.0, + "epoch": 0.05061012202440488, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1069781900664526, + "kl": 0.0012874603271484375, + "learning_rate": 5.04e-07, + "loss": -0.0277, + "num_tokens": 10928176.0, + "reward": 0.0, + "reward_std": 1.0323574542999268, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2018410081938735, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22079036175032507, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05708992257184504, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 253 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1390.0, + "completions/mean_length": 1167.3125, + "completions/mean_terminated_length": 1145.1334228515625, + "completions/min_length": 915.0, + "completions/min_terminated_length": 915.0, + "epoch": 0.05081016203240648, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8835873062302553, + "kl": 0.0020618438720703125, + "learning_rate": 5.06e-07, + "loss": 0.0269, + "num_tokens": 10972637.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.6265268325805664, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1061607120847878, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1162306656735699, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1377060745318193, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 254 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1338.0, + "completions/mean_terminated_length": 1264.3636474609375, + "completions/min_length": 864.0, + "completions/min_terminated_length": 864.0, + "epoch": 0.051010202040408084, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9757735346893304, + "kl": 0.001953125, + "learning_rate": 5.079999999999999e-07, + "loss": 0.0156, + "num_tokens": 11023829.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9831536412239075, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0036830140515540883, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06032776164396613, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07781745019952502, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 255 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1266.8125, + "completions/mean_terminated_length": 1126.9000244140625, + "completions/min_length": 769.0, + "completions/min_terminated_length": 769.0, + "epoch": 0.05121024204840968, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.022661637083397, + "kl": 0.002368927001953125, + "learning_rate": 5.1e-07, + "loss": -0.003, + "num_tokens": 11080282.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.33710700273513794, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.021526741527980495, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19701088983895643, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09388724521901162, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 256 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1181.75, + "completions/mean_terminated_length": 1108.3077392578125, + "completions/min_length": 809.0, + "completions/min_terminated_length": 809.0, + "epoch": 0.05141028205641128, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4540106376970505, + "kl": 0.00251007080078125, + "learning_rate": 5.12e-07, + "loss": -0.0006, + "num_tokens": 11133734.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9926777482032776, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.011624446906072386, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1214033007247737, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1134476547592341, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 257 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1417.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 925.5, + "completions/mean_terminated_length": 925.5, + "completions/min_length": 647.0, + "completions/min_terminated_length": 647.0, + "epoch": 0.051610322064412885, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6889648678463893, + "kl": 0.00144195556640625, + "learning_rate": 5.14e-07, + "loss": -0.0189, + "num_tokens": 11172814.0, + "reward": 0.0, + "reward_std": 0.8538609743118286, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07693073889883913, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17259852315341095, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07888106377466156, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 258 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1213.0, + "completions/mean_length": 1131.375, + "completions/mean_terminated_length": 1046.3077392578125, + "completions/min_length": 866.0, + "completions/min_terminated_length": 866.0, + "epoch": 0.05181036207241448, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.834749368216442, + "kl": 0.0017528533935546875, + "learning_rate": 5.16e-07, + "loss": -0.001, + "num_tokens": 11215644.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0220551490783691, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.042019139748222016, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07036877046997668, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13957607775504183, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 259 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1275.0, + "completions/max_terminated_length": 1275.0, + "completions/mean_length": 1012.0, + "completions/mean_terminated_length": 1012.0, + "completions/min_length": 856.0, + "completions/min_terminated_length": 856.0, + "epoch": 0.05201040208041608, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.362685565848312, + "kl": 0.00206756591796875, + "learning_rate": 5.18e-07, + "loss": 0.0154, + "num_tokens": 11260852.0, + "reward": 2.2351741790771484e-08, + "reward_std": 0.9945588111877441, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09666566902768368, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.028874339858427012, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09428090415820635, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 260 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1082.0, + "completions/max_terminated_length": 1082.0, + "completions/mean_length": 880.9375, + "completions/mean_terminated_length": 880.9375, + "completions/min_length": 682.0, + "completions/min_terminated_length": 682.0, + "epoch": 0.052210442088417686, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.603566233370556, + "kl": 0.0025196075439453125, + "learning_rate": 5.2e-07, + "loss": -0.0589, + "num_tokens": 11309499.0, + "reward": 0.0, + "reward_std": 0.8537737131118774, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.13169218716842834, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22508293636203333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1067187372905475, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 261 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1151.0, + "completions/max_terminated_length": 1151.0, + "completions/mean_length": 893.0, + "completions/mean_terminated_length": 893.0, + "completions/min_length": 740.0, + "completions/min_terminated_length": 740.0, + "epoch": 0.052410482096419284, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.2159169441534, + "kl": 0.002834320068359375, + "learning_rate": 5.22e-07, + "loss": 0.0, + "num_tokens": 11353563.0, + "reward": 0.0, + "reward_std": 0.837824285030365, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0619729839503517, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05680923676339889, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11147163731607215, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 262 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1077.0, + "completions/max_terminated_length": 1077.0, + "completions/mean_length": 891.6875, + "completions/mean_terminated_length": 891.6875, + "completions/min_length": 601.0, + "completions/min_terminated_length": 601.0, + "epoch": 0.05261052210442088, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.95636741411391, + "kl": 0.002643585205078125, + "learning_rate": 5.24e-07, + "loss": -0.029, + "num_tokens": 11391654.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.6188561916351318, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -3.107396955948083e-05, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10063639832534998, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08255189164891873, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 263 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1285.25, + "completions/mean_terminated_length": 1235.6923828125, + "completions/min_length": 964.0, + "completions/min_terminated_length": 964.0, + "epoch": 0.052810562112422486, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8869233315078073, + "kl": 0.0020008087158203125, + "learning_rate": 5.26e-07, + "loss": 0.0152, + "num_tokens": 11439818.0, + "reward": -1.862645149230957e-08, + "reward_std": 1.0445475578308105, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09433074880359507, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13756588284184068, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09496588081262931, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 264 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1209.0, + "completions/max_terminated_length": 1209.0, + "completions/mean_length": 1009.625, + "completions/mean_terminated_length": 1009.625, + "completions/min_length": 833.0, + "completions/min_terminated_length": 833.0, + "epoch": 0.053010602120424084, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9122155512519123, + "kl": 0.00188446044921875, + "learning_rate": 5.28e-07, + "loss": -0.0021, + "num_tokens": 11484508.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9582520127296448, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1278037967770834, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09566374216084292, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06191391873668904, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 265 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1264.375, + "completions/mean_terminated_length": 1248.666748046875, + "completions/min_length": 1032.0, + "completions/min_terminated_length": 1032.0, + "epoch": 0.05321064212842568, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2042435744888706, + "kl": 0.002613067626953125, + "learning_rate": 5.3e-07, + "loss": -0.0144, + "num_tokens": 11535010.0, + "reward": 0.0, + "reward_std": 1.0437947511672974, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05266281503051963, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09178369479125485, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 266 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1289.75, + "completions/mean_terminated_length": 1194.181884765625, + "completions/min_length": 349.0, + "completions/min_terminated_length": 349.0, + "epoch": 0.05341068213642729, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.10150657460499, + "kl": 0.0021038055419921875, + "learning_rate": 5.32e-07, + "loss": 0.0488, + "num_tokens": 11592406.0, + "reward": 0.0, + "reward_std": 0.9735476970672607, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06015756663661117, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06616969365831783, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13526380260918403, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 267 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1271.0, + "completions/max_terminated_length": 1271.0, + "completions/mean_length": 979.75, + "completions/mean_terminated_length": 979.75, + "completions/min_length": 646.0, + "completions/min_terminated_length": 646.0, + "epoch": 0.053610722144428885, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1050031808411487, + "kl": 0.0017948150634765625, + "learning_rate": 5.34e-07, + "loss": -0.0288, + "num_tokens": 11629186.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9993117451667786, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.009436205153035806, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11669165608897808, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 268 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1025.0, + "completions/max_terminated_length": 1025.0, + "completions/mean_length": 772.5, + "completions/mean_terminated_length": 772.5, + "completions/min_length": 540.0, + "completions/min_terminated_length": 540.0, + "epoch": 0.05381076215243048, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.437725661364779, + "kl": 0.0015411376953125, + "learning_rate": 5.36e-07, + "loss": 0.0126, + "num_tokens": 11653066.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8250758647918701, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.012144148904458368, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06541621200516519, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10809803506625451, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 269 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1258.0, + "completions/mean_length": 1131.5625, + "completions/mean_terminated_length": 1078.9285888671875, + "completions/min_length": 541.0, + "completions/min_terminated_length": 541.0, + "epoch": 0.05401080216043209, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.426230579021578, + "kl": 0.003326416015625, + "learning_rate": 5.38e-07, + "loss": 0.0175, + "num_tokens": 11706131.0, + "reward": 0.0, + "reward_std": 0.930101215839386, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0993167483462124, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10376718290130939, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 270 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1344.0, + "completions/max_terminated_length": 1344.0, + "completions/mean_length": 964.4375, + "completions/mean_terminated_length": 964.4375, + "completions/min_length": 767.0, + "completions/min_terminated_length": 767.0, + "epoch": 0.054210842168433686, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7367350247173627, + "kl": 0.001506805419921875, + "learning_rate": 5.4e-07, + "loss": 0.0066, + "num_tokens": 11739682.0, + "reward": -1.1175870895385742e-08, + "reward_std": 0.99496990442276, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.035646403917762214, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0484009337014269, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1299572579307862, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 271 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1437.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1060.9375, + "completions/mean_terminated_length": 1060.9375, + "completions/min_length": 874.0, + "completions/min_terminated_length": 874.0, + "epoch": 0.054410882176435284, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8220763591606066, + "kl": 0.00287628173828125, + "learning_rate": 5.420000000000001e-07, + "loss": -0.0723, + "num_tokens": 11781441.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8516342639923096, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.011676102456567687, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16454248243913985, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 272 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1244.0, + "completions/max_terminated_length": 1244.0, + "completions/mean_length": 992.75, + "completions/mean_terminated_length": 992.75, + "completions/min_length": 757.0, + "completions/min_terminated_length": 757.0, + "epoch": 0.05461092218443689, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.925023858705948, + "kl": 0.002910614013671875, + "learning_rate": 5.44e-07, + "loss": -0.018, + "num_tokens": 11820573.0, + "reward": 0.0, + "reward_std": 0.4371957778930664, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.130630632362241, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11928979932631983, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.19734346820820914, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 273 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1163.0, + "completions/mean_length": 1018.875, + "completions/mean_terminated_length": 986.800048828125, + "completions/min_length": 761.0, + "completions/min_terminated_length": 761.0, + "epoch": 0.05481096219243849, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5352284089811103, + "kl": 0.0010995864868164062, + "learning_rate": 5.46e-07, + "loss": 0.0256, + "num_tokens": 11870251.0, + "reward": 0.0, + "reward_std": 0.7385746240615845, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.18482610258649762, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11311684781886092, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09496588081262934, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 274 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1132.9375, + "completions/mean_terminated_length": 1108.4666748046875, + "completions/min_length": 914.0, + "completions/min_terminated_length": 914.0, + "epoch": 0.05501100220044009, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.084991912488546, + "kl": 0.002109527587890625, + "learning_rate": 5.48e-07, + "loss": -0.0236, + "num_tokens": 11912178.0, + "reward": 0.0, + "reward_std": 0.8492661714553833, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.05531761891264852, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17571676582864101, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08850612031567837, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 275 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1312.0, + "completions/max_terminated_length": 1312.0, + "completions/mean_length": 1011.0625, + "completions/mean_terminated_length": 1011.0625, + "completions/min_length": 806.0, + "completions/min_terminated_length": 806.0, + "epoch": 0.05521104220844169, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2787570744475474, + "kl": 0.002166748046875, + "learning_rate": 5.5e-07, + "loss": 0.0135, + "num_tokens": 11948971.0, + "reward": 0.0, + "reward_std": 0.8487585783004761, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10105516252525043, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17572498614524742, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14504150108516198, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 276 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1152.0625, + "completions/mean_terminated_length": 1102.357177734375, + "completions/min_length": 906.0, + "completions/min_terminated_length": 906.0, + "epoch": 0.05541108221644329, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.283834336463753, + "kl": 0.00211334228515625, + "learning_rate": 5.520000000000001e-07, + "loss": -0.0583, + "num_tokens": 11999660.0, + "reward": 0.0, + "reward_std": 0.5701693296432495, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.012782180273826316, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.26824588318710374, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12102953419784838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 277 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1105.0, + "completions/max_terminated_length": 1105.0, + "completions/mean_length": 829.875, + "completions/mean_terminated_length": 829.875, + "completions/min_length": 555.0, + "completions/min_terminated_length": 555.0, + "epoch": 0.05561112222444489, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.791363160894401, + "kl": 0.0023956298828125, + "learning_rate": 5.54e-07, + "loss": -0.0564, + "num_tokens": 12040610.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8365106582641602, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.017401852500042607, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04819169943324448, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13743685418725538, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 278 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1376.0, + "completions/max_terminated_length": 1376.0, + "completions/mean_length": 1025.75, + "completions/mean_terminated_length": 1025.75, + "completions/min_length": 715.0, + "completions/min_terminated_length": 715.0, + "epoch": 0.05581116223244649, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0901722388430035, + "kl": 0.0017795562744140625, + "learning_rate": 5.560000000000001e-07, + "loss": -0.025, + "num_tokens": 12076142.0, + "reward": 0.0, + "reward_std": 0.7258594036102295, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07768453761851564, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07550196526484584, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 279 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1446.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 950.5625, + "completions/mean_terminated_length": 950.5625, + "completions/min_length": 569.0, + "completions/min_terminated_length": 569.0, + "epoch": 0.05601120224044809, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.444205993739112, + "kl": 0.00226593017578125, + "learning_rate": 5.58e-07, + "loss": 0.0108, + "num_tokens": 12108871.0, + "reward": 0.0, + "reward_std": 0.40904662013053894, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12021089723569597, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13061780171047072, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09727776191382576, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 280 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1373.0, + "completions/max_terminated_length": 1373.0, + "completions/mean_length": 949.9375, + "completions/mean_terminated_length": 949.9375, + "completions/min_length": 771.0, + "completions/min_terminated_length": 771.0, + "epoch": 0.05621124224844969, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5198140052545877, + "kl": 0.001922607421875, + "learning_rate": 5.6e-07, + "loss": -0.0849, + "num_tokens": 12145838.0, + "reward": 0.0, + "reward_std": 0.9237408638000488, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12416114541329581, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.058956096038005674, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10809803506625451, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 281 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1287.0, + "completions/mean_length": 1141.1875, + "completions/mean_terminated_length": 1117.2667236328125, + "completions/min_length": 915.0, + "completions/min_terminated_length": 915.0, + "epoch": 0.05641128225645129, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9842314056687163, + "kl": 0.002635955810546875, + "learning_rate": 5.620000000000001e-07, + "loss": -0.0029, + "num_tokens": 12196553.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9228922128677368, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08432372448754188, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15695100642775364, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15957118462605638, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 282 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 1149.8125, + "completions/mean_terminated_length": 1069.0, + "completions/min_length": 845.0, + "completions/min_terminated_length": 845.0, + "epoch": 0.05661132226445289, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3276634417902247, + "kl": 0.002750396728515625, + "learning_rate": 5.639999999999999e-07, + "loss": 0.0312, + "num_tokens": 12239390.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6609470844268799, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.02406181625793237, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14823623699754873, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0806225774829855, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 283 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1352.0, + "completions/max_terminated_length": 1352.0, + "completions/mean_length": 1034.125, + "completions/mean_terminated_length": 1034.125, + "completions/min_length": 864.0, + "completions/min_terminated_length": 864.0, + "epoch": 0.056811362272454494, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.60694379958285, + "kl": 0.0020275115966796875, + "learning_rate": 5.66e-07, + "loss": -0.033, + "num_tokens": 12282904.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0592902898788452, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13684123294495323, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0486424085332824, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11917929226045819, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 284 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1131.5, + "completions/mean_terminated_length": 1106.933349609375, + "completions/min_length": 943.0, + "completions/min_terminated_length": 943.0, + "epoch": 0.05701140228045609, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4573586410890407, + "kl": 0.002803802490234375, + "learning_rate": 5.679999999999999e-07, + "loss": -0.0088, + "num_tokens": 12332376.0, + "reward": 0.0, + "reward_std": 0.6352803707122803, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06363172487843827, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08408547966808964, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12049281521534187, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 285 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1483.0, + "completions/mean_terminated_length": 1432.0, + "completions/min_length": 1325.0, + "completions/min_terminated_length": 1325.0, + "epoch": 0.05721144228845769, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5022094812135087, + "kl": 0.0015926361083984375, + "learning_rate": 5.699999999999999e-07, + "loss": -0.0058, + "num_tokens": 12386272.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0257747173309326, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09231233947723255, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11164721791566083, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 286 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1424.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1120.875, + "completions/mean_terminated_length": 1120.875, + "completions/min_length": 837.0, + "completions/min_terminated_length": 837.0, + "epoch": 0.057411482296459294, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0151868489265325, + "kl": 0.002079010009765625, + "learning_rate": 5.719999999999999e-07, + "loss": -0.0328, + "num_tokens": 12438302.0, + "reward": 0.0, + "reward_std": 0.5784468054771423, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.20618790862583702, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.4110752585511411, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 287 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1375.0, + "completions/max_terminated_length": 1375.0, + "completions/mean_length": 1032.4375, + "completions/mean_terminated_length": 1032.4375, + "completions/min_length": 716.0, + "completions/min_terminated_length": 716.0, + "epoch": 0.05761152230446089, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.527583992114679, + "kl": 0.0019855499267578125, + "learning_rate": 5.739999999999999e-07, + "loss": -0.0303, + "num_tokens": 12477021.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.935353696346283, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 1.7962884575418395e-05, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05281440812660723, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12171612389003693, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 288 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1382.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1170.0625, + "completions/mean_terminated_length": 1170.0625, + "completions/min_length": 978.0, + "completions/min_terminated_length": 978.0, + "epoch": 0.05781156231246249, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.869084640016092, + "kl": 0.0022640228271484375, + "learning_rate": 5.76e-07, + "loss": 0.0159, + "num_tokens": 12524222.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.013793706893921, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03239298209074214, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3494680701262261, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13214750456578045, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 289 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1289.0, + "completions/max_terminated_length": 1289.0, + "completions/mean_length": 1073.625, + "completions/mean_terminated_length": 1073.625, + "completions/min_length": 796.0, + "completions/min_terminated_length": 796.0, + "epoch": 0.058011602320464095, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4761628439899535, + "kl": 0.002346038818359375, + "learning_rate": 5.779999999999999e-07, + "loss": 0.0149, + "num_tokens": 12564688.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8878797292709351, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.22045632196158538, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11910808414194844, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 290 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1288.0, + "completions/max_terminated_length": 1288.0, + "completions/mean_length": 1055.75, + "completions/mean_terminated_length": 1055.75, + "completions/min_length": 946.0, + "completions/min_terminated_length": 946.0, + "epoch": 0.05821164232846569, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.71904649292869, + "kl": 0.0017299652099609375, + "learning_rate": 5.8e-07, + "loss": -0.013, + "num_tokens": 12599772.0, + "reward": 0.0, + "reward_std": 0.7823508977890015, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0060593920103744585, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1761011153138442, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08766518798921946, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 291 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1293.0, + "completions/max_terminated_length": 1293.0, + "completions/mean_length": 940.6875, + "completions/mean_terminated_length": 940.6875, + "completions/min_length": 674.0, + "completions/min_terminated_length": 674.0, + "epoch": 0.05841168233646729, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.610882110276839, + "kl": 0.00290679931640625, + "learning_rate": 5.819999999999999e-07, + "loss": 0.0016, + "num_tokens": 12639735.0, + "reward": 0.0, + "reward_std": 0.9638223648071289, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06447266653575609, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12632283263609748, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07685966046898342, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 292 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1322.25, + "completions/mean_terminated_length": 1184.0, + "completions/min_length": 912.0, + "completions/min_terminated_length": 912.0, + "epoch": 0.058611722344468896, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.530403790018538, + "kl": 0.00264739990234375, + "learning_rate": 5.839999999999999e-07, + "loss": 0.0095, + "num_tokens": 12688435.0, + "reward": 0.0, + "reward_std": 0.8152444958686829, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09120402131936467, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09401847566514875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 293 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1193.0, + "completions/max_terminated_length": 1193.0, + "completions/mean_length": 1000.375, + "completions/mean_terminated_length": 1000.375, + "completions/min_length": 781.0, + "completions/min_terminated_length": 781.0, + "epoch": 0.058811762352470494, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2984940855860843, + "kl": 0.0022125244140625, + "learning_rate": 5.86e-07, + "loss": -0.0174, + "num_tokens": 12732009.0, + "reward": 0.0, + "reward_std": 1.0639880895614624, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08867643319230327, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06479687744912609, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11122216672215288, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 294 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1460.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 941.6875, + "completions/mean_terminated_length": 941.6875, + "completions/min_length": 680.0, + "completions/min_terminated_length": 680.0, + "epoch": 0.05901180236047209, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7628855107708175, + "kl": 0.002689361572265625, + "learning_rate": 5.879999999999999e-07, + "loss": -0.0625, + "num_tokens": 12779228.0, + "reward": 0.0, + "reward_std": 1.0690325498580933, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04085695177208304, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12960716771368663, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0938872452190116, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 295 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1056.0, + "completions/max_terminated_length": 1056.0, + "completions/mean_length": 879.75, + "completions/mean_terminated_length": 879.75, + "completions/min_length": 776.0, + "completions/min_terminated_length": 776.0, + "epoch": 0.0592118423684737, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.129177650003512, + "kl": 0.0015687942504882812, + "learning_rate": 5.9e-07, + "loss": 0.0057, + "num_tokens": 12810424.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9240412712097168, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.02228385703434772, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12262723299807689, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06309898162000302, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 296 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1155.875, + "completions/mean_terminated_length": 1132.933349609375, + "completions/min_length": 966.0, + "completions/min_terminated_length": 966.0, + "epoch": 0.059411882376475295, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2531548186719865, + "kl": 0.002559661865234375, + "learning_rate": 5.919999999999999e-07, + "loss": 0.0252, + "num_tokens": 12861918.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.046846628189087, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07768023934548039, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11334877198392224, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10470416879457553, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 297 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1459.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1170.4375, + "completions/mean_terminated_length": 1170.4375, + "completions/min_length": 935.0, + "completions/min_terminated_length": 935.0, + "epoch": 0.05961192238447689, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.538009129155931, + "kl": 0.002559661865234375, + "learning_rate": 5.939999999999999e-07, + "loss": 0.0262, + "num_tokens": 12905301.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.84452885389328, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.21064688946281468, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.25688576738706526, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 298 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1262.0, + "completions/max_terminated_length": 1262.0, + "completions/mean_length": 1048.8125, + "completions/mean_terminated_length": 1048.8125, + "completions/min_length": 891.0, + "completions/min_terminated_length": 891.0, + "epoch": 0.0598119623924785, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.939450497986225, + "kl": 0.0019702911376953125, + "learning_rate": 5.96e-07, + "loss": -0.0289, + "num_tokens": 12948866.0, + "reward": 0.0, + "reward_std": 0.9196957349777222, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09438453072047849, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09272003166904187, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11155467020454343, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 299 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1493.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1247.1875, + "completions/mean_terminated_length": 1247.1875, + "completions/min_length": 955.0, + "completions/min_terminated_length": 955.0, + "epoch": 0.060012002400480095, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5073854534634794, + "kl": 0.001346588134765625, + "learning_rate": 5.979999999999999e-07, + "loss": 0.0329, + "num_tokens": 13001221.0, + "reward": 0.0, + "reward_std": 0.7860163450241089, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10779184760040528, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17939361440031695, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10671873729054748, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 300 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1063.0, + "completions/mean_length": 911.8125, + "completions/mean_terminated_length": 872.6000366210938, + "completions/min_length": 592.0, + "completions/min_terminated_length": 592.0, + "epoch": 0.06021204240848169, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5997168694845207, + "kl": 0.0026073455810546875, + "learning_rate": 6e-07, + "loss": -0.0023, + "num_tokens": 13051866.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9218506813049316, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10554587144111773, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23538209851668673, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13333333333333336, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 301 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1090.6875, + "completions/mean_terminated_length": 1032.21435546875, + "completions/min_length": 800.0, + "completions/min_terminated_length": 800.0, + "epoch": 0.0604120824164833, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.428253123628704, + "kl": 0.0018215179443359375, + "learning_rate": 6.019999999999999e-07, + "loss": 0.0683, + "num_tokens": 13095117.0, + "reward": 0.0, + "reward_std": 0.5788823962211609, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06365042046609755, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06862711058584961, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 302 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1203.0, + "completions/max_terminated_length": 1203.0, + "completions/mean_length": 1007.0, + "completions/mean_terminated_length": 1007.0, + "completions/min_length": 890.0, + "completions/min_terminated_length": 890.0, + "epoch": 0.060612122424484896, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8126256761033277, + "kl": 0.0015935897827148438, + "learning_rate": 6.04e-07, + "loss": 0.0, + "num_tokens": 13129589.0, + "reward": 0.0, + "reward_std": 0.6733622550964355, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04073307552236484, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0492556381690748, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194865, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 303 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1320.0, + "completions/max_terminated_length": 1320.0, + "completions/mean_length": 1048.625, + "completions/mean_terminated_length": 1048.625, + "completions/min_length": 731.0, + "completions/min_terminated_length": 731.0, + "epoch": 0.060812162432486494, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8847733679248653, + "kl": 0.001735687255859375, + "learning_rate": 6.06e-07, + "loss": -0.0104, + "num_tokens": 13166831.0, + "reward": 0.0, + "reward_std": 0.6622277498245239, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.030048633966937896, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09215729191919243, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0787635937708768, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 304 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1294.875, + "completions/mean_terminated_length": 1281.2000732421875, + "completions/min_length": 978.0, + "completions/min_terminated_length": 978.0, + "epoch": 0.0610122024404881, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9447118255866194, + "kl": 0.002262115478515625, + "learning_rate": 6.079999999999999e-07, + "loss": -0.0037, + "num_tokens": 13216093.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9884958267211914, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0729248226068816, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09867954764425028, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11603000888978234, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 305 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1404.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 1209.9375, + "completions/mean_terminated_length": 1209.9375, + "completions/min_length": 1055.0, + "completions/min_terminated_length": 1055.0, + "epoch": 0.0612122424484897, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.78100199923086, + "kl": 0.0018062591552734375, + "learning_rate": 6.1e-07, + "loss": 0.0339, + "num_tokens": 13259324.0, + "reward": 0.0, + "reward_std": 0.7120941281318665, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.012373758205588115, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1184791436042197, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12102953419784838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 306 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1342.0, + "completions/max_terminated_length": 1342.0, + "completions/mean_length": 1068.875, + "completions/mean_terminated_length": 1068.875, + "completions/min_length": 877.0, + "completions/min_terminated_length": 877.0, + "epoch": 0.0614122824564913, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.937061563990967, + "kl": 0.002109527587890625, + "learning_rate": 6.119999999999999e-07, + "loss": 0.0023, + "num_tokens": 13301170.0, + "reward": 0.0, + "reward_std": 0.9270023107528687, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10116701730733584, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23314572707355258, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 307 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1089.0, + "completions/max_terminated_length": 1089.0, + "completions/mean_length": 977.0, + "completions/mean_terminated_length": 977.0, + "completions/min_length": 892.0, + "completions/min_terminated_length": 892.0, + "epoch": 0.0616123224644929, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.177583639708097, + "kl": 0.003101348876953125, + "learning_rate": 6.14e-07, + "loss": 0.0034, + "num_tokens": 13335458.0, + "reward": 0.0, + "reward_std": 1.0414657592773438, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03895137290728059, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03340112417156588, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14700718047466632, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 308 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1290.0, + "completions/max_terminated_length": 1290.0, + "completions/mean_length": 1037.875, + "completions/mean_terminated_length": 1037.875, + "completions/min_length": 747.0, + "completions/min_terminated_length": 747.0, + "epoch": 0.0618123624724945, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.029195053745548, + "kl": 0.0021038055419921875, + "learning_rate": 6.16e-07, + "loss": -0.0021, + "num_tokens": 13384640.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0558497905731201, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09395981856993788, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06904762787050416, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14240006242195888, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 309 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1413.0, + "completions/mean_length": 1187.25, + "completions/mean_terminated_length": 1142.571533203125, + "completions/min_length": 973.0, + "completions/min_terminated_length": 973.0, + "epoch": 0.0620124024804961, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7757741053105756, + "kl": 0.00174713134765625, + "learning_rate": 6.18e-07, + "loss": -0.0006, + "num_tokens": 13436692.0, + "reward": 0.0, + "reward_std": 0.8142619132995605, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10531223742272522, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05840604954113515, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.16049691355057039, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 310 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1248.0, + "completions/max_terminated_length": 1248.0, + "completions/mean_length": 1056.625, + "completions/mean_terminated_length": 1056.625, + "completions/min_length": 904.0, + "completions/min_terminated_length": 904.0, + "epoch": 0.0622124424884977, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9094759011377422, + "kl": 0.002422332763671875, + "learning_rate": 6.2e-07, + "loss": -0.0161, + "num_tokens": 13468686.0, + "reward": 0.0, + "reward_std": 0.9734148383140564, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.17153646419986524, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06026513076610012, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194865, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 311 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1265.0, + "completions/mean_terminated_length": 1210.769287109375, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.0624124824964993, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.168780423773623, + "kl": 0.002361297607421875, + "learning_rate": 6.219999999999999e-07, + "loss": 0.0041, + "num_tokens": 13521390.0, + "reward": 0.0, + "reward_std": 0.8670443892478943, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0929775233065675, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09299083701324247, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06540472290116194, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 312 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1225.0, + "completions/max_terminated_length": 1225.0, + "completions/mean_length": 1020.0, + "completions/mean_terminated_length": 1020.0, + "completions/min_length": 646.0, + "completions/min_terminated_length": 646.0, + "epoch": 0.0626125225045009, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.960781600501938, + "kl": 0.0016078948974609375, + "learning_rate": 6.24e-07, + "loss": 0.0318, + "num_tokens": 13569598.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9484236836433411, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.062070568038496685, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03660297221137235, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04999999999999999, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 313 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1430.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1017.1875, + "completions/mean_terminated_length": 1017.1875, + "completions/min_length": 647.0, + "completions/min_terminated_length": 647.0, + "epoch": 0.0628125625125025, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.779357098212893, + "kl": 0.003082275390625, + "learning_rate": 6.26e-07, + "loss": -0.0508, + "num_tokens": 13624497.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0094335079193115, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.045723084426186014, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14984218134514646, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07503085784948506, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 314 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1274.0, + "completions/max_terminated_length": 1274.0, + "completions/mean_length": 1090.9375, + "completions/mean_terminated_length": 1090.9375, + "completions/min_length": 816.0, + "completions/min_terminated_length": 816.0, + "epoch": 0.0630126025205041, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.726879598583714, + "kl": 0.0016622543334960938, + "learning_rate": 6.28e-07, + "loss": -0.0422, + "num_tokens": 13660488.0, + "reward": 0.0, + "reward_std": 0.7584260106086731, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09830465457804201, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06323443440430934, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12141145226353545, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 315 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1116.6875, + "completions/mean_terminated_length": 988.9166870117188, + "completions/min_length": 705.0, + "completions/min_terminated_length": 705.0, + "epoch": 0.0632126425285057, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4499430752651574, + "kl": 0.002643585205078125, + "learning_rate": 6.3e-07, + "loss": -0.032, + "num_tokens": 13714547.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.034705638885498, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07133341894230141, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14901396307360357, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10292032157252813, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 316 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1361.0, + "completions/mean_length": 1080.0, + "completions/mean_terminated_length": 1052.0, + "completions/min_length": 810.0, + "completions/min_terminated_length": 810.0, + "epoch": 0.0634126825365073, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3305727618467285, + "kl": 0.002620697021484375, + "learning_rate": 6.319999999999999e-07, + "loss": -0.0326, + "num_tokens": 13755627.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8016319274902344, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08625898400442711, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17684575589343907, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07097208632298363, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 317 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1406.0, + "completions/max_terminated_length": 1406.0, + "completions/mean_length": 991.5625, + "completions/mean_terminated_length": 991.5625, + "completions/min_length": 672.0, + "completions/min_terminated_length": 672.0, + "epoch": 0.0636127225445089, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.930629017500912, + "kl": 0.0019245147705078125, + "learning_rate": 6.34e-07, + "loss": -0.0116, + "num_tokens": 13796020.0, + "reward": 0.0, + "reward_std": 1.0283989906311035, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.058567554810386606, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06035585819742332, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09888264649460886, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 318 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1204.0, + "completions/max_terminated_length": 1204.0, + "completions/mean_length": 932.9375, + "completions/mean_terminated_length": 932.9375, + "completions/min_length": 740.0, + "completions/min_terminated_length": 740.0, + "epoch": 0.0638127625525105, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.638799159556754, + "kl": 0.002391815185546875, + "learning_rate": 6.36e-07, + "loss": -0.0443, + "num_tokens": 13838067.0, + "reward": 0.0, + "reward_std": 0.7847166061401367, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1052325526709335, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05052333138997594, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0666666666666667, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 319 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1385.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1119.6875, + "completions/mean_terminated_length": 1119.6875, + "completions/min_length": 619.0, + "completions/min_terminated_length": 619.0, + "epoch": 0.0640128025605121, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9297521691610875, + "kl": 0.002017974853515625, + "learning_rate": 6.38e-07, + "loss": -0.0465, + "num_tokens": 13874902.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5946656465530396, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.058210771125527236, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1984957397168126, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08606629658238704, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 320 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1154.125, + "completions/mean_terminated_length": 1131.0667724609375, + "completions/min_length": 776.0, + "completions/min_terminated_length": 776.0, + "epoch": 0.0642128425685137, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.006980024894368, + "kl": 0.002017974853515625, + "learning_rate": 6.4e-07, + "loss": 0.0163, + "num_tokens": 13920976.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0422186851501465, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.009697391016161815, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06159667741284857, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07781745019952503, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 321 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1335.0, + "completions/max_terminated_length": 1335.0, + "completions/mean_length": 1063.25, + "completions/mean_terminated_length": 1063.25, + "completions/min_length": 858.0, + "completions/min_terminated_length": 858.0, + "epoch": 0.0644128825765153, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.79919816263666, + "kl": 0.002696990966796875, + "learning_rate": 6.42e-07, + "loss": 0.0181, + "num_tokens": 13970252.0, + "reward": 0.0, + "reward_std": 0.8310703039169312, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2708716425539972, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.30101033324130905, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09574271077563382, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 322 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1327.0, + "completions/max_terminated_length": 1327.0, + "completions/mean_length": 1099.5625, + "completions/mean_terminated_length": 1099.5625, + "completions/min_length": 915.0, + "completions/min_terminated_length": 915.0, + "epoch": 0.06461292258451691, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5521873298212494, + "kl": 0.00286102294921875, + "learning_rate": 6.44e-07, + "loss": -0.0001, + "num_tokens": 14013693.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9920013546943665, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06858390387799396, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06286390421149182, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10318986456114841, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 323 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1036.0, + "completions/max_terminated_length": 1036.0, + "completions/mean_length": 809.375, + "completions/mean_terminated_length": 809.375, + "completions/min_length": 639.0, + "completions/min_terminated_length": 639.0, + "epoch": 0.06481296259251851, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.510516944170776, + "kl": 0.002910614013671875, + "learning_rate": 6.46e-07, + "loss": -0.0281, + "num_tokens": 14048467.0, + "reward": 0.0, + "reward_std": 0.9303197860717773, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07148114017204495, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07153671002932546, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13492110177323527, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 324 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1366.0, + "completions/max_terminated_length": 1366.0, + "completions/mean_length": 1065.1875, + "completions/mean_terminated_length": 1065.1875, + "completions/min_length": 721.0, + "completions/min_terminated_length": 721.0, + "epoch": 0.0650130026005201, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.675818276115073, + "kl": 0.002651214599609375, + "learning_rate": 6.48e-07, + "loss": 0.0448, + "num_tokens": 14089854.0, + "reward": 0.0, + "reward_std": 0.9601966142654419, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.20044351200671048, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.21996093691288607, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07969850595746354, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 325 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1208.0, + "completions/max_terminated_length": 1208.0, + "completions/mean_length": 954.0625, + "completions/mean_terminated_length": 954.0625, + "completions/min_length": 657.0, + "completions/min_terminated_length": 657.0, + "epoch": 0.0652130426085217, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.061262775713941, + "kl": 0.0031890869140625, + "learning_rate": 6.5e-07, + "loss": -0.0271, + "num_tokens": 14137895.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0219199657440186, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.016097936024214877, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09409175624689142, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14168300559373406, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 326 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1187.5, + "completions/mean_terminated_length": 1142.857177734375, + "completions/min_length": 911.0, + "completions/min_terminated_length": 911.0, + "epoch": 0.0654130826165233, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6267539978653143, + "kl": 0.0014667510986328125, + "learning_rate": 6.52e-07, + "loss": 0.0136, + "num_tokens": 14184807.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0660490989685059, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.05274131067546654, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1711346826472948, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07888106377466156, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 327 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1368.0, + "completions/max_terminated_length": 1368.0, + "completions/mean_length": 1198.1875, + "completions/mean_terminated_length": 1198.1875, + "completions/min_length": 1032.0, + "completions/min_terminated_length": 1032.0, + "epoch": 0.0656131226245249, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0409215394938145, + "kl": 0.0007524490356445312, + "learning_rate": 6.54e-07, + "loss": -0.011, + "num_tokens": 14229682.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0571057796478271, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0549153607488499, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03071917681316496, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 328 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1412.0, + "completions/mean_length": 926.25, + "completions/mean_terminated_length": 888.0000610351562, + "completions/min_length": 743.0, + "completions/min_terminated_length": 743.0, + "epoch": 0.06581316263252651, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0457874334641826, + "kl": 0.0021457672119140625, + "learning_rate": 6.56e-07, + "loss": 0.0301, + "num_tokens": 14271782.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7471452951431274, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1359737330376989, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09591620614971735, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15957118462605638, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 329 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1299.9375, + "completions/mean_terminated_length": 1286.60009765625, + "completions/min_length": 958.0, + "completions/min_terminated_length": 958.0, + "epoch": 0.06601320264052811, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4720371393890512, + "kl": 0.001430511474609375, + "learning_rate": 6.58e-07, + "loss": -0.045, + "num_tokens": 14315309.0, + "reward": 0.0, + "reward_std": 0.8704723119735718, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05032646892378682, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05778096186242541, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0758897836290186, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 330 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1454.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1064.5625, + "completions/mean_terminated_length": 1064.5625, + "completions/min_length": 802.0, + "completions/min_terminated_length": 802.0, + "epoch": 0.06621324264852971, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6538470196076043, + "kl": 0.003032684326171875, + "learning_rate": 6.6e-07, + "loss": -0.021, + "num_tokens": 14360414.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9068003296852112, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12131599779117519, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17296909863838933, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10878112581387149, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 331 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1301.0, + "completions/max_terminated_length": 1301.0, + "completions/mean_length": 1139.4375, + "completions/mean_terminated_length": 1139.4375, + "completions/min_length": 943.0, + "completions/min_terminated_length": 943.0, + "epoch": 0.0664132826565313, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.029129394721588, + "kl": 0.00246429443359375, + "learning_rate": 6.62e-07, + "loss": -0.0367, + "num_tokens": 14407693.0, + "reward": 0.0, + "reward_std": 0.6307567358016968, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.017176072239826326, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14227986320210273, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10610965676722954, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 332 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1214.0, + "completions/mean_length": 1116.5625, + "completions/mean_terminated_length": 1091.0, + "completions/min_length": 950.0, + "completions/min_terminated_length": 950.0, + "epoch": 0.0666133226645329, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2282435737165915, + "kl": 0.002735137939453125, + "learning_rate": 6.64e-07, + "loss": -0.0247, + "num_tokens": 14449494.0, + "reward": 0.0, + "reward_std": 0.7950897216796875, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06577992406830349, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08576251644547134, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08333333333333333, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 333 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1327.0, + "completions/max_terminated_length": 1327.0, + "completions/mean_length": 851.0, + "completions/mean_terminated_length": 851.0, + "completions/min_length": 355.0, + "completions/min_terminated_length": 355.0, + "epoch": 0.0668133626725345, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7235356040223193, + "kl": 0.001922607421875, + "learning_rate": 6.66e-07, + "loss": -0.0738, + "num_tokens": 14493022.0, + "reward": 0.0, + "reward_std": 0.7034953236579895, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04846933824637148, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05509958356550029, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09803627446568497, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 334 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1243.0, + "completions/max_terminated_length": 1243.0, + "completions/mean_length": 1056.0, + "completions/mean_terminated_length": 1056.0, + "completions/min_length": 662.0, + "completions/min_terminated_length": 662.0, + "epoch": 0.06701340268053611, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1336913746336963, + "kl": 0.0015172958374023438, + "learning_rate": 6.68e-07, + "loss": -0.026, + "num_tokens": 14527846.0, + "reward": 0.0, + "reward_std": 0.844134509563446, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07533597659404187, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07499799360405916, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09651328828101764, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 335 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1201.0, + "completions/max_terminated_length": 1201.0, + "completions/mean_length": 1057.125, + "completions/mean_terminated_length": 1057.125, + "completions/min_length": 899.0, + "completions/min_terminated_length": 899.0, + "epoch": 0.06721344268853771, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6699185146250417, + "kl": 0.0015163421630859375, + "learning_rate": 6.7e-07, + "loss": -0.0053, + "num_tokens": 14578880.0, + "reward": 0.0, + "reward_std": 0.45521780848503113, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04925938586371086, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2106401494803362, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09953596037316065, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 336 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1086.0, + "completions/max_terminated_length": 1086.0, + "completions/mean_length": 941.75, + "completions/mean_terminated_length": 941.75, + "completions/min_length": 636.0, + "completions/min_terminated_length": 636.0, + "epoch": 0.06741348269653931, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.013173954844018, + "kl": 0.0017604827880859375, + "learning_rate": 6.72e-07, + "loss": 0.0078, + "num_tokens": 14629660.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0198047161102295, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06393410692468167, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1518267675516182, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08595864638818419, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 337 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1203.0, + "completions/mean_length": 1055.5625, + "completions/mean_terminated_length": 1025.933349609375, + "completions/min_length": 880.0, + "completions/min_terminated_length": 880.0, + "epoch": 0.06761352270454091, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0109714238892376, + "kl": 0.0022144317626953125, + "learning_rate": 6.74e-07, + "loss": -0.064, + "num_tokens": 14670277.0, + "reward": 0.0, + "reward_std": 0.705368161201477, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05579697628776519, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22257236145978596, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10174405069512346, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 338 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1414.375, + "completions/mean_terminated_length": 1347.77783203125, + "completions/min_length": 845.0, + "completions/min_terminated_length": 845.0, + "epoch": 0.0678135627125425, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5545739123179767, + "kl": 0.0016918182373046875, + "learning_rate": 6.76e-07, + "loss": -0.0, + "num_tokens": 14719467.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6121293902397156, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12296810274635994, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08833306944024044, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08681611046941136, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 339 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1431.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1233.3125, + "completions/mean_terminated_length": 1233.3125, + "completions/min_length": 911.0, + "completions/min_terminated_length": 911.0, + "epoch": 0.0680136027205441, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7368474886244423, + "kl": 0.0017642974853515625, + "learning_rate": 6.78e-07, + "loss": 0.014, + "num_tokens": 14769112.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7724797129631042, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11151077087494858, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09105827265003404, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14298407059684812, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 340 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1179.0, + "completions/mean_terminated_length": 1157.60009765625, + "completions/min_length": 930.0, + "completions/min_terminated_length": 930.0, + "epoch": 0.06821364272854571, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.035724534093981, + "kl": 0.002117156982421875, + "learning_rate": 6.800000000000001e-07, + "loss": -0.0288, + "num_tokens": 14811792.0, + "reward": 0.0, + "reward_std": 0.705655574798584, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.17518688187787623, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.26961736339109976, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06426219440409445, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 341 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1372.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 1025.125, + "completions/mean_terminated_length": 1025.125, + "completions/min_length": 714.0, + "completions/min_terminated_length": 714.0, + "epoch": 0.06841368273654731, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9296209708251375, + "kl": 0.001819610595703125, + "learning_rate": 6.82e-07, + "loss": -0.0273, + "num_tokens": 14861866.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9582629799842834, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1623747637989587, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10498263402473496, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.133263870794973, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 342 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1238.5, + "completions/mean_terminated_length": 1221.0667724609375, + "completions/min_length": 1029.0, + "completions/min_terminated_length": 1029.0, + "epoch": 0.06861372274454891, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.259909282822568, + "kl": 0.002445220947265625, + "learning_rate": 6.84e-07, + "loss": 0.008, + "num_tokens": 14904522.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7319600582122803, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2371651317276755, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1588665884099907, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07391185942027818, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 343 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1223.0, + "completions/max_terminated_length": 1223.0, + "completions/mean_length": 954.9375, + "completions/mean_terminated_length": 954.9375, + "completions/min_length": 844.0, + "completions/min_terminated_length": 844.0, + "epoch": 0.06881376275255051, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.908827350409384, + "kl": 0.0020351409912109375, + "learning_rate": 6.86e-07, + "loss": -0.0165, + "num_tokens": 14946593.0, + "reward": 0.0, + "reward_std": 0.9777045249938965, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04932851739813763, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06136731818439633, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13158576980363348, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 344 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1288.25, + "completions/mean_terminated_length": 1217.666748046875, + "completions/min_length": 790.0, + "completions/min_terminated_length": 790.0, + "epoch": 0.0690138027605521, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.881248742874211, + "kl": 0.0023784637451171875, + "learning_rate": 6.879999999999999e-07, + "loss": -0.0014, + "num_tokens": 15000629.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9439976215362549, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0936918066922655, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1700552441482114, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13957607775504183, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 345 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1350.5, + "completions/mean_terminated_length": 1300.666748046875, + "completions/min_length": 1101.0, + "completions/min_terminated_length": 1101.0, + "epoch": 0.0692138427685537, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.816949229971803, + "kl": 0.002552032470703125, + "learning_rate": 6.9e-07, + "loss": -0.0174, + "num_tokens": 15045149.0, + "reward": 0.0, + "reward_std": 0.7812479734420776, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10805725296754637, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1527770035618729, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 346 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1361.0, + "completions/mean_length": 1156.0, + "completions/mean_terminated_length": 1133.0667724609375, + "completions/min_length": 949.0, + "completions/min_terminated_length": 949.0, + "epoch": 0.06941388277655532, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5288820398732517, + "kl": 0.0013675689697265625, + "learning_rate": 6.919999999999999e-07, + "loss": -0.0431, + "num_tokens": 15089813.0, + "reward": 0.0, + "reward_std": 0.8824272751808167, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05163611558067876, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05578371492384978, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08432740427115679, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 347 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1275.625, + "completions/mean_terminated_length": 1243.571533203125, + "completions/min_length": 1012.0, + "completions/min_terminated_length": 1012.0, + "epoch": 0.06961392278455691, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7077493957068413, + "kl": 0.002986907958984375, + "learning_rate": 6.939999999999999e-07, + "loss": 0.0624, + "num_tokens": 15139119.0, + "reward": 0.0, + "reward_std": 1.0427207946777344, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.16726241420703053, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08145118095912635, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1813529401164726, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 348 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1242.6875, + "completions/mean_terminated_length": 1205.9285888671875, + "completions/min_length": 921.0, + "completions/min_terminated_length": 921.0, + "epoch": 0.06981396279255851, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1193249410242814, + "kl": 0.002666473388671875, + "learning_rate": 6.959999999999999e-07, + "loss": -0.0272, + "num_tokens": 15187850.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9013919830322266, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05184845586729065, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09439807649180176, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0620632890834175, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 349 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1213.3125, + "completions/mean_terminated_length": 1194.2000732421875, + "completions/min_length": 1058.0, + "completions/min_terminated_length": 1058.0, + "epoch": 0.07001400280056011, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9172405680981237, + "kl": 0.002223968505859375, + "learning_rate": 6.979999999999999e-07, + "loss": -0.0447, + "num_tokens": 15230823.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9998873472213745, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0406282715045708, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03867874358719472, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 350 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1151.9375, + "completions/mean_terminated_length": 993.727294921875, + "completions/min_length": 603.0, + "completions/min_terminated_length": 603.0, + "epoch": 0.07021404280856171, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.87144856498964, + "kl": 0.0019588470458984375, + "learning_rate": 7e-07, + "loss": -0.0081, + "num_tokens": 15273830.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8936978578567505, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05882835543892733, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12352255316920203, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 351 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1406.0, + "completions/mean_length": 1208.125, + "completions/mean_terminated_length": 1188.666748046875, + "completions/min_length": 948.0, + "completions/min_terminated_length": 948.0, + "epoch": 0.0704140828165633, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.485886749827238, + "kl": 0.0015850067138671875, + "learning_rate": 7.019999999999999e-07, + "loss": 0.0105, + "num_tokens": 15318424.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0468785762786865, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2517797020931366, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12360030026997942, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09428090415820636, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 352 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1270.0, + "completions/mean_length": 1167.1875, + "completions/mean_terminated_length": 1145.0, + "completions/min_length": 1020.0, + "completions/min_terminated_length": 1020.0, + "epoch": 0.07061412282456492, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0597257794216643, + "kl": 0.002948760986328125, + "learning_rate": 7.04e-07, + "loss": 0.0068, + "num_tokens": 15364843.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.974082887172699, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.14529218681599296, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1558113762047311, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.22026919557332234, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 353 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1496.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1218.5, + "completions/mean_terminated_length": 1218.5, + "completions/min_length": 1042.0, + "completions/min_terminated_length": 1042.0, + "epoch": 0.07081416283256652, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.535095596619962, + "kl": 0.0018138885498046875, + "learning_rate": 7.059999999999999e-07, + "loss": -0.0206, + "num_tokens": 15416267.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0632013082504272, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05752101356266756, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10237021027478785, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07969850595746354, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 354 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1030.0, + "completions/max_terminated_length": 1030.0, + "completions/mean_length": 943.5625, + "completions/mean_terminated_length": 943.5625, + "completions/min_length": 841.0, + "completions/min_terminated_length": 841.0, + "epoch": 0.07101420284056811, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.531911505952056, + "kl": 0.0007543563842773438, + "learning_rate": 7.079999999999999e-07, + "loss": -0.0088, + "num_tokens": 15447140.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9054348468780518, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.010061282361986203, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0808916388713053, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0718795288428261, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 355 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1371.0, + "completions/mean_length": 1072.75, + "completions/mean_terminated_length": 1011.71435546875, + "completions/min_length": 831.0, + "completions/min_terminated_length": 831.0, + "epoch": 0.07121424284856971, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.06662363006217, + "kl": 0.00211334228515625, + "learning_rate": 7.1e-07, + "loss": -0.0233, + "num_tokens": 15498048.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6050138473510742, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1334309841342444, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09555002125162401, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08734775114237132, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 356 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1370.0, + "completions/mean_length": 1307.0625, + "completions/mean_terminated_length": 1279.5, + "completions/min_length": 957.0, + "completions/min_terminated_length": 957.0, + "epoch": 0.07141428285657131, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1408757938831973, + "kl": 0.0017604827880859375, + "learning_rate": 7.119999999999999e-07, + "loss": -0.0355, + "num_tokens": 15546457.0, + "reward": 0.0, + "reward_std": 1.0633103847503662, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09629598365552976, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06200736582161165, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1002773930432755, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 357 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1253.0, + "completions/max_terminated_length": 1253.0, + "completions/mean_length": 1032.875, + "completions/mean_terminated_length": 1032.875, + "completions/min_length": 812.0, + "completions/min_terminated_length": 812.0, + "epoch": 0.07161432286457292, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5200336027926404, + "kl": 0.002655029296875, + "learning_rate": 7.14e-07, + "loss": -0.0531, + "num_tokens": 15597159.0, + "reward": 0.0, + "reward_std": 0.7375060319900513, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04998268172778838, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09819825713856337, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08933913745655644, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 358 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1168.0, + "completions/max_terminated_length": 1168.0, + "completions/mean_length": 992.0, + "completions/mean_terminated_length": 992.0, + "completions/min_length": 839.0, + "completions/min_terminated_length": 839.0, + "epoch": 0.07181436287257452, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.663470257183742, + "kl": 0.00310516357421875, + "learning_rate": 7.159999999999999e-07, + "loss": -0.0059, + "num_tokens": 15639647.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9935119152069092, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.3483290628806609, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22867036957442013, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07490735018081414, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 359 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1406.0, + "completions/mean_length": 1149.0625, + "completions/mean_terminated_length": 1125.666748046875, + "completions/min_length": 811.0, + "completions/min_terminated_length": 811.0, + "epoch": 0.07201440288057612, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2038059104342653, + "kl": 0.002460479736328125, + "learning_rate": 7.179999999999999e-07, + "loss": -0.0122, + "num_tokens": 15688760.0, + "reward": 0.0, + "reward_std": 0.876205563545227, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12511466261993245, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08827074223902567, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0850925422157591, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 360 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1232.0, + "completions/max_terminated_length": 1232.0, + "completions/mean_length": 933.8125, + "completions/mean_terminated_length": 933.8125, + "completions/min_length": 19.0, + "completions/min_terminated_length": 19.0, + "epoch": 0.07221444288857772, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.083074148576818, + "kl": 0.00205230712890625, + "learning_rate": 7.2e-07, + "loss": -0.1463, + "num_tokens": 15737637.0, + "reward": 0.0, + "reward_std": 1.0291200876235962, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.38407346997094094, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16171858816453336, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.19148542155126763, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 361 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1469.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1092.5, + "completions/mean_terminated_length": 1092.5, + "completions/min_length": 892.0, + "completions/min_terminated_length": 892.0, + "epoch": 0.07241448289657931, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5410087118063465, + "kl": 0.0016307830810546875, + "learning_rate": 7.219999999999999e-07, + "loss": 0.0456, + "num_tokens": 15778485.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.38191983103752136, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11336768403166607, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17835124708512148, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09583937179043478, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 362 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1367.0, + "completions/max_terminated_length": 1367.0, + "completions/mean_length": 1082.0625, + "completions/mean_terminated_length": 1082.0625, + "completions/min_length": 833.0, + "completions/min_terminated_length": 833.0, + "epoch": 0.07261452290458091, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4751409578576347, + "kl": 0.002613067626953125, + "learning_rate": 7.24e-07, + "loss": -0.0078, + "num_tokens": 15820214.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0028159618377686, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.13521388928891098, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08122136834306855, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1403039029577766, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 363 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1175.0, + "completions/max_terminated_length": 1175.0, + "completions/mean_length": 986.4375, + "completions/mean_terminated_length": 986.4375, + "completions/min_length": 760.0, + "completions/min_terminated_length": 760.0, + "epoch": 0.07281456291258252, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.846324158363804, + "kl": 0.00278472900390625, + "learning_rate": 7.259999999999999e-07, + "loss": 0.0096, + "num_tokens": 15850141.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.4547792077064514, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.24853806768033587, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07709225281838042, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09179284245476838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 364 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1401.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1132.3125, + "completions/mean_terminated_length": 1132.3125, + "completions/min_length": 889.0, + "completions/min_terminated_length": 889.0, + "epoch": 0.07301460292058412, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1035643550752336, + "kl": 0.002361297607421875, + "learning_rate": 7.28e-07, + "loss": -0.0212, + "num_tokens": 15894786.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6630457043647766, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07665848635857765, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.055994000329082756, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09953596037316068, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 365 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1287.125, + "completions/mean_terminated_length": 1272.933349609375, + "completions/min_length": 1059.0, + "completions/min_terminated_length": 1059.0, + "epoch": 0.07321464292858572, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1845270793640035, + "kl": 0.0029449462890625, + "learning_rate": 7.3e-07, + "loss": -0.015, + "num_tokens": 15939772.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9745171070098877, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.020137842112887324, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.049156351693108113, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09803627446568496, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 366 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1339.6875, + "completions/mean_terminated_length": 1179.375, + "completions/min_length": 969.0, + "completions/min_terminated_length": 969.0, + "epoch": 0.07341468293658732, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.393099933036064, + "kl": 0.001800537109375, + "learning_rate": 7.319999999999999e-07, + "loss": 0.0009, + "num_tokens": 15984031.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.530692458152771, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.22610506924534357, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3942636541117855, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 367 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1314.0, + "completions/max_terminated_length": 1314.0, + "completions/mean_length": 1175.8125, + "completions/mean_terminated_length": 1175.8125, + "completions/min_length": 1076.0, + "completions/min_terminated_length": 1076.0, + "epoch": 0.07361472294458891, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.3253744753909082, + "kl": 0.000598907470703125, + "learning_rate": 7.34e-07, + "loss": -0.0023, + "num_tokens": 16020300.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5411139726638794, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.004682378120607981, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07929864242136896, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 368 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1215.6875, + "completions/mean_terminated_length": 1086.45458984375, + "completions/min_length": 972.0, + "completions/min_terminated_length": 972.0, + "epoch": 0.07381476295259051, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4899929218235646, + "kl": 0.001880645751953125, + "learning_rate": 7.359999999999999e-07, + "loss": 0.0244, + "num_tokens": 16071863.0, + "reward": 0.0, + "reward_std": 0.8033138513565063, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12324959361052476, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14115236832989717, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.18678567634829202, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 369 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 872.0, + "completions/max_terminated_length": 872.0, + "completions/mean_length": 730.0, + "completions/mean_terminated_length": 730.0, + "completions/min_length": 620.0, + "completions/min_terminated_length": 620.0, + "epoch": 0.07401480296059212, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.718977421886424, + "kl": 0.001270294189453125, + "learning_rate": 7.38e-07, + "loss": 0.0038, + "num_tokens": 16098111.0, + "reward": 0.0, + "reward_std": 0.8377746343612671, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.025951983877257688, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17353772109226326, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1276569477008451, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 370 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1400.0, + "completions/max_terminated_length": 1400.0, + "completions/mean_length": 1076.25, + "completions/mean_terminated_length": 1076.25, + "completions/min_length": 776.0, + "completions/min_terminated_length": 776.0, + "epoch": 0.07421484296859372, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5410250077679892, + "kl": 0.002910614013671875, + "learning_rate": 7.4e-07, + "loss": -0.0394, + "num_tokens": 16137091.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.9029546976089478, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10377052385274668, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06820259161748492, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 371 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1412.0, + "completions/mean_length": 1162.4375, + "completions/mean_terminated_length": 1084.5384521484375, + "completions/min_length": 849.0, + "completions/min_terminated_length": 849.0, + "epoch": 0.07441488297659532, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3230005009291017, + "kl": 0.002902984619140625, + "learning_rate": 7.42e-07, + "loss": -0.0702, + "num_tokens": 16170450.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.579163134098053, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16478741748650536, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17152136734504211, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13957607775504183, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 372 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1356.0, + "completions/mean_length": 1225.4375, + "completions/mean_terminated_length": 1207.1334228515625, + "completions/min_length": 913.0, + "completions/min_terminated_length": 913.0, + "epoch": 0.07461492298459692, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1882596959059293, + "kl": 0.0024166107177734375, + "learning_rate": 7.44e-07, + "loss": -0.0447, + "num_tokens": 16213385.0, + "reward": 0.0, + "reward_std": 0.8411151766777039, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0058458092411815986, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.01902710442234823, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.23025750480390106, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 373 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1360.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1139.75, + "completions/mean_terminated_length": 1139.75, + "completions/min_length": 817.0, + "completions/min_terminated_length": 817.0, + "epoch": 0.07481496299259852, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4106630607811095, + "kl": 0.002552032470703125, + "learning_rate": 7.459999999999999e-07, + "loss": -0.0152, + "num_tokens": 16253189.0, + "reward": 0.0, + "reward_std": 0.8517966270446777, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03763152396368936, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08245404462152776, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 374 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1295.0, + "completions/max_terminated_length": 1295.0, + "completions/mean_length": 1121.4375, + "completions/mean_terminated_length": 1121.4375, + "completions/min_length": 886.0, + "completions/min_terminated_length": 886.0, + "epoch": 0.07501500300060011, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4556745340762682, + "kl": 0.00292205810546875, + "learning_rate": 7.48e-07, + "loss": -0.0016, + "num_tokens": 16293692.0, + "reward": 0.0, + "reward_std": 0.888988733291626, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.043497548504990026, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14118493847072852, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 375 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 1191.3125, + "completions/mean_terminated_length": 1147.21435546875, + "completions/min_length": 988.0, + "completions/min_terminated_length": 988.0, + "epoch": 0.07521504300860173, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.553757274454384, + "kl": 0.00337982177734375, + "learning_rate": 7.5e-07, + "loss": 0.0181, + "num_tokens": 16347793.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8260334134101868, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1392011822166792, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04421730938549221, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13305526559931294, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 376 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 994.0, + "completions/mean_length": 896.3125, + "completions/mean_terminated_length": 856.0667114257812, + "completions/min_length": 702.0, + "completions/min_terminated_length": 702.0, + "epoch": 0.07541508301660332, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.57904104466087, + "kl": 0.0025177001953125, + "learning_rate": 7.52e-07, + "loss": -0.0318, + "num_tokens": 16399558.0, + "reward": 0.0, + "reward_std": 0.3901280164718628, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.011001371137415054, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20970208290152467, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.16233253479155638, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 377 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1262.0, + "completions/max_terminated_length": 1262.0, + "completions/mean_length": 1040.5625, + "completions/mean_terminated_length": 1040.5625, + "completions/min_length": 782.0, + "completions/min_terminated_length": 782.0, + "epoch": 0.07561512302460492, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6399626681618407, + "kl": 0.0014743804931640625, + "learning_rate": 7.54e-07, + "loss": 0.0261, + "num_tokens": 16434423.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9672821760177612, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.032353029408212566, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04858101742043645, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08777074514725111, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 378 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1075.0, + "completions/max_terminated_length": 1075.0, + "completions/mean_length": 888.375, + "completions/mean_terminated_length": 888.375, + "completions/min_length": 747.0, + "completions/min_terminated_length": 747.0, + "epoch": 0.07581516303260652, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.384665928003429, + "kl": 0.002716064453125, + "learning_rate": 7.559999999999999e-07, + "loss": -0.0229, + "num_tokens": 16473797.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7845102548599243, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04443783342349225, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12494204747572885, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03626037527129048, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 379 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1314.0, + "completions/mean_length": 1150.8125, + "completions/mean_terminated_length": 1127.533447265625, + "completions/min_length": 843.0, + "completions/min_terminated_length": 843.0, + "epoch": 0.07601520304060812, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3731676456819244, + "kl": 0.002655029296875, + "learning_rate": 7.58e-07, + "loss": -0.0104, + "num_tokens": 16523850.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0610871315002441, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.24424745335877693, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15834873813631684, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10945995377982526, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 380 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1050.0, + "completions/max_terminated_length": 1050.0, + "completions/mean_length": 927.0625, + "completions/mean_terminated_length": 927.0625, + "completions/min_length": 753.0, + "completions/min_terminated_length": 753.0, + "epoch": 0.07621524304860972, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.269036080290833, + "kl": 0.0019435882568359375, + "learning_rate": 7.599999999999999e-07, + "loss": -0.0336, + "num_tokens": 16558451.0, + "reward": 0.0, + "reward_std": 1.0197536945343018, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.17701880427346592, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1995681821471707, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07187952884282611, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 381 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1180.0, + "completions/max_terminated_length": 1180.0, + "completions/mean_length": 973.75, + "completions/mean_terminated_length": 973.75, + "completions/min_length": 701.0, + "completions/min_terminated_length": 701.0, + "epoch": 0.07641528305661133, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.195959256858216, + "kl": 0.003448486328125, + "learning_rate": 7.62e-07, + "loss": -0.0079, + "num_tokens": 16597327.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.4973295331001282, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.05045498480440446, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14454463575108265, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14446581038560777, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 382 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1496.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1229.75, + "completions/mean_terminated_length": 1229.75, + "completions/min_length": 956.0, + "completions/min_terminated_length": 956.0, + "epoch": 0.07661532306461293, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0930389086903913, + "kl": 0.002651214599609375, + "learning_rate": 7.64e-07, + "loss": -0.0007, + "num_tokens": 16644115.0, + "reward": 0.0, + "reward_std": 0.8874791860580444, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.20088843223801536, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06546184080999841, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11080513425729777, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 383 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1277.0, + "completions/max_terminated_length": 1277.0, + "completions/mean_length": 1138.25, + "completions/mean_terminated_length": 1138.25, + "completions/min_length": 912.0, + "completions/min_terminated_length": 912.0, + "epoch": 0.07681536307261452, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.350044956544824, + "kl": 0.0023250579833984375, + "learning_rate": 7.66e-07, + "loss": -0.0503, + "num_tokens": 16688543.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.005187749862671, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10416489113196986, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06975711450953909, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08734775114237132, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 384 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1229.125, + "completions/mean_terminated_length": 1166.615478515625, + "completions/min_length": 922.0, + "completions/min_terminated_length": 922.0, + "epoch": 0.07701540308061612, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4214950284217345, + "kl": 0.002971649169921875, + "learning_rate": 7.68e-07, + "loss": -0.0012, + "num_tokens": 16736081.0, + "reward": 0.0, + "reward_std": 0.8239242434501648, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05289205845945407, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1445035649402757, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12041594578792297, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 385 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1173.25, + "completions/mean_terminated_length": 1151.4666748046875, + "completions/min_length": 819.0, + "completions/min_terminated_length": 819.0, + "epoch": 0.07721544308861772, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.034052749402521, + "kl": 0.00218963623046875, + "learning_rate": 7.699999999999999e-07, + "loss": 0.0306, + "num_tokens": 16773477.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9007006883621216, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06588522434933086, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05217985228126274, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08681611046941136, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 386 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1141.5, + "completions/mean_terminated_length": 1117.60009765625, + "completions/min_length": 866.0, + "completions/min_terminated_length": 866.0, + "epoch": 0.07741548309661932, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9382591204580604, + "kl": 0.00231170654296875, + "learning_rate": 7.72e-07, + "loss": 0.027, + "num_tokens": 16820581.0, + "reward": 0.0, + "reward_std": 0.78981614112854, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.02701970798811732, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06001692693809581, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06871842709362767, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 387 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1321.0, + "completions/max_terminated_length": 1321.0, + "completions/mean_length": 1101.75, + "completions/mean_terminated_length": 1101.75, + "completions/min_length": 918.0, + "completions/min_terminated_length": 918.0, + "epoch": 0.07761552310462093, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1950152570986083, + "kl": 0.001728057861328125, + "learning_rate": 7.74e-07, + "loss": 0.0101, + "num_tokens": 16858961.0, + "reward": 0.0, + "reward_std": 0.8525640964508057, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.018676410506978595, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06587479808858962, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07290277645477444, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 388 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1322.0, + "completions/max_terminated_length": 1322.0, + "completions/mean_length": 1117.875, + "completions/mean_terminated_length": 1117.875, + "completions/min_length": 885.0, + "completions/min_terminated_length": 885.0, + "epoch": 0.07781556311262253, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.478633859862951, + "kl": 0.0016613006591796875, + "learning_rate": 7.76e-07, + "loss": 0.0112, + "num_tokens": 16893911.0, + "reward": 0.0, + "reward_std": 0.8226579427719116, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.011502220478344798, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1263008350194181, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04849589520621153, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 389 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1452.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1016.6875, + "completions/mean_terminated_length": 1016.6875, + "completions/min_length": 873.0, + "completions/min_terminated_length": 873.0, + "epoch": 0.07801560312062412, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3021699943322287, + "kl": 0.00189208984375, + "learning_rate": 7.78e-07, + "loss": -0.0114, + "num_tokens": 16934746.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8973481059074402, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09215509609728414, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09834580743684312, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08388704928078612, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 390 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1309.9375, + "completions/mean_terminated_length": 1297.2667236328125, + "completions/min_length": 949.0, + "completions/min_terminated_length": 949.0, + "epoch": 0.07821564312862572, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.236403541934124, + "kl": 0.001438140869140625, + "learning_rate": 7.799999999999999e-07, + "loss": -0.0052, + "num_tokens": 16974697.0, + "reward": 0.0, + "reward_std": 0.4473966956138611, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.024258376209800665, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12976547694527227, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09803627446568498, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 391 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1251.3125, + "completions/mean_terminated_length": 1193.923095703125, + "completions/min_length": 853.0, + "completions/min_terminated_length": 853.0, + "epoch": 0.07841568313662732, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1317508146857502, + "kl": 0.0026702880859375, + "learning_rate": 7.82e-07, + "loss": -0.0271, + "num_tokens": 17019174.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9151982069015503, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15214568289075245, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20871152690741301, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 392 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1495.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 994.3125, + "completions/mean_terminated_length": 994.3125, + "completions/min_length": 704.0, + "completions/min_terminated_length": 704.0, + "epoch": 0.07861572314462893, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6187114622023238, + "kl": 0.00209808349609375, + "learning_rate": 7.84e-07, + "loss": 0.008, + "num_tokens": 17058915.0, + "reward": 0.0, + "reward_std": 0.8971644639968872, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.17128341262372673, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07485880797814844, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 393 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1335.0, + "completions/mean_length": 1175.8125, + "completions/mean_terminated_length": 1154.2000732421875, + "completions/min_length": 937.0, + "completions/min_terminated_length": 937.0, + "epoch": 0.07881576315263053, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.286206502832412, + "kl": 0.00289154052734375, + "learning_rate": 7.86e-07, + "loss": -0.0074, + "num_tokens": 17111224.0, + "reward": -4.470348358154297e-08, + "reward_std": 1.0330140590667725, + "rewards/wordcountpos_reward_nokeypoint/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03091208982560134, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07072870832550031, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09737289911202954, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 394 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1151.875, + "completions/mean_terminated_length": 1102.1429443359375, + "completions/min_length": 851.0, + "completions/min_terminated_length": 851.0, + "epoch": 0.07901580316063213, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1604006665726008, + "kl": 0.002899169921875, + "learning_rate": 7.88e-07, + "loss": 0.0411, + "num_tokens": 17150686.0, + "reward": 5.587935447692871e-09, + "reward_std": 1.064286708831787, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.587935447692871e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.13267867972290034, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1302227202910091, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 395 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1303.0, + "completions/max_terminated_length": 1303.0, + "completions/mean_length": 995.5, + "completions/mean_terminated_length": 995.5, + "completions/min_length": 675.0, + "completions/min_terminated_length": 675.0, + "epoch": 0.07921584316863373, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.095099186775773, + "kl": 0.0035400390625, + "learning_rate": 7.9e-07, + "loss": -0.0347, + "num_tokens": 17200422.0, + "reward": 3.725290298461914e-08, + "reward_std": 1.0586769580841064, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.34886983537656674, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10289489759502005, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1167460047694551, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 396 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1283.0, + "completions/max_terminated_length": 1283.0, + "completions/mean_length": 1186.125, + "completions/mean_terminated_length": 1186.125, + "completions/min_length": 1043.0, + "completions/min_terminated_length": 1043.0, + "epoch": 0.07941588317663532, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6635807975107553, + "kl": 0.001630544662475586, + "learning_rate": 7.92e-07, + "loss": 0.0055, + "num_tokens": 17249032.0, + "reward": 0.0, + "reward_std": 0.7198959589004517, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.17843944403988235, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3926896870590636, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10470416879457554, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 397 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1228.0, + "completions/max_terminated_length": 1228.0, + "completions/mean_length": 997.8125, + "completions/mean_terminated_length": 997.8125, + "completions/min_length": 794.0, + "completions/min_terminated_length": 794.0, + "epoch": 0.07961592318463692, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.899247129113273, + "kl": 0.00313568115234375, + "learning_rate": 7.94e-07, + "loss": -0.042, + "num_tokens": 17288621.0, + "reward": 0.0, + "reward_std": 0.6787030696868896, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07945241964422634, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08352879664384727, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13443985299781488, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 398 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1238.0, + "completions/max_terminated_length": 1238.0, + "completions/mean_length": 1015.8125, + "completions/mean_terminated_length": 1015.8125, + "completions/min_length": 835.0, + "completions/min_terminated_length": 835.0, + "epoch": 0.07981596319263853, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5889954453030275, + "kl": 0.0015311241149902344, + "learning_rate": 7.96e-07, + "loss": -0.0147, + "num_tokens": 17337162.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7482526302337646, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.006666440560326907, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24041276003925396, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12852438880818895, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 399 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1410.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 1017.5, + "completions/mean_terminated_length": 1017.5, + "completions/min_length": 631.0, + "completions/min_terminated_length": 631.0, + "epoch": 0.08001600320064013, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7030786885195934, + "kl": 0.00168609619140625, + "learning_rate": 7.98e-07, + "loss": -0.121, + "num_tokens": 17381378.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9090695977210999, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07337536278215936, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20930546960589405, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12816366850994054, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 400 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1130.0, + "completions/max_terminated_length": 1130.0, + "completions/mean_length": 917.375, + "completions/mean_terminated_length": 917.375, + "completions/min_length": 661.0, + "completions/min_terminated_length": 661.0, + "epoch": 0.08021604320864173, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.420754841252008, + "kl": 0.00273895263671875, + "learning_rate": 8e-07, + "loss": 0.013, + "num_tokens": 17423936.0, + "reward": 0.0, + "reward_std": 0.5550462007522583, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.16573521728966306, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.180927235638784, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11674600476945511, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 401 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1352.0, + "completions/max_terminated_length": 1352.0, + "completions/mean_length": 1133.25, + "completions/mean_terminated_length": 1133.25, + "completions/min_length": 997.0, + "completions/min_terminated_length": 997.0, + "epoch": 0.08041608321664333, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.041485476184688, + "kl": 0.0014190673828125, + "learning_rate": 8.02e-07, + "loss": 0.006, + "num_tokens": 17466236.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5227797031402588, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03363721150833076, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0913767824530837, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10878112581387148, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 402 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1364.0, + "completions/max_terminated_length": 1364.0, + "completions/mean_length": 1050.5, + "completions/mean_terminated_length": 1050.5, + "completions/min_length": 651.0, + "completions/min_terminated_length": 651.0, + "epoch": 0.08061612322464493, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2224425723385086, + "kl": 0.00247955322265625, + "learning_rate": 8.04e-07, + "loss": -0.0147, + "num_tokens": 17509860.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9569826126098633, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09752289569591145, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06131117630336059, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.18413964105375955, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 403 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1415.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 986.5625, + "completions/mean_terminated_length": 986.5625, + "completions/min_length": 816.0, + "completions/min_terminated_length": 816.0, + "epoch": 0.08081616323264652, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4548080754987525, + "kl": 0.0012731552124023438, + "learning_rate": 8.06e-07, + "loss": 0.0081, + "num_tokens": 17548701.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6584672331809998, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.23156963918237533, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15072063634629068, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11013459778666118, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 404 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1276.0, + "completions/max_terminated_length": 1276.0, + "completions/mean_length": 881.0, + "completions/mean_terminated_length": 881.0, + "completions/min_length": 539.0, + "completions/min_terminated_length": 539.0, + "epoch": 0.08101620324064814, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.236537148316597, + "kl": 0.002147674560546875, + "learning_rate": 8.08e-07, + "loss": -0.0287, + "num_tokens": 17577557.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.9946941137313843, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02484727451504519, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08060781438902628, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11855612829185827, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 405 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1439.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1155.875, + "completions/mean_terminated_length": 1155.875, + "completions/min_length": 903.0, + "completions/min_terminated_length": 903.0, + "epoch": 0.08121624324864973, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.999871931298176, + "kl": 0.0019435882568359375, + "learning_rate": 8.1e-07, + "loss": 0.0124, + "num_tokens": 17619819.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0222320556640625, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.32441248335841333, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17023623505272245, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 406 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1300.0, + "completions/max_terminated_length": 1300.0, + "completions/mean_length": 1141.5625, + "completions/mean_terminated_length": 1141.5625, + "completions/min_length": 1009.0, + "completions/min_terminated_length": 1009.0, + "epoch": 0.08141628325665133, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8030449971526794, + "kl": 0.00037103891372680664, + "learning_rate": 8.12e-07, + "loss": -0.0057, + "num_tokens": 17663756.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0037096738815308, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06158806885666013, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07558006441232386, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06191391873668902, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 407 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1177.0, + "completions/max_terminated_length": 1177.0, + "completions/mean_length": 1044.125, + "completions/mean_terminated_length": 1044.125, + "completions/min_length": 817.0, + "completions/min_terminated_length": 817.0, + "epoch": 0.08161632326465293, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4634080534307867, + "kl": 0.001407623291015625, + "learning_rate": 8.14e-07, + "loss": 0.0124, + "num_tokens": 17696854.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0214093923568726, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03395244908986234, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06130993142669272, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10671873729054747, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 408 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1207.0, + "completions/max_terminated_length": 1207.0, + "completions/mean_length": 971.3125, + "completions/mean_terminated_length": 971.3125, + "completions/min_length": 673.0, + "completions/min_terminated_length": 673.0, + "epoch": 0.08181636327265453, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.621446840263658, + "kl": 0.002834320068359375, + "learning_rate": 8.159999999999999e-07, + "loss": -0.0062, + "num_tokens": 17750331.0, + "reward": 0.0, + "reward_std": 0.7559704780578613, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.050230305204890885, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05498905285730148, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.675, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.19148542155126763, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 409 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1241.0, + "completions/max_terminated_length": 1241.0, + "completions/mean_length": 1019.8125, + "completions/mean_terminated_length": 1019.8125, + "completions/min_length": 708.0, + "completions/min_terminated_length": 708.0, + "epoch": 0.08201640328065612, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.749171876115079, + "kl": 0.00315093994140625, + "learning_rate": 8.179999999999999e-07, + "loss": -0.0437, + "num_tokens": 17789184.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.3914664387702942, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12148463937477451, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1803372009214367, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13158576980363348, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 410 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1176.0, + "completions/max_terminated_length": 1176.0, + "completions/mean_length": 880.9375, + "completions/mean_terminated_length": 880.9375, + "completions/min_length": 568.0, + "completions/min_terminated_length": 568.0, + "epoch": 0.08221644328865774, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1144036102420447, + "kl": 0.0019817352294921875, + "learning_rate": 8.199999999999999e-07, + "loss": -0.0215, + "num_tokens": 17837279.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8302940726280212, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1309736194066905, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08043243056909671, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14446581038560777, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 411 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1362.0, + "completions/max_terminated_length": 1362.0, + "completions/mean_length": 1088.75, + "completions/mean_terminated_length": 1088.75, + "completions/min_length": 834.0, + "completions/min_terminated_length": 834.0, + "epoch": 0.08241648329665933, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.643957330633729, + "kl": 0.002887725830078125, + "learning_rate": 8.219999999999999e-07, + "loss": -0.0105, + "num_tokens": 17877531.0, + "reward": 0.0, + "reward_std": 1.0634948015213013, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05726769038489176, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03970987414530704, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12816366850994057, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 412 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1300.0, + "completions/max_terminated_length": 1300.0, + "completions/mean_length": 1084.375, + "completions/mean_terminated_length": 1084.375, + "completions/min_length": 905.0, + "completions/min_terminated_length": 905.0, + "epoch": 0.08261652330466093, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.637478591054848, + "kl": 0.0028228759765625, + "learning_rate": 8.24e-07, + "loss": 0.0069, + "num_tokens": 17915849.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8930952548980713, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10698215069248931, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10981259420930861, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11155467020454343, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 413 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1462.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1129.125, + "completions/mean_terminated_length": 1129.125, + "completions/min_length": 943.0, + "completions/min_terminated_length": 943.0, + "epoch": 0.08281656331266253, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0036327909774077, + "kl": 0.0021991729736328125, + "learning_rate": 8.259999999999999e-07, + "loss": -0.0022, + "num_tokens": 17958907.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5580933094024658, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.008165972299001463, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15816839036044025, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 414 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1303.0, + "completions/max_terminated_length": 1303.0, + "completions/mean_length": 1032.6875, + "completions/mean_terminated_length": 1032.6875, + "completions/min_length": 620.0, + "completions/min_terminated_length": 620.0, + "epoch": 0.08301660332066413, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.936434377008649, + "kl": 0.0018520355224609375, + "learning_rate": 8.28e-07, + "loss": 0.0037, + "num_tokens": 18008126.0, + "reward": 0.0, + "reward_std": 1.0575085878372192, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.14918556227403107, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.058750697306502744, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 415 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1299.1875, + "completions/mean_terminated_length": 1143.0, + "completions/min_length": 1011.0, + "completions/min_terminated_length": 1011.0, + "epoch": 0.08321664332866573, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.476312520363112, + "kl": 0.0020999908447265625, + "learning_rate": 8.299999999999999e-07, + "loss": -0.0248, + "num_tokens": 18051489.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0246864557266235, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.022439466809862203, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0669653966593066, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11080513425729777, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 416 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1317.0, + "completions/max_terminated_length": 1317.0, + "completions/mean_length": 1070.3125, + "completions/mean_terminated_length": 1070.3125, + "completions/min_length": 823.0, + "completions/min_terminated_length": 823.0, + "epoch": 0.08341668333666734, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4934653059987744, + "kl": 0.00345611572265625, + "learning_rate": 8.319999999999999e-07, + "loss": 0.0108, + "num_tokens": 18091894.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.041778326034546, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04493135857898509, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07901438590366901, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0843274042711568, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 417 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1357.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1129.0, + "completions/mean_terminated_length": 1129.0, + "completions/min_length": 977.0, + "completions/min_terminated_length": 977.0, + "epoch": 0.08361672334466894, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4117493543495057, + "kl": 0.001445770263671875, + "learning_rate": 8.34e-07, + "loss": -0.0156, + "num_tokens": 18129366.0, + "reward": 0.0, + "reward_std": 0.2700986862182617, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1229835187716443, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19435526332833966, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1519624710005487, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 418 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1152.4375, + "completions/mean_terminated_length": 1072.2308349609375, + "completions/min_length": 629.0, + "completions/min_terminated_length": 629.0, + "epoch": 0.08381676335267053, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0129096458756113, + "kl": 0.002338409423828125, + "learning_rate": 8.359999999999999e-07, + "loss": -0.0661, + "num_tokens": 18178301.0, + "reward": 0.0, + "reward_std": 0.9663434028625488, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.019135580300574113, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.048976242898843864, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 419 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1488.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1135.4375, + "completions/mean_terminated_length": 1135.4375, + "completions/min_length": 896.0, + "completions/min_terminated_length": 896.0, + "epoch": 0.08401680336067213, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2965741192222557, + "kl": 0.002895355224609375, + "learning_rate": 8.38e-07, + "loss": -0.0385, + "num_tokens": 18222236.0, + "reward": 0.0, + "reward_std": 1.0163602828979492, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10272187684481446, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10833558914314495, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09727776191382574, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 420 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1291.0, + "completions/max_terminated_length": 1291.0, + "completions/mean_length": 1050.3125, + "completions/mean_terminated_length": 1050.3125, + "completions/min_length": 731.0, + "completions/min_terminated_length": 731.0, + "epoch": 0.08421684336867373, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.082869366574932, + "kl": 0.0021343231201171875, + "learning_rate": 8.399999999999999e-07, + "loss": -0.041, + "num_tokens": 18272409.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.016232967376709, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1429447878161333, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07313975134521543, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05443310539518174, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 421 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1331.0, + "completions/mean_length": 1164.3125, + "completions/mean_terminated_length": 1141.933349609375, + "completions/min_length": 932.0, + "completions/min_terminated_length": 932.0, + "epoch": 0.08441688337667534, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7866526497972113, + "kl": 0.002391815185546875, + "learning_rate": 8.419999999999999e-07, + "loss": -0.0213, + "num_tokens": 18315702.0, + "reward": 0.0, + "reward_std": 0.8058360815048218, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04732018448534996, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.050793277402758276, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11279282877125756, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 422 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1379.0, + "completions/max_terminated_length": 1379.0, + "completions/mean_length": 1149.9375, + "completions/mean_terminated_length": 1149.9375, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.08461692338467694, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5976911628865813, + "kl": 0.0013952255249023438, + "learning_rate": 8.439999999999999e-07, + "loss": -0.0074, + "num_tokens": 18352653.0, + "reward": 0.0, + "reward_std": 0.9133906364440918, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08116164491390734, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11516877693274562, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0620632890834175, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 423 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1257.875, + "completions/mean_terminated_length": 1177.166748046875, + "completions/min_length": 880.0, + "completions/min_terminated_length": 880.0, + "epoch": 0.08481696339267854, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7280187659960444, + "kl": 0.0020542144775390625, + "learning_rate": 8.459999999999999e-07, + "loss": 0.0286, + "num_tokens": 18400083.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.05845308303833, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.012948470140161737, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03558585178384421, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0709720863229836, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 424 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1084.1875, + "completions/mean_terminated_length": 1056.4666748046875, + "completions/min_length": 856.0, + "completions/min_terminated_length": 856.0, + "epoch": 0.08501700340068014, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.63114094107899, + "kl": 0.003170013427734375, + "learning_rate": 8.48e-07, + "loss": 0.049, + "num_tokens": 18449294.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.44955748319625854, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1717042452126114, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18979790496754526, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.2057956555708887, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 425 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1142.0, + "completions/max_terminated_length": 1142.0, + "completions/mean_length": 857.6875, + "completions/mean_terminated_length": 857.6875, + "completions/min_length": 589.0, + "completions/min_terminated_length": 589.0, + "epoch": 0.08521704340868173, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.384805061362684, + "kl": 0.00354766845703125, + "learning_rate": 8.499999999999999e-07, + "loss": -0.0382, + "num_tokens": 18485217.0, + "reward": 0.0, + "reward_std": 0.7694041132926941, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10723959676683871, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15290558834269466, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11603000888978234, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 426 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1280.0, + "completions/max_terminated_length": 1280.0, + "completions/mean_length": 1113.4375, + "completions/mean_terminated_length": 1113.4375, + "completions/min_length": 912.0, + "completions/min_terminated_length": 912.0, + "epoch": 0.08541708341668333, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3029050848375134, + "kl": 0.002471923828125, + "learning_rate": 8.52e-07, + "loss": -0.0022, + "num_tokens": 18522984.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5375534892082214, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2072404026175129, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23733631881861345, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 427 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1315.9375, + "completions/mean_terminated_length": 1254.5833740234375, + "completions/min_length": 980.0, + "completions/min_terminated_length": 980.0, + "epoch": 0.08561712342468494, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2305532301247464, + "kl": 0.0034027099609375, + "learning_rate": 8.539999999999999e-07, + "loss": -0.026, + "num_tokens": 18576783.0, + "reward": 0.0, + "reward_std": 0.6829123497009277, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2465701925850947, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2970959727588953, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0938872452190116, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 428 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1328.0, + "completions/max_terminated_length": 1328.0, + "completions/mean_length": 1011.375, + "completions/mean_terminated_length": 1011.375, + "completions/min_length": 536.0, + "completions/min_terminated_length": 536.0, + "epoch": 0.08581716343268654, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.839006035113483, + "kl": 0.003082275390625, + "learning_rate": 8.559999999999999e-07, + "loss": -0.0135, + "num_tokens": 18620541.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0192748308181763, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09918847296590731, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07758009669297937, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12641788434189793, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 429 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1295.6875, + "completions/mean_terminated_length": 1136.77783203125, + "completions/min_length": 896.0, + "completions/min_terminated_length": 896.0, + "epoch": 0.08601720344068814, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.191377322939786, + "kl": 0.00302886962890625, + "learning_rate": 8.58e-07, + "loss": -0.0383, + "num_tokens": 18664904.0, + "reward": 0.0, + "reward_std": 0.8099937438964844, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06771898324317603, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07362824428008644, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10945995377982527, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 430 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1476.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1027.4375, + "completions/mean_terminated_length": 1027.4375, + "completions/min_length": 19.0, + "completions/min_terminated_length": 19.0, + "epoch": 0.08621724344868974, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5136020887750203, + "kl": 0.0024089813232421875, + "learning_rate": 8.599999999999999e-07, + "loss": -0.209, + "num_tokens": 18702615.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6291576623916626, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.027735005751688357, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14256079699212532, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1663886570207993, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 431 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1235.0, + "completions/mean_length": 1118.625, + "completions/mean_terminated_length": 1093.2000732421875, + "completions/min_length": 929.0, + "completions/min_terminated_length": 929.0, + "epoch": 0.08641728345669134, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4308260011259017, + "kl": 0.0020389556884765625, + "learning_rate": 8.62e-07, + "loss": 0.003, + "num_tokens": 18743577.0, + "reward": 0.0, + "reward_std": 0.6564137935638428, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06122107892212338, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2025369742296428, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09888264649460884, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 432 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1193.8125, + "completions/mean_terminated_length": 1150.071533203125, + "completions/min_length": 926.0, + "completions/min_terminated_length": 926.0, + "epoch": 0.08661732346469293, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3486586074243285, + "kl": 0.003326416015625, + "learning_rate": 8.639999999999999e-07, + "loss": -0.0619, + "num_tokens": 18794766.0, + "reward": 0.0, + "reward_std": 0.6573572158813477, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.13650770934729628, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03117331463292723, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 433 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1144.375, + "completions/mean_terminated_length": 1093.571533203125, + "completions/min_length": 800.0, + "completions/min_terminated_length": 800.0, + "epoch": 0.08681736347269454, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.055796989416608, + "kl": 0.002643585205078125, + "learning_rate": 8.659999999999999e-07, + "loss": -0.0316, + "num_tokens": 18842580.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8981505036354065, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.043314965583023175, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0577463302925992, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1112221667221529, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 434 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1408.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 1004.9375, + "completions/mean_terminated_length": 1004.9375, + "completions/min_length": 812.0, + "completions/min_terminated_length": 812.0, + "epoch": 0.08701740348069614, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9671250382622265, + "kl": 0.0024204254150390625, + "learning_rate": 8.68e-07, + "loss": 0.0479, + "num_tokens": 18873155.0, + "reward": 0.0, + "reward_std": 1.0349515676498413, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.031187903872911704, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04224473521716988, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08777074514725111, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 435 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1324.1875, + "completions/mean_terminated_length": 1265.5833740234375, + "completions/min_length": 968.0, + "completions/min_terminated_length": 968.0, + "epoch": 0.08721744348869774, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.25086079176688, + "kl": 0.00354766845703125, + "learning_rate": 8.699999999999999e-07, + "loss": -0.0031, + "num_tokens": 18926014.0, + "reward": 0.0, + "reward_std": 0.9402385950088501, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.281398758290155, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3133089123958753, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14599594109020572, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 436 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1453.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1081.9375, + "completions/mean_terminated_length": 1081.9375, + "completions/min_length": 707.0, + "completions/min_terminated_length": 707.0, + "epoch": 0.08741748349669934, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.636050026623721, + "kl": 0.00286865234375, + "learning_rate": 8.72e-07, + "loss": 0.0417, + "num_tokens": 18974741.0, + "reward": 0.0, + "reward_std": 1.0009396076202393, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.004177888701300788, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17964409402453185, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07084150279686706, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 437 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1161.125, + "completions/mean_terminated_length": 1138.533447265625, + "completions/min_length": 869.0, + "completions/min_terminated_length": 869.0, + "epoch": 0.08761752350470094, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0983920093240505, + "kl": 0.002155303955078125, + "learning_rate": 8.739999999999999e-07, + "loss": 0.0406, + "num_tokens": 19015071.0, + "reward": 0.0, + "reward_std": 0.5684026479721069, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04618015309949244, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12473226504881324, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09878896324620107, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 438 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1227.0, + "completions/max_terminated_length": 1227.0, + "completions/mean_length": 1042.9375, + "completions/mean_terminated_length": 1042.9375, + "completions/min_length": 902.0, + "completions/min_terminated_length": 902.0, + "epoch": 0.08781756351270253, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5996885294180254, + "kl": 0.0014286041259765625, + "learning_rate": 8.76e-07, + "loss": -0.0085, + "num_tokens": 19058134.0, + "reward": 0.0, + "reward_std": 0.5913543701171875, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.33805343559066664, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3218654507549297, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08595864638818418, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 439 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1252.4375, + "completions/mean_terminated_length": 1169.916748046875, + "completions/min_length": 983.0, + "completions/min_terminated_length": 983.0, + "epoch": 0.08801760352070415, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1866481131170246, + "kl": 0.0029449462890625, + "learning_rate": 8.78e-07, + "loss": -0.0011, + "num_tokens": 19103213.0, + "reward": -1.862645149230957e-08, + "reward_std": 1.0595558881759644, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.002720492633116048, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18868683130536254, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.17384539747207065, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 440 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1295.0, + "completions/mean_length": 1131.5, + "completions/mean_terminated_length": 1106.933349609375, + "completions/min_length": 942.0, + "completions/min_terminated_length": 942.0, + "epoch": 0.08821764352870574, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.555414665024897, + "kl": 0.0034637451171875, + "learning_rate": 8.799999999999999e-07, + "loss": 0.0527, + "num_tokens": 19153445.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7351013422012329, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10544927321754721, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11094337198612815, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13526380260918403, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 441 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1189.8125, + "completions/mean_terminated_length": 1145.5, + "completions/min_length": 892.0, + "completions/min_terminated_length": 892.0, + "epoch": 0.08841768353670734, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5639364893224563, + "kl": 0.0033721923828125, + "learning_rate": 8.82e-07, + "loss": -0.0483, + "num_tokens": 19205738.0, + "reward": 0.0, + "reward_std": 0.7321063876152039, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.044144641568097484, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10462008955049681, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13045504405165223, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 442 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1163.0, + "completions/max_terminated_length": 1163.0, + "completions/mean_length": 1009.0625, + "completions/mean_terminated_length": 1009.0625, + "completions/min_length": 798.0, + "completions/min_terminated_length": 798.0, + "epoch": 0.08861772354470894, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6718455395975216, + "kl": 0.00275421142578125, + "learning_rate": 8.839999999999999e-07, + "loss": 0.0127, + "num_tokens": 19245843.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6130286455154419, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.060144906977294535, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09759898330288866, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0859586463881842, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 443 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1434.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 937.5, + "completions/mean_terminated_length": 937.5, + "completions/min_length": 653.0, + "completions/min_terminated_length": 653.0, + "epoch": 0.08881776355271054, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4114516565869093, + "kl": 0.0023937225341796875, + "learning_rate": 8.86e-07, + "loss": 0.029, + "num_tokens": 19285667.0, + "reward": 0.0, + "reward_std": 0.9947754144668579, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.19121377848362264, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24392585571722813, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06309898162000305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 444 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1390.1875, + "completions/mean_terminated_length": 1324.300048828125, + "completions/min_length": 1106.0, + "completions/min_terminated_length": 1106.0, + "epoch": 0.08901780356071214, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.962434891260953, + "kl": 0.003154754638671875, + "learning_rate": 8.88e-07, + "loss": -0.0115, + "num_tokens": 19333230.0, + "reward": 0.0, + "reward_std": 0.8739463090896606, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0733786174421881, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13273925690111882, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11344765475923412, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 445 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1102.75, + "completions/mean_terminated_length": 1011.0769653320312, + "completions/min_length": 645.0, + "completions/min_terminated_length": 645.0, + "epoch": 0.08921784356871375, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.404045630724902, + "kl": 0.0019817352294921875, + "learning_rate": 8.9e-07, + "loss": -0.0586, + "num_tokens": 19381890.0, + "reward": 4.470348358154297e-08, + "reward_std": 1.043419599533081, + "rewards/wordcountpos_reward_nokeypoint/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10776034358629694, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0638190497950426, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.103905227473387, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 446 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1248.125, + "completions/mean_terminated_length": 1212.1429443359375, + "completions/min_length": 925.0, + "completions/min_terminated_length": 925.0, + "epoch": 0.08941788357671535, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.379986018306052, + "kl": 0.00348663330078125, + "learning_rate": 8.92e-07, + "loss": -0.0477, + "num_tokens": 19437676.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9292985200881958, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.22213794444992524, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24217371950263036, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11344765475923412, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 447 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1499.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1144.25, + "completions/mean_terminated_length": 1144.25, + "completions/min_length": 876.0, + "completions/min_terminated_length": 876.0, + "epoch": 0.08961792358471694, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2476156738272026, + "kl": 0.002902984619140625, + "learning_rate": 8.939999999999999e-07, + "loss": -0.0151, + "num_tokens": 19489128.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7667519450187683, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1443491896323461, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07668062129530388, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11729986896522632, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 448 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1413.0, + "completions/max_terminated_length": 1413.0, + "completions/mean_length": 1060.125, + "completions/mean_terminated_length": 1060.125, + "completions/min_length": 908.0, + "completions/min_terminated_length": 908.0, + "epoch": 0.08981796359271854, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.732197329397532, + "kl": 0.0020160675048828125, + "learning_rate": 8.96e-07, + "loss": -0.012, + "num_tokens": 19537674.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0598664283752441, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.053291017574544464, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1456795666694438, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.18733017000097074, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 449 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1255.125, + "completions/mean_terminated_length": 1220.1429443359375, + "completions/min_length": 971.0, + "completions/min_terminated_length": 971.0, + "epoch": 0.09001800360072014, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0950509504442882, + "kl": 0.0030364990234375, + "learning_rate": 8.98e-07, + "loss": 0.0077, + "num_tokens": 19585252.0, + "reward": 0.0, + "reward_std": 0.8989287614822388, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12960246843168424, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10988530752470509, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12224747213928167, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 450 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1128.5, + "completions/mean_terminated_length": 1103.7333984375, + "completions/min_length": 731.0, + "completions/min_terminated_length": 731.0, + "epoch": 0.09021804360872174, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.015852427187334, + "kl": 0.003154754638671875, + "learning_rate": 9e-07, + "loss": -0.0202, + "num_tokens": 19636932.0, + "reward": 0.0, + "reward_std": 1.0336642265319824, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03968288294511931, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15158925823467087, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1060223596263578, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 451 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1375.0, + "completions/mean_length": 1103.875, + "completions/mean_terminated_length": 1047.2857666015625, + "completions/min_length": 816.0, + "completions/min_terminated_length": 816.0, + "epoch": 0.09041808361672335, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2278648268406456, + "kl": 0.002777099609375, + "learning_rate": 9.02e-07, + "loss": -0.034, + "num_tokens": 19687986.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.45276084542274475, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04990093050477666, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19615265445970886, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11603000888978234, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 452 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1170.0, + "completions/max_terminated_length": 1170.0, + "completions/mean_length": 931.5625, + "completions/mean_terminated_length": 931.5625, + "completions/min_length": 743.0, + "completions/min_terminated_length": 743.0, + "epoch": 0.09061812362472495, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.912024049485071, + "kl": 0.0039215087890625, + "learning_rate": 9.039999999999999e-07, + "loss": 0.0033, + "num_tokens": 19724451.0, + "reward": 0.0, + "reward_std": 0.852126955986023, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.01152569743157354, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0896161272523587, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09098229375970789, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 453 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1059.375, + "completions/mean_terminated_length": 1030.0, + "completions/min_length": 766.0, + "completions/min_terminated_length": 766.0, + "epoch": 0.09081816363272655, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8887672609305937, + "kl": 0.0020599365234375, + "learning_rate": 9.06e-07, + "loss": -0.0039, + "num_tokens": 19780489.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8596885204315186, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03538029113745441, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10396173057881597, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.056927504255331086, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 454 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1122.6875, + "completions/mean_terminated_length": 1068.7857666015625, + "completions/min_length": 880.0, + "completions/min_terminated_length": 880.0, + "epoch": 0.09101820364072814, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0847096107502736, + "kl": 0.0023193359375, + "learning_rate": 9.08e-07, + "loss": -0.0466, + "num_tokens": 19819444.0, + "reward": 0.0, + "reward_std": 0.534292995929718, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07500114892407513, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07859274509673148, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 455 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1260.0, + "completions/max_terminated_length": 1260.0, + "completions/mean_length": 1041.4375, + "completions/mean_terminated_length": 1041.4375, + "completions/min_length": 601.0, + "completions/min_terminated_length": 601.0, + "epoch": 0.09121824364872974, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7398241309513205, + "kl": 0.003147125244140625, + "learning_rate": 9.1e-07, + "loss": -0.02, + "num_tokens": 19863563.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6010160446166992, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11425433188437849, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.29367875530810283, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11979921473804349, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 456 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1388.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 1074.875, + "completions/mean_terminated_length": 1074.875, + "completions/min_length": 807.0, + "completions/min_terminated_length": 807.0, + "epoch": 0.09141828365673135, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2106720648980986, + "kl": 0.0018463134765625, + "learning_rate": 9.12e-07, + "loss": -0.043, + "num_tokens": 19905465.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0157217979431152, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.024467985040520647, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06779349475047793, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0806225774829855, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 457 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1231.0, + "completions/max_terminated_length": 1231.0, + "completions/mean_length": 963.3125, + "completions/mean_terminated_length": 963.3125, + "completions/min_length": 746.0, + "completions/min_terminated_length": 746.0, + "epoch": 0.09161832366473295, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1161943896134017, + "kl": 0.0022182464599609375, + "learning_rate": 9.14e-07, + "loss": -0.0366, + "num_tokens": 19939022.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0053585767745972, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.021567989583877345, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07738873711883265, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06440611887195308, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 458 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1067.0, + "completions/max_terminated_length": 1067.0, + "completions/mean_length": 852.5625, + "completions/mean_terminated_length": 852.5625, + "completions/min_length": 545.0, + "completions/min_terminated_length": 545.0, + "epoch": 0.09181836367273455, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5914246569345667, + "kl": 0.002593994140625, + "learning_rate": 9.16e-07, + "loss": 0.0258, + "num_tokens": 19976895.0, + "reward": 0.0, + "reward_std": 0.5595604181289673, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0837292582778635, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15246166932108757, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06426219440409447, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 459 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1325.0625, + "completions/mean_terminated_length": 1245.5455322265625, + "completions/min_length": 1045.0, + "completions/min_terminated_length": 1045.0, + "epoch": 0.09201840368073615, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.084965292754874, + "kl": 0.0013685226440429688, + "learning_rate": 9.18e-07, + "loss": 0.0132, + "num_tokens": 20019272.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8049581050872803, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.22229142210792657, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1942882409971822, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10461569884316811, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 460 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1132.0, + "completions/max_terminated_length": 1132.0, + "completions/mean_length": 989.875, + "completions/mean_terminated_length": 989.875, + "completions/min_length": 772.0, + "completions/min_terminated_length": 772.0, + "epoch": 0.09221844368873774, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4977429710574333, + "kl": 0.0028133392333984375, + "learning_rate": 9.2e-07, + "loss": -0.0262, + "num_tokens": 20074238.0, + "reward": 0.0, + "reward_std": 0.7537282109260559, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16626841699978454, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.054224932091615025, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11538983843829065, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 461 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1372.625, + "completions/mean_terminated_length": 1330.166748046875, + "completions/min_length": 1130.0, + "completions/min_terminated_length": 1130.0, + "epoch": 0.09241848369673934, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8360776044644553, + "kl": 0.002838134765625, + "learning_rate": 9.22e-07, + "loss": -0.002, + "num_tokens": 20126448.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9394620656967163, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05912984590978623, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1101141072241061, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 462 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1204.25, + "completions/mean_terminated_length": 1069.8182373046875, + "completions/min_length": 656.0, + "completions/min_terminated_length": 656.0, + "epoch": 0.09261852370474095, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.254196484704798, + "kl": 0.002971649169921875, + "learning_rate": 9.24e-07, + "loss": 0.0152, + "num_tokens": 20178444.0, + "reward": 0.0, + "reward_std": 0.7293630838394165, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06495063908903544, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06924083543432369, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08606629658238704, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 463 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1277.0, + "completions/max_terminated_length": 1277.0, + "completions/mean_length": 1048.4375, + "completions/mean_terminated_length": 1048.4375, + "completions/min_length": 836.0, + "completions/min_terminated_length": 836.0, + "epoch": 0.09281856371274255, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.0796030385660345, + "kl": 0.00304412841796875, + "learning_rate": 9.26e-07, + "loss": 0.0054, + "num_tokens": 20219347.0, + "reward": 2.2351741790771484e-08, + "reward_std": 0.9942997694015503, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.038448003666824944, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06121320201795772, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07290277645477446, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 464 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1244.0, + "completions/mean_terminated_length": 1226.933349609375, + "completions/min_length": 891.0, + "completions/min_terminated_length": 891.0, + "epoch": 0.09301860372074415, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4418533078416953, + "kl": 0.0018062591552734375, + "learning_rate": 9.28e-07, + "loss": 0.0255, + "num_tokens": 20264163.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0325134992599487, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1582219423585269, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09122417248584347, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05426273532033234, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 465 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1020.0, + "completions/max_terminated_length": 1020.0, + "completions/mean_length": 846.625, + "completions/mean_terminated_length": 846.625, + "completions/min_length": 692.0, + "completions/min_terminated_length": 692.0, + "epoch": 0.09321864372874575, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.100922253889518, + "kl": 0.00402069091796875, + "learning_rate": 9.3e-07, + "loss": -0.0014, + "num_tokens": 20304405.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0563747882843018, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12041594578792297, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 466 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1225.0, + "completions/max_terminated_length": 1225.0, + "completions/mean_length": 1091.3125, + "completions/mean_terminated_length": 1091.3125, + "completions/min_length": 932.0, + "completions/min_terminated_length": 932.0, + "epoch": 0.09341868373674735, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.14665363757736, + "kl": 0.0026836395263671875, + "learning_rate": 9.32e-07, + "loss": 0.0068, + "num_tokens": 20343562.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0117460489273071, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.015996276101343774, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05127021331862779, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09803627446568497, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 467 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1135.375, + "completions/mean_terminated_length": 1111.0667724609375, + "completions/min_length": 804.0, + "completions/min_terminated_length": 804.0, + "epoch": 0.09361872374474894, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6990588222441145, + "kl": 0.002300262451171875, + "learning_rate": 9.34e-07, + "loss": 0.0426, + "num_tokens": 20387024.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8402442932128906, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.030969377343633926, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10508969922521105, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1112221667221529, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 468 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1249.875, + "completions/mean_terminated_length": 1233.2000732421875, + "completions/min_length": 1077.0, + "completions/min_terminated_length": 1077.0, + "epoch": 0.09381876375275056, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8212656433065106, + "kl": 0.0023937225341796875, + "learning_rate": 9.36e-07, + "loss": 0.0115, + "num_tokens": 20420798.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5525354743003845, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.034404767166634515, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.089297153505736, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 469 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1265.625, + "completions/mean_terminated_length": 1250.0001220703125, + "completions/min_length": 1055.0, + "completions/min_terminated_length": 1055.0, + "epoch": 0.09401880376075215, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8369435602822006, + "kl": 0.0011162757873535156, + "learning_rate": 9.379999999999998e-07, + "loss": 0.0031, + "num_tokens": 20464096.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9793186187744141, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09802202445412954, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13556453715277944, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11666666666666668, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 470 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1484.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1194.0, + "completions/mean_terminated_length": 1194.0, + "completions/min_length": 943.0, + "completions/min_terminated_length": 943.0, + "epoch": 0.09421884376875375, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5297123370935894, + "kl": 0.002414703369140625, + "learning_rate": 9.399999999999999e-07, + "loss": -0.0033, + "num_tokens": 20511936.0, + "reward": 0.0, + "reward_std": 0.5992674827575684, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04358718795495399, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08769611235849503, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0718795288428261, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 471 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1193.8125, + "completions/mean_terminated_length": 1173.4000244140625, + "completions/min_length": 895.0, + "completions/min_terminated_length": 895.0, + "epoch": 0.09441888377675535, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7700386572306157, + "kl": 0.0018291473388671875, + "learning_rate": 9.419999999999999e-07, + "loss": -0.0479, + "num_tokens": 20555253.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9105622172355652, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.039836987977426574, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09423908333424968, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.975, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 472 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1433.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 966.625, + "completions/mean_terminated_length": 966.625, + "completions/min_length": 711.0, + "completions/min_terminated_length": 711.0, + "epoch": 0.09461892378475695, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3696946075582592, + "kl": 0.0029296875, + "learning_rate": 9.439999999999999e-07, + "loss": -0.0165, + "num_tokens": 20603495.0, + "reward": 0.0, + "reward_std": 0.8268293142318726, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07942092353116599, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1382669272126821, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08766518798921946, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 473 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1207.0, + "completions/max_terminated_length": 1207.0, + "completions/mean_length": 1075.4375, + "completions/mean_terminated_length": 1075.4375, + "completions/min_length": 938.0, + "completions/min_terminated_length": 938.0, + "epoch": 0.09481896379275855, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8090668433629027, + "kl": 0.002105712890625, + "learning_rate": 9.459999999999999e-07, + "loss": -0.0111, + "num_tokens": 20649270.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0460559129714966, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2627320161521229, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11047382520213471, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10036968702787746, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 474 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1242.0, + "completions/max_terminated_length": 1242.0, + "completions/mean_length": 963.375, + "completions/mean_terminated_length": 963.375, + "completions/min_length": 816.0, + "completions/min_terminated_length": 816.0, + "epoch": 0.09501900380076016, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0250782155393487, + "kl": 0.002239227294921875, + "learning_rate": 9.479999999999999e-07, + "loss": -0.0246, + "num_tokens": 20690932.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0486993789672852, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04572647180775243, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12817775546647148, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11147163731607215, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 475 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1288.0, + "completions/mean_length": 1055.875, + "completions/mean_terminated_length": 1026.2667236328125, + "completions/min_length": 913.0, + "completions/min_terminated_length": 913.0, + "epoch": 0.09521904380876176, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.6492443166601372, + "kl": 0.0008287429809570312, + "learning_rate": 9.499999999999999e-07, + "loss": -0.0134, + "num_tokens": 20740058.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.986407458782196, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04781515227113272, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06083298255177104, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026005, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 476 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1337.0, + "completions/max_terminated_length": 1337.0, + "completions/mean_length": 1112.375, + "completions/mean_terminated_length": 1112.375, + "completions/min_length": 711.0, + "completions/min_terminated_length": 711.0, + "epoch": 0.09541908381676335, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.400666622583683, + "kl": 0.003688812255859375, + "learning_rate": 9.52e-07, + "loss": -0.019, + "num_tokens": 20777768.0, + "reward": 0.0, + "reward_std": 0.8533346652984619, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.031901442705434914, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.061766347225733015, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08243965245133132, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 477 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1315.5, + "completions/mean_terminated_length": 1272.923095703125, + "completions/min_length": 1099.0, + "completions/min_terminated_length": 1099.0, + "epoch": 0.09561912382476495, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.579518804935378, + "kl": 0.00250244140625, + "learning_rate": 9.539999999999999e-07, + "loss": -0.0335, + "num_tokens": 20836096.0, + "reward": 0.0, + "reward_std": 1.0130741596221924, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.023494353485545214, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06898882103188379, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09651328828101766, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 478 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1441.0, + "completions/mean_terminated_length": 1382.0, + "completions/min_length": 1220.0, + "completions/min_terminated_length": 1220.0, + "epoch": 0.09581916383276655, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.3490409496699796, + "kl": 0.0007042884826660156, + "learning_rate": 9.559999999999998e-07, + "loss": -0.0085, + "num_tokens": 20873408.0, + "reward": 0.0, + "reward_std": 1.0190702676773071, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09999842238580771, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14145639420637546, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03626037527129048, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 479 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1270.0, + "completions/max_terminated_length": 1270.0, + "completions/mean_length": 828.375, + "completions/mean_terminated_length": 828.375, + "completions/min_length": 418.0, + "completions/min_terminated_length": 418.0, + "epoch": 0.09601920384076815, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.3559138034342695, + "kl": 0.002716064453125, + "learning_rate": 9.58e-07, + "loss": -0.1349, + "num_tokens": 20902710.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.45492836833000183, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.00816485018231171, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14463525261550336, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 480 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1400.0, + "completions/max_terminated_length": 1400.0, + "completions/mean_length": 1163.3125, + "completions/mean_terminated_length": 1163.3125, + "completions/min_length": 996.0, + "completions/min_terminated_length": 996.0, + "epoch": 0.09621924384876976, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.684850061147518, + "kl": 0.002490997314453125, + "learning_rate": 9.6e-07, + "loss": -0.017, + "num_tokens": 20940523.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0007050037384033, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.027572861330370728, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07365154029500963, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06540472290116194, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 481 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1416.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1211.0625, + "completions/mean_terminated_length": 1211.0625, + "completions/min_length": 1067.0, + "completions/min_terminated_length": 1067.0, + "epoch": 0.09641928385677136, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.245531613054143, + "kl": 0.0016412734985351562, + "learning_rate": 9.619999999999999e-07, + "loss": 0.0143, + "num_tokens": 20977436.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.064976453781128, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10133032225691331, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05106790126460632, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 482 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1233.0, + "completions/max_terminated_length": 1233.0, + "completions/mean_length": 1031.6875, + "completions/mean_terminated_length": 1031.6875, + "completions/min_length": 805.0, + "completions/min_terminated_length": 805.0, + "epoch": 0.09661932386477295, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.606933025483554, + "kl": 0.0017147064208984375, + "learning_rate": 9.64e-07, + "loss": -0.0011, + "num_tokens": 21019391.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8919689655303955, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07148904532040858, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2759363420855015, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.046943622609505783, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 483 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1185.0, + "completions/max_terminated_length": 1185.0, + "completions/mean_length": 922.625, + "completions/mean_terminated_length": 922.625, + "completions/min_length": 572.0, + "completions/min_terminated_length": 572.0, + "epoch": 0.09681936387277455, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.153361115824132, + "kl": 0.004058837890625, + "learning_rate": 9.66e-07, + "loss": -0.0303, + "num_tokens": 21056465.0, + "reward": 0.0, + "reward_std": 0.6710330843925476, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0544508213551106, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07999621449571175, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10602235962635778, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 484 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1462.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1181.8125, + "completions/mean_terminated_length": 1181.8125, + "completions/min_length": 894.0, + "completions/min_terminated_length": 894.0, + "epoch": 0.09701940388077615, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1560198868975435, + "kl": 0.002315521240234375, + "learning_rate": 9.679999999999999e-07, + "loss": 0.0039, + "num_tokens": 21101590.0, + "reward": 0.0, + "reward_std": 0.6944078207015991, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09024849348165284, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08738228665773225, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 485 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1288.0, + "completions/mean_length": 1149.9375, + "completions/mean_terminated_length": 1069.1539306640625, + "completions/min_length": 796.0, + "completions/min_terminated_length": 796.0, + "epoch": 0.09721944388877776, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0972542563746086, + "kl": 0.00353240966796875, + "learning_rate": 9.7e-07, + "loss": -0.0652, + "num_tokens": 21145229.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9358017444610596, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0879453528831644, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2750851512085748, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09339283817414604, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 486 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1423.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1109.75, + "completions/mean_terminated_length": 1109.75, + "completions/min_length": 820.0, + "completions/min_terminated_length": 820.0, + "epoch": 0.09741948389677936, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3873187484852014, + "kl": 0.00335693359375, + "learning_rate": 9.72e-07, + "loss": -0.0415, + "num_tokens": 21195353.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.3974721431732178, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05609821004010421, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08509025760393403, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.21221931353445905, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 487 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1222.0, + "completions/max_terminated_length": 1222.0, + "completions/mean_length": 1098.5625, + "completions/mean_terminated_length": 1098.5625, + "completions/min_length": 922.0, + "completions/min_terminated_length": 922.0, + "epoch": 0.09761952390478096, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4043971156662445, + "kl": 0.0019989013671875, + "learning_rate": 9.74e-07, + "loss": -0.0151, + "num_tokens": 21228842.0, + "reward": 0.0, + "reward_std": 0.4804677963256836, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05359424943886227, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1573412183351483, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07302967433402213, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 488 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1258.0, + "completions/max_terminated_length": 1258.0, + "completions/mean_length": 982.75, + "completions/mean_terminated_length": 982.75, + "completions/min_length": 713.0, + "completions/min_terminated_length": 713.0, + "epoch": 0.09781956391278256, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.142800647050454, + "kl": 0.0046234130859375, + "learning_rate": 9.759999999999998e-07, + "loss": -0.0215, + "num_tokens": 21272238.0, + "reward": 0.0, + "reward_std": 0.7192783355712891, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04184543731466083, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15366468368786598, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1264911064067352, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 489 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1486.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1351.125, + "completions/mean_terminated_length": 1351.125, + "completions/min_length": 865.0, + "completions/min_terminated_length": 865.0, + "epoch": 0.09801960392078415, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7956970421265415, + "kl": 0.002758026123046875, + "learning_rate": 9.78e-07, + "loss": -0.001, + "num_tokens": 21325168.0, + "reward": 0.0, + "reward_std": 0.9630328416824341, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06133110902133967, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11450130456923002, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 490 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1102.875, + "completions/mean_terminated_length": 1046.1429443359375, + "completions/min_length": 757.0, + "completions/min_terminated_length": 757.0, + "epoch": 0.09821964392878575, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.971435795821087, + "kl": 0.003330230712890625, + "learning_rate": 9.8e-07, + "loss": -0.0489, + "num_tokens": 21378750.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0276873111724854, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13955802596732428, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0710789655902767, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1403039029577766, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 491 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1376.0, + "completions/max_terminated_length": 1376.0, + "completions/mean_length": 1060.125, + "completions/mean_terminated_length": 1060.125, + "completions/min_length": 841.0, + "completions/min_terminated_length": 841.0, + "epoch": 0.09841968393678736, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1138054061245315, + "kl": 0.002716064453125, + "learning_rate": 9.819999999999999e-07, + "loss": 0.0284, + "num_tokens": 21413280.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0450648069381714, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03897970309063924, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10105623689655886, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11147163731607214, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 492 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1301.0, + "completions/mean_length": 1097.3125, + "completions/mean_terminated_length": 1070.4666748046875, + "completions/min_length": 882.0, + "completions/min_terminated_length": 882.0, + "epoch": 0.09861972394478896, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4274429602967666, + "kl": 0.00360870361328125, + "learning_rate": 9.84e-07, + "loss": 0.0134, + "num_tokens": 21452973.0, + "reward": 0.0, + "reward_std": 0.8456502556800842, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.000353917079170788, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06271584041930198, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 493 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1186.4375, + "completions/mean_terminated_length": 1141.6429443359375, + "completions/min_length": 773.0, + "completions/min_terminated_length": 773.0, + "epoch": 0.09881976395279056, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4581411370915776, + "kl": 0.003719329833984375, + "learning_rate": 9.86e-07, + "loss": -0.0225, + "num_tokens": 21492348.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9169107675552368, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1018585935509318, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11139656856413209, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042255, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 494 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1214.0, + "completions/max_terminated_length": 1214.0, + "completions/mean_length": 1022.6875, + "completions/mean_terminated_length": 1022.6875, + "completions/min_length": 878.0, + "completions/min_terminated_length": 878.0, + "epoch": 0.09901980396079216, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2594209693531373, + "kl": 0.0013952255249023438, + "learning_rate": 9.88e-07, + "loss": -0.0249, + "num_tokens": 21527567.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9248940348625183, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.14727495210245917, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06260786152053013, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07698003589195011, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 495 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1322.9375, + "completions/mean_terminated_length": 1263.916748046875, + "completions/min_length": 932.0, + "completions/min_terminated_length": 932.0, + "epoch": 0.09921984396879376, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0971715948099154, + "kl": 0.003597259521484375, + "learning_rate": 9.9e-07, + "loss": 0.0007, + "num_tokens": 21566974.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6724504828453064, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16632127041476494, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.25973171213567325, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09803627446568496, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 496 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1173.0, + "completions/max_terminated_length": 1173.0, + "completions/mean_length": 1099.8125, + "completions/mean_terminated_length": 1099.8125, + "completions/min_length": 800.0, + "completions/min_terminated_length": 800.0, + "epoch": 0.09941988397679535, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.167364222099895, + "kl": 0.0014548301696777344, + "learning_rate": 9.92e-07, + "loss": 0.0213, + "num_tokens": 21599851.0, + "reward": 0.0, + "reward_std": 0.9200087785720825, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04228207521481956, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10645866811128699, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.055611083361076445, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 497 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1197.6875, + "completions/mean_terminated_length": 1154.5, + "completions/min_length": 853.0, + "completions/min_terminated_length": 853.0, + "epoch": 0.09961992398479697, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.579863678006256, + "kl": 0.002346038818359375, + "learning_rate": 9.94e-07, + "loss": -0.076, + "num_tokens": 21652342.0, + "reward": 0.0, + "reward_std": 0.9505132436752319, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.14901117234025874, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09984730791981034, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06978803887752091, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 498 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1384.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1078.4375, + "completions/mean_terminated_length": 1078.4375, + "completions/min_length": 907.0, + "completions/min_terminated_length": 907.0, + "epoch": 0.09981996399279856, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7344499678236343, + "kl": 0.0029754638671875, + "learning_rate": 9.959999999999999e-07, + "loss": -0.0128, + "num_tokens": 21685581.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0071659088134766, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04710042257438442, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.052391292770577046, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06426219440409446, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 499 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1075.0, + "completions/max_terminated_length": 1075.0, + "completions/mean_length": 872.1875, + "completions/mean_terminated_length": 872.1875, + "completions/min_length": 705.0, + "completions/min_terminated_length": 705.0, + "epoch": 0.10002000400080016, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.760833791954958, + "kl": 0.003582000732421875, + "learning_rate": 9.98e-07, + "loss": -0.0137, + "num_tokens": 21715272.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8917542695999146, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11682712074508111, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08304438227337775, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.17191729277636836, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 500 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1208.0, + "completions/mean_terminated_length": 1188.533447265625, + "completions/min_length": 929.0, + "completions/min_terminated_length": 929.0, + "epoch": 0.10022004400880176, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2826802167133593, + "kl": 0.004047393798828125, + "learning_rate": 1e-06, + "loss": -0.0189, + "num_tokens": 21751192.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.033771276473999, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06585917682270193, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06795892535563894, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05426273532033234, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 501 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1356.0, + "completions/max_terminated_length": 1356.0, + "completions/mean_length": 1003.9375, + "completions/mean_terminated_length": 1003.9375, + "completions/min_length": 799.0, + "completions/min_terminated_length": 799.0, + "epoch": 0.10042008401680336, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.127396395790428, + "kl": 0.002971649169921875, + "learning_rate": 9.999998902889782e-07, + "loss": -0.0193, + "num_tokens": 21790759.0, + "reward": 3.725290298461914e-08, + "reward_std": 1.0647034645080566, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.014324276691609726, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15714187148696138, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13984117975602023, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 502 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1437.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1079.1875, + "completions/mean_terminated_length": 1079.1875, + "completions/min_length": 787.0, + "completions/min_terminated_length": 787.0, + "epoch": 0.10062012402480495, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5541990034508486, + "kl": 0.00396728515625, + "learning_rate": 9.999995611559667e-07, + "loss": 0.01, + "num_tokens": 21831194.0, + "reward": 0.0, + "reward_std": 0.9026003479957581, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15854462233266234, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1931846665175436, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08243965245133132, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 503 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1197.5625, + "completions/mean_terminated_length": 1154.357177734375, + "completions/min_length": 861.0, + "completions/min_terminated_length": 861.0, + "epoch": 0.10082016403280657, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2710347167879306, + "kl": 0.00386810302734375, + "learning_rate": 9.999990126011257e-07, + "loss": 0.0072, + "num_tokens": 21877451.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.9809114933013916, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02245946263088769, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06402570255243588, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258099, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 504 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1181.1875, + "completions/mean_terminated_length": 989.9000244140625, + "completions/min_length": 763.0, + "completions/min_terminated_length": 763.0, + "epoch": 0.10102020404080816, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3117741267051564, + "kl": 0.00395965576171875, + "learning_rate": 9.999982446247225e-07, + "loss": -0.0326, + "num_tokens": 21926166.0, + "reward": 0.0, + "reward_std": 0.7769395112991333, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10517685273687306, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.02916698542786899, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13957607775504183, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 505 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 896.0, + "completions/max_terminated_length": 896.0, + "completions/mean_length": 796.4375, + "completions/mean_terminated_length": 796.4375, + "completions/min_length": 622.0, + "completions/min_terminated_length": 622.0, + "epoch": 0.10122024404880976, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9905966830352444, + "kl": 0.0031337738037109375, + "learning_rate": 9.999972572271322e-07, + "loss": -0.0592, + "num_tokens": 21964949.0, + "reward": 0.0, + "reward_std": 0.906674861907959, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1461541631448373, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14782544715207366, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13709958532503408, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 506 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1134.1875, + "completions/mean_terminated_length": 1109.800048828125, + "completions/min_length": 944.0, + "completions/min_terminated_length": 944.0, + "epoch": 0.10142028405681136, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3727715703033887, + "kl": 0.0018405914306640625, + "learning_rate": 9.999960504088355e-07, + "loss": -0.0287, + "num_tokens": 22014064.0, + "reward": 0.0, + "reward_std": 0.7468199729919434, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11144887413027003, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17857670551458904, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12292725943057185, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 507 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1181.8125, + "completions/mean_terminated_length": 1108.3846435546875, + "completions/min_length": 906.0, + "completions/min_terminated_length": 906.0, + "epoch": 0.10162032406481296, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1115599843728625, + "kl": 0.003803253173828125, + "learning_rate": 9.999946241704217e-07, + "loss": -0.0391, + "num_tokens": 22064053.0, + "reward": 0.0, + "reward_std": 1.0208356380462646, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.14078214699313157, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3054601387900435, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09269623828717427, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 508 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1224.0, + "completions/max_terminated_length": 1224.0, + "completions/mean_length": 914.6875, + "completions/mean_terminated_length": 914.6875, + "completions/min_length": 644.0, + "completions/min_terminated_length": 644.0, + "epoch": 0.10182036407281456, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7115527057802185, + "kl": 0.0030384063720703125, + "learning_rate": 9.999929785125855e-07, + "loss": 0.0375, + "num_tokens": 22098552.0, + "reward": 0.0, + "reward_std": 0.7375361323356628, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02763799478019314, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.029700657451691294, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.18519259244445038, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 509 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 1114.0625, + "completions/mean_terminated_length": 1088.3333740234375, + "completions/min_length": 757.0, + "completions/min_terminated_length": 757.0, + "epoch": 0.10202040408081617, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4740466802246437, + "kl": 0.00406646728515625, + "learning_rate": 9.999911134361297e-07, + "loss": -0.0157, + "num_tokens": 22148521.0, + "reward": 0.0, + "reward_std": 0.8847533464431763, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.3027529520602966, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3136313357732767, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13270686158262923, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 510 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1113.5, + "completions/mean_terminated_length": 1113.5, + "completions/min_length": 811.0, + "completions/min_terminated_length": 811.0, + "epoch": 0.10222044408881777, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.322538750511683, + "kl": 0.0043315887451171875, + "learning_rate": 9.999890289419633e-07, + "loss": -0.0179, + "num_tokens": 22193097.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0619033575057983, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09469066695890645, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.039791053252346574, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12224747213928168, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 511 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1351.0, + "completions/max_terminated_length": 1351.0, + "completions/mean_length": 1108.875, + "completions/mean_terminated_length": 1108.875, + "completions/min_length": 683.0, + "completions/min_terminated_length": 683.0, + "epoch": 0.10242048409681936, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4949994964351316, + "kl": 0.00380706787109375, + "learning_rate": 9.999867250311034e-07, + "loss": -0.0208, + "num_tokens": 22241735.0, + "reward": 0.0, + "reward_std": 0.9236515760421753, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.23691775505348905, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1929106039135632, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11666666666666668, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 512 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1307.0, + "completions/mean_length": 1181.875, + "completions/mean_terminated_length": 1160.666748046875, + "completions/min_length": 904.0, + "completions/min_terminated_length": 904.0, + "epoch": 0.10262052410482096, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3074140530322804, + "kl": 0.0020389556884765625, + "learning_rate": 9.999842017046729e-07, + "loss": -0.0148, + "num_tokens": 22289389.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9659587740898132, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06631300462510295, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11209856705338912, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06309898162000303, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 513 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1255.0, + "completions/max_terminated_length": 1255.0, + "completions/mean_length": 1038.3125, + "completions/mean_terminated_length": 1038.3125, + "completions/min_length": 861.0, + "completions/min_terminated_length": 861.0, + "epoch": 0.10282056411282256, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8766525128156166, + "kl": 0.004852294921875, + "learning_rate": 9.999814589639024e-07, + "loss": -0.0356, + "num_tokens": 22340890.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0018025636672974, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1058415083510815, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09087182712193063, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1261979632400061, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 514 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1331.0, + "completions/max_terminated_length": 1331.0, + "completions/mean_length": 1083.625, + "completions/mean_terminated_length": 1083.625, + "completions/min_length": 894.0, + "completions/min_terminated_length": 894.0, + "epoch": 0.10302060412082416, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6714501381209903, + "kl": 0.00371551513671875, + "learning_rate": 9.99978496810129e-07, + "loss": -0.0591, + "num_tokens": 22387172.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8851035237312317, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03898584096679297, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.036543256943346765, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13326387079497304, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 515 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1398.0, + "completions/max_terminated_length": 1398.0, + "completions/mean_length": 1096.625, + "completions/mean_terminated_length": 1096.625, + "completions/min_length": 873.0, + "completions/min_terminated_length": 873.0, + "epoch": 0.10322064412882577, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.670217920551818, + "kl": 0.0030670166015625, + "learning_rate": 9.999753152447975e-07, + "loss": -0.0482, + "num_tokens": 22424182.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6973052024841309, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.011355264786232881, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15259214421986964, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09651328828101764, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 516 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1312.0, + "completions/mean_length": 1384.25, + "completions/mean_terminated_length": 1191.3333740234375, + "completions/min_length": 1091.0, + "completions/min_terminated_length": 1091.0, + "epoch": 0.10342068413682737, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.378655056951866, + "kl": 0.0024471282958984375, + "learning_rate": 9.999719142694587e-07, + "loss": -0.0408, + "num_tokens": 22472842.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7642661333084106, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.010973279727663125, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08383221807077684, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13109227736669002, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 517 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 1161.625, + "completions/mean_terminated_length": 958.6000366210938, + "completions/min_length": 701.0, + "completions/min_terminated_length": 701.0, + "epoch": 0.10362072414482897, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.505555440037528, + "kl": 0.002750396728515625, + "learning_rate": 9.999682938857714e-07, + "loss": -0.0174, + "num_tokens": 22510708.0, + "reward": 0.0, + "reward_std": 0.42126840353012085, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03333007967139788, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.040532804402139036, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.18358568490953675, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 518 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1085.0, + "completions/max_terminated_length": 1085.0, + "completions/mean_length": 905.6875, + "completions/mean_terminated_length": 905.6875, + "completions/min_length": 767.0, + "completions/min_terminated_length": 767.0, + "epoch": 0.10382076415283056, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3305587518231863, + "kl": 0.0032501220703125, + "learning_rate": 9.999644540955006e-07, + "loss": 0.0056, + "num_tokens": 22549495.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9963435530662537, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1518720412607482, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14669125478997336, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14807405554629052, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 519 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1213.0, + "completions/max_terminated_length": 1213.0, + "completions/mean_length": 1032.4375, + "completions/mean_terminated_length": 1032.4375, + "completions/min_length": 775.0, + "completions/min_terminated_length": 775.0, + "epoch": 0.10402080416083216, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.466529974969054, + "kl": 0.0042572021484375, + "learning_rate": 9.99960394900519e-07, + "loss": -0.0277, + "num_tokens": 22589486.0, + "reward": 0.0, + "reward_std": 0.9371652007102966, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03459109503285297, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09432073966576701, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 520 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1206.3125, + "completions/mean_terminated_length": 1186.7333984375, + "completions/min_length": 886.0, + "completions/min_terminated_length": 886.0, + "epoch": 0.10422084416883377, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.543986502982465, + "kl": 0.00431060791015625, + "learning_rate": 9.999561163028054e-07, + "loss": -0.004, + "num_tokens": 22632259.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.068537950515747, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.021598039974713393, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10230942820685852, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15293426329272616, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 521 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1255.4375, + "completions/mean_terminated_length": 1220.5, + "completions/min_length": 942.0, + "completions/min_terminated_length": 942.0, + "epoch": 0.10442088417683537, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3295230320301257, + "kl": 0.0041046142578125, + "learning_rate": 9.999516183044463e-07, + "loss": -0.0301, + "num_tokens": 22681202.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.063791275024414, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03890405480454996, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04543502314872267, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11080513425729775, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 522 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1352.0, + "completions/max_terminated_length": 1352.0, + "completions/mean_length": 969.5, + "completions/mean_terminated_length": 969.5, + "completions/min_length": 754.0, + "completions/min_terminated_length": 754.0, + "epoch": 0.10462092418483697, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2857151520086796, + "kl": 0.003620147705078125, + "learning_rate": 9.99946900907635e-07, + "loss": -0.0098, + "num_tokens": 22721914.0, + "reward": 2.60770320892334e-08, + "reward_std": 0.9086049795150757, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.60770320892334e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2152356531907636, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12251239205586248, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.102469507659596, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 523 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1244.0, + "completions/max_terminated_length": 1244.0, + "completions/mean_length": 1093.375, + "completions/mean_terminated_length": 1093.375, + "completions/min_length": 958.0, + "completions/min_terminated_length": 958.0, + "epoch": 0.10482096419283857, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1591991161359387, + "kl": 0.0040912628173828125, + "learning_rate": 9.999419641146717e-07, + "loss": 0.0022, + "num_tokens": 22768872.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0097687244415283, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05711870069197811, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.32976142140666576, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10532137766186216, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 524 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1423.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1142.5625, + "completions/mean_terminated_length": 1142.5625, + "completions/min_length": 756.0, + "completions/min_terminated_length": 756.0, + "epoch": 0.10502100420084017, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2336235677768457, + "kl": 0.00302886962890625, + "learning_rate": 9.999368079279633e-07, + "loss": -0.0308, + "num_tokens": 22809145.0, + "reward": 0.0, + "reward_std": 0.5164128541946411, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11118397166253532, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10070406391458832, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09496588081262931, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 525 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 990.3125, + "completions/mean_terminated_length": 956.3333740234375, + "completions/min_length": 659.0, + "completions/min_terminated_length": 659.0, + "epoch": 0.10522104420884176, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1813665273522598, + "kl": 0.0016756057739257812, + "learning_rate": 9.999314323500245e-07, + "loss": -0.05, + "num_tokens": 22852614.0, + "reward": 0.0, + "reward_std": 0.9530852437019348, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.17502530438323433, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18232174040917098, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06763190130459203, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 526 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1003.25, + "completions/mean_terminated_length": 932.2857666015625, + "completions/min_length": 418.0, + "completions/min_terminated_length": 418.0, + "epoch": 0.10542108421684337, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4415811656494935, + "kl": 0.004055023193359375, + "learning_rate": 9.99925837383476e-07, + "loss": -0.034, + "num_tokens": 22884378.0, + "reward": 0.0, + "reward_std": 0.5097564458847046, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06277058437843437, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07400391047408177, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.17469550228474265, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 527 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1230.0, + "completions/max_terminated_length": 1230.0, + "completions/mean_length": 925.9375, + "completions/mean_terminated_length": 925.9375, + "completions/min_length": 716.0, + "completions/min_terminated_length": 716.0, + "epoch": 0.10562112422484497, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.002472058086025, + "kl": 0.00274658203125, + "learning_rate": 9.999200230310464e-07, + "loss": -0.0727, + "num_tokens": 22912169.0, + "reward": 0.0, + "reward_std": 0.6466852426528931, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.028328763870241254, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1262519720770018, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0969917904124231, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 528 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1336.0, + "completions/mean_length": 1368.25, + "completions/mean_terminated_length": 1198.857177734375, + "completions/min_length": 1106.0, + "completions/min_terminated_length": 1106.0, + "epoch": 0.10582116423284657, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8388924914530578, + "kl": 0.00418853759765625, + "learning_rate": 9.999139892955702e-07, + "loss": -0.0022, + "num_tokens": 22969221.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.4881129860877991, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11810764593268408, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11750768581802086, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10390522747338703, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 529 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1294.0625, + "completions/mean_terminated_length": 1225.416748046875, + "completions/min_length": 878.0, + "completions/min_terminated_length": 878.0, + "epoch": 0.10602120424084817, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8482498699266325, + "kl": 0.003875732421875, + "learning_rate": 9.999077361799901e-07, + "loss": -0.0626, + "num_tokens": 23022566.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.44140541553497314, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.009542758743325615, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15561681859843132, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12405196043952266, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 530 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1066.8125, + "completions/mean_terminated_length": 1037.933349609375, + "completions/min_length": 840.0, + "completions/min_terminated_length": 840.0, + "epoch": 0.10622124424884977, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9042295594147611, + "kl": 0.0022029876708984375, + "learning_rate": 9.999012636873545e-07, + "loss": -0.0084, + "num_tokens": 23059907.0, + "reward": 0.0, + "reward_std": 0.5950965881347656, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08079857032495062, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06996881901254871, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.070841502796867, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 531 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1281.4375, + "completions/mean_terminated_length": 1182.0909423828125, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.10642128425685136, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8442792801532126, + "kl": 0.0019817352294921875, + "learning_rate": 9.9989457182082e-07, + "loss": 0.0017, + "num_tokens": 23102770.0, + "reward": 0.0, + "reward_std": 0.6525969505310059, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.01337994855165649, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11546365751617711, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12164002752505566, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 532 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1390.0, + "completions/mean_terminated_length": 1324.0, + "completions/min_length": 1013.0, + "completions/min_terminated_length": 1013.0, + "epoch": 0.10662132426485298, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0911933495086146, + "kl": 0.00165557861328125, + "learning_rate": 9.998876605836494e-07, + "loss": -0.0251, + "num_tokens": 23150082.0, + "reward": 0.0, + "reward_std": 0.5944397449493408, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.040939646958946016, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06648271671334266, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08681611046941135, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 533 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1391.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1112.5625, + "completions/mean_terminated_length": 1112.5625, + "completions/min_length": 894.0, + "completions/min_terminated_length": 894.0, + "epoch": 0.10682136427285457, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.148853539598023, + "kl": 0.00371551513671875, + "learning_rate": 9.998805299792124e-07, + "loss": 0.0012, + "num_tokens": 23188043.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.3106329143047333, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.24831863042698804, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22348484856434186, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10532137766186216, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 534 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1219.0, + "completions/mean_length": 1084.25, + "completions/mean_terminated_length": 1056.533447265625, + "completions/min_length": 879.0, + "completions/min_terminated_length": 879.0, + "epoch": 0.10702140428085617, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.601531797962958, + "kl": 0.003932952880859375, + "learning_rate": 9.998731800109863e-07, + "loss": -0.0499, + "num_tokens": 23238095.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8337744474411011, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0835135410685546, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12141949678911151, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05947299418254506, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 535 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1464.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1007.5, + "completions/mean_terminated_length": 1007.5, + "completions/min_length": 803.0, + "completions/min_terminated_length": 803.0, + "epoch": 0.10722144428885777, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5813553117891965, + "kl": 0.00455474853515625, + "learning_rate": 9.998656106825547e-07, + "loss": -0.0129, + "num_tokens": 23280775.0, + "reward": 2.60770320892334e-08, + "reward_std": 0.9939677715301514, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.60770320892334e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0727815281320766, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13843623287263193, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10174405069512348, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 536 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1282.4375, + "completions/mean_terminated_length": 1151.9000244140625, + "completions/min_length": 847.0, + "completions/min_terminated_length": 847.0, + "epoch": 0.10742148429685937, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4013721427541292, + "kl": 0.0046844482421875, + "learning_rate": 9.998578219976087e-07, + "loss": 0.0293, + "num_tokens": 23326558.0, + "reward": 0.0, + "reward_std": 0.8520996570587158, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06707116019875366, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3357327819256247, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 537 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1340.0, + "completions/max_terminated_length": 1340.0, + "completions/mean_length": 1086.5625, + "completions/mean_terminated_length": 1086.5625, + "completions/min_length": 666.0, + "completions/min_terminated_length": 666.0, + "epoch": 0.10762152430486097, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.932989269859236, + "kl": 0.00489044189453125, + "learning_rate": 9.998498139599457e-07, + "loss": -0.0524, + "num_tokens": 23376671.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5374624133110046, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.00946854553663501, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1254460088190595, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 538 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1434.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1313.375, + "completions/mean_terminated_length": 1313.375, + "completions/min_length": 1153.0, + "completions/min_terminated_length": 1153.0, + "epoch": 0.10782156431286258, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7842120510976742, + "kl": 0.00365447998046875, + "learning_rate": 9.99841586573471e-07, + "loss": -0.0098, + "num_tokens": 23428461.0, + "reward": 0.0, + "reward_std": 1.034376621246338, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05141166112492662, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07461743003737732, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10749676997731401, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 539 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1480.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1228.625, + "completions/mean_terminated_length": 1228.625, + "completions/min_length": 980.0, + "completions/min_terminated_length": 980.0, + "epoch": 0.10802160432086418, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.517815798298583, + "kl": 0.00492095947265625, + "learning_rate": 9.998331398421957e-07, + "loss": -0.0526, + "num_tokens": 23463311.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8852605223655701, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.00445444465810794, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24468727562240217, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11474609652039004, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 540 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1358.0, + "completions/max_terminated_length": 1358.0, + "completions/mean_length": 1039.1875, + "completions/mean_terminated_length": 1039.1875, + "completions/min_length": 749.0, + "completions/min_terminated_length": 749.0, + "epoch": 0.10822164432886577, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3221040260129726, + "kl": 0.004192352294921875, + "learning_rate": 9.99824473770239e-07, + "loss": -0.0023, + "num_tokens": 23495018.0, + "reward": 0.0, + "reward_std": 0.7817463278770447, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12732565159658416, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0700493522564605, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08766518798921946, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 541 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1109.0, + "completions/mean_length": 1142.25, + "completions/mean_terminated_length": 927.6000366210938, + "completions/min_length": 780.0, + "completions/min_terminated_length": 780.0, + "epoch": 0.10842168433686737, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3688152302722205, + "kl": 0.004535675048828125, + "learning_rate": 9.99815588361826e-07, + "loss": -0.014, + "num_tokens": 23545078.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5076950192451477, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10090473153466618, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08377535284831496, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10878112581387149, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 542 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1180.0, + "completions/max_terminated_length": 1180.0, + "completions/mean_length": 953.5, + "completions/mean_terminated_length": 953.5, + "completions/min_length": 733.0, + "completions/min_terminated_length": 733.0, + "epoch": 0.10862172434486897, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.314924402669171, + "kl": 0.00534820556640625, + "learning_rate": 9.998064836212896e-07, + "loss": -0.0368, + "num_tokens": 23578190.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0238995552062988, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.016850655938095017, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05304063406652044, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10809803506625451, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 543 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1240.625, + "completions/mean_terminated_length": 1154.166748046875, + "completions/min_length": 951.0, + "completions/min_terminated_length": 951.0, + "epoch": 0.10882176435287057, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8229180851133986, + "kl": 0.00388336181640625, + "learning_rate": 9.997971595530694e-07, + "loss": -0.0768, + "num_tokens": 23619664.0, + "reward": 0.0, + "reward_std": 1.0188395977020264, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09778676321456771, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05730280370950804, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1060223596263578, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 544 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1199.0, + "completions/max_terminated_length": 1199.0, + "completions/mean_length": 892.6875, + "completions/mean_terminated_length": 892.6875, + "completions/min_length": 706.0, + "completions/min_terminated_length": 706.0, + "epoch": 0.10902180436087218, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.470896467765749, + "kl": 0.00403594970703125, + "learning_rate": 9.997876161617116e-07, + "loss": -0.0227, + "num_tokens": 23668883.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.9376604557037354, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.034683988843343255, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09589431346206127, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14782371884055637, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 545 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1298.0, + "completions/max_terminated_length": 1298.0, + "completions/mean_length": 1037.125, + "completions/mean_terminated_length": 1037.125, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.10922184436887378, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3976836890084163, + "kl": 0.0042724609375, + "learning_rate": 9.997778534518698e-07, + "loss": -0.0268, + "num_tokens": 23702261.0, + "reward": 0.0, + "reward_std": 0.9394770860671997, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10070166616905439, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11614660164081918, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.067631901304592, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 546 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 1252.8125, + "completions/mean_terminated_length": 1236.3333740234375, + "completions/min_length": 970.0, + "completions/min_terminated_length": 970.0, + "epoch": 0.10942188437687538, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0128310503650813, + "kl": 0.003795623779296875, + "learning_rate": 9.99767871428304e-07, + "loss": 0.0068, + "num_tokens": 23754234.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5945786833763123, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.034996085486985316, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09759714274400597, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1261979632400061, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 547 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1260.0, + "completions/max_terminated_length": 1260.0, + "completions/mean_length": 1081.25, + "completions/mean_terminated_length": 1081.25, + "completions/min_length": 993.0, + "completions/min_terminated_length": 993.0, + "epoch": 0.10962192438487697, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7705847291893746, + "kl": 0.002796173095703125, + "learning_rate": 9.997576700958821e-07, + "loss": -0.0174, + "num_tokens": 23793222.0, + "reward": 0.0, + "reward_std": 0.8656362295150757, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0058663719978308436, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10879588622743583, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.052880017930181294, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 548 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1290.0, + "completions/max_terminated_length": 1290.0, + "completions/mean_length": 1049.8125, + "completions/mean_terminated_length": 1049.8125, + "completions/min_length": 759.0, + "completions/min_terminated_length": 759.0, + "epoch": 0.10982196439287857, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.629146848030375, + "kl": 0.00566864013671875, + "learning_rate": 9.99747249459578e-07, + "loss": -0.0118, + "num_tokens": 23828419.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8004570007324219, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.030755935214515452, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10426174133566694, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09953596037316066, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 549 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1344.0, + "completions/max_terminated_length": 1344.0, + "completions/mean_length": 1141.9375, + "completions/mean_terminated_length": 1141.9375, + "completions/min_length": 913.0, + "completions/min_terminated_length": 913.0, + "epoch": 0.11002200440088018, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.487893573324628, + "kl": 0.005035400390625, + "learning_rate": 9.997366095244725e-07, + "loss": -0.0162, + "num_tokens": 23872234.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8072513341903687, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08765597597209498, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1773586823768135, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08606629658238707, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 550 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1310.0, + "completions/max_terminated_length": 1310.0, + "completions/mean_length": 1119.3125, + "completions/mean_terminated_length": 1119.3125, + "completions/min_length": 969.0, + "completions/min_terminated_length": 969.0, + "epoch": 0.11022204440888178, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7581838063256248, + "kl": 0.00524139404296875, + "learning_rate": 9.997257502957542e-07, + "loss": 0.0145, + "num_tokens": 23909575.0, + "reward": 0.0, + "reward_std": 1.0273160934448242, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.003538858607537901, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03429993509260952, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10183501544346313, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 551 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1356.0, + "completions/mean_length": 1150.4375, + "completions/mean_terminated_length": 1069.769287109375, + "completions/min_length": 753.0, + "completions/min_terminated_length": 753.0, + "epoch": 0.11042208441688338, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2932117774252037, + "kl": 0.004024505615234375, + "learning_rate": 9.997146717787177e-07, + "loss": 0.0421, + "num_tokens": 23951454.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.9031859636306763, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08583483191419812, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0591200727542733, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06885303726590962, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 552 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1227.0, + "completions/mean_length": 1152.125, + "completions/mean_terminated_length": 1071.84619140625, + "completions/min_length": 946.0, + "completions/min_terminated_length": 946.0, + "epoch": 0.11062212442488498, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.258379208259791, + "kl": 0.00445556640625, + "learning_rate": 9.997033739787652e-07, + "loss": 0.0431, + "num_tokens": 23992792.0, + "reward": 0.0, + "reward_std": 0.7376552820205688, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.13358870805155706, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.042831126788857976, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11603000888978234, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 553 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1359.0, + "completions/max_terminated_length": 1359.0, + "completions/mean_length": 1139.9375, + "completions/mean_terminated_length": 1139.9375, + "completions/min_length": 947.0, + "completions/min_terminated_length": 947.0, + "epoch": 0.11082216443288657, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.387138122659551, + "kl": 0.0028324127197265625, + "learning_rate": 9.996918569014055e-07, + "loss": 0.034, + "num_tokens": 24026519.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.40994346141815186, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1986320276817004, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17886299990588356, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03442651863295481, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 554 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 905.0, + "completions/max_terminated_length": 905.0, + "completions/mean_length": 797.4375, + "completions/mean_terminated_length": 797.4375, + "completions/min_length": 563.0, + "completions/min_terminated_length": 563.0, + "epoch": 0.11102220444088817, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4366228313162646, + "kl": 0.003536224365234375, + "learning_rate": 9.996801205522545e-07, + "loss": 0.0101, + "num_tokens": 24055854.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5550634264945984, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04299462926512924, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10826708302916341, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08777074514725114, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 555 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1398.0, + "completions/mean_length": 1127.5625, + "completions/mean_terminated_length": 1102.7333984375, + "completions/min_length": 837.0, + "completions/min_terminated_length": 837.0, + "epoch": 0.11122224444888978, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8627431372961913, + "kl": 0.003215789794921875, + "learning_rate": 9.996681649370347e-07, + "loss": -0.0141, + "num_tokens": 24097479.0, + "reward": 0.0, + "reward_std": 0.6904380321502686, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.13360701746330514, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11396910750393119, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.060705726131767716, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 556 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1265.125, + "completions/mean_terminated_length": 1231.571533203125, + "completions/min_length": 946.0, + "completions/min_terminated_length": 946.0, + "epoch": 0.11142228445689138, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7190387107283334, + "kl": 0.00356292724609375, + "learning_rate": 9.996559900615756e-07, + "loss": -0.0359, + "num_tokens": 24147553.0, + "reward": 0.0, + "reward_std": 0.7589155435562134, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06401253127958095, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06485729425014723, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12285191326386657, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 557 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1030.4375, + "completions/mean_terminated_length": 999.1333618164062, + "completions/min_length": 643.0, + "completions/min_terminated_length": 643.0, + "epoch": 0.11162232446489298, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.218298169673787, + "kl": 0.00634002685546875, + "learning_rate": 9.996435959318142e-07, + "loss": -0.0705, + "num_tokens": 24200120.0, + "reward": 0.0, + "reward_std": 0.9369520545005798, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04120574477960039, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10657011736912425, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 558 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1353.0, + "completions/mean_length": 1191.0625, + "completions/mean_terminated_length": 1170.4666748046875, + "completions/min_length": 788.0, + "completions/min_terminated_length": 788.0, + "epoch": 0.11182236447289458, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6912941342770518, + "kl": 0.003925323486328125, + "learning_rate": 9.996309825537934e-07, + "loss": -0.0665, + "num_tokens": 24243161.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5855206251144409, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03209337575557279, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24529228737940076, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13045504405165223, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 559 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1283.0, + "completions/max_terminated_length": 1283.0, + "completions/mean_length": 1027.0, + "completions/mean_terminated_length": 1027.0, + "completions/min_length": 879.0, + "completions/min_terminated_length": 879.0, + "epoch": 0.11202240448089618, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.006516109716043, + "kl": 0.00428009033203125, + "learning_rate": 9.996181499336637e-07, + "loss": -0.037, + "num_tokens": 24281313.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0119245052337646, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11472290092546181, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03492957985001679, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08777074514725113, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 560 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1279.25, + "completions/mean_terminated_length": 1264.533447265625, + "completions/min_length": 1039.0, + "completions/min_terminated_length": 1039.0, + "epoch": 0.11222244448889777, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.907339174919706, + "kl": 0.004383087158203125, + "learning_rate": 9.996050980776829e-07, + "loss": -0.0291, + "num_tokens": 24323189.0, + "reward": 0.0, + "reward_std": 0.7085694074630737, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07587732162399607, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06039769402593903, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08777074514725111, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 561 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1312.0, + "completions/max_terminated_length": 1312.0, + "completions/mean_length": 960.1875, + "completions/mean_terminated_length": 960.1875, + "completions/min_length": 733.0, + "completions/min_terminated_length": 733.0, + "epoch": 0.11242248449689939, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.43405949404868, + "kl": 0.00487518310546875, + "learning_rate": 9.995918269922143e-07, + "loss": -0.0215, + "num_tokens": 24361048.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0065057277679443, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.007411520221589781, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08579792695994504, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0909822937597079, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 562 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1373.0, + "completions/max_terminated_length": 1373.0, + "completions/mean_length": 1137.3125, + "completions/mean_terminated_length": 1137.3125, + "completions/min_length": 804.0, + "completions/min_terminated_length": 804.0, + "epoch": 0.11262252450490098, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.938779272693812, + "kl": 0.0027561187744140625, + "learning_rate": 9.995783366837291e-07, + "loss": -0.0127, + "num_tokens": 24395309.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9113973379135132, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0637428618877009, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08694916035360521, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07876359377087681, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 563 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1279.0, + "completions/max_terminated_length": 1279.0, + "completions/mean_length": 1028.0625, + "completions/mean_terminated_length": 1028.0625, + "completions/min_length": 782.0, + "completions/min_terminated_length": 782.0, + "epoch": 0.11282256451290258, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9517605505957807, + "kl": 0.005462646484375, + "learning_rate": 9.995646271588058e-07, + "loss": -0.0551, + "num_tokens": 24442206.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0042849779129028, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.052810815437219744, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0642457772698567, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15533714826025885, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 564 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1297.25, + "completions/mean_terminated_length": 1250.4615478515625, + "completions/min_length": 1006.0, + "completions/min_terminated_length": 1006.0, + "epoch": 0.11302260452090418, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.96886034398946, + "kl": 0.00457763671875, + "learning_rate": 9.995506984241287e-07, + "loss": -0.043, + "num_tokens": 24485426.0, + "reward": 0.0, + "reward_std": 0.7237389087677002, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.016109274003575355, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07852561884334874, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1080980350662545, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 565 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1329.0, + "completions/mean_length": 1222.5625, + "completions/mean_terminated_length": 1204.0667724609375, + "completions/min_length": 1038.0, + "completions/min_terminated_length": 1038.0, + "epoch": 0.11322264452890578, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.345280475953298, + "kl": 0.005157470703125, + "learning_rate": 9.995365504864897e-07, + "loss": -0.0187, + "num_tokens": 24535275.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9915969371795654, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.061705132080139585, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07332570584582773, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08153617692869923, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 566 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1332.25, + "completions/mean_terminated_length": 1293.5384521484375, + "completions/min_length": 1128.0, + "completions/min_terminated_length": 1128.0, + "epoch": 0.11342268453690738, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.947961737920906, + "kl": 0.00479888916015625, + "learning_rate": 9.995221833527873e-07, + "loss": -0.0082, + "num_tokens": 24585951.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.8035404682159424, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.01547179518300102, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.044044331453578806, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11792967144619462, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 567 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1306.0625, + "completions/mean_terminated_length": 1278.357177734375, + "completions/min_length": 1188.0, + "completions/min_terminated_length": 1188.0, + "epoch": 0.11362272454490899, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.315282742034719, + "kl": 0.0034656524658203125, + "learning_rate": 9.995075970300272e-07, + "loss": -0.0154, + "num_tokens": 24629072.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9222784042358398, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02811932584717915, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04982858267195667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1355373393953503, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 568 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1248.375, + "completions/mean_terminated_length": 1190.3077392578125, + "completions/min_length": 1022.0, + "completions/min_terminated_length": 1022.0, + "epoch": 0.11382276455291059, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.591174011891405, + "kl": 0.0036468505859375, + "learning_rate": 9.994927915253217e-07, + "loss": -0.0046, + "num_tokens": 24661222.0, + "reward": 0.0, + "reward_std": 1.015749454498291, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.018588614891532823, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06499807815729092, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.056927504255331086, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 569 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1331.3125, + "completions/mean_terminated_length": 1254.6363525390625, + "completions/min_length": 977.0, + "completions/min_terminated_length": 977.0, + "epoch": 0.11402280456091218, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.673508453833841, + "kl": 0.00510406494140625, + "learning_rate": 9.994777668458896e-07, + "loss": -0.0156, + "num_tokens": 24717723.0, + "reward": 0.0, + "reward_std": 0.5909874439239502, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11309062611536627, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11092010788169006, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 570 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1206.4375, + "completions/mean_terminated_length": 1186.86669921875, + "completions/min_length": 1007.0, + "completions/min_terminated_length": 1007.0, + "epoch": 0.11422284456891378, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6397499742526698, + "kl": 0.0040035247802734375, + "learning_rate": 9.994625229990577e-07, + "loss": 0.0014, + "num_tokens": 24759114.0, + "reward": 0.0, + "reward_std": 1.0300233364105225, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04905451093329143, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0782337030652068, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059472994182545036, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 571 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1399.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 1055.5625, + "completions/mean_terminated_length": 1055.5625, + "completions/min_length": 885.0, + "completions/min_terminated_length": 885.0, + "epoch": 0.11442288457691538, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.86547898320535, + "kl": 0.0049896240234375, + "learning_rate": 9.994470599922585e-07, + "loss": -0.0308, + "num_tokens": 24808091.0, + "reward": 3.725290298461914e-08, + "reward_std": 1.0354986190795898, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.060983145960216076, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11611899903037905, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06885303726590962, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 572 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1494.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1168.25, + "completions/mean_terminated_length": 1168.25, + "completions/min_length": 993.0, + "completions/min_terminated_length": 993.0, + "epoch": 0.11462292458491698, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.715427949312685, + "kl": 0.003696441650390625, + "learning_rate": 9.99431377833032e-07, + "loss": -0.0033, + "num_tokens": 24856527.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9450145959854126, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04317301327183206, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.054569041232292365, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 573 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1277.625, + "completions/mean_terminated_length": 1226.3077392578125, + "completions/min_length": 1044.0, + "completions/min_terminated_length": 1044.0, + "epoch": 0.11482296459291859, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.866726706186063, + "kl": 0.004169464111328125, + "learning_rate": 9.994154765290247e-07, + "loss": -0.0165, + "num_tokens": 24899705.0, + "reward": 0.0, + "reward_std": 0.6241518259048462, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08814166251516527, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09838406184126738, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04999999999999999, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 574 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1307.375, + "completions/mean_terminated_length": 1279.857177734375, + "completions/min_length": 1066.0, + "completions/min_terminated_length": 1066.0, + "epoch": 0.11502300460092019, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4183441003660056, + "kl": 0.003437042236328125, + "learning_rate": 9.993993560879905e-07, + "loss": -0.0178, + "num_tokens": 24939991.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6513309478759766, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.021691466967316858, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0879249960233631, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08509254221575907, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 575 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1208.0, + "completions/max_terminated_length": 1208.0, + "completions/mean_length": 967.5625, + "completions/mean_terminated_length": 967.5625, + "completions/min_length": 787.0, + "completions/min_terminated_length": 787.0, + "epoch": 0.11522304460892178, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.49036835333137, + "kl": 0.0052337646484375, + "learning_rate": 9.993830165177895e-07, + "loss": -0.021, + "num_tokens": 24979288.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0439430475234985, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07866124944367195, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06868336104508681, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06885303726590962, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 576 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1236.125, + "completions/mean_terminated_length": 1198.4285888671875, + "completions/min_length": 970.0, + "completions/min_terminated_length": 970.0, + "epoch": 0.11542308461692338, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2549714209267715, + "kl": 0.00518798828125, + "learning_rate": 9.99366457826389e-07, + "loss": -0.0099, + "num_tokens": 25024114.0, + "reward": 0.0, + "reward_std": 0.9939181804656982, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11844269150989473, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0948811185634652, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09727776191382574, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 577 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1395.0, + "completions/max_terminated_length": 1395.0, + "completions/mean_length": 1013.25, + "completions/mean_terminated_length": 1013.25, + "completions/min_length": 748.0, + "completions/min_terminated_length": 748.0, + "epoch": 0.11562312462492498, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.001481271580753, + "kl": 0.0052032470703125, + "learning_rate": 9.99349680021863e-07, + "loss": -0.013, + "num_tokens": 25061102.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7736286520957947, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.014401995212977454, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07562345774634081, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.17758670287225067, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 578 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1353.0, + "completions/mean_length": 1048.375, + "completions/mean_terminated_length": 1018.2667236328125, + "completions/min_length": 785.0, + "completions/min_terminated_length": 785.0, + "epoch": 0.11582316463292658, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4440059066264412, + "kl": 0.003841400146484375, + "learning_rate": 9.993326831123928e-07, + "loss": 0.0001, + "num_tokens": 25105364.0, + "reward": 0.0, + "reward_std": 0.4735858142375946, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08790474995296349, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24339754606736771, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14375905768565217, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 579 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1378.0, + "completions/max_terminated_length": 1378.0, + "completions/mean_length": 1009.5625, + "completions/mean_terminated_length": 1009.5625, + "completions/min_length": 721.0, + "completions/min_terminated_length": 721.0, + "epoch": 0.11602320464092819, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.522417845054057, + "kl": 0.0054473876953125, + "learning_rate": 9.993154671062658e-07, + "loss": -0.0283, + "num_tokens": 25140965.0, + "reward": 0.0, + "reward_std": 0.9013688564300537, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16554983573635193, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2161718066273339, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08073734277593311, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 580 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1383.0, + "completions/mean_length": 1165.375, + "completions/mean_terminated_length": 1143.0667724609375, + "completions/min_length": 818.0, + "completions/min_terminated_length": 818.0, + "epoch": 0.11622324464892979, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.675041669107841, + "kl": 0.00646209716796875, + "learning_rate": 9.992980320118768e-07, + "loss": -0.0314, + "num_tokens": 25193435.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0339778661727905, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.029334463802556263, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11420723144147464, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1354690069789096, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 581 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1393.0, + "completions/max_terminated_length": 1393.0, + "completions/mean_length": 1014.0, + "completions/mean_terminated_length": 1014.0, + "completions/min_length": 806.0, + "completions/min_terminated_length": 806.0, + "epoch": 0.11642328465693139, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7500094672933484, + "kl": 0.00394439697265625, + "learning_rate": 9.992803778377272e-07, + "loss": -0.0286, + "num_tokens": 25228779.0, + "reward": 0.0, + "reward_std": 1.0260323286056519, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07671554862728096, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1585148514257079, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.060705726131767695, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 582 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1481.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1092.5, + "completions/mean_terminated_length": 1092.5, + "completions/min_length": 946.0, + "completions/min_terminated_length": 946.0, + "epoch": 0.11662332466493298, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1607869081219, + "kl": 0.0040740966796875, + "learning_rate": 9.99262504592425e-07, + "loss": -0.0356, + "num_tokens": 25262339.0, + "reward": 0.0, + "reward_std": 0.7288249135017395, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08052769067391248, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.105683565344081, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07876359377087681, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 583 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1349.0, + "completions/mean_length": 1028.0, + "completions/mean_terminated_length": 960.5714721679688, + "completions/min_length": 608.0, + "completions/min_terminated_length": 608.0, + "epoch": 0.11682336467293458, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0196576920030216, + "kl": 0.003833770751953125, + "learning_rate": 9.99244412284686e-07, + "loss": -0.0175, + "num_tokens": 25304547.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.015641689300537, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06936211215207988, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.058660626044895886, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.067631901304592, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 584 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1485.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1171.125, + "completions/mean_terminated_length": 1171.125, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.1170234046809362, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4560507869439587, + "kl": 0.00525665283203125, + "learning_rate": 9.99226100923331e-07, + "loss": -0.0118, + "num_tokens": 25352197.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0596541166305542, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.16328226974250587, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07497533161679454, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15776212754932312, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 585 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1355.0, + "completions/max_terminated_length": 1355.0, + "completions/mean_length": 1110.5625, + "completions/mean_terminated_length": 1110.5625, + "completions/min_length": 832.0, + "completions/min_terminated_length": 832.0, + "epoch": 0.11722344468893779, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5909418955503707, + "kl": 0.00540924072265625, + "learning_rate": 9.992075705172898e-07, + "loss": -0.0328, + "num_tokens": 25396846.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9294216632843018, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.019478869620439834, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07644852532565499, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1102186379345533, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 586 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1400.0, + "completions/max_terminated_length": 1400.0, + "completions/mean_length": 1126.3125, + "completions/mean_terminated_length": 1126.3125, + "completions/min_length": 844.0, + "completions/min_terminated_length": 844.0, + "epoch": 0.11742348469693939, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8870055135644894, + "kl": 0.002902984619140625, + "learning_rate": 9.991888210755972e-07, + "loss": 0.0012, + "num_tokens": 25444803.0, + "reward": 3.725290298461914e-08, + "reward_std": 1.0528329610824585, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08022907070515117, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05876301646241347, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09496588081262934, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 587 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1399.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 1008.75, + "completions/mean_terminated_length": 1008.75, + "completions/min_length": 793.0, + "completions/min_terminated_length": 793.0, + "epoch": 0.11762352470494099, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4417836400216504, + "kl": 0.0025768280029296875, + "learning_rate": 9.991698526073957e-07, + "loss": -0.0425, + "num_tokens": 25476567.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.05305016040802, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06785723411651867, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1802715122196813, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10470416879457553, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 588 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1451.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1252.25, + "completions/mean_terminated_length": 1252.25, + "completions/min_length": 1012.0, + "completions/min_terminated_length": 1012.0, + "epoch": 0.11782356471294259, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0942635687004376, + "kl": 0.00479888916015625, + "learning_rate": 9.991506651219344e-07, + "loss": -0.0112, + "num_tokens": 25527387.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.5886082053184509, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1198411036152227, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.21099335000179173, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06666666666666667, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 589 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1060.0, + "completions/mean_terminated_length": 958.4615478515625, + "completions/min_length": 636.0, + "completions/min_terminated_length": 636.0, + "epoch": 0.11802360472094418, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2743730506356763, + "kl": 0.0054931640625, + "learning_rate": 9.991312586285694e-07, + "loss": -0.0288, + "num_tokens": 25569115.0, + "reward": 0.0, + "reward_std": 1.0064506530761719, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03221333898893953, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08556474418044693, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1128748897706693, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 590 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1369.0, + "completions/max_terminated_length": 1369.0, + "completions/mean_length": 954.5625, + "completions/mean_terminated_length": 954.5625, + "completions/min_length": 573.0, + "completions/min_terminated_length": 573.0, + "epoch": 0.1182236447289458, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5550623078504398, + "kl": 0.0028972625732421875, + "learning_rate": 9.99111633136763e-07, + "loss": -0.0432, + "num_tokens": 25599900.0, + "reward": 0.0, + "reward_std": 1.0231075286865234, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07360493653423918, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07744687371294956, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13045504405165223, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 591 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1243.125, + "completions/mean_terminated_length": 1183.84619140625, + "completions/min_length": 956.0, + "completions/min_terminated_length": 956.0, + "epoch": 0.1184236847369474, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3963425972098547, + "kl": 0.00571441650390625, + "learning_rate": 9.99091788656085e-07, + "loss": -0.0367, + "num_tokens": 25651094.0, + "reward": 0.0, + "reward_std": 0.6927897930145264, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.14448588031357737, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09620913756591844, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08777074514725111, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 592 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1283.0, + "completions/max_terminated_length": 1283.0, + "completions/mean_length": 1149.25, + "completions/mean_terminated_length": 1149.25, + "completions/min_length": 946.0, + "completions/min_terminated_length": 946.0, + "epoch": 0.11862372474494899, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8403653150448442, + "kl": 0.00572967529296875, + "learning_rate": 9.990717251962117e-07, + "loss": -0.007, + "num_tokens": 25699994.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.48180317878723145, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1851437767755659, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11220407848293427, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11800816042090449, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 593 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1113.0, + "completions/mean_terminated_length": 1087.2000732421875, + "completions/min_length": 814.0, + "completions/min_terminated_length": 814.0, + "epoch": 0.11882376475295059, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2255160518918586, + "kl": 0.00443267822265625, + "learning_rate": 9.990514427669258e-07, + "loss": -0.0652, + "num_tokens": 25739994.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.47799575328826904, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.02597729689006805, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11422443584222822, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13957607775504183, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 594 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1391.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1122.625, + "completions/mean_terminated_length": 1122.625, + "completions/min_length": 895.0, + "completions/min_terminated_length": 895.0, + "epoch": 0.11902380476095219, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.476223747680068, + "kl": 0.005889892578125, + "learning_rate": 9.990309413781174e-07, + "loss": 0.0077, + "num_tokens": 25781588.0, + "reward": 0.0, + "reward_std": 0.48329535126686096, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10020383990726535, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10991144443734088, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.161245154965971, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 595 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1461.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1064.5, + "completions/mean_terminated_length": 1064.5, + "completions/min_length": 744.0, + "completions/min_terminated_length": 744.0, + "epoch": 0.11922384476895379, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.629395660100588, + "kl": 0.00727081298828125, + "learning_rate": 9.99010221039783e-07, + "loss": -0.0263, + "num_tokens": 25827964.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.770063042640686, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.00031189153840511206, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2155032014673473, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 596 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1452.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1167.3125, + "completions/mean_terminated_length": 1167.3125, + "completions/min_length": 792.0, + "completions/min_terminated_length": 792.0, + "epoch": 0.1194238847769554, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4586730129743746, + "kl": 0.00669097900390625, + "learning_rate": 9.989892817620258e-07, + "loss": -0.0317, + "num_tokens": 25871361.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9988396167755127, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15137551408950772, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15227082201822864, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11122216672215288, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 597 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1353.0, + "completions/max_terminated_length": 1353.0, + "completions/mean_length": 1174.0, + "completions/mean_terminated_length": 1174.0, + "completions/min_length": 933.0, + "completions/min_terminated_length": 933.0, + "epoch": 0.119623924784957, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7251511789996354, + "kl": 0.0054473876953125, + "learning_rate": 9.989681235550562e-07, + "loss": -0.0011, + "num_tokens": 25914089.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5388116240501404, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10465316060801556, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22535503960445624, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0683130051063973, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 598 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1040.0, + "completions/max_terminated_length": 1040.0, + "completions/mean_length": 925.75, + "completions/mean_terminated_length": 925.75, + "completions/min_length": 874.0, + "completions/min_terminated_length": 874.0, + "epoch": 0.11982396479295859, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.201523306927925, + "kl": 0.0007207393646240234, + "learning_rate": 9.989467464291908e-07, + "loss": 0.0069, + "num_tokens": 25945933.0, + "reward": 0.0, + "reward_std": 0.8759870529174805, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06514297546874186, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11523914835637897, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10036968702787748, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 599 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1493.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1139.8125, + "completions/mean_terminated_length": 1139.8125, + "completions/min_length": 882.0, + "completions/min_terminated_length": 882.0, + "epoch": 0.12002400480096019, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.829571763516663, + "kl": 0.004703521728515625, + "learning_rate": 9.989251503948531e-07, + "loss": -0.0551, + "num_tokens": 25986546.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9363263249397278, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.01664230216676736, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08594457935511501, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10292032157252809, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 600 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1305.0, + "completions/max_terminated_length": 1305.0, + "completions/mean_length": 904.5, + "completions/mean_terminated_length": 904.5, + "completions/min_length": 645.0, + "completions/min_terminated_length": 645.0, + "epoch": 0.12022404480896179, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0961448660006456, + "kl": 0.002521514892578125, + "learning_rate": 9.989033354625734e-07, + "loss": -0.0241, + "num_tokens": 26019082.0, + "reward": 0.0, + "reward_std": 0.4185102581977844, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07915263034935441, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22344262169749157, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0718795288428261, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 601 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1413.0, + "completions/mean_length": 1283.5625, + "completions/mean_terminated_length": 1185.181884765625, + "completions/min_length": 695.0, + "completions/min_terminated_length": 695.0, + "epoch": 0.12042408481696339, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.064674286345393, + "kl": 0.00510406494140625, + "learning_rate": 9.988813016429892e-07, + "loss": -0.0929, + "num_tokens": 26064507.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5430575013160706, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07289756556048649, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12236983353286, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14168300559373406, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 602 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1185.0, + "completions/max_terminated_length": 1185.0, + "completions/mean_length": 893.75, + "completions/mean_terminated_length": 893.75, + "completions/min_length": 707.0, + "completions/min_terminated_length": 707.0, + "epoch": 0.120624124824965, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.277637243374027, + "kl": 0.005615234375, + "learning_rate": 9.98859048946844e-07, + "loss": 0.0137, + "num_tokens": 26114511.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.993501603603363, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.019711565324998157, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2557488865973168, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07097208632298363, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 603 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1322.0, + "completions/mean_length": 1224.5625, + "completions/mean_terminated_length": 1059.300048828125, + "completions/min_length": 979.0, + "completions/min_terminated_length": 979.0, + "epoch": 0.1208241648329666, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3398220873726987, + "kl": 0.002533435821533203, + "learning_rate": 9.98836577384988e-07, + "loss": -0.0338, + "num_tokens": 26160504.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5166664719581604, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10609441032354046, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10443920352585405, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.17363222093882275, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 604 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1104.25, + "completions/mean_terminated_length": 1077.86669921875, + "completions/min_length": 645.0, + "completions/min_terminated_length": 645.0, + "epoch": 0.1210242048409682, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7309265976378105, + "kl": 0.004505157470703125, + "learning_rate": 9.98813886968379e-07, + "loss": -0.0113, + "num_tokens": 26212020.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9883939027786255, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08878612809611347, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0878919455559622, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10945995377982527, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 605 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1247.0, + "completions/mean_terminated_length": 1210.857177734375, + "completions/min_length": 754.0, + "completions/min_terminated_length": 754.0, + "epoch": 0.12122424484896979, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.42614529282555, + "kl": 0.0037689208984375, + "learning_rate": 9.987909777080804e-07, + "loss": 0.0094, + "num_tokens": 26247724.0, + "reward": 0.0, + "reward_std": 0.8376979827880859, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04468498487633004, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07305444606520843, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 606 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1155.9375, + "completions/mean_terminated_length": 1133.0, + "completions/min_length": 827.0, + "completions/min_terminated_length": 827.0, + "epoch": 0.12142428485697139, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.347719607033237, + "kl": 0.005279541015625, + "learning_rate": 9.987678496152636e-07, + "loss": -0.0457, + "num_tokens": 26300955.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5218381285667419, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04519739471438044, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08264831841884078, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11409872268574492, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 607 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1384.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1101.0, + "completions/mean_terminated_length": 1101.0, + "completions/min_length": 948.0, + "completions/min_terminated_length": 948.0, + "epoch": 0.12162432486497299, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.540102830079853, + "kl": 0.00632476806640625, + "learning_rate": 9.987445027012051e-07, + "loss": 0.0181, + "num_tokens": 26340667.0, + "reward": 0.0, + "reward_std": 1.053479790687561, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.007137844783453309, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07749954901566297, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10671873729054748, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 608 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1463.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1221.375, + "completions/mean_terminated_length": 1221.375, + "completions/min_length": 949.0, + "completions/min_terminated_length": 949.0, + "epoch": 0.1218243648729746, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9587969181142517, + "kl": 0.004627227783203125, + "learning_rate": 9.987209369772897e-07, + "loss": 0.015, + "num_tokens": 26383905.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0063270330429077, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.3623655576860605, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23045337617194547, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 609 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1185.0, + "completions/max_terminated_length": 1185.0, + "completions/mean_length": 1029.375, + "completions/mean_terminated_length": 1029.375, + "completions/min_length": 943.0, + "completions/min_terminated_length": 943.0, + "epoch": 0.1220244048809762, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.412895928125548, + "kl": 0.0021080970764160156, + "learning_rate": 9.986971524550076e-07, + "loss": -0.0006, + "num_tokens": 26425015.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9252477884292603, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0895748173078858, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04652688816846951, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06666666666666667, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 610 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1291.0, + "completions/max_terminated_length": 1291.0, + "completions/mean_length": 948.75, + "completions/mean_terminated_length": 948.75, + "completions/min_length": 705.0, + "completions/min_terminated_length": 705.0, + "epoch": 0.1222244448889778, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4406472755390896, + "kl": 0.00506591796875, + "learning_rate": 9.986731491459567e-07, + "loss": -0.0136, + "num_tokens": 26468723.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.028116226196289, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1233627011564904, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10962112977228022, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09583937179043481, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 611 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1284.0, + "completions/max_terminated_length": 1284.0, + "completions/mean_length": 977.25, + "completions/mean_terminated_length": 977.25, + "completions/min_length": 747.0, + "completions/min_terminated_length": 747.0, + "epoch": 0.1224244848969794, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7665558854693466, + "kl": 0.0060882568359375, + "learning_rate": 9.986489270618406e-07, + "loss": -0.0134, + "num_tokens": 26510575.0, + "reward": 0.0, + "reward_std": 0.6778950691223145, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.030260753447459814, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18662664584453867, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06978803887752091, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 612 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1249.125, + "completions/mean_terminated_length": 1135.0909423828125, + "completions/min_length": 802.0, + "completions/min_terminated_length": 802.0, + "epoch": 0.12262452490498099, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7198826429030305, + "kl": 0.004375457763671875, + "learning_rate": 9.986244862144706e-07, + "loss": 0.0211, + "num_tokens": 26554433.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7551993727684021, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07978609411857758, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09874051214197357, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07391185942027817, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 613 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1234.0, + "completions/max_terminated_length": 1234.0, + "completions/mean_length": 935.3125, + "completions/mean_terminated_length": 935.3125, + "completions/min_length": 601.0, + "completions/min_terminated_length": 601.0, + "epoch": 0.1228245649129826, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8081502041440376, + "kl": 0.0057830810546875, + "learning_rate": 9.98599826615764e-07, + "loss": -0.0485, + "num_tokens": 26583926.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.9424110651016235, + "rewards/wordcountpos_reward_nokeypoint/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05482243236237163, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06686826361146926, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09953596037316066, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 614 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1275.0, + "completions/max_terminated_length": 1275.0, + "completions/mean_length": 1038.5, + "completions/mean_terminated_length": 1038.5, + "completions/min_length": 716.0, + "completions/min_terminated_length": 716.0, + "epoch": 0.1230246049209842, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.699407966865149, + "kl": 0.0057220458984375, + "learning_rate": 9.985749482777447e-07, + "loss": -0.0125, + "num_tokens": 26624622.0, + "reward": 0.0, + "reward_std": 0.8152978420257568, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03805779154540149, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13769957700471797, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10183501544346314, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 615 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1307.0, + "completions/max_terminated_length": 1307.0, + "completions/mean_length": 1049.875, + "completions/mean_terminated_length": 1049.875, + "completions/min_length": 859.0, + "completions/min_terminated_length": 859.0, + "epoch": 0.1232246449289858, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.888763902577557, + "kl": 0.005046844482421875, + "learning_rate": 9.985498512125438e-07, + "loss": -0.0451, + "num_tokens": 26656468.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0169572830200195, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06604244748052165, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08553438704975365, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11547005383792518, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 616 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1407.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1126.1875, + "completions/mean_terminated_length": 1126.1875, + "completions/min_length": 863.0, + "completions/min_terminated_length": 863.0, + "epoch": 0.1234246849369874, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.161928636666797, + "kl": 0.0028934478759765625, + "learning_rate": 9.985245354323985e-07, + "loss": -0.0063, + "num_tokens": 26696071.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9981516599655151, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0498879173682473, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06296038339902778, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0769800358919501, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 617 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1266.0, + "completions/mean_terminated_length": 1212.0, + "completions/min_length": 958.0, + "completions/min_terminated_length": 958.0, + "epoch": 0.123624724944989, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2288376291596865, + "kl": 0.00543975830078125, + "learning_rate": 9.984990009496531e-07, + "loss": -0.0473, + "num_tokens": 26741423.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0556341409683228, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.19059163528804623, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06209942369169989, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0894427190999916, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 618 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1453.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1183.6875, + "completions/mean_terminated_length": 1183.6875, + "completions/min_length": 853.0, + "completions/min_terminated_length": 853.0, + "epoch": 0.12382476495299059, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.895755755681718, + "kl": 0.0053253173828125, + "learning_rate": 9.984732477767583e-07, + "loss": -0.0511, + "num_tokens": 26788298.0, + "reward": 0.0, + "reward_std": 0.8443827033042908, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11827427374056512, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07913952550252429, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08777074514725111, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 619 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1399.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 1059.1875, + "completions/mean_terminated_length": 1059.1875, + "completions/min_length": 749.0, + "completions/min_terminated_length": 749.0, + "epoch": 0.1240248049609922, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6110107169155197, + "kl": 0.0055999755859375, + "learning_rate": 9.984472759262715e-07, + "loss": -0.0473, + "num_tokens": 26824045.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0512335300445557, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04818371086878444, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07452283561952634, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10852547064066471, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 620 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1307.625, + "completions/mean_terminated_length": 1243.5, + "completions/min_length": 945.0, + "completions/min_terminated_length": 945.0, + "epoch": 0.1242248449689938, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.916156516236884, + "kl": 0.005889892578125, + "learning_rate": 9.984210854108563e-07, + "loss": -0.0279, + "num_tokens": 26875431.0, + "reward": 0.0, + "reward_std": 0.8875646591186523, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.057919834302254246, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19651987519619907, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05821416398857661, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 621 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1354.0, + "completions/max_terminated_length": 1354.0, + "completions/mean_length": 1031.9375, + "completions/mean_terminated_length": 1031.9375, + "completions/min_length": 691.0, + "completions/min_terminated_length": 691.0, + "epoch": 0.1244248849769954, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2424587298719514, + "kl": 0.00518798828125, + "learning_rate": 9.98394676243284e-07, + "loss": 0.0246, + "num_tokens": 26909846.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5794577598571777, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03098296782423319, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04294107022767224, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 622 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1104.6875, + "completions/mean_terminated_length": 1078.3333740234375, + "completions/min_length": 650.0, + "completions/min_terminated_length": 650.0, + "epoch": 0.124624924984997, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.304650055363134, + "kl": 0.004749298095703125, + "learning_rate": 9.983680484364312e-07, + "loss": 0.0228, + "num_tokens": 26959977.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0239720344543457, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.01092215009453331, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.34218848260106816, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11287488977066931, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 623 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1066.0, + "completions/max_terminated_length": 1066.0, + "completions/mean_length": 931.8125, + "completions/mean_terminated_length": 931.8125, + "completions/min_length": 790.0, + "completions/min_terminated_length": 790.0, + "epoch": 0.1248249649929986, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.469250328590308, + "kl": 0.003383636474609375, + "learning_rate": 9.98341202003282e-07, + "loss": -0.0356, + "num_tokens": 26999910.0, + "reward": 0.0, + "reward_std": 0.3521076738834381, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06280117093258375, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11811636695922913, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.17363222093882275, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 624 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1241.0, + "completions/mean_length": 1062.3125, + "completions/mean_terminated_length": 1033.1334228515625, + "completions/min_length": 703.0, + "completions/min_terminated_length": 703.0, + "epoch": 0.1250250050010002, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5508063535944676, + "kl": 0.00563812255859375, + "learning_rate": 9.983141369569269e-07, + "loss": -0.0099, + "num_tokens": 27046355.0, + "reward": 0.0, + "reward_std": 0.9169567823410034, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.17230403252701795, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15422876841939218, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10246950765959599, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 625 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1221.0, + "completions/mean_length": 1115.25, + "completions/mean_terminated_length": 1026.4615478515625, + "completions/min_length": 844.0, + "completions/min_terminated_length": 844.0, + "epoch": 0.1252250450090018, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8180183975886024, + "kl": 0.0044708251953125, + "learning_rate": 9.982868533105628e-07, + "loss": 0.0226, + "num_tokens": 27093863.0, + "reward": 0.0, + "reward_std": 0.8674072027206421, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13401340599009082, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10395403956717941, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11642832797715322, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 626 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1477.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1173.5, + "completions/mean_terminated_length": 1173.5, + "completions/min_length": 808.0, + "completions/min_terminated_length": 808.0, + "epoch": 0.1254250850170034, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8918796026670246, + "kl": 0.004749298095703125, + "learning_rate": 9.982593510774934e-07, + "loss": 0.0059, + "num_tokens": 27146583.0, + "reward": 0.0, + "reward_std": 0.6927990913391113, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.041310604860439123, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06337618219899739, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09339283817414602, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 627 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1171.0, + "completions/max_terminated_length": 1171.0, + "completions/mean_length": 969.0, + "completions/mean_terminated_length": 969.0, + "completions/min_length": 753.0, + "completions/min_terminated_length": 753.0, + "epoch": 0.125625125025005, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0835441841794613, + "kl": 0.004032135009765625, + "learning_rate": 9.98231630271129e-07, + "loss": -0.0371, + "num_tokens": 27183303.0, + "reward": 0.0, + "reward_std": 0.6685020923614502, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.009301357377332238, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05842590493951457, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 628 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1189.0, + "completions/max_terminated_length": 1189.0, + "completions/mean_length": 1092.3125, + "completions/mean_terminated_length": 1092.3125, + "completions/min_length": 978.0, + "completions/min_terminated_length": 978.0, + "epoch": 0.12582516503300661, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8652034756082987, + "kl": 0.003936767578125, + "learning_rate": 9.982036909049862e-07, + "loss": 0.0046, + "num_tokens": 27224180.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0457714796066284, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05116629095822893, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.037710725411601226, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08933913745655643, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 629 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1119.5625, + "completions/mean_terminated_length": 1065.21435546875, + "completions/min_length": 918.0, + "completions/min_terminated_length": 918.0, + "epoch": 0.1260252050410082, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.690242611223263, + "kl": 0.00698089599609375, + "learning_rate": 9.981755329926885e-07, + "loss": -0.0419, + "num_tokens": 27266493.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0554372072219849, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10134946845178117, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06360253226447367, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 630 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1272.0, + "completions/mean_terminated_length": 1239.4285888671875, + "completions/min_length": 1011.0, + "completions/min_terminated_length": 1011.0, + "epoch": 0.1262252450490098, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9965986653338987, + "kl": 0.00458526611328125, + "learning_rate": 9.981471565479657e-07, + "loss": -0.0075, + "num_tokens": 27320149.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0582287311553955, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.010073050034854841, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07817315334938715, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12816366850994057, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 631 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1382.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1141.5, + "completions/mean_terminated_length": 1141.5, + "completions/min_length": 1009.0, + "completions/min_terminated_length": 1009.0, + "epoch": 0.1264252850570114, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0934783268199233, + "kl": 0.005706787109375, + "learning_rate": 9.981185615846547e-07, + "loss": -0.0139, + "num_tokens": 27361613.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.012229323387146, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07425680231484104, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15023117927612134, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05692750425533111, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 632 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1200.0, + "completions/mean_length": 1266.9375, + "completions/mean_terminated_length": 1033.875, + "completions/min_length": 810.0, + "completions/min_terminated_length": 810.0, + "epoch": 0.126625325065013, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4837218023712695, + "kl": 0.00356292724609375, + "learning_rate": 9.980897481166977e-07, + "loss": -0.0089, + "num_tokens": 27412180.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.35236042737960815, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07425677444644505, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15905100918728327, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09388724521901157, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 633 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1390.0, + "completions/max_terminated_length": 1390.0, + "completions/mean_length": 1152.3125, + "completions/mean_terminated_length": 1152.3125, + "completions/min_length": 1016.0, + "completions/min_terminated_length": 1016.0, + "epoch": 0.1268253650730146, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.950852099929956, + "kl": 0.00543975830078125, + "learning_rate": 9.980607161581453e-07, + "loss": -0.0163, + "num_tokens": 27450001.0, + "reward": 0.0, + "reward_std": 1.0638015270233154, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02089668702731837, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08656675317860754, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06666666666666665, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 634 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1081.0, + "completions/max_terminated_length": 1081.0, + "completions/mean_length": 972.0, + "completions/mean_terminated_length": 972.0, + "completions/min_length": 903.0, + "completions/min_terminated_length": 903.0, + "epoch": 0.1270254050810162, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.3157538236284405, + "kl": 0.001164555549621582, + "learning_rate": 9.980314657231528e-07, + "loss": -0.001, + "num_tokens": 27479617.0, + "reward": 0.0, + "reward_std": 0.609226644039154, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.01513700606094018, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15739308986767286, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 635 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1236.0, + "completions/max_terminated_length": 1236.0, + "completions/mean_length": 1022.3125, + "completions/mean_terminated_length": 1022.3125, + "completions/min_length": 883.0, + "completions/min_terminated_length": 883.0, + "epoch": 0.1272254450890178, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1971483670717507, + "kl": 0.004791259765625, + "learning_rate": 9.980019968259832e-07, + "loss": -0.0177, + "num_tokens": 27515798.0, + "reward": 0.0, + "reward_std": 0.9419280290603638, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.14448547554433472, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1495136940946624, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 636 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1492.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1109.5, + "completions/mean_terminated_length": 1109.5, + "completions/min_length": 911.0, + "completions/min_terminated_length": 911.0, + "epoch": 0.1274254850970194, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.738210832286256, + "kl": 0.00467681884765625, + "learning_rate": 9.979723094810057e-07, + "loss": -0.027, + "num_tokens": 27552310.0, + "reward": 0.0, + "reward_std": 0.9167577028274536, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08047438277269055, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.21250687243612923, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05426273532033234, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 637 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1170.0, + "completions/mean_length": 1054.1875, + "completions/mean_terminated_length": 905.5833740234375, + "completions/min_length": 19.0, + "completions/min_terminated_length": 19.0, + "epoch": 0.127625525105021, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7901642488833516, + "kl": 0.005462646484375, + "learning_rate": 9.979424037026958e-07, + "loss": -0.1879, + "num_tokens": 27597401.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8539779186248779, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12956929428776817, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12988084880393289, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.18135294011647257, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 638 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1306.0, + "completions/max_terminated_length": 1306.0, + "completions/mean_length": 1051.9375, + "completions/mean_terminated_length": 1051.9375, + "completions/min_length": 731.0, + "completions/min_terminated_length": 731.0, + "epoch": 0.1278255651130226, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.679666861567111, + "kl": 0.0070037841796875, + "learning_rate": 9.979122795056359e-07, + "loss": -0.0452, + "num_tokens": 27645816.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8172924518585205, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04477841041012783, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23686921266455804, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1253144193766372, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 639 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1245.0, + "completions/max_terminated_length": 1245.0, + "completions/mean_length": 1078.6875, + "completions/mean_terminated_length": 1078.6875, + "completions/min_length": 868.0, + "completions/min_terminated_length": 868.0, + "epoch": 0.1280256051210242, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7915228591813137, + "kl": 0.004665374755859375, + "learning_rate": 9.978819369045144e-07, + "loss": 0.0107, + "num_tokens": 27695435.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0063811540603638, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05732069882084855, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07624878260343174, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07084150279686702, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 640 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1084.0, + "completions/max_terminated_length": 1084.0, + "completions/mean_length": 789.3125, + "completions/mean_terminated_length": 789.3125, + "completions/min_length": 665.0, + "completions/min_terminated_length": 665.0, + "epoch": 0.12822564512902582, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8211177062446713, + "kl": 0.003814697265625, + "learning_rate": 9.978513759141268e-07, + "loss": 0.0118, + "num_tokens": 27734616.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0331488847732544, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07125581989051658, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05797193814975494, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1567612007930345, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 641 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1438.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1243.0, + "completions/mean_terminated_length": 1243.0, + "completions/min_length": 1043.0, + "completions/min_terminated_length": 1043.0, + "epoch": 0.1284256851370274, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2615739613617944, + "kl": 0.006103515625, + "learning_rate": 9.978205965493745e-07, + "loss": -0.017, + "num_tokens": 27781936.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9979515075683594, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08411283486252094, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0818134189395154, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11385500851066221, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 642 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1469.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1109.8125, + "completions/mean_terminated_length": 1109.8125, + "completions/min_length": 863.0, + "completions/min_terminated_length": 863.0, + "epoch": 0.128625725145029, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.232241053688641, + "kl": 0.00582122802734375, + "learning_rate": 9.97789598825266e-07, + "loss": -0.0032, + "num_tokens": 27830397.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9317643642425537, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10133046439590387, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1035504997041372, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 643 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1107.9375, + "completions/mean_terminated_length": 1081.800048828125, + "completions/min_length": 763.0, + "completions/min_terminated_length": 763.0, + "epoch": 0.1288257651530306, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.463750160768034, + "kl": 0.00690460205078125, + "learning_rate": 9.977583827569156e-07, + "loss": -0.0558, + "num_tokens": 27883148.0, + "reward": 0.0, + "reward_std": 0.44865548610687256, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06047525401532876, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07032185709974872, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1264911064067352, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 644 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1198.75, + "completions/mean_terminated_length": 1178.666748046875, + "completions/min_length": 859.0, + "completions/min_terminated_length": 859.0, + "epoch": 0.1290258051610322, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1588005366183647, + "kl": 0.0062103271484375, + "learning_rate": 9.977269483595446e-07, + "loss": -0.0018, + "num_tokens": 27930776.0, + "reward": 0.0, + "reward_std": 0.6461268067359924, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03667932327598234, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.27892049772314054, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07876359377087681, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 645 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1212.5, + "completions/mean_terminated_length": 1193.3333740234375, + "completions/min_length": 950.0, + "completions/min_terminated_length": 950.0, + "epoch": 0.12922584516903382, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.74234514067827, + "kl": 0.0043182373046875, + "learning_rate": 9.976952956484806e-07, + "loss": 0.0258, + "num_tokens": 27978152.0, + "reward": 0.0, + "reward_std": 0.8872358798980713, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.060518165563727654, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18991954341403014, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1134476547592341, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 646 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1433.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1092.1875, + "completions/mean_terminated_length": 1092.1875, + "completions/min_length": 733.0, + "completions/min_terminated_length": 733.0, + "epoch": 0.1294258851770354, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6014452660987506, + "kl": 0.0042266845703125, + "learning_rate": 9.976634246391574e-07, + "loss": -0.0284, + "num_tokens": 28020283.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0326790809631348, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.3160293728071413, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10711886499684645, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09803627446568497, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 647 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1338.5625, + "completions/mean_terminated_length": 1284.75, + "completions/min_length": 1137.0, + "completions/min_terminated_length": 1137.0, + "epoch": 0.12962592518503702, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.7264811575426493, + "kl": 0.0025615692138671875, + "learning_rate": 9.976313353471158e-07, + "loss": -0.0103, + "num_tokens": 28066124.0, + "reward": 0.0, + "reward_std": 0.643837034702301, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.178209496173775, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1284186941544956, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194862, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 648 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1402.0, + "completions/mean_length": 1196.9375, + "completions/mean_terminated_length": 1059.181884765625, + "completions/min_length": 634.0, + "completions/min_terminated_length": 634.0, + "epoch": 0.1298259651930386, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4047760869469945, + "kl": 0.0066070556640625, + "learning_rate": 9.975990277880021e-07, + "loss": 0.0056, + "num_tokens": 28115243.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6812397837638855, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1330093065646394, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17315410635521936, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0926962382871743, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 649 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 1143.125, + "completions/mean_terminated_length": 1119.3333740234375, + "completions/min_length": 884.0, + "completions/min_terminated_length": 884.0, + "epoch": 0.1300260052010402, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.31756506944612, + "kl": 0.00640106201171875, + "learning_rate": 9.9756650197757e-07, + "loss": 0.0491, + "num_tokens": 28165013.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8418999910354614, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07031888394179135, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1578589301302356, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06666666666666665, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 650 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1326.0, + "completions/max_terminated_length": 1326.0, + "completions/mean_length": 1128.9375, + "completions/mean_terminated_length": 1128.9375, + "completions/min_length": 874.0, + "completions/min_terminated_length": 874.0, + "epoch": 0.1302260452090418, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.557557575773277, + "kl": 0.005340576171875, + "learning_rate": 9.975337579316792e-07, + "loss": -0.0164, + "num_tokens": 28209572.0, + "reward": 0.0, + "reward_std": 0.8302733302116394, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.17168243877887407, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10611735199932913, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09803627446568497, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 651 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1204.0, + "completions/mean_length": 1115.375, + "completions/mean_terminated_length": 1089.7333984375, + "completions/min_length": 952.0, + "completions/min_terminated_length": 952.0, + "epoch": 0.1304260852170434, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9513903543520423, + "kl": 0.00276947021484375, + "learning_rate": 9.975007956662958e-07, + "loss": -0.0131, + "num_tokens": 28249994.0, + "reward": 0.0, + "reward_std": 0.9597549438476562, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.049972755414017654, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07943527848563371, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04554200340426487, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 652 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1148.0, + "completions/max_terminated_length": 1148.0, + "completions/mean_length": 904.8125, + "completions/mean_terminated_length": 904.8125, + "completions/min_length": 771.0, + "completions/min_terminated_length": 771.0, + "epoch": 0.13062612522504502, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.336635203506321, + "kl": 0.00508880615234375, + "learning_rate": 9.974676151974924e-07, + "loss": -0.0079, + "num_tokens": 28297039.0, + "reward": 0.0, + "reward_std": 0.9825433492660522, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04329907163608994, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05167572313904247, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14343665526661614, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 653 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1269.75, + "completions/mean_terminated_length": 1193.0, + "completions/min_length": 892.0, + "completions/min_terminated_length": 892.0, + "epoch": 0.1308261652330466, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6121345438666475, + "kl": 0.0065765380859375, + "learning_rate": 9.97434216541448e-07, + "loss": 0.0469, + "num_tokens": 28342923.0, + "reward": 0.0, + "reward_std": 1.0025177001953125, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12831354634731246, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2113758723361091, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1338185615204685, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 654 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1431.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1145.9375, + "completions/mean_terminated_length": 1145.9375, + "completions/min_length": 921.0, + "completions/min_terminated_length": 921.0, + "epoch": 0.13102620524104822, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7524767509344406, + "kl": 0.003810882568359375, + "learning_rate": 9.974005997144479e-07, + "loss": 0.0048, + "num_tokens": 28381770.0, + "reward": 0.0, + "reward_std": 0.8087669014930725, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.18759170421697663, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15838629867822, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04849589520621153, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 655 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1197.1875, + "completions/mean_terminated_length": 1127.3077392578125, + "completions/min_length": 770.0, + "completions/min_terminated_length": 770.0, + "epoch": 0.1312262452490498, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.19192461735081, + "kl": 0.00664520263671875, + "learning_rate": 9.973667647328835e-07, + "loss": -0.0478, + "num_tokens": 28420517.0, + "reward": 0.0, + "reward_std": 0.6893986463546753, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03677050210913922, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09127432147565094, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08681611046941137, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 656 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1371.0, + "completions/max_terminated_length": 1371.0, + "completions/mean_length": 1022.0625, + "completions/mean_terminated_length": 1022.0625, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.1314262852570514, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.433750998670967, + "kl": 0.0059814453125, + "learning_rate": 9.973327116132535e-07, + "loss": -0.0354, + "num_tokens": 28466366.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0237822532653809, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04712423081424379, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06259693587611385, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04127594582445935, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 657 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1457.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1147.0, + "completions/mean_terminated_length": 1147.0, + "completions/min_length": 870.0, + "completions/min_terminated_length": 870.0, + "epoch": 0.13162632526505302, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3730000764118873, + "kl": 0.00586700439453125, + "learning_rate": 9.972984403721617e-07, + "loss": 0.0247, + "num_tokens": 28508710.0, + "reward": 0.0, + "reward_std": 0.6254897713661194, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.20515316431974193, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09050024656417859, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0938872452190116, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 658 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1286.0, + "completions/max_terminated_length": 1286.0, + "completions/mean_length": 1041.9375, + "completions/mean_terminated_length": 1041.9375, + "completions/min_length": 751.0, + "completions/min_terminated_length": 751.0, + "epoch": 0.1318263652730546, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.867071326953477, + "kl": 0.0040340423583984375, + "learning_rate": 9.972639510263196e-07, + "loss": -0.0392, + "num_tokens": 28546149.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9719338417053223, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12298577774218296, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05766504685085986, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.077817450199525, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 659 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1341.6875, + "completions/mean_terminated_length": 1269.727294921875, + "completions/min_length": 1027.0, + "completions/min_terminated_length": 1027.0, + "epoch": 0.13202640528105622, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3468575879166615, + "kl": 0.006378173828125, + "learning_rate": 9.972292435925436e-07, + "loss": 0.0204, + "num_tokens": 28592064.0, + "reward": 0.0, + "reward_std": 0.751152515411377, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.036839011347422054, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20617132317273607, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 660 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1384.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1143.125, + "completions/mean_terminated_length": 1143.125, + "completions/min_length": 935.0, + "completions/min_terminated_length": 935.0, + "epoch": 0.1322264452890578, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0606750430856398, + "kl": 0.005634307861328125, + "learning_rate": 9.971943180877578e-07, + "loss": -0.0192, + "num_tokens": 28632242.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0254310369491577, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08756746546360628, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07399304006665296, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07187952884282611, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 661 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1115.375, + "completions/mean_terminated_length": 1089.7333984375, + "completions/min_length": 848.0, + "completions/min_terminated_length": 848.0, + "epoch": 0.13242648529705942, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6180938607117983, + "kl": 0.00644683837890625, + "learning_rate": 9.97159174528992e-07, + "loss": -0.0081, + "num_tokens": 28682136.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.060869574546814, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05026425765334284, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08304656055867149, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10036968702787749, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 662 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1407.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 982.8125, + "completions/mean_terminated_length": 982.8125, + "completions/min_length": 740.0, + "completions/min_terminated_length": 740.0, + "epoch": 0.132626525305061, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.812557113810195, + "kl": 0.0064544677734375, + "learning_rate": 9.97123812933382e-07, + "loss": -0.008, + "num_tokens": 28720717.0, + "reward": 0.0, + "reward_std": 0.8558617234230042, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.31846720686355845, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.402163174057049, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 663 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1217.0625, + "completions/mean_terminated_length": 1151.769287109375, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.1328265653130626, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2873502241286885, + "kl": 0.0054779052734375, + "learning_rate": 9.970882333181705e-07, + "loss": -0.0228, + "num_tokens": 28760982.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0136096477508545, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.009623731878162548, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.031156022527572533, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1189459883650901, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 664 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1386.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 998.0625, + "completions/mean_terminated_length": 998.0625, + "completions/min_length": 687.0, + "completions/min_terminated_length": 687.0, + "epoch": 0.13302660532106422, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.593821308588659, + "kl": 0.0032196044921875, + "learning_rate": 9.970524357007062e-07, + "loss": -0.0084, + "num_tokens": 28793599.0, + "reward": 0.0, + "reward_std": 0.9129513502120972, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.054247659817022446, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09463480230332581, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05443310539518172, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 665 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1395.8125, + "completions/mean_terminated_length": 1348.45458984375, + "completions/min_length": 1100.0, + "completions/min_terminated_length": 1100.0, + "epoch": 0.1332266453290658, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.74644032802705, + "kl": 0.00547027587890625, + "learning_rate": 9.970164200984443e-07, + "loss": -0.0214, + "num_tokens": 28844308.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6745178699493408, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2579351781390133, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13201281345099775, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1060223596263578, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 666 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1117.6875, + "completions/mean_terminated_length": 1063.071533203125, + "completions/min_length": 765.0, + "completions/min_terminated_length": 765.0, + "epoch": 0.13342668533706742, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1300480013024474, + "kl": 0.00490570068359375, + "learning_rate": 9.96980186528946e-07, + "loss": 0.0278, + "num_tokens": 28883887.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8094056844711304, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06710406483953377, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14654803825055976, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 667 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1195.25, + "completions/mean_terminated_length": 1056.727294921875, + "completions/min_length": 753.0, + "completions/min_terminated_length": 753.0, + "epoch": 0.133626725345069, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.090744279468452, + "kl": 0.00595855712890625, + "learning_rate": 9.969437350098792e-07, + "loss": 0.0185, + "num_tokens": 28933699.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9273002743721008, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.052077207261709994, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12224194509205406, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14089659985908765, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 668 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1229.0, + "completions/max_terminated_length": 1229.0, + "completions/mean_length": 984.1875, + "completions/mean_terminated_length": 984.1875, + "completions/min_length": 853.0, + "completions/min_terminated_length": 853.0, + "epoch": 0.13382676535307061, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.351142427004019, + "kl": 0.00676727294921875, + "learning_rate": 9.969070655590176e-07, + "loss": -0.0092, + "num_tokens": 28976222.0, + "reward": 0.0, + "reward_std": 0.5140662789344788, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11679943785541615, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10707143748958273, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1101345977866612, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 669 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1385.0, + "completions/mean_terminated_length": 1332.727294921875, + "completions/min_length": 1262.0, + "completions/min_terminated_length": 1262.0, + "epoch": 0.13402680536107223, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.301305746845824, + "kl": 0.00363922119140625, + "learning_rate": 9.968701781942416e-07, + "loss": -0.019, + "num_tokens": 29027278.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9843964576721191, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.18253325727065503, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14544812834804952, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07888106377466153, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 670 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1216.0, + "completions/max_terminated_length": 1216.0, + "completions/mean_length": 869.3125, + "completions/mean_terminated_length": 869.3125, + "completions/min_length": 609.0, + "completions/min_terminated_length": 609.0, + "epoch": 0.1342268453690738, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4858948399402414, + "kl": 0.004314422607421875, + "learning_rate": 9.968330729335373e-07, + "loss": -0.0633, + "num_tokens": 29061699.0, + "reward": 0.0, + "reward_std": 0.8186737298965454, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03810847887693084, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1266103838619376, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0850925422157591, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 671 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1325.0, + "completions/max_terminated_length": 1325.0, + "completions/mean_length": 1085.3125, + "completions/mean_terminated_length": 1085.3125, + "completions/min_length": 875.0, + "completions/min_terminated_length": 875.0, + "epoch": 0.13442688537707542, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.400723794588815, + "kl": 0.0061187744140625, + "learning_rate": 9.967957497949977e-07, + "loss": -0.012, + "num_tokens": 29111944.0, + "reward": 0.0, + "reward_std": 0.6915768384933472, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.22673878354978683, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.39680330099751093, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09418264367902597, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 672 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1168.5625, + "completions/mean_terminated_length": 1146.4666748046875, + "completions/min_length": 868.0, + "completions/min_terminated_length": 868.0, + "epoch": 0.134626925385077, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.321871718617164, + "kl": 0.0060882568359375, + "learning_rate": 9.967582087968216e-07, + "loss": 0.0457, + "num_tokens": 29162817.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7206494212150574, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11605824635524017, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1546805901732413, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.107496769977314, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 673 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1280.625, + "completions/mean_terminated_length": 1266.0001220703125, + "completions/min_length": 1115.0, + "completions/min_terminated_length": 1115.0, + "epoch": 0.13482696539307862, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.531425942730242, + "kl": 0.00426483154296875, + "learning_rate": 9.967204499573144e-07, + "loss": -0.0077, + "num_tokens": 29199891.0, + "reward": 0.0, + "reward_std": 1.0492463111877441, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.19788437592797192, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10743085603204024, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03415650255319865, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 674 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1215.0, + "completions/mean_terminated_length": 1196.0001220703125, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.1350270054010802, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.776205005220771, + "kl": 0.004131317138671875, + "learning_rate": 9.96682473294887e-07, + "loss": -0.0126, + "num_tokens": 29243811.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.647058367729187, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1373933982033831, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1438363513245413, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0758897836290186, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 675 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1424.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1152.0625, + "completions/mean_terminated_length": 1152.0625, + "completions/min_length": 941.0, + "completions/min_terminated_length": 941.0, + "epoch": 0.13522704540908181, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1550086565305375, + "kl": 0.004695892333984375, + "learning_rate": 9.966442788280575e-07, + "loss": 0.0368, + "num_tokens": 29288796.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.9364603757858276, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1830312870446314, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13908995394962276, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04849589520621153, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 676 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1450.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1214.3125, + "completions/mean_terminated_length": 1214.3125, + "completions/min_length": 938.0, + "completions/min_terminated_length": 938.0, + "epoch": 0.13542708541708343, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3816261208351506, + "kl": 0.00681304931640625, + "learning_rate": 9.966058665754494e-07, + "loss": -0.0219, + "num_tokens": 29342345.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9890535473823547, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2099985622698682, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13817896289990586, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15962919996504865, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 677 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1083.0, + "completions/max_terminated_length": 1083.0, + "completions/mean_length": 932.125, + "completions/mean_terminated_length": 932.125, + "completions/min_length": 783.0, + "completions/min_terminated_length": 783.0, + "epoch": 0.135627125425085, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.191002939930253, + "kl": 0.00403594970703125, + "learning_rate": 9.965672365557928e-07, + "loss": 0.0237, + "num_tokens": 29382219.0, + "reward": 0.0, + "reward_std": 1.0514026880264282, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0016922883668195328, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.018678696814675603, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.045338235029118136, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 678 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1119.0, + "completions/max_terminated_length": 1119.0, + "completions/mean_length": 884.0, + "completions/mean_terminated_length": 884.0, + "completions/min_length": 652.0, + "completions/min_terminated_length": 652.0, + "epoch": 0.13582716543308662, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.304297302193887, + "kl": 0.00766754150390625, + "learning_rate": 9.96528388787924e-07, + "loss": 0.0141, + "num_tokens": 29420419.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5308505296707153, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05014583971379952, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16314630191742416, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11080513425729775, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 679 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 1237.875, + "completions/mean_terminated_length": 1118.727294921875, + "completions/min_length": 861.0, + "completions/min_terminated_length": 861.0, + "epoch": 0.1360272054410882, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.760882760688646, + "kl": 0.004039764404296875, + "learning_rate": 9.964893232907847e-07, + "loss": 0.038, + "num_tokens": 29463657.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9468058347702026, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.01320557987956474, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08716688723317809, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1549193338482967, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 680 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1480.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 947.625, + "completions/mean_terminated_length": 947.625, + "completions/min_length": 596.0, + "completions/min_terminated_length": 596.0, + "epoch": 0.13622724544908982, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.567726963684965, + "kl": 0.0057220458984375, + "learning_rate": 9.964500400834242e-07, + "loss": 0.0312, + "num_tokens": 29509363.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9575223922729492, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.28524878694479133, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15252326026361782, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13492110177323527, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 681 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1373.0, + "completions/max_terminated_length": 1373.0, + "completions/mean_length": 1010.9375, + "completions/mean_terminated_length": 1010.9375, + "completions/min_length": 698.0, + "completions/min_terminated_length": 698.0, + "epoch": 0.13642728545709143, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9765045596778337, + "kl": 0.0052947998046875, + "learning_rate": 9.964105391849968e-07, + "loss": -0.0525, + "num_tokens": 29542586.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5600380301475525, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.21208169322571246, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16131597006969806, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11855612829185827, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 682 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1277.0, + "completions/max_terminated_length": 1277.0, + "completions/mean_length": 1075.625, + "completions/mean_terminated_length": 1075.625, + "completions/min_length": 882.0, + "completions/min_terminated_length": 882.0, + "epoch": 0.136627325465093, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4273925948815336, + "kl": 0.0061187744140625, + "learning_rate": 9.963708206147635e-07, + "loss": 0.0086, + "num_tokens": 29593340.0, + "reward": 0.0, + "reward_std": 0.7174463272094727, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02625763890127869, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03735142728587546, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12641788434189793, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 683 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1433.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1028.8125, + "completions/mean_terminated_length": 1028.8125, + "completions/min_length": 743.0, + "completions/min_terminated_length": 743.0, + "epoch": 0.13682736547309463, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2703675275416035, + "kl": 0.0059814453125, + "learning_rate": 9.96330884392091e-07, + "loss": -0.0519, + "num_tokens": 29623129.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.000617265701294, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2904197697262102, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13304201900010412, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07969850595746354, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 684 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1107.0, + "completions/mean_terminated_length": 928.3636474609375, + "completions/min_length": 590.0, + "completions/min_terminated_length": 590.0, + "epoch": 0.1370274054810962, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5199971629898785, + "kl": 0.005828857421875, + "learning_rate": 9.962907305364528e-07, + "loss": -0.0367, + "num_tokens": 29662609.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.4203028082847595, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.28853384428244494, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.29782299666221923, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15776212754932312, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 685 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1382.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1128.375, + "completions/mean_terminated_length": 1128.375, + "completions/min_length": 865.0, + "completions/min_terminated_length": 865.0, + "epoch": 0.13722744548909782, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.967431003743017, + "kl": 0.011135101318359375, + "learning_rate": 9.962503590674276e-07, + "loss": -0.0062, + "num_tokens": 29706959.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9590449333190918, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04405635519873275, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11264049311624566, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09496588081262934, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 686 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1175.0, + "completions/max_terminated_length": 1175.0, + "completions/mean_length": 950.0, + "completions/mean_terminated_length": 950.0, + "completions/min_length": 820.0, + "completions/min_terminated_length": 820.0, + "epoch": 0.13742748549709943, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.899582154692882, + "kl": 0.005199432373046875, + "learning_rate": 9.962097700047008e-07, + "loss": -0.013, + "num_tokens": 29756079.0, + "reward": 0.0, + "reward_std": 0.9595593214035034, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.004895878865537357, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1348831690747819, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13957607775504186, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 687 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1255.3125, + "completions/mean_terminated_length": 1239.0001220703125, + "completions/min_length": 1151.0, + "completions/min_terminated_length": 1151.0, + "epoch": 0.13762752550510102, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2075418464703707, + "kl": 0.00360870361328125, + "learning_rate": 9.96168963368064e-07, + "loss": 0.0247, + "num_tokens": 29794100.0, + "reward": -1.862645149230957e-08, + "reward_std": 1.0583150386810303, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15579212777514156, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05038377291764615, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09016445879408155, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 688 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1188.0, + "completions/max_terminated_length": 1188.0, + "completions/mean_length": 1058.9375, + "completions/mean_terminated_length": 1058.9375, + "completions/min_length": 886.0, + "completions/min_terminated_length": 886.0, + "epoch": 0.13782756551310263, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.122933151054921, + "kl": 0.00440216064453125, + "learning_rate": 9.96127939177415e-07, + "loss": 0.0286, + "num_tokens": 29827595.0, + "reward": 0.0, + "reward_std": 0.7575008869171143, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.009994229056342317, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3285113723005558, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 689 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1351.0, + "completions/max_terminated_length": 1351.0, + "completions/mean_length": 1177.3125, + "completions/mean_terminated_length": 1177.3125, + "completions/min_length": 1011.0, + "completions/min_terminated_length": 1011.0, + "epoch": 0.1380276055211042, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2997832146864177, + "kl": 0.00385284423828125, + "learning_rate": 9.960866974527567e-07, + "loss": 0.014, + "num_tokens": 29868664.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.421240895986557, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.030924999827479822, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.045231391354160315, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13221755360572018, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 690 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1374.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1108.3125, + "completions/mean_terminated_length": 1108.3125, + "completions/min_length": 754.0, + "completions/min_terminated_length": 754.0, + "epoch": 0.13822764552910582, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6618935446707206, + "kl": 0.004383087158203125, + "learning_rate": 9.960452382141992e-07, + "loss": -0.0504, + "num_tokens": 29915941.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7892509698867798, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06525174154559012, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06057267144472161, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09418264367902596, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 691 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1459.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1232.3125, + "completions/mean_terminated_length": 1232.3125, + "completions/min_length": 935.0, + "completions/min_terminated_length": 935.0, + "epoch": 0.1384276855371074, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1209002607869714, + "kl": 0.002765655517578125, + "learning_rate": 9.960035614819581e-07, + "loss": -0.004, + "num_tokens": 29959010.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9350792169570923, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04713232736604106, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1070643058933692, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12141145226353543, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 692 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1326.0, + "completions/max_terminated_length": 1326.0, + "completions/mean_length": 1000.5, + "completions/mean_terminated_length": 1000.5, + "completions/min_length": 772.0, + "completions/min_terminated_length": 772.0, + "epoch": 0.13862772554510902, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3083790547905085, + "kl": 0.00525665283203125, + "learning_rate": 9.959616672763551e-07, + "loss": 0.006, + "num_tokens": 30000874.0, + "reward": 0.0, + "reward_std": 0.9944514036178589, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.18590486705434334, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09123139313655158, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06652763279965647, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 693 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1299.0, + "completions/max_terminated_length": 1299.0, + "completions/mean_length": 989.8125, + "completions/mean_terminated_length": 989.8125, + "completions/min_length": 750.0, + "completions/min_terminated_length": 750.0, + "epoch": 0.13882776555311063, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3026760875724084, + "kl": 0.00528717041015625, + "learning_rate": 9.959195556178182e-07, + "loss": 0.0054, + "num_tokens": 30040079.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6982542872428894, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09971926987406864, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12111544225767598, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08062257748298551, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 694 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1221.9375, + "completions/mean_terminated_length": 1203.4000244140625, + "completions/min_length": 1022.0, + "completions/min_terminated_length": 1022.0, + "epoch": 0.13902780556111222, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.307788364006199, + "kl": 0.00731658935546875, + "learning_rate": 9.95877226526881e-07, + "loss": -0.0053, + "num_tokens": 30085110.0, + "reward": 0.0, + "reward_std": 0.7320248484611511, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08591086702878646, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1397023303824348, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07490735018081413, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 695 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1310.0, + "completions/max_terminated_length": 1310.0, + "completions/mean_length": 1017.625, + "completions/mean_terminated_length": 1017.625, + "completions/min_length": 717.0, + "completions/min_terminated_length": 717.0, + "epoch": 0.13922784556911383, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3331092001587757, + "kl": 0.00560760498046875, + "learning_rate": 9.958346800241833e-07, + "loss": -0.0136, + "num_tokens": 30126720.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0280733108520508, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.011815086575757965, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0368515551906012, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10809803506625451, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 696 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1322.0, + "completions/mean_length": 1124.5625, + "completions/mean_terminated_length": 1099.533447265625, + "completions/min_length": 783.0, + "completions/min_terminated_length": 783.0, + "epoch": 0.1394278855771154, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1154212224386626, + "kl": 0.005794525146484375, + "learning_rate": 9.957919161304714e-07, + "loss": 0.0525, + "num_tokens": 30178081.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9317577481269836, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0313519456983477, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09195840493169644, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07490735018081411, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 697 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1245.0625, + "completions/mean_terminated_length": 1208.6429443359375, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.13962792558511702, + "frac_reward_zero_std": 0.0, + "grad_norm": 30.35985423000838, + "kl": 0.07567596435546875, + "learning_rate": 9.957489348665968e-07, + "loss": -0.0394, + "num_tokens": 30229890.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0507616996765137, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0351050502915286, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09605954759874305, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 698 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1217.0625, + "completions/mean_terminated_length": 1198.2000732421875, + "completions/min_length": 953.0, + "completions/min_terminated_length": 953.0, + "epoch": 0.13982796559311864, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7385054359157692, + "kl": 0.005828857421875, + "learning_rate": 9.957057362535175e-07, + "loss": -0.0015, + "num_tokens": 30277163.0, + "reward": 0.0, + "reward_std": 0.9701701402664185, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.052455064625939345, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13992483548617754, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05163977794943221, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 699 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1182.75, + "completions/mean_terminated_length": 1161.60009765625, + "completions/min_length": 1027.0, + "completions/min_terminated_length": 1027.0, + "epoch": 0.14002800560112022, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.696104416778322, + "kl": 0.003940582275390625, + "learning_rate": 9.956623203122972e-07, + "loss": -0.0187, + "num_tokens": 30328615.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0267882347106934, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13213191348131975, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1512098166126736, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10390522747338701, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 700 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1481.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1151.6875, + "completions/mean_terminated_length": 1151.6875, + "completions/min_length": 760.0, + "completions/min_terminated_length": 760.0, + "epoch": 0.14022804560912183, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.749171860381066, + "kl": 0.00548553466796875, + "learning_rate": 9.956186870641057e-07, + "loss": 0.0398, + "num_tokens": 30367194.0, + "reward": 0.0, + "reward_std": 0.6777411699295044, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.014806572740289084, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07206339183306193, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12345339501504503, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 701 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1362.5625, + "completions/mean_terminated_length": 1342.9285888671875, + "completions/min_length": 1218.0, + "completions/min_terminated_length": 1218.0, + "epoch": 0.14042808561712342, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7398970839179038, + "kl": 0.00604248046875, + "learning_rate": 9.955748365302192e-07, + "loss": -0.0301, + "num_tokens": 30414291.0, + "reward": 0.0, + "reward_std": 0.7893999814987183, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10613039538396016, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15704723741447935, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.093392838174146, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 702 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1462.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1030.125, + "completions/mean_terminated_length": 1030.125, + "completions/min_length": 856.0, + "completions/min_terminated_length": 856.0, + "epoch": 0.14062812562512503, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7981574131802, + "kl": 0.0074310302734375, + "learning_rate": 9.955307687320188e-07, + "loss": -0.017, + "num_tokens": 30456933.0, + "reward": 0.0, + "reward_std": 0.838640570640564, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10724093495186068, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03554579721809367, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.16238956361284543, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 703 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1260.0, + "completions/max_terminated_length": 1260.0, + "completions/mean_length": 946.875, + "completions/mean_terminated_length": 946.875, + "completions/min_length": 778.0, + "completions/min_terminated_length": 778.0, + "epoch": 0.1408281656331266, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.187393528292029, + "kl": 0.007720947265625, + "learning_rate": 9.954864836909928e-07, + "loss": 0.007, + "num_tokens": 30495931.0, + "reward": 0.0, + "reward_std": 0.7913191318511963, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.029447235881958987, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18564369835748223, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08243965245133132, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 704 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1350.0, + "completions/max_terminated_length": 1350.0, + "completions/mean_length": 1095.3125, + "completions/mean_terminated_length": 1095.3125, + "completions/min_length": 19.0, + "completions/min_terminated_length": 19.0, + "epoch": 0.14102820564112822, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.664257406786109, + "kl": 0.004726409912109375, + "learning_rate": 9.954419814287342e-07, + "loss": -0.145, + "num_tokens": 30544016.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0103096961975098, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11382237374096846, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09403065984833035, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.16487930490266264, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 705 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1418.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1135.25, + "completions/mean_terminated_length": 1135.25, + "completions/min_length": 808.0, + "completions/min_terminated_length": 808.0, + "epoch": 0.14122824564912984, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.533696261233303, + "kl": 0.0092926025390625, + "learning_rate": 9.953972619669427e-07, + "loss": -0.0162, + "num_tokens": 30592116.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8147860765457153, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0006157461126693152, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1345453020743536, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11666666666666668, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 706 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1353.0, + "completions/max_terminated_length": 1353.0, + "completions/mean_length": 1077.75, + "completions/mean_terminated_length": 1077.75, + "completions/min_length": 920.0, + "completions/min_terminated_length": 920.0, + "epoch": 0.14142828565713142, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.984473266327546, + "kl": 0.00585174560546875, + "learning_rate": 9.953523253274238e-07, + "loss": -0.0093, + "num_tokens": 30634536.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.000441074371338, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04417017032408094, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12759710598853538, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08681611046941137, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 707 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1350.0, + "completions/mean_terminated_length": 1157.1429443359375, + "completions/min_length": 1054.0, + "completions/min_terminated_length": 1054.0, + "epoch": 0.14162832566513303, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.467348257985941, + "kl": 0.0017185211181640625, + "learning_rate": 9.953071715320888e-07, + "loss": 0.0113, + "num_tokens": 30685184.0, + "reward": 0.0, + "reward_std": 1.0236318111419678, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1417305952897173, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06187070727059217, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.045338235029118136, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 708 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1280.5625, + "completions/mean_terminated_length": 1229.923095703125, + "completions/min_length": 935.0, + "completions/min_terminated_length": 935.0, + "epoch": 0.14182836567313462, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2269902606631655, + "kl": 0.0069122314453125, + "learning_rate": 9.952618006029548e-07, + "loss": 0.0074, + "num_tokens": 30726433.0, + "reward": 0.0, + "reward_std": 0.9788329005241394, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08842183463750468, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10552344360223014, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1517795672580372, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 709 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1368.0, + "completions/max_terminated_length": 1368.0, + "completions/mean_length": 1108.625, + "completions/mean_terminated_length": 1108.625, + "completions/min_length": 982.0, + "completions/min_terminated_length": 982.0, + "epoch": 0.14202840568113623, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4431517162303, + "kl": 0.004543304443359375, + "learning_rate": 9.95216212562145e-07, + "loss": -0.0308, + "num_tokens": 30774443.0, + "reward": -9.313225746154785e-09, + "reward_std": 1.0673320293426514, + "rewards/wordcountpos_reward_nokeypoint/mean": -9.313225746154785e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.059024106027506414, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12949683306214418, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09388724521901158, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 710 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1143.5, + "completions/mean_terminated_length": 929.6000366210938, + "completions/min_length": 640.0, + "completions/min_terminated_length": 640.0, + "epoch": 0.14222844568913784, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.350326354195779, + "kl": 0.004940032958984375, + "learning_rate": 9.951704074318883e-07, + "loss": -0.0838, + "num_tokens": 30821411.0, + "reward": 1.862645149230957e-08, + "reward_std": 0.9558383226394653, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.862645149230957e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1125055670907381, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05895765222583878, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11538983843829065, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 711 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1295.0, + "completions/mean_length": 1120.5, + "completions/mean_terminated_length": 1095.2000732421875, + "completions/min_length": 847.0, + "completions/min_terminated_length": 847.0, + "epoch": 0.14242848569713942, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.837552825479169, + "kl": 0.00978851318359375, + "learning_rate": 9.951243852345196e-07, + "loss": -0.0294, + "num_tokens": 30870179.0, + "reward": 0.0, + "reward_std": 0.8432515859603882, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.3913692599451631, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09312295472385707, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10174405069512349, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 712 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1428.6875, + "completions/mean_terminated_length": 1337.0, + "completions/min_length": 1065.0, + "completions/min_terminated_length": 1065.0, + "epoch": 0.14262852570514103, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.228416425704413, + "kl": 0.00745391845703125, + "learning_rate": 9.95078145992479e-07, + "loss": 0.0074, + "num_tokens": 30919214.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0472122430801392, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.15631057278346205, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22177450832799514, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10576003586036262, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 713 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1431.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1190.4375, + "completions/mean_terminated_length": 1190.4375, + "completions/min_length": 1019.0, + "completions/min_terminated_length": 1019.0, + "epoch": 0.14282856571314262, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.065302593954059, + "kl": 0.00466156005859375, + "learning_rate": 9.950316897283137e-07, + "loss": -0.0001, + "num_tokens": 30964693.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6828963756561279, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.019938109645834604, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08746284539205995, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0925962962222252, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 714 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1277.0, + "completions/max_terminated_length": 1277.0, + "completions/mean_length": 1097.0625, + "completions/mean_terminated_length": 1097.0625, + "completions/min_length": 827.0, + "completions/min_terminated_length": 827.0, + "epoch": 0.14302860572114423, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.967628032387131, + "kl": 0.005245208740234375, + "learning_rate": 9.949850164646756e-07, + "loss": -0.0258, + "num_tokens": 31011318.0, + "reward": 3.3527612686157227e-08, + "reward_std": 1.0190335512161255, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.3527612686157227e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12271564064506636, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1355268482172753, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12141145226353543, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 715 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1423.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1077.6875, + "completions/mean_terminated_length": 1077.6875, + "completions/min_length": 832.0, + "completions/min_terminated_length": 832.0, + "epoch": 0.14322864572914584, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9498087325711277, + "kl": 0.0068206787109375, + "learning_rate": 9.949381262243225e-07, + "loss": -0.0288, + "num_tokens": 31053665.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.6639367341995239, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16631769719255687, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17485560336080977, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0769800358919501, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 716 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1287.0, + "completions/mean_length": 1007.4375, + "completions/mean_terminated_length": 974.6000366210938, + "completions/min_length": 669.0, + "completions/min_terminated_length": 669.0, + "epoch": 0.14342868573714743, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.670849082381008, + "kl": 0.0072174072265625, + "learning_rate": 9.94891019030119e-07, + "loss": -0.0674, + "num_tokens": 31090584.0, + "reward": 0.0, + "reward_std": 0.9717199802398682, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03913433907480912, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2035607041322989, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10886621079036349, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 717 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1344.0, + "completions/mean_length": 1169.375, + "completions/mean_terminated_length": 1122.1429443359375, + "completions/min_length": 772.0, + "completions/min_terminated_length": 772.0, + "epoch": 0.14362872574514904, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8773255302194314, + "kl": 0.004238128662109375, + "learning_rate": 9.948436949050343e-07, + "loss": 0.0256, + "num_tokens": 31136414.0, + "reward": 0.0, + "reward_std": 0.8156179189682007, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.02287684698472807, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09080949912730368, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 718 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1392.0, + "completions/max_terminated_length": 1392.0, + "completions/mean_length": 958.9375, + "completions/mean_terminated_length": 958.9375, + "completions/min_length": 637.0, + "completions/min_terminated_length": 637.0, + "epoch": 0.14382876575315062, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.926820253715189, + "kl": 0.00473785400390625, + "learning_rate": 9.94796153872144e-07, + "loss": -0.0213, + "num_tokens": 31184557.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9861009120941162, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07893338552436974, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08368398681104343, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.056927504255331086, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 719 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1276.0, + "completions/max_terminated_length": 1276.0, + "completions/mean_length": 975.0625, + "completions/mean_terminated_length": 975.0625, + "completions/min_length": 698.0, + "completions/min_terminated_length": 698.0, + "epoch": 0.14402880576115223, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8323599155477104, + "kl": 0.007537841796875, + "learning_rate": 9.947483959546293e-07, + "loss": -0.0643, + "num_tokens": 31224230.0, + "reward": 0.0, + "reward_std": 0.9133445024490356, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.044019861790586416, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08928290790564751, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1275843947266976, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 720 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1189.0, + "completions/max_terminated_length": 1189.0, + "completions/mean_length": 1037.125, + "completions/mean_terminated_length": 1037.125, + "completions/min_length": 784.0, + "completions/min_terminated_length": 784.0, + "epoch": 0.14422884576915382, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7106352599650907, + "kl": 0.00618743896484375, + "learning_rate": 9.94700421175777e-07, + "loss": -0.0278, + "num_tokens": 31269592.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.47162216901779175, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12957283641522815, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.41716303599489996, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13743685418725538, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 721 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1369.0, + "completions/mean_terminated_length": 1309.45458984375, + "completions/min_length": 1196.0, + "completions/min_terminated_length": 1196.0, + "epoch": 0.14442888577715543, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1732925864508474, + "kl": 0.0070343017578125, + "learning_rate": 9.946522295589801e-07, + "loss": -0.0292, + "num_tokens": 31319472.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5713779330253601, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2566548110806487, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2885192163367105, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12164002752505566, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 722 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1247.0, + "completions/max_terminated_length": 1247.0, + "completions/mean_length": 945.125, + "completions/mean_terminated_length": 945.125, + "completions/min_length": 610.0, + "completions/min_terminated_length": 610.0, + "epoch": 0.14462892578515704, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1506850450057597, + "kl": 0.00653076171875, + "learning_rate": 9.94603821127737e-07, + "loss": -0.0467, + "num_tokens": 31355234.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9842181205749512, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.13297543687445793, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.057581715446731595, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06440611887195305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 723 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1279.0, + "completions/max_terminated_length": 1279.0, + "completions/mean_length": 879.5625, + "completions/mean_terminated_length": 879.5625, + "completions/min_length": 684.0, + "completions/min_terminated_length": 684.0, + "epoch": 0.14482896579315863, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.538436581893375, + "kl": 0.00739288330078125, + "learning_rate": 9.945551959056518e-07, + "loss": 0.0053, + "num_tokens": 31404139.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9817670583724976, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08170680280346966, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06787838544442702, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1088662107903635, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 724 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1253.0, + "completions/mean_length": 1272.125, + "completions/mean_terminated_length": 1044.25, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.14502900580116024, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3515678520831083, + "kl": 0.0052490234375, + "learning_rate": 9.945063539164344e-07, + "loss": -0.0563, + "num_tokens": 31460429.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.877171516418457, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04367600190173569, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07695484247692534, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 725 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1337.375, + "completions/mean_terminated_length": 1326.533447265625, + "completions/min_length": 1154.0, + "completions/min_terminated_length": 1154.0, + "epoch": 0.14522904580916182, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.009846018209793, + "kl": 0.0072784423828125, + "learning_rate": 9.944572951839003e-07, + "loss": -0.0279, + "num_tokens": 31510307.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8750674724578857, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06367617379762731, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07767073324869139, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 726 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 1246.0, + "completions/mean_terminated_length": 1161.3333740234375, + "completions/min_length": 975.0, + "completions/min_terminated_length": 975.0, + "epoch": 0.14542908581716343, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7190245314661285, + "kl": 0.006122589111328125, + "learning_rate": 9.94408019731971e-07, + "loss": -0.0469, + "num_tokens": 31565035.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9171417951583862, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.17555355700822342, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16886412789127483, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 727 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1242.1875, + "completions/mean_terminated_length": 1225.0001220703125, + "completions/min_length": 919.0, + "completions/min_terminated_length": 919.0, + "epoch": 0.14562912582516505, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5005651208521535, + "kl": 0.0074615478515625, + "learning_rate": 9.94358527584673e-07, + "loss": -0.0354, + "num_tokens": 31608198.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8134673833847046, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.028166855399243865, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17946802816189822, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12881223774390613, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 728 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1266.8125, + "completions/mean_terminated_length": 1126.9000244140625, + "completions/min_length": 874.0, + "completions/min_terminated_length": 874.0, + "epoch": 0.14582916583316663, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8134387247463897, + "kl": 0.00542449951171875, + "learning_rate": 9.943088187661394e-07, + "loss": -0.0533, + "num_tokens": 31658027.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7811755537986755, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2670093741500464, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.7305057014908032, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07084150279686703, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 729 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1063.6875, + "completions/mean_terminated_length": 1034.60009765625, + "completions/min_length": 820.0, + "completions/min_terminated_length": 820.0, + "epoch": 0.14602920584116824, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4213141662100206, + "kl": 0.0061798095703125, + "learning_rate": 9.94258893300608e-07, + "loss": -0.0028, + "num_tokens": 31699678.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9375150203704834, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.01588280916560092, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.024792250719790104, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11287488977066928, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 730 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1403.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1172.1875, + "completions/mean_terminated_length": 1172.1875, + "completions/min_length": 818.0, + "completions/min_terminated_length": 818.0, + "epoch": 0.14622924584916983, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2233455359455028, + "kl": 0.00698089599609375, + "learning_rate": 9.942087512124232e-07, + "loss": -0.0321, + "num_tokens": 31736857.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8587762117385864, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04076599163099255, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09641476912387797, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09179284245476838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 731 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1460.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1053.5, + "completions/mean_terminated_length": 1053.5, + "completions/min_length": 863.0, + "completions/min_terminated_length": 863.0, + "epoch": 0.14642928585717144, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6521433535500916, + "kl": 0.0085906982421875, + "learning_rate": 9.94158392526034e-07, + "loss": -0.0079, + "num_tokens": 31769073.0, + "reward": 0.0, + "reward_std": 0.6770071983337402, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.024186928155500654, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.023987919539044122, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09016445879408157, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 732 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1136.0, + "completions/mean_length": 965.75, + "completions/mean_terminated_length": 930.1333618164062, + "completions/min_length": 789.0, + "completions/min_terminated_length": 789.0, + "epoch": 0.14662932586517302, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5125226545478108, + "kl": 0.00667572021484375, + "learning_rate": 9.941078172659955e-07, + "loss": -0.0126, + "num_tokens": 31806861.0, + "reward": 0.0, + "reward_std": 0.8564249277114868, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0953710710736443, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11269460710579629, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1354690069789096, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 733 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1305.0, + "completions/max_terminated_length": 1305.0, + "completions/mean_length": 1102.1875, + "completions/mean_terminated_length": 1102.1875, + "completions/min_length": 888.0, + "completions/min_terminated_length": 888.0, + "epoch": 0.14682936587317463, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.13886795106335, + "kl": 0.00605010986328125, + "learning_rate": 9.94057025456969e-07, + "loss": -0.0215, + "num_tokens": 31844368.0, + "reward": 0.0, + "reward_std": 0.6366941928863525, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07029382384808258, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11677101773917868, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 734 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1230.0625, + "completions/mean_terminated_length": 1167.769287109375, + "completions/min_length": 890.0, + "completions/min_terminated_length": 890.0, + "epoch": 0.14702940588117624, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5780686405415087, + "kl": 0.00701141357421875, + "learning_rate": 9.940060171237204e-07, + "loss": 0.0372, + "num_tokens": 31889593.0, + "reward": 0.0, + "reward_std": 0.998370885848999, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1388523574519755, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18637172549688774, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.19626135258506328, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 735 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1442.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 993.1875, + "completions/mean_terminated_length": 993.1875, + "completions/min_length": 782.0, + "completions/min_terminated_length": 782.0, + "epoch": 0.14722944588917783, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2351531159554616, + "kl": 0.004566192626953125, + "learning_rate": 9.939547922911215e-07, + "loss": -0.0685, + "num_tokens": 31933836.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5374400019645691, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.041750725342076016, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08142273442833754, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07698003589195011, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 736 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1243.0, + "completions/max_terminated_length": 1243.0, + "completions/mean_length": 985.0, + "completions/mean_terminated_length": 985.0, + "completions/min_length": 806.0, + "completions/min_terminated_length": 806.0, + "epoch": 0.14742948589717944, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2320909987327595, + "kl": 0.0057373046875, + "learning_rate": 9.9390335098415e-07, + "loss": 0.0122, + "num_tokens": 31975308.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0251682996749878, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12854668047654308, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0884451319471674, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1067187372905475, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 737 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1064.875, + "completions/mean_terminated_length": 1035.86669921875, + "completions/min_length": 841.0, + "completions/min_terminated_length": 841.0, + "epoch": 0.14762952590518102, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3049168920426406, + "kl": 0.00457000732421875, + "learning_rate": 9.938516932278888e-07, + "loss": -0.0405, + "num_tokens": 32010914.0, + "reward": 0.0, + "reward_std": 0.6919175386428833, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0702407611547459, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.21060990925894468, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11122216672215289, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 738 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1035.625, + "completions/mean_terminated_length": 1004.666748046875, + "completions/min_length": 876.0, + "completions/min_terminated_length": 876.0, + "epoch": 0.14782956591318264, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1949320476301915, + "kl": 0.00310516357421875, + "learning_rate": 9.937998190475266e-07, + "loss": -0.0415, + "num_tokens": 32045324.0, + "reward": -1.30385160446167e-08, + "reward_std": 1.067802906036377, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.30385160446167e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.023707976138399672, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09108694682505451, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09388724521901158, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 739 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1490.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1058.625, + "completions/mean_terminated_length": 1058.625, + "completions/min_length": 749.0, + "completions/min_terminated_length": 749.0, + "epoch": 0.14802960592118425, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0917664513375316, + "kl": 0.00547027587890625, + "learning_rate": 9.937477284683574e-07, + "loss": -0.0314, + "num_tokens": 32092422.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8205986618995667, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07763743925917398, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10680970350171977, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07781745019952503, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 740 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1187.375, + "completions/mean_terminated_length": 1166.533447265625, + "completions/min_length": 993.0, + "completions/min_terminated_length": 993.0, + "epoch": 0.14822964592918583, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5441910406212482, + "kl": 0.003604888916015625, + "learning_rate": 9.936954215157807e-07, + "loss": 0.0029, + "num_tokens": 32123452.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.996324360370636, + "rewards/wordcountpos_reward_nokeypoint/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.20806528154501341, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06981764301142866, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.026874192494328493, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 741 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1161.0, + "completions/max_terminated_length": 1161.0, + "completions/mean_length": 986.75, + "completions/mean_terminated_length": 986.75, + "completions/min_length": 830.0, + "completions/min_terminated_length": 830.0, + "epoch": 0.14842968593718744, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.769636504060677, + "kl": 0.006927490234375, + "learning_rate": 9.936428982153017e-07, + "loss": 0.0249, + "num_tokens": 32150472.0, + "reward": 0.0, + "reward_std": 1.054905652999878, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.01667657010513239, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03370447146809725, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10470416879457554, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 742 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1216.5, + "completions/mean_terminated_length": 1197.60009765625, + "completions/min_length": 884.0, + "completions/min_terminated_length": 884.0, + "epoch": 0.14862972594518903, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5911904394301932, + "kl": 0.0059986114501953125, + "learning_rate": 9.935901585925309e-07, + "loss": -0.045, + "num_tokens": 32195392.0, + "reward": 0.0, + "reward_std": 0.9420871734619141, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.024348562434065267, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23966794193815882, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1338185615204685, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 743 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1371.0, + "completions/mean_length": 1250.5625, + "completions/mean_terminated_length": 1056.5555419921875, + "completions/min_length": 788.0, + "completions/min_terminated_length": 788.0, + "epoch": 0.14882976595319064, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1858741312678687, + "kl": 0.00605010986328125, + "learning_rate": 9.935372026731847e-07, + "loss": 0.0374, + "num_tokens": 32250801.0, + "reward": 0.0, + "reward_std": 0.6659972667694092, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.050644572836808066, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08661014042162246, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12164002752505566, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 744 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1324.0, + "completions/max_terminated_length": 1324.0, + "completions/mean_length": 1102.1875, + "completions/mean_terminated_length": 1102.1875, + "completions/min_length": 845.0, + "completions/min_terminated_length": 845.0, + "epoch": 0.14902980596119225, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.638189010963646, + "kl": 0.00533294677734375, + "learning_rate": 9.934840304830843e-07, + "loss": 0.0103, + "num_tokens": 32296636.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.39348071813583374, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0680132212117319, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16696970450628243, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07187952884282608, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 745 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1362.0, + "completions/mean_length": 1258.3125, + "completions/mean_terminated_length": 1177.75, + "completions/min_length": 1045.0, + "completions/min_terminated_length": 1045.0, + "epoch": 0.14922984596919384, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.951411442350124, + "kl": 0.0076141357421875, + "learning_rate": 9.934306420481567e-07, + "loss": -0.0137, + "num_tokens": 32341377.0, + "reward": 0.0, + "reward_std": 0.7884678840637207, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.017359971067529966, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12142974491738584, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1270024788326182, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 746 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1315.25, + "completions/mean_terminated_length": 1302.933349609375, + "completions/min_length": 971.0, + "completions/min_terminated_length": 971.0, + "epoch": 0.14942988597719545, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0979191117845373, + "kl": 0.00823211669921875, + "learning_rate": 9.933770373944344e-07, + "loss": 0.0008, + "num_tokens": 32396629.0, + "reward": 0.0, + "reward_std": 0.7308996319770813, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.19060462611040446, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3820413368997012, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.046943622609505783, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 747 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1400.0, + "completions/mean_length": 1198.875, + "completions/mean_terminated_length": 1178.800048828125, + "completions/min_length": 800.0, + "completions/min_terminated_length": 800.0, + "epoch": 0.14962992598519703, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3685676904084882, + "kl": 0.00772857666015625, + "learning_rate": 9.933232165480555e-07, + "loss": -0.0118, + "num_tokens": 32440899.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0398492813110352, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10356247515174918, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0391417913664289, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07685966046898336, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 748 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1467.0, + "completions/mean_length": 1234.9375, + "completions/mean_terminated_length": 1217.2667236328125, + "completions/min_length": 1059.0, + "completions/min_terminated_length": 1059.0, + "epoch": 0.14982996599319864, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8892868718156812, + "kl": 0.00518035888671875, + "learning_rate": 9.932691795352632e-07, + "loss": -0.0056, + "num_tokens": 32485418.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8583910465240479, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.140432944501357, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07026342284672203, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05443310539518172, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 749 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1381.0, + "completions/max_terminated_length": 1381.0, + "completions/mean_length": 1168.25, + "completions/mean_terminated_length": 1168.25, + "completions/min_length": 905.0, + "completions/min_terminated_length": 905.0, + "epoch": 0.15003000600120023, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.358903918145326, + "kl": 0.00490570068359375, + "learning_rate": 9.93214926382406e-07, + "loss": -0.0195, + "num_tokens": 32540862.0, + "reward": 0.0, + "reward_std": 0.8871325254440308, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.024492673861437372, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08263117206735236, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 750 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 1129.1875, + "completions/mean_terminated_length": 1104.4666748046875, + "completions/min_length": 926.0, + "completions/min_terminated_length": 926.0, + "epoch": 0.15023004600920184, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.954527963101841, + "kl": 0.00524139404296875, + "learning_rate": 9.931604571159382e-07, + "loss": -0.0546, + "num_tokens": 32588097.0, + "reward": 0.0, + "reward_std": 0.8041890263557434, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.051861888827194905, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10934680212163495, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08073734277593311, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 751 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1101.0, + "completions/mean_length": 1062.1875, + "completions/mean_terminated_length": 916.25, + "completions/min_length": 669.0, + "completions/min_terminated_length": 669.0, + "epoch": 0.15043008601720345, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.97518042729978, + "kl": 0.005008697509765625, + "learning_rate": 9.931057717624192e-07, + "loss": -0.003, + "num_tokens": 32635508.0, + "reward": 0.0, + "reward_std": 0.6873092651367188, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13059523142876245, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0790549830602253, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 752 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1338.0, + "completions/max_terminated_length": 1338.0, + "completions/mean_length": 1081.5625, + "completions/mean_terminated_length": 1081.5625, + "completions/min_length": 599.0, + "completions/min_terminated_length": 599.0, + "epoch": 0.15063012602520504, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6197000500171184, + "kl": 0.00475311279296875, + "learning_rate": 9.930508703485136e-07, + "loss": -0.0061, + "num_tokens": 32678149.0, + "reward": 0.0, + "reward_std": 0.926202118396759, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0003749036190119229, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09791193673002267, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07302967433402215, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 753 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1383.0, + "completions/max_terminated_length": 1383.0, + "completions/mean_length": 1104.6875, + "completions/mean_terminated_length": 1104.6875, + "completions/min_length": 858.0, + "completions/min_terminated_length": 858.0, + "epoch": 0.15083016603320665, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.144561315107978, + "kl": 0.006256103515625, + "learning_rate": 9.929957529009918e-07, + "loss": -0.0412, + "num_tokens": 32716496.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.736321210861206, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.00748988397017919, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23786326727976279, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.103905227473387, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 754 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1373.0, + "completions/mean_length": 1131.5, + "completions/mean_terminated_length": 1046.4615478515625, + "completions/min_length": 911.0, + "completions/min_terminated_length": 911.0, + "epoch": 0.15103020604120823, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2641090350764523, + "kl": 0.0068206787109375, + "learning_rate": 9.929404194467294e-07, + "loss": -0.0066, + "num_tokens": 32756656.0, + "reward": 0.0, + "reward_std": 0.7997984886169434, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15181479415554391, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.34701128739565174, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 755 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1205.25, + "completions/mean_terminated_length": 1185.60009765625, + "completions/min_length": 973.0, + "completions/min_terminated_length": 973.0, + "epoch": 0.15123024604920984, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.898258548781875, + "kl": 0.0064697265625, + "learning_rate": 9.92884870012707e-07, + "loss": 0.0003, + "num_tokens": 32803252.0, + "reward": 0.0, + "reward_std": 0.9694674015045166, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.5061100551384478, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.41022376086854195, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06540472290116194, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 756 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1250.0, + "completions/max_terminated_length": 1250.0, + "completions/mean_length": 1001.25, + "completions/mean_terminated_length": 1001.25, + "completions/min_length": 719.0, + "completions/min_terminated_length": 719.0, + "epoch": 0.15143028605721146, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.061240154218926, + "kl": 0.00476837158203125, + "learning_rate": 9.92829104626011e-07, + "loss": 0.033, + "num_tokens": 32836480.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0107663869857788, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03261500191050867, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08883937130429634, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.038248698840130005, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 757 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1281.0, + "completions/max_terminated_length": 1281.0, + "completions/mean_length": 902.875, + "completions/mean_terminated_length": 902.875, + "completions/min_length": 656.0, + "completions/min_terminated_length": 656.0, + "epoch": 0.15163032606521304, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.080097067338614, + "kl": 0.004573822021484375, + "learning_rate": 9.927731233138326e-07, + "loss": -0.0001, + "num_tokens": 32873582.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0344139337539673, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0011426899947020097, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07295954268631258, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09418264367902597, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 758 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1320.0, + "completions/max_terminated_length": 1320.0, + "completions/mean_length": 1142.9375, + "completions/mean_terminated_length": 1142.9375, + "completions/min_length": 993.0, + "completions/min_terminated_length": 993.0, + "epoch": 0.15183036607321465, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2179295574328735, + "kl": 0.00699615478515625, + "learning_rate": 9.927169261034687e-07, + "loss": -0.0362, + "num_tokens": 32918909.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.015416145324707, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.053786456756096744, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0741510121914005, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07084150279686703, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 759 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1157.75, + "completions/mean_terminated_length": 1134.933349609375, + "completions/min_length": 669.0, + "completions/min_terminated_length": 669.0, + "epoch": 0.15203040608121624, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5870951686228563, + "kl": 0.008148193359375, + "learning_rate": 9.926605130223215e-07, + "loss": -0.0518, + "num_tokens": 32974185.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9708069562911987, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.22729692548750394, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16184535030895186, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1128748897706693, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 760 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1115.0, + "completions/mean_length": 955.0, + "completions/mean_terminated_length": 918.6666870117188, + "completions/min_length": 687.0, + "completions/min_terminated_length": 687.0, + "epoch": 0.15223044608921785, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.853400056103315, + "kl": 0.00800323486328125, + "learning_rate": 9.926038840978979e-07, + "loss": 0.0146, + "num_tokens": 33023897.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.9366490840911865, + "rewards/wordcountpos_reward_nokeypoint/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05323784396413278, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1348365430706454, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12041594578792297, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 761 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1173.0, + "completions/mean_terminated_length": 1151.2000732421875, + "completions/min_length": 947.0, + "completions/min_terminated_length": 947.0, + "epoch": 0.15243048609721943, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0734235565393626, + "kl": 0.00672149658203125, + "learning_rate": 9.925470393578105e-07, + "loss": -0.0054, + "num_tokens": 33075017.0, + "reward": 0.0, + "reward_std": 0.38984519243240356, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12773261197053415, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10601090252237405, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1258305739211792, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 762 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1204.9375, + "completions/mean_terminated_length": 1106.5833740234375, + "completions/min_length": 806.0, + "completions/min_terminated_length": 806.0, + "epoch": 0.15263052610522104, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3734213375847037, + "kl": 0.00731658935546875, + "learning_rate": 9.924899788297773e-07, + "loss": -0.0173, + "num_tokens": 33123816.0, + "reward": 0.0, + "reward_std": 0.7235683798789978, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11299995393406564, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13727834786331822, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1520233900132184, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 763 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1229.0, + "completions/max_terminated_length": 1229.0, + "completions/mean_length": 1001.9375, + "completions/mean_terminated_length": 1001.9375, + "completions/min_length": 746.0, + "completions/min_terminated_length": 746.0, + "epoch": 0.15283056611322265, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.652365972093981, + "kl": 0.00841522216796875, + "learning_rate": 9.924327025416213e-07, + "loss": 0.0008, + "num_tokens": 33161743.0, + "reward": 0.0, + "reward_std": 0.8722985982894897, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.00881726031549879, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1595948445584055, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0969917904124231, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 764 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1366.0, + "completions/mean_length": 1171.125, + "completions/mean_terminated_length": 1149.2000732421875, + "completions/min_length": 741.0, + "completions/min_terminated_length": 741.0, + "epoch": 0.15303060612122424, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.685052221058177, + "kl": 0.006195068359375, + "learning_rate": 9.9237521052127e-07, + "loss": 0.0228, + "num_tokens": 33204529.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.43554380536079407, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.010032841505091629, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15156202303047603, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06426219440409445, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 765 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1370.0, + "completions/max_terminated_length": 1370.0, + "completions/mean_length": 1076.5625, + "completions/mean_terminated_length": 1076.5625, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.15323064612922585, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9482935940781347, + "kl": 0.00818634033203125, + "learning_rate": 9.923175027967577e-07, + "loss": 0.0256, + "num_tokens": 33237130.0, + "reward": 0.0, + "reward_std": 1.0508638620376587, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.01937865768122604, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04309226135852725, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08595864638818418, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 766 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1291.0, + "completions/max_terminated_length": 1291.0, + "completions/mean_length": 1073.75, + "completions/mean_terminated_length": 1073.75, + "completions/min_length": 869.0, + "completions/min_terminated_length": 869.0, + "epoch": 0.15343068613722743, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.864822570552068, + "kl": 0.0089111328125, + "learning_rate": 9.922595793962223e-07, + "loss": 0.0487, + "num_tokens": 33283526.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9642419815063477, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.013578638438402861, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1463298405173718, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06831300510639733, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 767 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1338.0, + "completions/mean_length": 1070.0625, + "completions/mean_terminated_length": 1041.4000244140625, + "completions/min_length": 733.0, + "completions/min_terminated_length": 733.0, + "epoch": 0.15363072614522905, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4199485839330546, + "kl": 0.0095062255859375, + "learning_rate": 9.92201440347908e-07, + "loss": -0.0239, + "num_tokens": 33323927.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9198828935623169, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03293617278560085, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07412663927104304, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09179284245476838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 768 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1278.0625, + "completions/mean_terminated_length": 1226.84619140625, + "completions/min_length": 904.0, + "completions/min_terminated_length": 904.0, + "epoch": 0.15383076615323066, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.95663604985395, + "kl": 0.00688934326171875, + "learning_rate": 9.921430856801631e-07, + "loss": -0.0355, + "num_tokens": 33370680.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0689175128936768, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03543056951325031, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05894154651387662, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1261979632400061, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 769 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1407.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1164.125, + "completions/mean_terminated_length": 1164.125, + "completions/min_length": 897.0, + "completions/min_terminated_length": 897.0, + "epoch": 0.15403080616123224, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.119748892971239, + "kl": 0.0080108642578125, + "learning_rate": 9.92084515421442e-07, + "loss": -0.0199, + "num_tokens": 33421418.0, + "reward": 0.0, + "reward_std": 0.9716382026672363, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03217759122950855, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04984218148388002, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10461569884316811, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 770 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1207.0, + "completions/max_terminated_length": 1207.0, + "completions/mean_length": 911.9375, + "completions/mean_terminated_length": 911.9375, + "completions/min_length": 539.0, + "completions/min_terminated_length": 539.0, + "epoch": 0.15423084616923385, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.756101073777376, + "kl": 0.0082855224609375, + "learning_rate": 9.920257296003035e-07, + "loss": -0.046, + "num_tokens": 33449201.0, + "reward": 0.0, + "reward_std": 0.8316950798034668, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06717952672526306, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09453465753575925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08243965245133131, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 771 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1197.9375, + "completions/mean_terminated_length": 1060.6363525390625, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.15443088617723544, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0657068420357785, + "kl": 0.00637054443359375, + "learning_rate": 9.919667282454123e-07, + "loss": 0.005, + "num_tokens": 33494088.0, + "reward": 0.0, + "reward_std": 0.5227149724960327, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03324158160346786, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15887932441282998, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15869840952317446, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 772 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1495.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1273.375, + "completions/mean_terminated_length": 1273.375, + "completions/min_length": 1081.0, + "completions/min_terminated_length": 1081.0, + "epoch": 0.15463092618523705, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.280686016194293, + "kl": 0.0072021484375, + "learning_rate": 9.919075113855374e-07, + "loss": 0.0416, + "num_tokens": 33533078.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.604564368724823, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08797005591116247, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06072818612494271, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07969850595746353, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 773 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1454.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1154.5625, + "completions/mean_terminated_length": 1154.5625, + "completions/min_length": 1016.0, + "completions/min_terminated_length": 1016.0, + "epoch": 0.15483096619323863, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8837101880746916, + "kl": 0.006786346435546875, + "learning_rate": 9.918480790495533e-07, + "loss": 0.036, + "num_tokens": 33575079.0, + "reward": 0.0, + "reward_std": 1.0064867734909058, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2053843915170293, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2066955543636377, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11917929226045819, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 774 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1224.125, + "completions/mean_terminated_length": 1205.7333984375, + "completions/min_length": 927.0, + "completions/min_terminated_length": 927.0, + "epoch": 0.15503100620124025, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.181826768183982, + "kl": 0.00696563720703125, + "learning_rate": 9.917884312664395e-07, + "loss": -0.005, + "num_tokens": 33619809.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9997962713241577, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1630534015681988, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22248840349236212, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14401645996461915, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 775 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1489.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1127.375, + "completions/mean_terminated_length": 1127.375, + "completions/min_length": 834.0, + "completions/min_terminated_length": 834.0, + "epoch": 0.15523104620924186, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.361335433379365, + "kl": 0.00743865966796875, + "learning_rate": 9.917285680652805e-07, + "loss": 0.0083, + "num_tokens": 33660479.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.39689069986343384, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04092311533595326, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.29191309327049, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06440611887195305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 776 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1288.3125, + "completions/mean_terminated_length": 1239.4615478515625, + "completions/min_length": 1063.0, + "completions/min_terminated_length": 1063.0, + "epoch": 0.15543108621724344, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.237501315405986, + "kl": 0.007293701171875, + "learning_rate": 9.916684894752659e-07, + "loss": -0.0157, + "num_tokens": 33705340.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9043306112289429, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.02318048789497351, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18141310275143102, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03415650255319865, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 777 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1223.625, + "completions/mean_terminated_length": 1131.5, + "completions/min_length": 856.0, + "completions/min_terminated_length": 856.0, + "epoch": 0.15563112622524505, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.814818763669433, + "kl": 0.00611114501953125, + "learning_rate": 9.916081955256902e-07, + "loss": -0.0198, + "num_tokens": 33748286.0, + "reward": 0.0, + "reward_std": 0.6055250763893127, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.003060370255131041, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09875244910738828, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9791666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04013864859597431, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 778 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1274.0, + "completions/mean_length": 1065.5625, + "completions/mean_terminated_length": 1036.60009765625, + "completions/min_length": 916.0, + "completions/min_terminated_length": 916.0, + "epoch": 0.15583116623324664, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3966935050125087, + "kl": 0.004917144775390625, + "learning_rate": 9.915476862459529e-07, + "loss": 0.0379, + "num_tokens": 33780095.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8389561772346497, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03865006788096502, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09408483556828917, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15371932093796678, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 779 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1484.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1246.8125, + "completions/mean_terminated_length": 1246.8125, + "completions/min_length": 1106.0, + "completions/min_terminated_length": 1106.0, + "epoch": 0.15603120624124825, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3671830930285696, + "kl": 0.00748443603515625, + "learning_rate": 9.91486961665559e-07, + "loss": -0.0288, + "num_tokens": 33830924.0, + "reward": 3.725290298461914e-08, + "reward_std": 1.0593838691711426, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04087585845395357, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06264876112796951, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14851112939963645, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 780 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1331.0, + "completions/mean_length": 1134.625, + "completions/mean_terminated_length": 1110.2667236328125, + "completions/min_length": 935.0, + "completions/min_terminated_length": 935.0, + "epoch": 0.15623124624924986, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.568377457994513, + "kl": 0.009033203125, + "learning_rate": 9.914260218141179e-07, + "loss": 0.0014, + "num_tokens": 33877734.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9850832223892212, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.14619480175297947, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2616790634402095, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1264911064067352, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 781 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1403.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 976.5, + "completions/mean_terminated_length": 976.5, + "completions/min_length": 648.0, + "completions/min_terminated_length": 648.0, + "epoch": 0.15643128625725145, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.081810440320315, + "kl": 0.00664520263671875, + "learning_rate": 9.913648667213438e-07, + "loss": -0.0059, + "num_tokens": 33916198.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.9184173345565796, + "rewards/wordcountpos_reward_nokeypoint/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07676184030427725, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11111057466850525, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 782 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1356.8125, + "completions/mean_terminated_length": 1323.769287109375, + "completions/min_length": 1243.0, + "completions/min_terminated_length": 1243.0, + "epoch": 0.15663132626525306, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.032639749654524, + "kl": 0.005687713623046875, + "learning_rate": 9.913034964170567e-07, + "loss": 0.0038, + "num_tokens": 33956211.0, + "reward": 0.0, + "reward_std": 0.42658573389053345, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16175966388950253, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11201656407408808, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04216370213557838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 783 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1426.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1190.1875, + "completions/mean_terminated_length": 1190.1875, + "completions/min_length": 1003.0, + "completions/min_terminated_length": 1003.0, + "epoch": 0.15683136627325464, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8667455630781693, + "kl": 0.0072021484375, + "learning_rate": 9.912419109311807e-07, + "loss": -0.0229, + "num_tokens": 34009614.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9915661811828613, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.14471241041467195, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2738521107169115, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059628479399994404, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 784 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1361.1875, + "completions/mean_terminated_length": 1253.2222900390625, + "completions/min_length": 1024.0, + "completions/min_terminated_length": 1024.0, + "epoch": 0.15703140628125625, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.445421283326054, + "kl": 0.00970458984375, + "learning_rate": 9.911801102937455e-07, + "loss": -0.0464, + "num_tokens": 34060209.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9363152980804443, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07183485320267857, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10109096928354773, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11547005383792518, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 785 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1251.5, + "completions/mean_terminated_length": 1234.933349609375, + "completions/min_length": 884.0, + "completions/min_terminated_length": 884.0, + "epoch": 0.15723144628925786, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7670674092304615, + "kl": 0.00661468505859375, + "learning_rate": 9.91118094534885e-07, + "loss": -0.0058, + "num_tokens": 34109017.0, + "reward": -3.725290298461914e-08, + "reward_std": 0.968751847743988, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08767639631556301, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17406890189712465, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.21204471925271556, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 786 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1341.375, + "completions/mean_terminated_length": 1246.2000732421875, + "completions/min_length": 979.0, + "completions/min_terminated_length": 979.0, + "epoch": 0.15743148629725945, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.115119937107194, + "kl": 0.0084075927734375, + "learning_rate": 9.910558636848384e-07, + "loss": -0.0179, + "num_tokens": 34156823.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9126564264297485, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13055048491435658, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0808738472237123, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09699179041242309, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 787 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1211.0, + "completions/max_terminated_length": 1211.0, + "completions/mean_length": 965.25, + "completions/mean_terminated_length": 965.25, + "completions/min_length": 705.0, + "completions/min_terminated_length": 705.0, + "epoch": 0.15763152630526106, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.193323212039577, + "kl": 0.007854461669921875, + "learning_rate": 9.909934177739502e-07, + "loss": -0.0105, + "num_tokens": 34202867.0, + "reward": 0.0, + "reward_std": 0.9859888553619385, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.21240318652396928, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2956439997129123, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12041594578792297, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 788 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1181.0, + "completions/max_terminated_length": 1181.0, + "completions/mean_length": 1050.625, + "completions/mean_terminated_length": 1050.625, + "completions/min_length": 778.0, + "completions/min_terminated_length": 778.0, + "epoch": 0.15783156631326264, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.474023612319744, + "kl": 0.0058441162109375, + "learning_rate": 9.909307568326686e-07, + "loss": 0.0186, + "num_tokens": 34251733.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6692025661468506, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.009389641832331793, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12408772127750711, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05443310539518172, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 789 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1161.9375, + "completions/mean_terminated_length": 1113.6429443359375, + "completions/min_length": 791.0, + "completions/min_terminated_length": 791.0, + "epoch": 0.15803160632126426, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3394578784503306, + "kl": 0.00800323486328125, + "learning_rate": 9.90867880891548e-07, + "loss": -0.006, + "num_tokens": 34295764.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9364528059959412, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16614565395751318, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07605982896576363, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11344765475923412, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 790 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1161.0, + "completions/mean_length": 1039.75, + "completions/mean_terminated_length": 1009.0667114257812, + "completions/min_length": 784.0, + "completions/min_terminated_length": 784.0, + "epoch": 0.15823164632926584, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6996014492131883, + "kl": 0.0085296630859375, + "learning_rate": 9.908047899812468e-07, + "loss": -0.0141, + "num_tokens": 34336824.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0472294092178345, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07735671876699568, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06783622910713447, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0697880388775209, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 791 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1252.5625, + "completions/mean_terminated_length": 1170.0833740234375, + "completions/min_length": 1000.0, + "completions/min_terminated_length": 1000.0, + "epoch": 0.15843168633726745, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1393459091976545, + "kl": 0.00846099853515625, + "learning_rate": 9.907414841325283e-07, + "loss": -0.0341, + "num_tokens": 34389361.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8758847713470459, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.022616161611815344, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05667518403946425, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11538983843829065, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 792 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1360.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1185.1875, + "completions/mean_terminated_length": 1185.1875, + "completions/min_length": 774.0, + "completions/min_terminated_length": 774.0, + "epoch": 0.15863172634526906, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5143062099391424, + "kl": 0.00518798828125, + "learning_rate": 9.906779633762606e-07, + "loss": -0.0404, + "num_tokens": 34432604.0, + "reward": 0.0, + "reward_std": 0.6717320680618286, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03544805438469065, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06069556758555572, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10602235962635778, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 793 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1120.4375, + "completions/mean_terminated_length": 1095.1334228515625, + "completions/min_length": 769.0, + "completions/min_terminated_length": 769.0, + "epoch": 0.15883176635327065, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4378189735324924, + "kl": 0.00798797607421875, + "learning_rate": 9.906142277434172e-07, + "loss": -0.0192, + "num_tokens": 34479379.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.068108081817627, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.14812654223992536, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07243193523670573, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10327955589886445, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 794 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1215.4375, + "completions/mean_terminated_length": 1174.7857666015625, + "completions/min_length": 1032.0, + "completions/min_terminated_length": 1032.0, + "epoch": 0.15903180636127226, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.759717551204494, + "kl": 0.00724029541015625, + "learning_rate": 9.905502772650754e-07, + "loss": 0.0003, + "num_tokens": 34524130.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6662622690200806, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09560596157515877, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13304465777896696, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10886621079036349, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 795 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1114.0, + "completions/max_terminated_length": 1114.0, + "completions/mean_length": 925.125, + "completions/mean_terminated_length": 925.125, + "completions/min_length": 796.0, + "completions/min_terminated_length": 796.0, + "epoch": 0.15923184636927384, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7893669413777684, + "kl": 0.004566192626953125, + "learning_rate": 9.904861119724178e-07, + "loss": -0.023, + "num_tokens": 34563596.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8998886346817017, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04291776571534934, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03871793391489929, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 796 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1317.0, + "completions/max_terminated_length": 1317.0, + "completions/mean_length": 1183.3125, + "completions/mean_terminated_length": 1183.3125, + "completions/min_length": 891.0, + "completions/min_terminated_length": 891.0, + "epoch": 0.15943188637727546, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.482195085289091, + "kl": 0.004825592041015625, + "learning_rate": 9.904217318967318e-07, + "loss": -0.0087, + "num_tokens": 34603201.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0187772512435913, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03135400288331924, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08366029520462358, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 797 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1443.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1153.0625, + "completions/mean_terminated_length": 1153.0625, + "completions/min_length": 933.0, + "completions/min_terminated_length": 933.0, + "epoch": 0.15963192638527707, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8745084813548116, + "kl": 0.0096435546875, + "learning_rate": 9.903571370694094e-07, + "loss": -0.0212, + "num_tokens": 34654218.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0295226573944092, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.035020938796827085, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06167459940928411, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10610965676722953, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 798 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1347.0, + "completions/mean_length": 1123.25, + "completions/mean_terminated_length": 1098.1334228515625, + "completions/min_length": 890.0, + "completions/min_terminated_length": 890.0, + "epoch": 0.15983196639327865, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2496467672577594, + "kl": 0.0087738037109375, + "learning_rate": 9.902923275219475e-07, + "loss": 0.012, + "num_tokens": 34685494.0, + "reward": 0.0, + "reward_std": 0.36454010009765625, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07893134333648368, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12055457144305563, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194865, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 799 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1376.0, + "completions/max_terminated_length": 1376.0, + "completions/mean_length": 1071.5, + "completions/mean_terminated_length": 1071.5, + "completions/min_length": 930.0, + "completions/min_terminated_length": 930.0, + "epoch": 0.16003200640128026, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.780903983936385, + "kl": 0.00830841064453125, + "learning_rate": 9.902273032859472e-07, + "loss": -0.0116, + "num_tokens": 34718014.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6860511302947998, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0785245440126617, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08109980032788537, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13221755360572016, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 800 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1272.0, + "completions/max_terminated_length": 1272.0, + "completions/mean_length": 1045.5, + "completions/mean_terminated_length": 1045.5, + "completions/min_length": 854.0, + "completions/min_terminated_length": 854.0, + "epoch": 0.16023204640928185, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.743555421310866, + "kl": 0.008941650390625, + "learning_rate": 9.90162064393115e-07, + "loss": -0.0051, + "num_tokens": 34749990.0, + "reward": 0.0, + "reward_std": 0.8740478754043579, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04026578523630691, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06494368137723174, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11409872268574492, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 801 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1202.375, + "completions/mean_terminated_length": 1067.0909423828125, + "completions/min_length": 844.0, + "completions/min_terminated_length": 844.0, + "epoch": 0.16043208641728346, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.359016023148994, + "kl": 0.00675201416015625, + "learning_rate": 9.900966108752614e-07, + "loss": 0.0051, + "num_tokens": 34800372.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9605590105056763, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10028848270176513, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05884875612108117, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11792967144619461, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 802 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1303.0, + "completions/max_terminated_length": 1303.0, + "completions/mean_length": 1040.25, + "completions/mean_terminated_length": 1040.25, + "completions/min_length": 771.0, + "completions/min_terminated_length": 771.0, + "epoch": 0.16063212642528504, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.741694436245569, + "kl": 0.00801849365234375, + "learning_rate": 9.900309427643018e-07, + "loss": -0.0644, + "num_tokens": 34842136.0, + "reward": 0.0, + "reward_std": 0.6025787591934204, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.43464781211653325, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2108530792996841, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1253144193766372, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 803 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1263.0, + "completions/max_terminated_length": 1263.0, + "completions/mean_length": 1050.625, + "completions/mean_terminated_length": 1050.625, + "completions/min_length": 865.0, + "completions/min_terminated_length": 865.0, + "epoch": 0.16083216643328666, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.812292180451614, + "kl": 0.0086212158203125, + "learning_rate": 9.899650600922566e-07, + "loss": -0.0148, + "num_tokens": 34889874.0, + "reward": 0.0, + "reward_std": 0.7824389934539795, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.30156719063713133, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23580077093050295, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11213417888437976, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 804 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1360.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1025.875, + "completions/mean_terminated_length": 1025.875, + "completions/min_length": 739.0, + "completions/min_terminated_length": 739.0, + "epoch": 0.16103220644128827, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.473122197132115, + "kl": 0.007568359375, + "learning_rate": 9.8989896289125e-07, + "loss": -0.0663, + "num_tokens": 34929160.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5794689655303955, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11139500237699422, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14732565416920215, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1287403358472941, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 805 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1134.75, + "completions/mean_terminated_length": 1082.571533203125, + "completions/min_length": 762.0, + "completions/min_terminated_length": 762.0, + "epoch": 0.16123224644928985, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9884776376819895, + "kl": 0.00555419921875, + "learning_rate": 9.898326511935117e-07, + "loss": -0.0386, + "num_tokens": 34975612.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9754001498222351, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.034681113030809056, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08997737546805432, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15752718754175363, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 806 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1426.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1063.0, + "completions/mean_terminated_length": 1063.0, + "completions/min_length": 765.0, + "completions/min_terminated_length": 765.0, + "epoch": 0.16143228645729146, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.512297518336623, + "kl": 0.00760650634765625, + "learning_rate": 9.897661250313755e-07, + "loss": -0.0611, + "num_tokens": 35010468.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9274194836616516, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06201752052590838, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08405576681726241, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09016445879408155, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 807 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1108.1875, + "completions/mean_terminated_length": 977.5833740234375, + "completions/min_length": 19.0, + "completions/min_terminated_length": 19.0, + "epoch": 0.16163232646529305, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8148378585212543, + "kl": 0.00733184814453125, + "learning_rate": 9.896993844372794e-07, + "loss": -0.2571, + "num_tokens": 35049327.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6777615547180176, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.24676165730307167, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2188402761975332, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.2356629734112617, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 808 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1477.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1075.0625, + "completions/mean_terminated_length": 1075.0625, + "completions/min_length": 868.0, + "completions/min_terminated_length": 868.0, + "epoch": 0.16183236647329466, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8314450055066054, + "kl": 0.0064697265625, + "learning_rate": 9.896324294437672e-07, + "loss": 0.0332, + "num_tokens": 35092216.0, + "reward": 0.0, + "reward_std": 0.7016310691833496, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.045998559282501066, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22738143107141526, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07097208632298362, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 809 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1353.0, + "completions/max_terminated_length": 1353.0, + "completions/mean_length": 1083.0625, + "completions/mean_terminated_length": 1083.0625, + "completions/min_length": 856.0, + "completions/min_terminated_length": 856.0, + "epoch": 0.16203240648129627, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0666945198852895, + "kl": 0.00753021240234375, + "learning_rate": 9.895652600834859e-07, + "loss": -0.0255, + "num_tokens": 35125857.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9258521795272827, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09261886251609024, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07371272739914915, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.052880017930181294, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 810 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1316.0, + "completions/mean_length": 1286.5, + "completions/mean_terminated_length": 1120.4444580078125, + "completions/min_length": 895.0, + "completions/min_terminated_length": 895.0, + "epoch": 0.16223244648929785, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.279096573549635, + "kl": 0.0078125, + "learning_rate": 9.894978763891879e-07, + "loss": -0.0225, + "num_tokens": 35178265.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0682893991470337, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.009212380495398857, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07452208819949857, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05163977794943221, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 811 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1371.0, + "completions/mean_length": 1053.5, + "completions/mean_terminated_length": 1023.7333984375, + "completions/min_length": 698.0, + "completions/min_terminated_length": 698.0, + "epoch": 0.16243248649729947, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8211830670987705, + "kl": 0.0086517333984375, + "learning_rate": 9.894302783937296e-07, + "loss": -0.0225, + "num_tokens": 35219217.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8062123656272888, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.15693224044479243, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20906316893504656, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10318986456114841, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 812 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1214.0, + "completions/max_terminated_length": 1214.0, + "completions/mean_length": 1000.9375, + "completions/mean_terminated_length": 1000.9375, + "completions/min_length": 807.0, + "completions/min_terminated_length": 807.0, + "epoch": 0.16263252650530105, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.291883167406542, + "kl": 0.00690460205078125, + "learning_rate": 9.89362466130072e-07, + "loss": -0.0339, + "num_tokens": 35260976.0, + "reward": 0.0, + "reward_std": 1.0558667182922363, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08005587287929072, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04988272146803043, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1166666666666667, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 813 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1049.8125, + "completions/mean_terminated_length": 1019.800048828125, + "completions/min_length": 569.0, + "completions/min_terminated_length": 569.0, + "epoch": 0.16283256651330266, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2001029396500256, + "kl": 0.0071868896484375, + "learning_rate": 9.892944396312812e-07, + "loss": 0.0023, + "num_tokens": 35299629.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7333624958992004, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09645966934163323, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15767353436860396, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08681611046941136, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 814 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1362.0, + "completions/max_terminated_length": 1362.0, + "completions/mean_length": 1139.0625, + "completions/mean_terminated_length": 1139.0625, + "completions/min_length": 894.0, + "completions/min_terminated_length": 894.0, + "epoch": 0.16303260652130427, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.43696382349229, + "kl": 0.0025997161865234375, + "learning_rate": 9.892261989305264e-07, + "loss": 0.0066, + "num_tokens": 35348702.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5213609933853149, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.14989592844137306, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12982545268228052, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1465024333004847, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 815 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1362.0, + "completions/max_terminated_length": 1362.0, + "completions/mean_length": 1104.5, + "completions/mean_terminated_length": 1104.5, + "completions/min_length": 766.0, + "completions/min_terminated_length": 766.0, + "epoch": 0.16323264652930586, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7079619113295723, + "kl": 0.0100250244140625, + "learning_rate": 9.891577440610827e-07, + "loss": -0.029, + "num_tokens": 35402222.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6934175491333008, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.18902397851350802, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20340154818502032, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07290277645477444, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 816 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1146.75, + "completions/mean_terminated_length": 1123.2000732421875, + "completions/min_length": 851.0, + "completions/min_terminated_length": 851.0, + "epoch": 0.16343268653730747, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2778287316073604, + "kl": 0.00804901123046875, + "learning_rate": 9.89089075056329e-07, + "loss": 0.0284, + "num_tokens": 35446746.0, + "reward": 5.587935447692871e-09, + "reward_std": 0.942658543586731, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.587935447692871e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05139867284391413, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0632950411425262, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08933913745655643, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 817 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1480.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1045.25, + "completions/mean_terminated_length": 1045.25, + "completions/min_length": 753.0, + "completions/min_terminated_length": 753.0, + "epoch": 0.16363272654530905, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6243133612533707, + "kl": 0.007965087890625, + "learning_rate": 9.890201919497482e-07, + "loss": -0.0589, + "num_tokens": 35488598.0, + "reward": 0.0, + "reward_std": 1.0557827949523926, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.024561530521624342, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08872345912825362, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1287403358472941, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 818 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1332.0, + "completions/max_terminated_length": 1332.0, + "completions/mean_length": 1090.4375, + "completions/mean_terminated_length": 1090.4375, + "completions/min_length": 824.0, + "completions/min_terminated_length": 824.0, + "epoch": 0.16383276655331067, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1114670713532253, + "kl": 0.00765228271484375, + "learning_rate": 9.889510947749282e-07, + "loss": -0.0167, + "num_tokens": 35529933.0, + "reward": 0.0, + "reward_std": 0.9344456791877747, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.19292748820234057, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07658439262379411, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0769800358919501, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 819 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1268.0, + "completions/max_terminated_length": 1268.0, + "completions/mean_length": 1002.5, + "completions/mean_terminated_length": 1002.5, + "completions/min_length": 635.0, + "completions/min_terminated_length": 635.0, + "epoch": 0.16403280656131225, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6382237364611356, + "kl": 0.005252838134765625, + "learning_rate": 9.888817835655614e-07, + "loss": -0.0343, + "num_tokens": 35575189.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7334702610969543, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.14138915260290325, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13017485972639709, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10610965676722954, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 820 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1394.0, + "completions/mean_length": 1105.625, + "completions/mean_terminated_length": 1079.3333740234375, + "completions/min_length": 904.0, + "completions/min_terminated_length": 904.0, + "epoch": 0.16423284656931386, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7361510537264895, + "kl": 0.0059661865234375, + "learning_rate": 9.888122583554438e-07, + "loss": -0.0289, + "num_tokens": 35614527.0, + "reward": 0.0, + "reward_std": 1.0479291677474976, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.022803305175872764, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08965774943686244, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08062257748298551, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 821 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1154.625, + "completions/mean_terminated_length": 1039.5, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.16443288657731547, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.608215247005147, + "kl": 0.007781982421875, + "learning_rate": 9.887425191784765e-07, + "loss": 0.0399, + "num_tokens": 35655449.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8624235391616821, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.022192670492736006, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06344521818593721, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07290277645477444, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 822 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1223.0, + "completions/max_terminated_length": 1223.0, + "completions/mean_length": 965.5625, + "completions/mean_terminated_length": 965.5625, + "completions/min_length": 846.0, + "completions/min_terminated_length": 846.0, + "epoch": 0.16463292658531706, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9889887837012936, + "kl": 0.006404876708984375, + "learning_rate": 9.886725660686647e-07, + "loss": 0.0421, + "num_tokens": 35690034.0, + "reward": 0.0, + "reward_std": 0.5994852781295776, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.055061707998821804, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10986532546002227, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10101338378503964, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 823 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1297.0, + "completions/max_terminated_length": 1297.0, + "completions/mean_length": 1056.6875, + "completions/mean_terminated_length": 1056.6875, + "completions/min_length": 866.0, + "completions/min_terminated_length": 866.0, + "epoch": 0.16483296659331867, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4597049188507816, + "kl": 0.00799560546875, + "learning_rate": 9.886023990601176e-07, + "loss": 0.0165, + "num_tokens": 35729117.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9168305993080139, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2230824095960781, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06668983338904615, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194864, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 824 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1174.6875, + "completions/mean_terminated_length": 1153.0, + "completions/min_length": 849.0, + "completions/min_terminated_length": 849.0, + "epoch": 0.16503300660132025, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3323927515700595, + "kl": 0.009552001953125, + "learning_rate": 9.88532018187049e-07, + "loss": 0.0062, + "num_tokens": 35775520.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8842302560806274, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.42825760328032625, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1169658856984427, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 825 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1318.5, + "completions/mean_terminated_length": 1306.4000244140625, + "completions/min_length": 1067.0, + "completions/min_terminated_length": 1067.0, + "epoch": 0.16523304660932187, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0057427596281268, + "kl": 0.0080718994140625, + "learning_rate": 9.884614234837772e-07, + "loss": 0.0035, + "num_tokens": 35835944.0, + "reward": 0.0, + "reward_std": 0.9171786308288574, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1757702313480788, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17928020162396907, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09269623828717428, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 826 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1192.6875, + "completions/mean_terminated_length": 1148.7857666015625, + "completions/min_length": 953.0, + "completions/min_terminated_length": 953.0, + "epoch": 0.16543308661732348, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.093387651186379, + "kl": 0.009613037109375, + "learning_rate": 9.88390614984724e-07, + "loss": -0.0004, + "num_tokens": 35885731.0, + "reward": 0.0, + "reward_std": 0.7221215963363647, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03401007937106059, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1988495068208532, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 827 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1475.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1099.25, + "completions/mean_terminated_length": 1099.25, + "completions/min_length": 868.0, + "completions/min_terminated_length": 868.0, + "epoch": 0.16563312662532506, + "frac_reward_zero_std": 0.5, + "grad_norm": 2.7533058700415625, + "kl": 0.00743865966796875, + "learning_rate": 9.883195927244165e-07, + "loss": 0.0338, + "num_tokens": 35925927.0, + "reward": -1.1175870895385742e-08, + "reward_std": 0.7544520497322083, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08925812783456655, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11201947099427631, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0957427107756338, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 828 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1429.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 1066.625, + "completions/mean_terminated_length": 1066.625, + "completions/min_length": 898.0, + "completions/min_terminated_length": 898.0, + "epoch": 0.16583316663332667, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3018309093458234, + "kl": 0.008636474609375, + "learning_rate": 9.882483567374851e-07, + "loss": 0.0105, + "num_tokens": 35967217.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7812833786010742, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.061880545617319996, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10819436239611038, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06426219440409447, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 829 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1243.0, + "completions/max_terminated_length": 1243.0, + "completions/mean_length": 1011.75, + "completions/mean_terminated_length": 1011.75, + "completions/min_length": 771.0, + "completions/min_terminated_length": 771.0, + "epoch": 0.16603320664132826, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.343286943292407, + "kl": 0.0067901611328125, + "learning_rate": 9.881769070586648e-07, + "loss": 0.0109, + "num_tokens": 36011069.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.6294483542442322, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.006772433205476166, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11284171263162518, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 830 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1279.1875, + "completions/mean_terminated_length": 1247.6429443359375, + "completions/min_length": 776.0, + "completions/min_terminated_length": 776.0, + "epoch": 0.16623324664932987, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.101862606869446, + "kl": 0.00774383544921875, + "learning_rate": 9.881052437227952e-07, + "loss": -0.0429, + "num_tokens": 36057432.0, + "reward": 0.0, + "reward_std": 0.3894191384315491, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.043331886845955105, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06211845971180102, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10036968702787745, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 831 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1368.0, + "completions/max_terminated_length": 1368.0, + "completions/mean_length": 1073.1875, + "completions/mean_terminated_length": 1073.1875, + "completions/min_length": 907.0, + "completions/min_terminated_length": 907.0, + "epoch": 0.16643328665733145, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.367723271042293, + "kl": 0.0087890625, + "learning_rate": 9.88033366764819e-07, + "loss": -0.01, + "num_tokens": 36100571.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9726666212081909, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.005864086922198277, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18586201291112522, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.055611083361076424, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 832 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1337.0, + "completions/max_terminated_length": 1337.0, + "completions/mean_length": 1056.4375, + "completions/mean_terminated_length": 1056.4375, + "completions/min_length": 765.0, + "completions/min_terminated_length": 765.0, + "epoch": 0.16663332666533306, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6729384708848776, + "kl": 0.0082244873046875, + "learning_rate": 9.879612762197843e-07, + "loss": 0.0054, + "num_tokens": 36137290.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9158831834793091, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.017987464148763455, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0809297025532757, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10318986456114838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 833 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1164.5625, + "completions/mean_terminated_length": 1142.2000732421875, + "completions/min_length": 960.0, + "completions/min_terminated_length": 960.0, + "epoch": 0.16683336667333468, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3959417596942134, + "kl": 0.00826263427734375, + "learning_rate": 9.878889721228426e-07, + "loss": -0.0288, + "num_tokens": 36172011.0, + "reward": 0.0, + "reward_std": 0.7522633671760559, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11518993174983623, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1861932242552626, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12049281521534187, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 834 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1328.875, + "completions/mean_terminated_length": 1195.77783203125, + "completions/min_length": 994.0, + "completions/min_terminated_length": 994.0, + "epoch": 0.16703340668133626, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.36429759284482, + "kl": 0.009124755859375, + "learning_rate": 9.878164545092496e-07, + "loss": -0.0126, + "num_tokens": 36226961.0, + "reward": 0.0, + "reward_std": 0.6982765793800354, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.002902388733717595, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07996762510803353, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1276569477008451, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 835 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1178.9375, + "completions/mean_terminated_length": 1157.533447265625, + "completions/min_length": 566.0, + "completions/min_terminated_length": 566.0, + "epoch": 0.16723344668933787, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.027184543398103, + "kl": 0.00777435302734375, + "learning_rate": 9.877437234143653e-07, + "loss": -0.0748, + "num_tokens": 36268296.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7765026092529297, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03562064045504098, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.29277420002674215, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08734775114237132, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 836 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1471.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1189.9375, + "completions/mean_terminated_length": 1189.9375, + "completions/min_length": 958.0, + "completions/min_terminated_length": 958.0, + "epoch": 0.16743348669733946, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2834656964420774, + "kl": 0.00832366943359375, + "learning_rate": 9.876707788736539e-07, + "loss": -0.0096, + "num_tokens": 36313831.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.818612813949585, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11436864902765208, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1686770936283077, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10610965676722953, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 837 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1387.0, + "completions/max_terminated_length": 1387.0, + "completions/mean_length": 1186.375, + "completions/mean_terminated_length": 1186.375, + "completions/min_length": 995.0, + "completions/min_terminated_length": 995.0, + "epoch": 0.16763352670534107, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.155444733731613, + "kl": 0.007537841796875, + "learning_rate": 9.87597620922683e-07, + "loss": 0.0069, + "num_tokens": 36359725.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8623005151748657, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06490289006329082, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13402244467299132, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08734775114237131, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 838 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1480.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1144.0625, + "completions/mean_terminated_length": 1144.0625, + "completions/min_length": 915.0, + "completions/min_terminated_length": 915.0, + "epoch": 0.16783356671334268, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.783147071163588, + "kl": 0.00771331787109375, + "learning_rate": 9.875242495971252e-07, + "loss": -0.0003, + "num_tokens": 36402718.0, + "reward": 0.0, + "reward_std": 0.8208498358726501, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.20070554232517693, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2402086650796176, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 839 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1375.0, + "completions/mean_length": 1112.0, + "completions/mean_terminated_length": 1086.1334228515625, + "completions/min_length": 908.0, + "completions/min_terminated_length": 908.0, + "epoch": 0.16803360672134426, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3307495182102635, + "kl": 0.0099029541015625, + "learning_rate": 9.874506649327567e-07, + "loss": -0.0596, + "num_tokens": 36446030.0, + "reward": 0.0, + "reward_std": 0.6286139488220215, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05774607662959445, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06594304017024773, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258096, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 840 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1282.0, + "completions/max_terminated_length": 1282.0, + "completions/mean_length": 1089.75, + "completions/mean_terminated_length": 1089.75, + "completions/min_length": 892.0, + "completions/min_terminated_length": 892.0, + "epoch": 0.16823364672934588, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.408237031685119, + "kl": 0.0106048583984375, + "learning_rate": 9.873768669654575e-07, + "loss": -0.0048, + "num_tokens": 36484042.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.832362711429596, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.032724225377560874, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.079537647534649, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10809803506625451, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 841 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1406.0, + "completions/max_terminated_length": 1406.0, + "completions/mean_length": 1154.0625, + "completions/mean_terminated_length": 1154.0625, + "completions/min_length": 1025.0, + "completions/min_terminated_length": 1025.0, + "epoch": 0.16843368673734746, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8060061707217065, + "kl": 0.00742340087890625, + "learning_rate": 9.873028557312117e-07, + "loss": -0.0054, + "num_tokens": 36529747.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7076513767242432, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1843039624836495, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12008135932813427, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1355373393953503, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 842 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1261.0, + "completions/max_terminated_length": 1261.0, + "completions/mean_length": 1061.75, + "completions/mean_terminated_length": 1061.75, + "completions/min_length": 847.0, + "completions/min_terminated_length": 847.0, + "epoch": 0.16863372674534907, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.750426092245313, + "kl": 0.0093841552734375, + "learning_rate": 9.872286312661077e-07, + "loss": 0.0079, + "num_tokens": 36578543.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8663616180419922, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.013090345708628246, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12473123990142519, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11792967144619461, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 843 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1480.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1284.875, + "completions/mean_terminated_length": 1284.875, + "completions/min_length": 1027.0, + "completions/min_terminated_length": 1027.0, + "epoch": 0.16883376675335068, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.827123684643078, + "kl": 0.0032825469970703125, + "learning_rate": 9.87154193606338e-07, + "loss": -0.0313, + "num_tokens": 36618061.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8792611956596375, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16321980998800995, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.34476542402340676, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.026874192494328493, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 844 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1106.0, + "completions/mean_length": 1216.0625, + "completions/mean_terminated_length": 932.125, + "completions/min_length": 851.0, + "completions/min_terminated_length": 851.0, + "epoch": 0.16903380676135227, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.21344273476029, + "kl": 0.0105438232421875, + "learning_rate": 9.87079542788198e-07, + "loss": -0.0072, + "num_tokens": 36673366.0, + "reward": 0.0, + "reward_std": 0.7901817560195923, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07258893761008879, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06203911386643339, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.16055459438389727, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 845 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1189.25, + "completions/mean_terminated_length": 1085.666748046875, + "completions/min_length": 887.0, + "completions/min_terminated_length": 887.0, + "epoch": 0.16923384676935388, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4539359325906074, + "kl": 0.009979248046875, + "learning_rate": 9.870046788480884e-07, + "loss": 0.0243, + "num_tokens": 36724642.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9189920425415039, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.061064005583259334, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07308359735476408, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1299572579307862, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 846 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1325.25, + "completions/mean_terminated_length": 1300.2857666015625, + "completions/min_length": 1087.0, + "completions/min_terminated_length": 1087.0, + "epoch": 0.16943388677735546, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.24042644991032, + "kl": 0.0053253173828125, + "learning_rate": 9.86929601822513e-07, + "loss": -0.0092, + "num_tokens": 36768158.0, + "reward": 0.0, + "reward_std": 0.7679414749145508, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10803238983210992, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09092389450046273, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06440611887195305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 847 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1404.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 1114.9375, + "completions/mean_terminated_length": 1114.9375, + "completions/min_length": 951.0, + "completions/min_terminated_length": 951.0, + "epoch": 0.16963392678535708, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8482794220289844, + "kl": 0.00505828857421875, + "learning_rate": 9.868543117480798e-07, + "loss": -0.0213, + "num_tokens": 36805309.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7276599407196045, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.019094211035097963, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06556301438207472, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05443310539518172, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 848 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1214.625, + "completions/mean_terminated_length": 1173.857177734375, + "completions/min_length": 794.0, + "completions/min_terminated_length": 794.0, + "epoch": 0.16983396679335866, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.70316608616716, + "kl": 0.0126800537109375, + "learning_rate": 9.867788086615001e-07, + "loss": -0.0567, + "num_tokens": 36855119.0, + "reward": 0.0, + "reward_std": 0.6580266356468201, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1422490084056368, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.25757334447932034, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0806225774829855, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 849 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1108.0, + "completions/max_terminated_length": 1108.0, + "completions/mean_length": 950.25, + "completions/mean_terminated_length": 950.25, + "completions/min_length": 676.0, + "completions/min_terminated_length": 676.0, + "epoch": 0.17003400680136027, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.669839773946094, + "kl": 0.01055908203125, + "learning_rate": 9.867030925995905e-07, + "loss": -0.0356, + "num_tokens": 36902931.0, + "reward": 0.0, + "reward_std": 0.8709691762924194, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.007469921457128557, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0917620569283761, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1261979632400061, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 850 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 1184.5625, + "completions/mean_terminated_length": 1163.533447265625, + "completions/min_length": 900.0, + "completions/min_terminated_length": 900.0, + "epoch": 0.17023404680936188, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.148444293691691, + "kl": 0.00687408447265625, + "learning_rate": 9.866271635992694e-07, + "loss": -0.0334, + "num_tokens": 36950972.0, + "reward": 0.0, + "reward_std": 1.0551235675811768, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0295399032613098, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06043486100799506, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 851 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1186.0, + "completions/mean_length": 1005.6875, + "completions/mean_terminated_length": 935.0714721679688, + "completions/min_length": 309.0, + "completions/min_terminated_length": 309.0, + "epoch": 0.17043408681736347, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.697311363131722, + "kl": 0.004730224609375, + "learning_rate": 9.86551021697561e-07, + "loss": -0.1387, + "num_tokens": 36998119.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.026976466178894, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06893055971616258, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0609283766831921, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08606629658238706, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 852 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1378.0, + "completions/mean_length": 1219.5625, + "completions/mean_terminated_length": 1179.5, + "completions/min_length": 931.0, + "completions/min_terminated_length": 931.0, + "epoch": 0.17063412682536508, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8307871860035534, + "kl": 0.00637054443359375, + "learning_rate": 9.864746669315918e-07, + "loss": 0.0436, + "num_tokens": 37045848.0, + "reward": 0.0, + "reward_std": 0.7970679402351379, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08304047663362761, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09555877645698178, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06191391873668902, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 853 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1395.0, + "completions/mean_length": 1377.125, + "completions/mean_terminated_length": 1281.5555419921875, + "completions/min_length": 1100.0, + "completions/min_terminated_length": 1100.0, + "epoch": 0.17083416683336666, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0950773125164037, + "kl": 0.0083770751953125, + "learning_rate": 9.863980993385931e-07, + "loss": -0.0369, + "num_tokens": 37091154.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0587300062179565, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1033090988011864, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1567798197771411, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08850612031567835, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 854 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1310.25, + "completions/mean_terminated_length": 1224.0, + "completions/min_length": 1099.0, + "completions/min_terminated_length": 1099.0, + "epoch": 0.17103420684136827, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6321413813683794, + "kl": 0.0057220458984375, + "learning_rate": 9.863213189558996e-07, + "loss": 0.009, + "num_tokens": 37134670.0, + "reward": 0.0, + "reward_std": 0.4178122580051422, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12852319629475872, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12504796925343295, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194864, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 855 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1367.0, + "completions/max_terminated_length": 1367.0, + "completions/mean_length": 1124.375, + "completions/mean_terminated_length": 1124.375, + "completions/min_length": 897.0, + "completions/min_terminated_length": 897.0, + "epoch": 0.1712342468493699, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.777062576065207, + "kl": 0.00716400146484375, + "learning_rate": 9.862443258209496e-07, + "loss": -0.0139, + "num_tokens": 37180052.0, + "reward": 0.0, + "reward_std": 0.7992645502090454, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04605038498629939, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16724237421239915, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04849589520621153, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 856 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1367.0, + "completions/max_terminated_length": 1367.0, + "completions/mean_length": 1081.5625, + "completions/mean_terminated_length": 1081.5625, + "completions/min_length": 750.0, + "completions/min_terminated_length": 750.0, + "epoch": 0.17143428685737147, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2063787193805, + "kl": 0.008758544921875, + "learning_rate": 9.861671199712855e-07, + "loss": -0.0656, + "num_tokens": 37224573.0, + "reward": 0.0, + "reward_std": 1.0172406435012817, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07486230542074833, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11395108012647531, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08243965245133132, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 857 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1343.5, + "completions/mean_terminated_length": 1272.3636474609375, + "completions/min_length": 1051.0, + "completions/min_terminated_length": 1051.0, + "epoch": 0.17163432686537308, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8398093783534963, + "kl": 0.00812530517578125, + "learning_rate": 9.86089701444553e-07, + "loss": -0.0252, + "num_tokens": 37268765.0, + "reward": 0.0, + "reward_std": 0.8919476866722107, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06640774927238419, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09331697918155128, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13743685418725538, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 858 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1299.0, + "completions/max_terminated_length": 1299.0, + "completions/mean_length": 982.1875, + "completions/mean_terminated_length": 982.1875, + "completions/min_length": 832.0, + "completions/min_terminated_length": 832.0, + "epoch": 0.17183436687337467, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9556655747110967, + "kl": 0.0068817138671875, + "learning_rate": 9.86012070278502e-07, + "loss": 0.0023, + "num_tokens": 37312960.0, + "reward": 0.0, + "reward_std": 0.28659990429878235, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.18334866973569697, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2111212777170177, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.16843506277010845, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 859 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1219.8125, + "completions/mean_terminated_length": 1201.1334228515625, + "completions/min_length": 966.0, + "completions/min_terminated_length": 966.0, + "epoch": 0.17203440688137628, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.077315426920646, + "kl": 0.0103912353515625, + "learning_rate": 9.859342265109856e-07, + "loss": -0.0157, + "num_tokens": 37362581.0, + "reward": 0.0, + "reward_std": 0.9002656936645508, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.020307035898096015, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.28453479921084823, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.975, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 860 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1344.0, + "completions/mean_terminated_length": 1250.4000244140625, + "completions/min_length": 754.0, + "completions/min_terminated_length": 754.0, + "epoch": 0.17223444688937786, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.461530956447996, + "kl": 0.0089874267578125, + "learning_rate": 9.858561701799606e-07, + "loss": -0.0236, + "num_tokens": 37419165.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.022255301475525, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.026097812252073596, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07524574933575466, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 861 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1463.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1220.125, + "completions/mean_terminated_length": 1220.125, + "completions/min_length": 1009.0, + "completions/min_terminated_length": 1009.0, + "epoch": 0.17243448689737947, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3758946028006243, + "kl": 0.010223388671875, + "learning_rate": 9.85777901323488e-07, + "loss": 0.0065, + "num_tokens": 37463247.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0287668704986572, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10621541171124901, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22187622833246926, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10036968702787746, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 862 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1045.25, + "completions/mean_terminated_length": 940.3077392578125, + "completions/min_length": 629.0, + "completions/min_terminated_length": 629.0, + "epoch": 0.17263452690538109, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.99289813289387, + "kl": 0.0065155029296875, + "learning_rate": 9.856994199797317e-07, + "loss": 0.0058, + "num_tokens": 37504715.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.047146201133728, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0667741284166943, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05469646703792685, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.094182643679026, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 863 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1348.0, + "completions/max_terminated_length": 1348.0, + "completions/mean_length": 1201.25, + "completions/mean_terminated_length": 1201.25, + "completions/min_length": 981.0, + "completions/min_terminated_length": 981.0, + "epoch": 0.17283456691338267, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.508728723042557, + "kl": 0.0056610107421875, + "learning_rate": 9.8562072618696e-07, + "loss": -0.022, + "num_tokens": 37539423.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9104820489883423, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.047384054804184755, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09754622306291379, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06540472290116194, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 864 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1401.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1220.8125, + "completions/mean_terminated_length": 1220.8125, + "completions/min_length": 1018.0, + "completions/min_terminated_length": 1018.0, + "epoch": 0.17303460692138428, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.343242244857981, + "kl": 0.009735107421875, + "learning_rate": 9.85541819983544e-07, + "loss": -0.0218, + "num_tokens": 37581788.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7178727984428406, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.19841156388682188, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2763074894862204, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07969850595746354, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 865 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1423.0, + "completions/mean_terminated_length": 1376.800048828125, + "completions/min_length": 1337.0, + "completions/min_terminated_length": 1337.0, + "epoch": 0.17323464692938587, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1651402909256707, + "kl": 0.003879547119140625, + "learning_rate": 9.854627014079588e-07, + "loss": 0.0027, + "num_tokens": 37627500.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8123623132705688, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0253158977593169, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19701330448340798, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09999999999999999, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 866 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 1050.75, + "completions/mean_terminated_length": 1020.800048828125, + "completions/min_length": 864.0, + "completions/min_terminated_length": 864.0, + "epoch": 0.17343468693738748, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9250663259044747, + "kl": 0.0115203857421875, + "learning_rate": 9.853833704987831e-07, + "loss": -0.0147, + "num_tokens": 37677888.0, + "reward": 0.0, + "reward_std": 0.959006667137146, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.20700801270118893, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23924352864792184, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10610965676722955, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 867 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1129.0, + "completions/max_terminated_length": 1129.0, + "completions/mean_length": 952.8125, + "completions/mean_terminated_length": 952.8125, + "completions/min_length": 724.0, + "completions/min_terminated_length": 724.0, + "epoch": 0.1736347269453891, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5495141094766103, + "kl": 0.004241943359375, + "learning_rate": 9.85303827294699e-07, + "loss": 0.0109, + "num_tokens": 37714965.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0442442893981934, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.103513037528249, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06624351717658117, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 868 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1269.625, + "completions/mean_terminated_length": 1254.2667236328125, + "completions/min_length": 978.0, + "completions/min_terminated_length": 978.0, + "epoch": 0.17383476695339067, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.115621881051362, + "kl": 0.0094146728515625, + "learning_rate": 9.852240718344919e-07, + "loss": -0.0013, + "num_tokens": 37764735.0, + "reward": 0.0, + "reward_std": 0.5563172101974487, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2388392804796545, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3121971967511285, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08933913745655642, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 869 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1448.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1105.9375, + "completions/mean_terminated_length": 1105.9375, + "completions/min_length": 781.0, + "completions/min_terminated_length": 781.0, + "epoch": 0.17403480696139229, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1459885929136417, + "kl": 0.0084991455078125, + "learning_rate": 9.85144104157051e-07, + "loss": -0.0747, + "num_tokens": 37817526.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8768314123153687, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.023986318729213682, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05388809332891866, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09651328828101764, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 870 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1210.8125, + "completions/mean_terminated_length": 1144.0770263671875, + "completions/min_length": 922.0, + "completions/min_terminated_length": 922.0, + "epoch": 0.17423484696939387, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3001079893579455, + "kl": 0.005245208740234375, + "learning_rate": 9.85063924301369e-07, + "loss": 0.0074, + "num_tokens": 37860363.0, + "reward": 0.0, + "reward_std": 0.932608962059021, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.023728208785942836, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15082244196254238, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07290277645477444, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 871 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1314.0, + "completions/mean_length": 1052.0625, + "completions/mean_terminated_length": 1022.2000732421875, + "completions/min_length": 614.0, + "completions/min_terminated_length": 614.0, + "epoch": 0.17443488697739548, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.195915820101008, + "kl": 0.00827789306640625, + "learning_rate": 9.84983532306542e-07, + "loss": -0.0013, + "num_tokens": 37894156.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0436437129974365, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06240714987449793, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03964697501023904, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06309898162000302, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 872 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1449.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1130.4375, + "completions/mean_terminated_length": 1130.4375, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.1746349269853971, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3065068473509784, + "kl": 0.0081939697265625, + "learning_rate": 9.849029282117692e-07, + "loss": 0.0006, + "num_tokens": 37937675.0, + "reward": 0.0, + "reward_std": 0.9621764421463013, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11309630852554932, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.303302270464247, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09803627446568496, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 873 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1255.0625, + "completions/mean_terminated_length": 1238.7333984375, + "completions/min_length": 848.0, + "completions/min_terminated_length": 848.0, + "epoch": 0.17483496699339868, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.949920019484396, + "kl": 0.00666046142578125, + "learning_rate": 9.84822112056354e-07, + "loss": -0.0206, + "num_tokens": 37982540.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8937065601348877, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11198279927377197, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15529353385902553, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14605934866804432, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 874 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1361.0, + "completions/mean_length": 1248.6875, + "completions/mean_terminated_length": 1212.7857666015625, + "completions/min_length": 975.0, + "completions/min_terminated_length": 975.0, + "epoch": 0.1750350070014003, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.274539963603609, + "kl": 0.0096282958984375, + "learning_rate": 9.847410838797023e-07, + "loss": -0.009, + "num_tokens": 38025199.0, + "reward": 0.0, + "reward_std": 0.9864178895950317, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08644519422820007, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07640615099228523, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11800816042090449, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 875 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1316.1875, + "completions/mean_terminated_length": 1273.769287109375, + "completions/min_length": 1084.0, + "completions/min_terminated_length": 1084.0, + "epoch": 0.17523504700940187, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4712691486955425, + "kl": 0.00691986083984375, + "learning_rate": 9.846598437213241e-07, + "loss": -0.0017, + "num_tokens": 38072970.0, + "reward": 0.0, + "reward_std": 0.5060663223266602, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10718634624250514, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2520467446097927, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11013459778666118, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 876 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1286.6875, + "completions/mean_terminated_length": 1237.4615478515625, + "completions/min_length": 994.0, + "completions/min_terminated_length": 994.0, + "epoch": 0.17543508701740348, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0382358336515103, + "kl": 0.0085906982421875, + "learning_rate": 9.845783916208325e-07, + "loss": 0.0054, + "num_tokens": 38113733.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0431209802627563, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08164226932669186, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06280733062046029, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04849589520621153, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 877 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1496.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1108.75, + "completions/mean_terminated_length": 1108.75, + "completions/min_length": 873.0, + "completions/min_terminated_length": 873.0, + "epoch": 0.17563512702540507, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3121574785167547, + "kl": 0.0094451904296875, + "learning_rate": 9.844967276179435e-07, + "loss": -0.0121, + "num_tokens": 38164785.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6377788782119751, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15475248529195046, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15456635649965375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12641788434189796, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 878 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1337.0, + "completions/mean_length": 1071.125, + "completions/mean_terminated_length": 1009.857177734375, + "completions/min_length": 777.0, + "completions/min_terminated_length": 777.0, + "epoch": 0.17583516703340668, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3924744697634277, + "kl": 0.0080413818359375, + "learning_rate": 9.844148517524772e-07, + "loss": -0.0026, + "num_tokens": 38202915.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.03822660446167, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.18875169391636776, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0927247779510385, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07685966046898339, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 879 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1446.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1250.3125, + "completions/mean_terminated_length": 1250.3125, + "completions/min_length": 1068.0, + "completions/min_terminated_length": 1068.0, + "epoch": 0.1760352070414083, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7132461621811355, + "kl": 0.00603485107421875, + "learning_rate": 9.843327640643566e-07, + "loss": 0.0062, + "num_tokens": 38242112.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8193296194076538, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.037197923079981834, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1068616913892454, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 880 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1265.0, + "completions/mean_terminated_length": 1231.4285888671875, + "completions/min_length": 962.0, + "completions/min_terminated_length": 962.0, + "epoch": 0.17623524704940988, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8623958208517304, + "kl": 0.00797271728515625, + "learning_rate": 9.842504645936078e-07, + "loss": -0.0242, + "num_tokens": 38295384.0, + "reward": 0.0, + "reward_std": 0.3968231976032257, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.013755895768021302, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2521043951424508, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11474609652039004, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 881 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1379.0, + "completions/mean_length": 1178.125, + "completions/mean_terminated_length": 1132.1429443359375, + "completions/min_length": 965.0, + "completions/min_terminated_length": 965.0, + "epoch": 0.1764352870574115, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1522882145847033, + "kl": 0.00839996337890625, + "learning_rate": 9.84167953380361e-07, + "loss": -0.0032, + "num_tokens": 38338762.0, + "reward": 2.60770320892334e-08, + "reward_std": 1.0676723718643188, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.60770320892334e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09143899574624073, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08637187282054155, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 882 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1345.0, + "completions/mean_length": 1162.875, + "completions/mean_terminated_length": 1140.4000244140625, + "completions/min_length": 877.0, + "completions/min_terminated_length": 877.0, + "epoch": 0.17663532706541307, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5603480776481025, + "kl": 0.00539398193359375, + "learning_rate": 9.840852304648481e-07, + "loss": -0.0348, + "num_tokens": 38387488.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0164644718170166, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09730227031228608, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16965302582898772, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10576003586036262, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 883 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1189.75, + "completions/mean_terminated_length": 1118.1539306640625, + "completions/min_length": 962.0, + "completions/min_terminated_length": 962.0, + "epoch": 0.17683536707341468, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7826428377776384, + "kl": 0.00702667236328125, + "learning_rate": 9.84002295887406e-07, + "loss": 0.0456, + "num_tokens": 38439348.0, + "reward": 0.0, + "reward_std": 0.9678224325180054, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03596685279970509, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1120469981976404, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09737289911202955, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 884 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1358.9375, + "completions/mean_terminated_length": 1294.8182373046875, + "completions/min_length": 1061.0, + "completions/min_terminated_length": 1061.0, + "epoch": 0.1770354070814163, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.866252224943301, + "kl": 0.00882720947265625, + "learning_rate": 9.839191496884736e-07, + "loss": -0.022, + "num_tokens": 38495123.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.9650845527648926, + "rewards/wordcountpos_reward_nokeypoint/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04023239220638179, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08730719685821138, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 885 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1497.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1172.3125, + "completions/mean_terminated_length": 1172.3125, + "completions/min_length": 965.0, + "completions/min_terminated_length": 965.0, + "epoch": 0.17723544708941788, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.33477738092492, + "kl": 0.006496429443359375, + "learning_rate": 9.838357919085933e-07, + "loss": -0.0172, + "num_tokens": 38540976.0, + "reward": 0.0, + "reward_std": 0.9561715722084045, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.14081843164026597, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12331283449174278, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10576003586036264, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 886 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1197.625, + "completions/mean_terminated_length": 1127.84619140625, + "completions/min_length": 924.0, + "completions/min_terminated_length": 924.0, + "epoch": 0.1774354870974195, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6131709581080886, + "kl": 0.003963470458984375, + "learning_rate": 9.83752222588411e-07, + "loss": 0.0213, + "num_tokens": 38594530.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0393660068511963, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0755551307800884, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06744826969560105, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.057089922571845, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 887 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1107.5625, + "completions/mean_terminated_length": 1081.4000244140625, + "completions/min_length": 775.0, + "completions/min_terminated_length": 775.0, + "epoch": 0.17763552710542108, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.393475082065519, + "kl": 0.0105133056640625, + "learning_rate": 9.836684417686754e-07, + "loss": -0.0101, + "num_tokens": 38647379.0, + "reward": -1.955777406692505e-08, + "reward_std": 0.9992396831512451, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.955777406692505e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.19347958154366995, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07790416105746797, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042253, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 888 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1337.0, + "completions/max_terminated_length": 1337.0, + "completions/mean_length": 1123.0, + "completions/mean_terminated_length": 1123.0, + "completions/min_length": 929.0, + "completions/min_terminated_length": 929.0, + "epoch": 0.1778355671134227, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.405393376178018, + "kl": 0.010162353515625, + "learning_rate": 9.835844494902381e-07, + "loss": -0.0124, + "num_tokens": 38690907.0, + "reward": 0.0, + "reward_std": 0.8805860280990601, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07943745671797586, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18726116262935585, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05146016078626404, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 889 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1197.9375, + "completions/mean_terminated_length": 1128.2308349609375, + "completions/min_length": 979.0, + "completions/min_terminated_length": 979.0, + "epoch": 0.17803560712142427, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7023181439844985, + "kl": 0.0052337646484375, + "learning_rate": 9.835002457940543e-07, + "loss": -0.0119, + "num_tokens": 38742042.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9297127723693848, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02891154497188913, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11931177467681335, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11642832797715323, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 890 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1252.0, + "completions/max_terminated_length": 1252.0, + "completions/mean_length": 1014.625, + "completions/mean_terminated_length": 1014.625, + "completions/min_length": 849.0, + "completions/min_terminated_length": 849.0, + "epoch": 0.17823564712942588, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6787146488741134, + "kl": 0.008544921875, + "learning_rate": 9.834158307211825e-07, + "loss": 0.0228, + "num_tokens": 38771996.0, + "reward": 0.0, + "reward_std": 0.9131697416305542, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.19346893807181773, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06599584287417616, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05443310539518172, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 891 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1303.625, + "completions/mean_terminated_length": 1258.3077392578125, + "completions/min_length": 984.0, + "completions/min_terminated_length": 984.0, + "epoch": 0.1784356871374275, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.052195577068716, + "kl": 0.0086669921875, + "learning_rate": 9.833312043127835e-07, + "loss": -0.0209, + "num_tokens": 38810934.0, + "reward": 0.0, + "reward_std": 0.7508441209793091, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.19705324256322065, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18001558726125702, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.067631901304592, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 892 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1223.0, + "completions/max_terminated_length": 1223.0, + "completions/mean_length": 901.625, + "completions/mean_terminated_length": 901.625, + "completions/min_length": 667.0, + "completions/min_terminated_length": 667.0, + "epoch": 0.17863572714542908, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.009021196429188, + "kl": 0.009979248046875, + "learning_rate": 9.832463666101215e-07, + "loss": 0.0231, + "num_tokens": 38853968.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9962062835693359, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02587593254178624, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08192674095667744, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 893 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1195.0, + "completions/mean_length": 954.125, + "completions/mean_terminated_length": 917.7333984375, + "completions/min_length": 776.0, + "completions/min_terminated_length": 776.0, + "epoch": 0.1788357671534307, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.880899185870084, + "kl": 0.0034580230712890625, + "learning_rate": 9.831613176545637e-07, + "loss": -0.0248, + "num_tokens": 38888506.0, + "reward": 0.0, + "reward_std": 0.6194002628326416, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04227015993810794, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04087831975402885, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07503085784948504, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 894 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1417.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1215.9375, + "completions/mean_terminated_length": 1215.9375, + "completions/min_length": 924.0, + "completions/min_terminated_length": 924.0, + "epoch": 0.17903580716143228, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0103727523357398, + "kl": 0.0038471221923828125, + "learning_rate": 9.830760574875806e-07, + "loss": -0.0085, + "num_tokens": 38929057.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0661038160324097, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11061920228155901, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.056086098735360995, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07888106377466154, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 895 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1318.1875, + "completions/mean_terminated_length": 1306.0667724609375, + "completions/min_length": 1080.0, + "completions/min_terminated_length": 1080.0, + "epoch": 0.1792358471694339, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.126154437540995, + "kl": 0.004913330078125, + "learning_rate": 9.829905861507453e-07, + "loss": -0.016, + "num_tokens": 38976116.0, + "reward": 0.0, + "reward_std": 0.8505393862724304, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02575673328057767, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12044294398523625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11080513425729777, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 896 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1245.1875, + "completions/mean_terminated_length": 1160.25, + "completions/min_length": 494.0, + "completions/min_terminated_length": 494.0, + "epoch": 0.1794358871774355, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.946771718830567, + "kl": 0.0076446533203125, + "learning_rate": 9.829049036857338e-07, + "loss": -0.0586, + "num_tokens": 39027639.0, + "reward": 0.0, + "reward_std": 1.0404472351074219, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03152918432704997, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17367881270685215, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0958393717904348, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 897 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1366.0, + "completions/max_terminated_length": 1366.0, + "completions/mean_length": 1091.1875, + "completions/mean_terminated_length": 1091.1875, + "completions/min_length": 799.0, + "completions/min_terminated_length": 799.0, + "epoch": 0.17963592718543708, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9386076081716785, + "kl": 0.00693511962890625, + "learning_rate": 9.82819010134326e-07, + "loss": -0.0389, + "num_tokens": 39073090.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8581688404083252, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.22613725612606111, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13718338502296723, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06309898162000302, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 898 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1048.0, + "completions/max_terminated_length": 1048.0, + "completions/mean_length": 889.1875, + "completions/mean_terminated_length": 889.1875, + "completions/min_length": 643.0, + "completions/min_terminated_length": 643.0, + "epoch": 0.1798359671934387, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8770435866283375, + "kl": 0.00948333740234375, + "learning_rate": 9.827329055384031e-07, + "loss": -0.0303, + "num_tokens": 39112661.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6518764495849609, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.015322789053189702, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11095701742473514, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 899 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1461.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1197.25, + "completions/mean_terminated_length": 1197.25, + "completions/min_length": 802.0, + "completions/min_terminated_length": 802.0, + "epoch": 0.18003600720144028, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6788667397165784, + "kl": 0.00887298583984375, + "learning_rate": 9.826465899399504e-07, + "loss": 0.0197, + "num_tokens": 39164449.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7706519961357117, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12067941641366851, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0880139527139238, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.056927504255331086, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 900 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1414.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 1237.125, + "completions/mean_terminated_length": 1237.125, + "completions/min_length": 992.0, + "completions/min_terminated_length": 992.0, + "epoch": 0.1802360472094419, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0712004052316586, + "kl": 0.0099334716796875, + "learning_rate": 9.82560063381056e-07, + "loss": 0.0098, + "num_tokens": 39211459.0, + "reward": 0.0, + "reward_std": 0.9258636236190796, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12523821193739937, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15151256675701302, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258099, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 901 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1398.0, + "completions/mean_length": 1203.25, + "completions/mean_terminated_length": 1183.4666748046875, + "completions/min_length": 989.0, + "completions/min_terminated_length": 989.0, + "epoch": 0.18043608721744347, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8826433011509933, + "kl": 0.007965087890625, + "learning_rate": 9.824733259039104e-07, + "loss": -0.0291, + "num_tokens": 39253127.0, + "reward": 0.0, + "reward_std": 0.876156210899353, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.17725794022019617, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09653302087616178, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 902 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1289.875, + "completions/mean_terminated_length": 1275.86669921875, + "completions/min_length": 1079.0, + "completions/min_terminated_length": 1079.0, + "epoch": 0.1806361272254451, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7022010188902907, + "kl": 0.00626373291015625, + "learning_rate": 9.823863775508072e-07, + "loss": -0.0369, + "num_tokens": 39301101.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.8673394918441772, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0654705551398003, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04290867264630124, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15628795835228615, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 903 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1357.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1092.4375, + "completions/mean_terminated_length": 1092.4375, + "completions/min_length": 909.0, + "completions/min_terminated_length": 909.0, + "epoch": 0.1808361672334467, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1791056457049676, + "kl": 0.0101165771484375, + "learning_rate": 9.822992183641429e-07, + "loss": 0.0025, + "num_tokens": 39352068.0, + "reward": 0.0, + "reward_std": 0.7140047550201416, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.061381565103906174, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1718401806622205, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0697880388775209, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 904 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1349.625, + "completions/mean_terminated_length": 1259.4000244140625, + "completions/min_length": 618.0, + "completions/min_terminated_length": 618.0, + "epoch": 0.18103620724144828, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5078454978666787, + "kl": 0.0071258544921875, + "learning_rate": 9.822118483864167e-07, + "loss": -0.0384, + "num_tokens": 39401102.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.980755090713501, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2529444652358932, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23142234377165904, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1246476515504285, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 905 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1207.0, + "completions/max_terminated_length": 1207.0, + "completions/mean_length": 865.375, + "completions/mean_terminated_length": 865.375, + "completions/min_length": 514.0, + "completions/min_terminated_length": 514.0, + "epoch": 0.1812362472494499, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.866792053051406, + "kl": 0.01285552978515625, + "learning_rate": 9.821242676602308e-07, + "loss": -0.1474, + "num_tokens": 39426932.0, + "reward": 0.0, + "reward_std": 0.7918018102645874, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.036223406445504897, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.056963287287344046, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09651328828101764, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 906 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 1194.6875, + "completions/mean_terminated_length": 1174.3333740234375, + "completions/min_length": 995.0, + "completions/min_terminated_length": 995.0, + "epoch": 0.18143628725745148, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4720294605889523, + "kl": 0.011444091796875, + "learning_rate": 9.820364762282896e-07, + "loss": 0.0196, + "num_tokens": 39471271.0, + "reward": 0.0, + "reward_std": 0.6192638874053955, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2458498179178708, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3731634518717522, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09428090415820634, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 907 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1135.4375, + "completions/mean_terminated_length": 1051.3077392578125, + "completions/min_length": 804.0, + "completions/min_terminated_length": 804.0, + "epoch": 0.1816363272654531, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0638821741286306, + "kl": 0.0084381103515625, + "learning_rate": 9.819484741334009e-07, + "loss": 0.0147, + "num_tokens": 39506070.0, + "reward": 0.0, + "reward_std": 1.0202088356018066, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.15503415500465156, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23656614223886027, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08153617692869924, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 908 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1080.0, + "completions/mean_terminated_length": 1052.0, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.1818363672734547, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.350688656764279, + "kl": 0.010986328125, + "learning_rate": 9.818602614184745e-07, + "loss": -0.0323, + "num_tokens": 39549326.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9871019124984741, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.040438437217895154, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0798846848350266, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09179284245476838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 909 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1289.0, + "completions/max_terminated_length": 1289.0, + "completions/mean_length": 1054.75, + "completions/mean_terminated_length": 1054.75, + "completions/min_length": 800.0, + "completions/min_terminated_length": 800.0, + "epoch": 0.1820364072814563, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9859396930574245, + "kl": 0.00673675537109375, + "learning_rate": 9.817718381265238e-07, + "loss": -0.0142, + "num_tokens": 39590898.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.618232250213623, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06365236748258273, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.046731324978713094, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09269623828717428, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 910 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1222.8125, + "completions/mean_terminated_length": 1204.3333740234375, + "completions/min_length": 1000.0, + "completions/min_terminated_length": 1000.0, + "epoch": 0.1822364472894579, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7285507152795816, + "kl": 0.00714111328125, + "learning_rate": 9.81683204300664e-07, + "loss": 0.0152, + "num_tokens": 39633031.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9757005572319031, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08786287263622707, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0850241313015095, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08681611046941136, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 911 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1245.0, + "completions/max_terminated_length": 1245.0, + "completions/mean_length": 967.375, + "completions/mean_terminated_length": 967.375, + "completions/min_length": 812.0, + "completions/min_terminated_length": 812.0, + "epoch": 0.18243648729745948, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2614557989564084, + "kl": 0.00618743896484375, + "learning_rate": 9.815943599841138e-07, + "loss": -0.0074, + "num_tokens": 39662357.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0318365097045898, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.010781837953741592, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0473972979225819, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 912 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1193.0, + "completions/max_terminated_length": 1193.0, + "completions/mean_length": 927.4375, + "completions/mean_terminated_length": 927.4375, + "completions/min_length": 806.0, + "completions/min_terminated_length": 806.0, + "epoch": 0.1826365273054611, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9219620618013376, + "kl": 0.00904083251953125, + "learning_rate": 9.815053052201938e-07, + "loss": -0.0158, + "num_tokens": 39701772.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9832159280776978, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11637025273967536, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15341756945631047, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.056927504255331086, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 913 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1492.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1144.375, + "completions/mean_terminated_length": 1144.375, + "completions/min_length": 898.0, + "completions/min_terminated_length": 898.0, + "epoch": 0.1828365673134627, + "frac_reward_zero_std": 0.0, + "grad_norm": 12.288456443913548, + "kl": 0.01734161376953125, + "learning_rate": 9.814160400523274e-07, + "loss": -0.0183, + "num_tokens": 39752338.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8585934638977051, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0028792303457087316, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.011516921382834926, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.161245154965971, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 914 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1238.0, + "completions/max_terminated_length": 1238.0, + "completions/mean_length": 1002.375, + "completions/mean_terminated_length": 1002.375, + "completions/min_length": 884.0, + "completions/min_terminated_length": 884.0, + "epoch": 0.1830366073214643, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2279775953770207, + "kl": 0.0080413818359375, + "learning_rate": 9.81326564524041e-07, + "loss": 0.0274, + "num_tokens": 39803400.0, + "reward": 0.0, + "reward_std": 0.5724028944969177, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0445426397746254, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08199383934601435, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04367387557118565, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 915 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1444.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1131.6875, + "completions/mean_terminated_length": 1131.6875, + "completions/min_length": 781.0, + "completions/min_terminated_length": 781.0, + "epoch": 0.1832366473294659, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9689048223477443, + "kl": 0.00839996337890625, + "learning_rate": 9.81236878678963e-07, + "loss": 0.0143, + "num_tokens": 39838115.0, + "reward": 0.0, + "reward_std": 0.9095580577850342, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.05995154759513249, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07497853324305156, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07187952884282608, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 916 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1301.8125, + "completions/mean_terminated_length": 1273.5, + "completions/min_length": 1042.0, + "completions/min_terminated_length": 1042.0, + "epoch": 0.18343668733746749, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7248597809138846, + "kl": 0.008514404296875, + "learning_rate": 9.81146982560825e-07, + "loss": -0.0085, + "num_tokens": 39882680.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7266299724578857, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06266052290456348, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07633583062781507, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10101338378503961, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 917 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1216.5, + "completions/mean_terminated_length": 1197.60009765625, + "completions/min_length": 856.0, + "completions/min_terminated_length": 856.0, + "epoch": 0.1836367273454691, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.12866744925378, + "kl": 0.00757598876953125, + "learning_rate": 9.810568762134602e-07, + "loss": -0.0699, + "num_tokens": 39930232.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5598295331001282, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.020096247496810313, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23665351923836006, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09727776191382573, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 918 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1291.0625, + "completions/mean_terminated_length": 1261.21435546875, + "completions/min_length": 1010.0, + "completions/min_terminated_length": 1010.0, + "epoch": 0.18383676735347068, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6449450870618203, + "kl": 0.00824737548828125, + "learning_rate": 9.809665596808052e-07, + "loss": -0.0056, + "num_tokens": 39978473.0, + "reward": 0.0, + "reward_std": 0.875176191329956, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.006312223164889564, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23675059149024855, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06885303726590962, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 919 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1384.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1147.9375, + "completions/mean_terminated_length": 1147.9375, + "completions/min_length": 908.0, + "completions/min_terminated_length": 908.0, + "epoch": 0.1840368073614723, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2570397536157816, + "kl": 0.009307861328125, + "learning_rate": 9.808760330068989e-07, + "loss": -0.0057, + "num_tokens": 40022024.0, + "reward": 0.0, + "reward_std": 0.9658756852149963, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09753274531585182, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08309007762327118, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0769800358919501, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 920 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1401.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1236.6875, + "completions/mean_terminated_length": 1236.6875, + "completions/min_length": 975.0, + "completions/min_terminated_length": 975.0, + "epoch": 0.1842368473694739, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6535744238870214, + "kl": 0.006000518798828125, + "learning_rate": 9.807852962358822e-07, + "loss": -0.0028, + "num_tokens": 40059539.0, + "reward": 0.0, + "reward_std": 0.9066012501716614, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.13751590573361433, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24890003613056427, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0697880388775209, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 921 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1337.0, + "completions/max_terminated_length": 1337.0, + "completions/mean_length": 1072.25, + "completions/mean_terminated_length": 1072.25, + "completions/min_length": 830.0, + "completions/min_terminated_length": 830.0, + "epoch": 0.1844368873774755, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.600354595253127, + "kl": 0.0132904052734375, + "learning_rate": 9.806943494119989e-07, + "loss": -0.0204, + "num_tokens": 40094615.0, + "reward": 0.0, + "reward_std": 0.9246164560317993, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09152060092945088, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04608923965739499, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.048686449556014776, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 922 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1345.0, + "completions/max_terminated_length": 1345.0, + "completions/mean_length": 1027.6875, + "completions/mean_terminated_length": 1027.6875, + "completions/min_length": 814.0, + "completions/min_terminated_length": 814.0, + "epoch": 0.1846369273854771, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2027474318204474, + "kl": 0.0084991455078125, + "learning_rate": 9.806031925795951e-07, + "loss": -0.0497, + "num_tokens": 40148010.0, + "reward": 0.0, + "reward_std": 0.8698360919952393, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09327799617301942, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1678898990523197, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 923 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1292.0, + "completions/max_terminated_length": 1292.0, + "completions/mean_length": 1154.25, + "completions/mean_terminated_length": 1154.25, + "completions/min_length": 920.0, + "completions/min_terminated_length": 920.0, + "epoch": 0.18483696739347868, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5594667416355605, + "kl": 0.0122528076171875, + "learning_rate": 9.805118257831192e-07, + "loss": -0.0031, + "num_tokens": 40202430.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0026524066925049, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08333431079246054, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2173932857997032, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12524050936172845, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 924 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1225.9375, + "completions/mean_terminated_length": 1207.666748046875, + "completions/min_length": 923.0, + "completions/min_terminated_length": 923.0, + "epoch": 0.1850370074014803, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1780376619351354, + "kl": 0.009307861328125, + "learning_rate": 9.804202490671223e-07, + "loss": 0.0079, + "num_tokens": 40247317.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.40284255146980286, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11206225064686229, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17689362375916232, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0938872452190116, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 925 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1306.0, + "completions/max_terminated_length": 1306.0, + "completions/mean_length": 1097.25, + "completions/mean_terminated_length": 1097.25, + "completions/min_length": 896.0, + "completions/min_terminated_length": 896.0, + "epoch": 0.1852370474094819, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7550471385040347, + "kl": 0.0109405517578125, + "learning_rate": 9.803284624762575e-07, + "loss": -0.016, + "num_tokens": 40290233.0, + "reward": 0.0, + "reward_std": 0.6178953647613525, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09202901377979988, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12521108335691317, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10741060020797313, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 926 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1420.0, + "completions/max_terminated_length": 1420.0, + "completions/mean_length": 1051.5625, + "completions/mean_terminated_length": 1051.5625, + "completions/min_length": 679.0, + "completions/min_terminated_length": 679.0, + "epoch": 0.1854370874174835, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.771444531335443, + "kl": 0.008575439453125, + "learning_rate": 9.8023646605528e-07, + "loss": -0.0088, + "num_tokens": 40339170.0, + "reward": 0.0, + "reward_std": 0.5166000127792358, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07114539715298727, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15500121400458675, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11474609652039004, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 927 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1314.0, + "completions/max_terminated_length": 1314.0, + "completions/mean_length": 1074.0625, + "completions/mean_terminated_length": 1074.0625, + "completions/min_length": 523.0, + "completions/min_terminated_length": 523.0, + "epoch": 0.1856371274254851, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.464190728912537, + "kl": 0.009857177734375, + "learning_rate": 9.801442598490485e-07, + "loss": -0.0452, + "num_tokens": 40387731.0, + "reward": 0.0, + "reward_std": 0.9182111620903015, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.05386686323181178, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.049336964228021474, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0824396524513313, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 928 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1252.0, + "completions/max_terminated_length": 1252.0, + "completions/mean_length": 1094.1875, + "completions/mean_terminated_length": 1094.1875, + "completions/min_length": 660.0, + "completions/min_terminated_length": 660.0, + "epoch": 0.1858371674334867, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.440765943787231, + "kl": 0.0108489990234375, + "learning_rate": 9.800518439025223e-07, + "loss": -0.024, + "num_tokens": 40440630.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9280741214752197, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05219500309293882, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3046582496714699, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1270024788326182, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 929 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1263.1875, + "completions/mean_terminated_length": 1247.4000244140625, + "completions/min_length": 949.0, + "completions/min_terminated_length": 949.0, + "epoch": 0.1860372074414883, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.706496653626478, + "kl": 0.00714874267578125, + "learning_rate": 9.799592182607642e-07, + "loss": 0.0333, + "num_tokens": 40484457.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9917539358139038, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.46817253188337354, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3322206266323296, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03849001794597504, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 930 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1368.625, + "completions/mean_terminated_length": 1289.800048828125, + "completions/min_length": 1187.0, + "completions/min_terminated_length": 1187.0, + "epoch": 0.18623724744948988, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.518514591377267, + "kl": 0.00658416748046875, + "learning_rate": 9.79866382968939e-07, + "loss": 0.0261, + "num_tokens": 40532003.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.43064743280410767, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04523339600438975, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2928970115072967, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1010133837850396, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 931 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1138.125, + "completions/mean_terminated_length": 1086.4285888671875, + "completions/min_length": 988.0, + "completions/min_terminated_length": 988.0, + "epoch": 0.1864372874574915, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8966775907425826, + "kl": 0.00505828857421875, + "learning_rate": 9.797733380723133e-07, + "loss": 0.0098, + "num_tokens": 40564637.0, + "reward": 0.0, + "reward_std": 0.6018378734588623, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10648945042337953, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15974771232651658, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04216370213557838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 932 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1191.75, + "completions/mean_terminated_length": 1171.2000732421875, + "completions/min_length": 812.0, + "completions/min_terminated_length": 812.0, + "epoch": 0.1866373274654931, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5536377096582377, + "kl": 0.0083770751953125, + "learning_rate": 9.796800836162565e-07, + "loss": -0.0095, + "num_tokens": 40603889.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.049893856048584, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.005470238615819334, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04423355408737371, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04194352464039305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 933 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1255.0, + "completions/max_terminated_length": 1255.0, + "completions/mean_length": 1021.75, + "completions/mean_terminated_length": 1021.75, + "completions/min_length": 875.0, + "completions/min_terminated_length": 875.0, + "epoch": 0.1868373674734947, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.539180129531466, + "kl": 0.0138092041015625, + "learning_rate": 9.795866196462397e-07, + "loss": -0.0267, + "num_tokens": 40651669.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.64860999584198, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11072318090415521, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1810821805349052, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 934 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1120.0, + "completions/max_terminated_length": 1120.0, + "completions/mean_length": 874.875, + "completions/mean_terminated_length": 874.875, + "completions/min_length": 693.0, + "completions/min_terminated_length": 693.0, + "epoch": 0.1870374074814963, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8119528212147435, + "kl": 0.01123046875, + "learning_rate": 9.794929462078366e-07, + "loss": 0.0458, + "num_tokens": 40692235.0, + "reward": 0.0, + "reward_std": 0.6395448446273804, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11147434763361323, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15624003195663672, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09574271077563383, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 935 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1358.0, + "completions/mean_length": 1040.25, + "completions/mean_terminated_length": 1009.6000366210938, + "completions/min_length": 687.0, + "completions/min_terminated_length": 687.0, + "epoch": 0.1872374474894979, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.263089424942943, + "kl": 0.00982666015625, + "learning_rate": 9.793990633467225e-07, + "loss": -0.0131, + "num_tokens": 40735615.0, + "reward": 0.0, + "reward_std": 0.7452367544174194, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.3010913441930653, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1588774123048007, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09269623828717428, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 936 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1267.25, + "completions/mean_terminated_length": 1161.45458984375, + "completions/min_length": 894.0, + "completions/min_terminated_length": 894.0, + "epoch": 0.1874374874974995, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.97560213163845, + "kl": 0.0123291015625, + "learning_rate": 9.793049711086754e-07, + "loss": 0.017, + "num_tokens": 40797571.0, + "reward": 0.0, + "reward_std": 0.8600860834121704, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06954919754101105, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11421388835937657, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10461569884316811, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 937 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1082.1875, + "completions/mean_terminated_length": 985.769287109375, + "completions/min_length": 608.0, + "completions/min_terminated_length": 608.0, + "epoch": 0.1876375275055011, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.03129642959593, + "kl": 0.008739471435546875, + "learning_rate": 9.79210669539575e-07, + "loss": -0.0718, + "num_tokens": 40831598.0, + "reward": 0.0, + "reward_std": 0.4829227924346924, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0440011295446507, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07820597747597491, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08681611046941136, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 938 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1255.0, + "completions/max_terminated_length": 1255.0, + "completions/mean_length": 1104.125, + "completions/mean_terminated_length": 1104.125, + "completions/min_length": 971.0, + "completions/min_terminated_length": 971.0, + "epoch": 0.1878375675135027, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1304516815320125, + "kl": 0.008636474609375, + "learning_rate": 9.791161586854028e-07, + "loss": 0.0155, + "num_tokens": 40879128.0, + "reward": 0.0, + "reward_std": 0.6974177360534668, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.18244860555770204, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13300024976289437, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05426273532033234, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 939 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1316.0, + "completions/max_terminated_length": 1316.0, + "completions/mean_length": 977.125, + "completions/mean_terminated_length": 977.125, + "completions/min_length": 780.0, + "completions/min_terminated_length": 780.0, + "epoch": 0.1880376075215043, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5004444357189115, + "kl": 0.00955963134765625, + "learning_rate": 9.790214385922432e-07, + "loss": -0.0255, + "num_tokens": 40917362.0, + "reward": 0.0, + "reward_std": 0.9495123624801636, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.031105184303021487, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16221178232742428, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09388724521901158, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 940 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1361.0, + "completions/max_terminated_length": 1361.0, + "completions/mean_length": 972.4375, + "completions/mean_terminated_length": 972.4375, + "completions/min_length": 671.0, + "completions/min_terminated_length": 671.0, + "epoch": 0.1882376475295059, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6065166002660374, + "kl": 0.00836944580078125, + "learning_rate": 9.789265093062822e-07, + "loss": -0.0161, + "num_tokens": 40955129.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.04313325881958, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07204259360010484, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10440898037317592, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0909822937597079, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 941 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1335.0, + "completions/max_terminated_length": 1335.0, + "completions/mean_length": 1170.875, + "completions/mean_terminated_length": 1170.875, + "completions/min_length": 880.0, + "completions/min_terminated_length": 880.0, + "epoch": 0.1884376875375075, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.436705776164322, + "kl": 0.0024127960205078125, + "learning_rate": 9.788313708738074e-07, + "loss": -0.0161, + "num_tokens": 40996503.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9906513690948486, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07578531551537092, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05231166518203754, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.048686449556014755, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 942 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1184.0, + "completions/mean_length": 943.8125, + "completions/mean_terminated_length": 906.7333984375, + "completions/min_length": 616.0, + "completions/min_terminated_length": 616.0, + "epoch": 0.18863772754550912, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.054391357219915, + "kl": 0.009307861328125, + "learning_rate": 9.787360233412088e-07, + "loss": 0.025, + "num_tokens": 41033196.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9015039205551147, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0250079500714186, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06847233372542501, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.24525119397904702, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 943 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1479.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1137.5625, + "completions/mean_terminated_length": 1137.5625, + "completions/min_length": 891.0, + "completions/min_terminated_length": 891.0, + "epoch": 0.1888377675535107, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.606567185012443, + "kl": 0.01007843017578125, + "learning_rate": 9.786404667549785e-07, + "loss": 0.0052, + "num_tokens": 41064133.0, + "reward": 0.0, + "reward_std": 0.4469779431819916, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10679922102772632, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11662324336202734, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12049281521534187, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 944 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1262.25, + "completions/mean_terminated_length": 1183.0, + "completions/min_length": 1005.0, + "completions/min_terminated_length": 1005.0, + "epoch": 0.1890378075615123, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6326243840885923, + "kl": 0.0084686279296875, + "learning_rate": 9.785447011617101e-07, + "loss": -0.0051, + "num_tokens": 41110681.0, + "reward": 0.0, + "reward_std": 0.4188610017299652, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0683047783372807, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.187981769995476, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08734775114237132, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 945 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1165.0, + "completions/max_terminated_length": 1165.0, + "completions/mean_length": 845.125, + "completions/mean_terminated_length": 845.125, + "completions/min_length": 567.0, + "completions/min_terminated_length": 567.0, + "epoch": 0.1892378475695139, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.858466494692358, + "kl": 0.0095672607421875, + "learning_rate": 9.784487266080995e-07, + "loss": -0.0019, + "num_tokens": 41145379.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0521931648254395, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11170875127937793, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09245985274930393, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07781745019952503, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 946 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1253.0, + "completions/max_terminated_length": 1253.0, + "completions/mean_length": 937.0625, + "completions/mean_terminated_length": 937.0625, + "completions/min_length": 734.0, + "completions/min_terminated_length": 734.0, + "epoch": 0.1894378875775155, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9506632441837377, + "kl": 0.00835418701171875, + "learning_rate": 9.783525431409443e-07, + "loss": -0.0339, + "num_tokens": 41195212.0, + "reward": -9.313225746154785e-09, + "reward_std": 1.0395461320877075, + "rewards/wordcountpos_reward_nokeypoint/mean": -9.313225746154785e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2680503172491328, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.26102750652780915, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07876359377087681, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 947 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1134.5625, + "completions/mean_terminated_length": 1050.2308349609375, + "completions/min_length": 840.0, + "completions/min_terminated_length": 840.0, + "epoch": 0.1896379275855171, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7555132083444303, + "kl": 0.0109405517578125, + "learning_rate": 9.78256150807144e-07, + "loss": -0.0278, + "num_tokens": 41247781.0, + "reward": 0.0, + "reward_std": 0.5337927937507629, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.027862594863597177, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03761614694779095, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09418264367902596, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 948 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1499.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1142.5, + "completions/mean_terminated_length": 1142.5, + "completions/min_length": 781.0, + "completions/min_terminated_length": 781.0, + "epoch": 0.1898379675935187, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.524794726151636, + "kl": 0.0106353759765625, + "learning_rate": 9.781595496536997e-07, + "loss": 0.008, + "num_tokens": 41298261.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.858599841594696, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.3597943193360917, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.21375769347679374, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07969850595746354, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 949 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1383.0, + "completions/mean_length": 1177.0625, + "completions/mean_terminated_length": 1155.533447265625, + "completions/min_length": 930.0, + "completions/min_terminated_length": 930.0, + "epoch": 0.19003800760152031, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8335563800540506, + "kl": 0.00726318359375, + "learning_rate": 9.780627397277149e-07, + "loss": 0.042, + "num_tokens": 41339462.0, + "reward": -5.587935447692871e-09, + "reward_std": 1.0669140815734863, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.587935447692871e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07952830900872705, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0737082232471025, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.060705726131767695, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 950 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1454.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1158.1875, + "completions/mean_terminated_length": 1158.1875, + "completions/min_length": 975.0, + "completions/min_terminated_length": 975.0, + "epoch": 0.1902380476095219, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.947406583604454, + "kl": 0.00783538818359375, + "learning_rate": 9.779657210763944e-07, + "loss": -0.0392, + "num_tokens": 41378177.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0310821533203125, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.00222743758429243, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.026652822369431825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04367387557118565, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 951 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1142.875, + "completions/mean_terminated_length": 1119.0667724609375, + "completions/min_length": 837.0, + "completions/min_terminated_length": 837.0, + "epoch": 0.1904380876175235, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.325709489420799, + "kl": 0.00888824462890625, + "learning_rate": 9.778684937470449e-07, + "loss": -0.0224, + "num_tokens": 41419151.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9388270378112793, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03225135097132579, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1834641867847031, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07876359377087681, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 952 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1338.0, + "completions/mean_length": 1193.625, + "completions/mean_terminated_length": 1149.857177734375, + "completions/min_length": 919.0, + "completions/min_terminated_length": 919.0, + "epoch": 0.1906381276255251, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.288024048034387, + "kl": 0.00611114501953125, + "learning_rate": 9.77771057787075e-07, + "loss": 0.0184, + "num_tokens": 41470065.0, + "reward": 0.0, + "reward_std": 0.6729685068130493, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2115640501212076, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15033117604498242, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09496588081262933, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 953 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1461.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1125.0625, + "completions/mean_terminated_length": 1125.0625, + "completions/min_length": 951.0, + "completions/min_terminated_length": 951.0, + "epoch": 0.1908381676335267, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2049821952168758, + "kl": 0.01197052001953125, + "learning_rate": 9.776734132439948e-07, + "loss": 0.0319, + "num_tokens": 41513258.0, + "reward": 0.0, + "reward_std": 0.4946305751800537, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1369388503805839, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1412716060368213, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10945995377982527, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 954 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1401.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1118.4375, + "completions/mean_terminated_length": 1118.4375, + "completions/min_length": 993.0, + "completions/min_terminated_length": 993.0, + "epoch": 0.19103820764152832, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.456623534978109, + "kl": 0.009613037109375, + "learning_rate": 9.775755601654163e-07, + "loss": -0.0097, + "num_tokens": 41558913.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9525743722915649, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06332935397291616, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10356347405520008, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05821416398857659, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 955 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1301.0, + "completions/max_terminated_length": 1301.0, + "completions/mean_length": 1013.9375, + "completions/mean_terminated_length": 1013.9375, + "completions/min_length": 772.0, + "completions/min_terminated_length": 772.0, + "epoch": 0.1912382476495299, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6057619411942494, + "kl": 0.00949859619140625, + "learning_rate": 9.774774985990531e-07, + "loss": -0.046, + "num_tokens": 41591496.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.0645910501480103, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10074975649356142, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.024277452601212772, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09953596037316065, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 956 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1474.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1139.125, + "completions/mean_terminated_length": 1139.125, + "completions/min_length": 943.0, + "completions/min_terminated_length": 943.0, + "epoch": 0.19143828765753151, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.200672207303196, + "kl": 0.010711669921875, + "learning_rate": 9.773792285927204e-07, + "loss": -0.0044, + "num_tokens": 41633338.0, + "reward": 0.0, + "reward_std": 0.8814581632614136, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06031036030084137, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2629606521288556, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194864, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 957 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1478.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1197.125, + "completions/mean_terminated_length": 1197.125, + "completions/min_length": 889.0, + "completions/min_terminated_length": 889.0, + "epoch": 0.1916383276655331, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2148837068309737, + "kl": 0.0099945068359375, + "learning_rate": 9.772807501943352e-07, + "loss": -0.0079, + "num_tokens": 41671204.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9972952604293823, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09098570250143213, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08747212959756141, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06652763279965644, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 958 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1376.0, + "completions/max_terminated_length": 1376.0, + "completions/mean_length": 1089.0, + "completions/mean_terminated_length": 1089.0, + "completions/min_length": 805.0, + "completions/min_terminated_length": 805.0, + "epoch": 0.1918383676735347, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.674423668299218, + "kl": 0.0117645263671875, + "learning_rate": 9.77182063451916e-07, + "loss": -0.0604, + "num_tokens": 41717236.0, + "reward": 0.0, + "reward_std": 0.962651252746582, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03763276329548526, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08569445713790573, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 959 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1140.125, + "completions/mean_terminated_length": 1020.1666870117188, + "completions/min_length": 19.0, + "completions/min_terminated_length": 19.0, + "epoch": 0.1920384076815363, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.620375106225409, + "kl": 0.0095977783203125, + "learning_rate": 9.770831684135825e-07, + "loss": -0.1691, + "num_tokens": 41769798.0, + "reward": 0.0, + "reward_std": 0.6641061305999756, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03491723296680474, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13125788453443268, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1927865832122834, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 960 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1331.0, + "completions/max_terminated_length": 1331.0, + "completions/mean_length": 1074.6875, + "completions/mean_terminated_length": 1074.6875, + "completions/min_length": 872.0, + "completions/min_terminated_length": 872.0, + "epoch": 0.1922384476895379, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.754327387749018, + "kl": 0.0103607177734375, + "learning_rate": 9.76984065127557e-07, + "loss": -0.0299, + "num_tokens": 41816353.0, + "reward": 0.0, + "reward_std": 0.8055334091186523, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.017140279665702262, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3887971377018499, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059628479399994376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 961 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1132.75, + "completions/mean_terminated_length": 1080.2857666015625, + "completions/min_length": 976.0, + "completions/min_terminated_length": 976.0, + "epoch": 0.19243848769753952, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.160953343277857, + "kl": 0.0078277587890625, + "learning_rate": 9.768847536421628e-07, + "loss": -0.0087, + "num_tokens": 41858205.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9318591356277466, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1818912497101904, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10839486008352574, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05426273532033234, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 962 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1401.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1151.0, + "completions/mean_terminated_length": 1151.0, + "completions/min_length": 855.0, + "completions/min_terminated_length": 855.0, + "epoch": 0.1926385277055411, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.247328557574533, + "kl": 0.0108489990234375, + "learning_rate": 9.76785234005824e-07, + "loss": 0.0076, + "num_tokens": 41907261.0, + "reward": 0.0, + "reward_std": 0.5684940218925476, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15051409852666978, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22234125778256128, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11155467020454342, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 963 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1422.5, + "completions/mean_terminated_length": 1293.3333740234375, + "completions/min_length": 1133.0, + "completions/min_terminated_length": 1133.0, + "epoch": 0.1928385677135427, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.221966940319106, + "kl": 0.005573272705078125, + "learning_rate": 9.76685506267067e-07, + "loss": 0.0312, + "num_tokens": 41957109.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0403571128845215, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.17409372512328272, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08097085685895916, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08421753138505424, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 964 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1192.9375, + "completions/mean_terminated_length": 1172.4666748046875, + "completions/min_length": 827.0, + "completions/min_terminated_length": 827.0, + "epoch": 0.1930386077215443, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.740315737580438, + "kl": 0.0104217529296875, + "learning_rate": 9.765855704745196e-07, + "loss": 0.0192, + "num_tokens": 41995004.0, + "reward": 0.0, + "reward_std": 0.4510999917984009, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09447278326047223, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2936824442770255, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1138550085106622, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 965 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1115.75, + "completions/mean_terminated_length": 1027.076904296875, + "completions/min_length": 666.0, + "completions/min_terminated_length": 666.0, + "epoch": 0.1932386477295459, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.264279455386113, + "kl": 0.00753021240234375, + "learning_rate": 9.764854266769112e-07, + "loss": -0.009, + "num_tokens": 42028920.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9628843069076538, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0062894874475163155, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05230585448463504, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 966 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1277.0, + "completions/max_terminated_length": 1277.0, + "completions/mean_length": 1127.125, + "completions/mean_terminated_length": 1127.125, + "completions/min_length": 955.0, + "completions/min_terminated_length": 955.0, + "epoch": 0.19343868773754752, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.433407714417339, + "kl": 0.011810302734375, + "learning_rate": 9.763850749230719e-07, + "loss": 0.0134, + "num_tokens": 42069314.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8609832525253296, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.37223038507757616, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20349792262793537, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05426273532033234, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 967 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1265.0, + "completions/max_terminated_length": 1265.0, + "completions/mean_length": 1164.6875, + "completions/mean_terminated_length": 1164.6875, + "completions/min_length": 948.0, + "completions/min_terminated_length": 948.0, + "epoch": 0.1936387277455491, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.675865813306238, + "kl": 0.0126190185546875, + "learning_rate": 9.76284515261934e-07, + "loss": -0.0231, + "num_tokens": 42108117.0, + "reward": 0.0, + "reward_std": 0.8446465730667114, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09494544397733884, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10088500639920513, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10945995377982526, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 968 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1366.75, + "completions/mean_terminated_length": 1306.181884765625, + "completions/min_length": 1091.0, + "completions/min_terminated_length": 1091.0, + "epoch": 0.19383876775355072, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1947999017212014, + "kl": 0.0108184814453125, + "learning_rate": 9.761837477425306e-07, + "loss": -0.0159, + "num_tokens": 42155121.0, + "reward": 0.0, + "reward_std": 0.8239400386810303, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0926915651230943, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0686311047097951, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12874033584729408, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 969 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1387.0, + "completions/max_terminated_length": 1387.0, + "completions/mean_length": 1060.0625, + "completions/mean_terminated_length": 1060.0625, + "completions/min_length": 961.0, + "completions/min_terminated_length": 961.0, + "epoch": 0.1940388077615523, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4571144168936807, + "kl": 0.00911712646484375, + "learning_rate": 9.760827724139967e-07, + "loss": 0.0116, + "num_tokens": 42203418.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9556577205657959, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.02190254004175716, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24311190387939974, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11080513425729775, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 970 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1411.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1138.5625, + "completions/mean_terminated_length": 1138.5625, + "completions/min_length": 885.0, + "completions/min_terminated_length": 885.0, + "epoch": 0.1942388477695539, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.438625481833955, + "kl": 0.011260986328125, + "learning_rate": 9.75981589325568e-07, + "loss": -0.0194, + "num_tokens": 42252819.0, + "reward": 0.0, + "reward_std": 0.8868687748908997, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1767433697687156, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12008460055028242, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12382783747337807, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 971 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 1252.0625, + "completions/mean_terminated_length": 1169.416748046875, + "completions/min_length": 990.0, + "completions/min_terminated_length": 990.0, + "epoch": 0.19443888777755552, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4260732508367506, + "kl": 0.0099945068359375, + "learning_rate": 9.758801985265822e-07, + "loss": -0.0159, + "num_tokens": 42302988.0, + "reward": 2.421438694000244e-08, + "reward_std": 1.0070050954818726, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.421438694000244e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10143348505200173, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15374179472827057, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09259629622222519, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 972 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1257.875, + "completions/mean_terminated_length": 1112.5999755859375, + "completions/min_length": 815.0, + "completions/min_terminated_length": 815.0, + "epoch": 0.1946389277855571, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.973795361388002, + "kl": 0.0089874267578125, + "learning_rate": 9.757786000664776e-07, + "loss": 0.0344, + "num_tokens": 42360226.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0578982830047607, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10818888929033868, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07041372342407186, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059472994182545036, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 973 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1273.0, + "completions/mean_length": 1151.75, + "completions/mean_terminated_length": 1102.0, + "completions/min_length": 930.0, + "completions/min_terminated_length": 930.0, + "epoch": 0.19483896779355872, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.569626658875335, + "kl": 0.00787353515625, + "learning_rate": 9.756767939947943e-07, + "loss": -0.0182, + "num_tokens": 42408502.0, + "reward": 0.0, + "reward_std": 0.685393214225769, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0014840978346672496, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09104157176446635, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258099, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 974 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1419.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1143.25, + "completions/mean_terminated_length": 1143.25, + "completions/min_length": 873.0, + "completions/min_terminated_length": 873.0, + "epoch": 0.1950390078015603, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6756792234482822, + "kl": 0.006439208984375, + "learning_rate": 9.755747803611732e-07, + "loss": -0.0248, + "num_tokens": 42448058.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9865007400512695, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06084973441421046, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06097552711371674, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08388704928078612, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 975 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1480.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 938.4375, + "completions/mean_terminated_length": 938.4375, + "completions/min_length": 685.0, + "completions/min_terminated_length": 685.0, + "epoch": 0.19523904780956192, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9940591741315212, + "kl": 0.0116119384765625, + "learning_rate": 9.754725592153568e-07, + "loss": -0.0226, + "num_tokens": 42485641.0, + "reward": 0.0, + "reward_std": 0.7132540941238403, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.02658746276263584, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19677106517880646, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11417984514369003, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 976 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1167.125, + "completions/mean_terminated_length": 1144.933349609375, + "completions/min_length": 875.0, + "completions/min_terminated_length": 875.0, + "epoch": 0.1954390878175635, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2434218450715093, + "kl": 0.0102996826171875, + "learning_rate": 9.753701306071882e-07, + "loss": -0.0591, + "num_tokens": 42521995.0, + "reward": 4.470348358154297e-08, + "reward_std": 1.0493853092193604, + "rewards/wordcountpos_reward_nokeypoint/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07820033428933273, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.053572258986499484, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.057089922571845, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 977 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1377.0, + "completions/max_terminated_length": 1377.0, + "completions/mean_length": 1078.1875, + "completions/mean_terminated_length": 1078.1875, + "completions/min_length": 783.0, + "completions/min_terminated_length": 783.0, + "epoch": 0.1956391278255651, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3159730841785144, + "kl": 0.010498046875, + "learning_rate": 9.752674945866127e-07, + "loss": -0.025, + "num_tokens": 42555134.0, + "reward": 2.2351741790771484e-08, + "reward_std": 0.987463116645813, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.021695684849482276, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04046516479113063, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10327955589886445, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 978 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1386.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1060.5625, + "completions/mean_terminated_length": 1060.5625, + "completions/min_length": 810.0, + "completions/min_terminated_length": 810.0, + "epoch": 0.19583916783356672, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2714642444519413, + "kl": 0.0124359130859375, + "learning_rate": 9.751646512036756e-07, + "loss": -0.0669, + "num_tokens": 42604879.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7809333801269531, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09748517843575066, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2203004912780235, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12345339501504503, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 979 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1118.0, + "completions/max_terminated_length": 1118.0, + "completions/mean_length": 875.3125, + "completions/mean_terminated_length": 875.3125, + "completions/min_length": 768.0, + "completions/min_terminated_length": 768.0, + "epoch": 0.1960392078415683, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9484385615404856, + "kl": 0.005023956298828125, + "learning_rate": 9.750616005085239e-07, + "loss": -0.0227, + "num_tokens": 42633836.0, + "reward": 0.0, + "reward_std": 0.8136897683143616, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.021331420632374254, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04947308308094137, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258096, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 980 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1099.6875, + "completions/mean_terminated_length": 1073.0, + "completions/min_length": 704.0, + "completions/min_terminated_length": 704.0, + "epoch": 0.19623924784956992, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.180030347420967, + "kl": 0.0097198486328125, + "learning_rate": 9.749583425514056e-07, + "loss": 0.0492, + "num_tokens": 42669743.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7595632076263428, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.36713809370305, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24497619292037331, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1060223596263578, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 981 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1333.0, + "completions/mean_length": 1197.0, + "completions/mean_terminated_length": 1176.800048828125, + "completions/min_length": 968.0, + "completions/min_terminated_length": 968.0, + "epoch": 0.1964392878575715, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.132565486638046, + "kl": 0.0091552734375, + "learning_rate": 9.748548773826699e-07, + "loss": 0.0239, + "num_tokens": 42718575.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9750230312347412, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.023824157987842293, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07972079905115823, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 982 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1318.0, + "completions/max_terminated_length": 1318.0, + "completions/mean_length": 1023.3125, + "completions/mean_terminated_length": 1023.3125, + "completions/min_length": 868.0, + "completions/min_terminated_length": 868.0, + "epoch": 0.19663932786557312, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9740327606419217, + "kl": 0.0079193115234375, + "learning_rate": 9.747512050527667e-07, + "loss": -0.0061, + "num_tokens": 42751100.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.035020112991333, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03771278861210742, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1334002497821941, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.038248698840130005, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 983 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1379.0, + "completions/mean_length": 1095.5625, + "completions/mean_terminated_length": 1068.60009765625, + "completions/min_length": 783.0, + "completions/min_terminated_length": 783.0, + "epoch": 0.19683936787357473, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.211749271493051, + "kl": 0.0098724365234375, + "learning_rate": 9.746473256122473e-07, + "loss": 0.0068, + "num_tokens": 42790101.0, + "reward": 0.0, + "reward_std": 0.6845227479934692, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0471636703510205, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0995271738894484, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07391185942027816, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 984 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1102.5, + "completions/mean_terminated_length": 1010.769287109375, + "completions/min_length": 677.0, + "completions/min_terminated_length": 677.0, + "epoch": 0.1970394078815763, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.977784244485465, + "kl": 0.0094757080078125, + "learning_rate": 9.745432391117634e-07, + "loss": -0.0663, + "num_tokens": 42843253.0, + "reward": 0.0, + "reward_std": 0.8866456747055054, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.24189455267102716, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06653264859992196, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09016445879408155, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 985 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1316.0, + "completions/mean_length": 1089.4375, + "completions/mean_terminated_length": 1062.0667724609375, + "completions/min_length": 956.0, + "completions/min_terminated_length": 956.0, + "epoch": 0.19723944788957792, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.197773958733782, + "kl": 0.009429931640625, + "learning_rate": 9.744389456020683e-07, + "loss": 0.0217, + "num_tokens": 42893228.0, + "reward": 0.0, + "reward_std": 0.610695481300354, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.018290262474672702, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1469268133224277, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0787635937708768, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 986 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1439.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1162.6875, + "completions/mean_terminated_length": 1162.6875, + "completions/min_length": 937.0, + "completions/min_terminated_length": 937.0, + "epoch": 0.1974394878975795, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7572444235737734, + "kl": 0.0103607177734375, + "learning_rate": 9.743344451340161e-07, + "loss": -0.0529, + "num_tokens": 42941191.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7924208641052246, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.13676396830957796, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0685336669804793, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1276569477008451, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 987 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1378.4375, + "completions/mean_terminated_length": 1305.5, + "completions/min_length": 838.0, + "completions/min_terminated_length": 838.0, + "epoch": 0.19763952790558112, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.053668265271677, + "kl": 0.0101776123046875, + "learning_rate": 9.742297377585617e-07, + "loss": -0.0423, + "num_tokens": 42997742.0, + "reward": 0.0, + "reward_std": 0.9025582671165466, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04276690000746571, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11890982284557869, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258096, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 988 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1323.375, + "completions/mean_terminated_length": 1186.0, + "completions/min_length": 1058.0, + "completions/min_terminated_length": 1058.0, + "epoch": 0.1978395679135827, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2288498171709823, + "kl": 0.009002685546875, + "learning_rate": 9.741248235267608e-07, + "loss": -0.0143, + "num_tokens": 43044924.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9370718002319336, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0002684323498285896, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07545357181330094, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09888264649460886, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 989 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1360.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1070.0625, + "completions/mean_terminated_length": 1070.0625, + "completions/min_length": 936.0, + "completions/min_terminated_length": 936.0, + "epoch": 0.19803960792158432, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.497265271169296, + "kl": 0.0103302001953125, + "learning_rate": 9.740197024897697e-07, + "loss": -0.0205, + "num_tokens": 43095645.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.41995418071746826, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02233754288521963, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22529884065158184, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 990 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1475.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1050.1875, + "completions/mean_terminated_length": 1050.1875, + "completions/min_length": 797.0, + "completions/min_terminated_length": 797.0, + "epoch": 0.19823964792958593, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.382732656456988, + "kl": 0.010345458984375, + "learning_rate": 9.739143746988466e-07, + "loss": 0.0257, + "num_tokens": 43135208.0, + "reward": 5.21540641784668e-08, + "reward_std": 1.0562621355056763, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.21540641784668e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09053993292706639, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07951792195131999, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09953596037316066, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 991 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1324.0, + "completions/max_terminated_length": 1324.0, + "completions/mean_length": 964.0, + "completions/mean_terminated_length": 964.0, + "completions/min_length": 713.0, + "completions/min_terminated_length": 713.0, + "epoch": 0.1984396879375875, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6745210463041618, + "kl": 0.0091705322265625, + "learning_rate": 9.738088402053494e-07, + "loss": -0.0087, + "num_tokens": 43177536.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.9881241321563721, + "rewards/wordcountpos_reward_nokeypoint/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.024922447273417934, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07860341340985724, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1354690069789096, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 992 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1200.0, + "completions/max_terminated_length": 1200.0, + "completions/mean_length": 932.5625, + "completions/mean_terminated_length": 932.5625, + "completions/min_length": 843.0, + "completions/min_terminated_length": 843.0, + "epoch": 0.19863972794558912, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6954212873788164, + "kl": 0.0047473907470703125, + "learning_rate": 9.73703099060737e-07, + "loss": -0.0211, + "num_tokens": 43206209.0, + "reward": 1.30385160446167e-08, + "reward_std": 0.9567482471466064, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.30385160446167e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.14919373303706665, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13949225797760093, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12224747213928167, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 993 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1100.0625, + "completions/mean_terminated_length": 1042.9285888671875, + "completions/min_length": 821.0, + "completions/min_terminated_length": 821.0, + "epoch": 0.1988397679535907, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3483833336547986, + "kl": 0.009979248046875, + "learning_rate": 9.735971513165697e-07, + "loss": -0.0177, + "num_tokens": 43257082.0, + "reward": 0.0, + "reward_std": 0.8427684307098389, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1617459196830795, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06695121345917691, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10027739304327551, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 994 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1246.25, + "completions/mean_terminated_length": 1210.0, + "completions/min_length": 878.0, + "completions/min_terminated_length": 878.0, + "epoch": 0.19903980796159232, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9981916053489357, + "kl": 0.009368896484375, + "learning_rate": 9.734909970245076e-07, + "loss": 0.0108, + "num_tokens": 43310254.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.9046894311904907, + "rewards/wordcountpos_reward_nokeypoint/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07925636878234564, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03268525460589114, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12641788434189793, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 995 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1469.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1127.875, + "completions/mean_terminated_length": 1127.875, + "completions/min_length": 774.0, + "completions/min_terminated_length": 774.0, + "epoch": 0.19923984796959393, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2024324341990837, + "kl": 0.0069732666015625, + "learning_rate": 9.733846362363127e-07, + "loss": -0.038, + "num_tokens": 43350620.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8460608720779419, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.16801700471546355, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10240784611565462, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1067187372905475, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 996 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1320.0, + "completions/max_terminated_length": 1320.0, + "completions/mean_length": 1101.5625, + "completions/mean_terminated_length": 1101.5625, + "completions/min_length": 849.0, + "completions/min_terminated_length": 849.0, + "epoch": 0.19943988797759551, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.335979564837253, + "kl": 0.0099334716796875, + "learning_rate": 9.732780690038464e-07, + "loss": 0.01, + "num_tokens": 43397181.0, + "reward": 0.0, + "reward_std": 0.5404672622680664, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09982381029224718, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20657161681883776, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10852547064066471, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 997 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1303.0, + "completions/max_terminated_length": 1303.0, + "completions/mean_length": 1113.8125, + "completions/mean_terminated_length": 1113.8125, + "completions/min_length": 916.0, + "completions/min_terminated_length": 916.0, + "epoch": 0.19963992798559713, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6935090331722793, + "kl": 0.0127410888671875, + "learning_rate": 9.731712953790718e-07, + "loss": -0.0673, + "num_tokens": 43447730.0, + "reward": 0.0, + "reward_std": 0.8417454957962036, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.20522667118325094, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13130831325257156, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11979921473804349, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 998 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1429.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 1084.5625, + "completions/mean_terminated_length": 1084.5625, + "completions/min_length": 873.0, + "completions/min_terminated_length": 873.0, + "epoch": 0.1998399679935987, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3783912316398843, + "kl": 0.010528564453125, + "learning_rate": 9.730643154140518e-07, + "loss": -0.0551, + "num_tokens": 43489443.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0159975290298462, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.01495180984596958, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19976133860523282, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08388704928078612, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 999 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1262.875, + "completions/mean_terminated_length": 1229.0, + "completions/min_length": 986.0, + "completions/min_terminated_length": 986.0, + "epoch": 0.20004000800160032, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.859793095679796, + "kl": 0.009552001953125, + "learning_rate": 9.729571291609507e-07, + "loss": 0.0052, + "num_tokens": 43537081.0, + "reward": 0.0, + "reward_std": 0.522189199924469, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.23317139358129418, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10371208039921047, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.17701224063135673, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1000 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1476.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1170.875, + "completions/mean_terminated_length": 1170.875, + "completions/min_length": 851.0, + "completions/min_terminated_length": 851.0, + "epoch": 0.20024004800960193, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8419710619462268, + "kl": 0.00543212890625, + "learning_rate": 9.728497366720326e-07, + "loss": -0.0131, + "num_tokens": 43575063.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9846193790435791, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06969511621727772, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.047172816780908296, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09269623828717428, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1001 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1247.0, + "completions/max_terminated_length": 1247.0, + "completions/mean_length": 948.4375, + "completions/mean_terminated_length": 948.4375, + "completions/min_length": 690.0, + "completions/min_terminated_length": 690.0, + "epoch": 0.20044008801760352, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.565574218972778, + "kl": 0.01324462890625, + "learning_rate": 9.727421379996629e-07, + "loss": -0.019, + "num_tokens": 43618262.0, + "reward": 0.0, + "reward_std": 0.9601762294769287, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0003714771475082663, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12495780132374443, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09699179041242309, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1002 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1176.5625, + "completions/mean_terminated_length": 1029.5455322265625, + "completions/min_length": 746.0, + "completions/min_terminated_length": 746.0, + "epoch": 0.20064012802560513, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.82928980558373, + "kl": 0.0098876953125, + "learning_rate": 9.72634333196307e-07, + "loss": -0.0321, + "num_tokens": 43671351.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0316529273986816, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.059940935408557786, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1907368103591716, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07302967433402215, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1003 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1465.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 990.8125, + "completions/mean_terminated_length": 990.8125, + "completions/min_length": 697.0, + "completions/min_terminated_length": 697.0, + "epoch": 0.20084016803360671, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8228860143644376, + "kl": 0.013153076171875, + "learning_rate": 9.72526322314531e-07, + "loss": 0.0255, + "num_tokens": 43718724.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6309555172920227, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0034249721109603486, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17554606156401983, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11642832797715322, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1004 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1183.0, + "completions/max_terminated_length": 1183.0, + "completions/mean_length": 982.25, + "completions/mean_terminated_length": 982.25, + "completions/min_length": 812.0, + "completions/min_terminated_length": 812.0, + "epoch": 0.20104020804160833, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.132543100464066, + "kl": 0.0085296630859375, + "learning_rate": 9.724181054070018e-07, + "loss": 0.021, + "num_tokens": 43755208.0, + "reward": 0.0, + "reward_std": 0.665825366973877, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2531526671607638, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16220551119000678, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07685966046898336, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1005 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1297.4375, + "completions/mean_terminated_length": 1139.888916015625, + "completions/min_length": 972.0, + "completions/min_terminated_length": 972.0, + "epoch": 0.2012402480496099, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.791966530437936, + "kl": 0.0070343017578125, + "learning_rate": 9.723096825264862e-07, + "loss": 0.0206, + "num_tokens": 43802535.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0470314025878906, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04838828259548067, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08132888546955075, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042255, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1006 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1391.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 908.5, + "completions/mean_terminated_length": 908.5, + "completions/min_length": 525.0, + "completions/min_terminated_length": 525.0, + "epoch": 0.20144028805761152, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.915030705612659, + "kl": 0.010894775390625, + "learning_rate": 9.722010537258516e-07, + "loss": -0.0005, + "num_tokens": 43837159.0, + "reward": 0.0, + "reward_std": 0.649959146976471, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.14154407368397667, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16013771868741125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08933913745655642, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1007 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1309.0, + "completions/max_terminated_length": 1309.0, + "completions/mean_length": 1014.875, + "completions/mean_terminated_length": 1014.875, + "completions/min_length": 804.0, + "completions/min_terminated_length": 804.0, + "epoch": 0.20164032806561313, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5262361145611534, + "kl": 0.0131988525390625, + "learning_rate": 9.720922190580662e-07, + "loss": -0.052, + "num_tokens": 43882853.0, + "reward": 0.0, + "reward_std": 0.571896493434906, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06045660901158478, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14261610017114626, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1270024788326182, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1008 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1257.875, + "completions/mean_terminated_length": 1241.7333984375, + "completions/min_length": 1027.0, + "completions/min_terminated_length": 1027.0, + "epoch": 0.20184036807361472, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0077145549507422, + "kl": 0.00792694091796875, + "learning_rate": 9.719831785761981e-07, + "loss": 0.0216, + "num_tokens": 43919011.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0469828844070435, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12970716038537564, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.061574330972651276, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.022771001702132435, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1009 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1429.0, + "completions/mean_terminated_length": 1396.727294921875, + "completions/min_length": 1207.0, + "completions/min_terminated_length": 1207.0, + "epoch": 0.20204040808161633, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7159265153908243, + "kl": 0.0087432861328125, + "learning_rate": 9.71873932333416e-07, + "loss": -0.0216, + "num_tokens": 43963571.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8990396857261658, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.18705502879522767, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19554985608433778, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0620632890834175, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1010 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1442.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1003.25, + "completions/mean_terminated_length": 1003.25, + "completions/min_length": 555.0, + "completions/min_terminated_length": 555.0, + "epoch": 0.2022404480896179, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3400124473527213, + "kl": 0.0104217529296875, + "learning_rate": 9.717644803829886e-07, + "loss": -0.0408, + "num_tokens": 43994087.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0531514883041382, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.037928891115551186, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1184445218564884, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1011 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1266.0, + "completions/max_terminated_length": 1266.0, + "completions/mean_length": 1019.5625, + "completions/mean_terminated_length": 1019.5625, + "completions/min_length": 799.0, + "completions/min_terminated_length": 799.0, + "epoch": 0.20244048809761953, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6165396271810306, + "kl": 0.00756072998046875, + "learning_rate": 9.716548227782854e-07, + "loss": 0.0027, + "num_tokens": 44035016.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8626106977462769, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02764000549647257, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06735455481763515, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9833333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.029814239699997188, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1012 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1256.5, + "completions/mean_terminated_length": 1240.2667236328125, + "completions/min_length": 1023.0, + "completions/min_terminated_length": 1023.0, + "epoch": 0.20264052810562114, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.272312294844041, + "kl": 0.0119476318359375, + "learning_rate": 9.71544959572776e-07, + "loss": 0.006, + "num_tokens": 44079648.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.065244436264038, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07177332250113466, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08014130326883, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0769800358919501, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1013 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1305.0, + "completions/mean_length": 1122.8125, + "completions/mean_terminated_length": 1097.666748046875, + "completions/min_length": 834.0, + "completions/min_terminated_length": 834.0, + "epoch": 0.20284056811362272, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0294131467417653, + "kl": 0.0091552734375, + "learning_rate": 9.7143489082003e-07, + "loss": -0.0334, + "num_tokens": 44126781.0, + "reward": 0.0, + "reward_std": 0.6008857488632202, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.036855379105484025, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0855015164545413, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.200554786086551, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1014 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1109.4375, + "completions/mean_terminated_length": 1083.4000244140625, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.20304060812162433, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2799916289081614, + "kl": 0.0102691650390625, + "learning_rate": 9.713246165737177e-07, + "loss": -0.011, + "num_tokens": 44168964.0, + "reward": 0.0, + "reward_std": 0.373626708984375, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.058529099185145025, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07168238690665123, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10183501544346311, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1015 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1161.0, + "completions/max_terminated_length": 1161.0, + "completions/mean_length": 963.1875, + "completions/mean_terminated_length": 963.1875, + "completions/min_length": 704.0, + "completions/min_terminated_length": 704.0, + "epoch": 0.20324064812962592, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.368852235654075, + "kl": 0.00823211669921875, + "learning_rate": 9.712141368876092e-07, + "loss": -0.0482, + "num_tokens": 44209975.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.4895855784416199, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09052307870675319, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08470499242209457, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0958393717904348, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1016 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1427.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1109.1875, + "completions/mean_terminated_length": 1109.1875, + "completions/min_length": 760.0, + "completions/min_terminated_length": 760.0, + "epoch": 0.20344068813762753, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.855355257589448, + "kl": 0.0119781494140625, + "learning_rate": 9.711034518155746e-07, + "loss": 0.0139, + "num_tokens": 44258498.0, + "reward": 0.0, + "reward_std": 0.7598207592964172, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0040354546987911525, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16055632582135107, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08595864638818418, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1017 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1369.0, + "completions/max_terminated_length": 1369.0, + "completions/mean_length": 1209.6875, + "completions/mean_terminated_length": 1209.6875, + "completions/min_length": 1073.0, + "completions/min_terminated_length": 1073.0, + "epoch": 0.2036407281456291, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.527229672300949, + "kl": 0.0065155029296875, + "learning_rate": 9.709925614115849e-07, + "loss": -0.0096, + "num_tokens": 44299741.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0009483098983765, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06502766516156026, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04030907004490856, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06426219440409445, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1018 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1215.0, + "completions/max_terminated_length": 1215.0, + "completions/mean_length": 1024.375, + "completions/mean_terminated_length": 1024.375, + "completions/min_length": 833.0, + "completions/min_terminated_length": 833.0, + "epoch": 0.20384076815363072, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.817668011649096, + "kl": 0.0139312744140625, + "learning_rate": 9.708814657297105e-07, + "loss": -0.0195, + "num_tokens": 44345635.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5322309136390686, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.25003029007365096, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23925696156235238, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1465024333004847, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1019 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1198.0, + "completions/max_terminated_length": 1198.0, + "completions/mean_length": 1034.9375, + "completions/mean_terminated_length": 1034.9375, + "completions/min_length": 934.0, + "completions/min_terminated_length": 934.0, + "epoch": 0.20404080816163234, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.125673662429089, + "kl": 0.0068359375, + "learning_rate": 9.707701648241223e-07, + "loss": 0.0045, + "num_tokens": 44382642.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.989235520362854, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04335612146280112, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03913035055768123, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1020 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1427.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1174.625, + "completions/mean_terminated_length": 1174.625, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.20424084816963392, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4086670744576915, + "kl": 0.01092529296875, + "learning_rate": 9.706586587490908e-07, + "loss": -0.0122, + "num_tokens": 44425492.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6771136522293091, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.05966448518797445, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12638860758285206, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05163977794943223, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1021 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1209.0, + "completions/max_terminated_length": 1209.0, + "completions/mean_length": 1031.3125, + "completions/mean_terminated_length": 1031.3125, + "completions/min_length": 766.0, + "completions/min_terminated_length": 766.0, + "epoch": 0.20444088817763553, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2288803086646567, + "kl": 0.00986480712890625, + "learning_rate": 9.705469475589875e-07, + "loss": -0.0264, + "num_tokens": 44457001.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0490679740905762, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.025729676392736597, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03325434569491508, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10101338378503961, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1022 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1485.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1043.375, + "completions/mean_terminated_length": 1043.375, + "completions/min_length": 720.0, + "completions/min_terminated_length": 720.0, + "epoch": 0.20464092818563712, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.375591914749362, + "kl": 0.0107269287109375, + "learning_rate": 9.704350313082827e-07, + "loss": -0.0488, + "num_tokens": 44490327.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.063845157623291, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0722687974638197, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10565261151217445, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09737289911202952, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1023 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1228.875, + "completions/mean_terminated_length": 1138.5, + "completions/min_length": 905.0, + "completions/min_terminated_length": 905.0, + "epoch": 0.20484096819363873, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6547276606173686, + "kl": 0.0263519287109375, + "learning_rate": 9.703229100515476e-07, + "loss": -0.0333, + "num_tokens": 44539725.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9984069466590881, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1788829555084766, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1373481501480394, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09339283817414601, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1024 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1454.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1004.875, + "completions/mean_terminated_length": 1004.875, + "completions/min_length": 876.0, + "completions/min_terminated_length": 876.0, + "epoch": 0.20504100820164034, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1838291127512783, + "kl": 0.00762939453125, + "learning_rate": 9.702105838434528e-07, + "loss": 0.0059, + "num_tokens": 44569363.0, + "reward": 0.0, + "reward_std": 0.41129815578460693, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.023626303951620804, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0826459527739802, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.070841502796867, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1025 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1216.0, + "completions/mean_length": 1031.8125, + "completions/mean_terminated_length": 1000.6000366210938, + "completions/min_length": 659.0, + "completions/min_terminated_length": 659.0, + "epoch": 0.20524104820964192, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.510349922802455, + "kl": 0.0106201171875, + "learning_rate": 9.700980527387692e-07, + "loss": 0.044, + "num_tokens": 44619960.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0670186281204224, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10107030510198348, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07600418163635711, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08777074514725111, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1026 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1082.8125, + "completions/mean_terminated_length": 1055.0, + "completions/min_length": 822.0, + "completions/min_terminated_length": 822.0, + "epoch": 0.20544108821764354, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.817149098517448, + "kl": 0.0117950439453125, + "learning_rate": 9.699853167923675e-07, + "loss": -0.0251, + "num_tokens": 44654053.0, + "reward": 0.0, + "reward_std": 0.6953741312026978, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.013300303125940433, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1185025436558316, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09179284245476838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1027 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1330.0, + "completions/mean_terminated_length": 1197.77783203125, + "completions/min_length": 571.0, + "completions/min_terminated_length": 571.0, + "epoch": 0.20564112822564512, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0973998190059464, + "kl": 0.0101776123046875, + "learning_rate": 9.698723760592182e-07, + "loss": -0.0511, + "num_tokens": 44709893.0, + "reward": 0.0, + "reward_std": 0.7700033187866211, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1580340100388161, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2855475758236729, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09888264649460886, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1028 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1464.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1189.5, + "completions/mean_terminated_length": 1189.5, + "completions/min_length": 902.0, + "completions/min_terminated_length": 902.0, + "epoch": 0.20584116823364673, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6234392310470827, + "kl": 0.0113983154296875, + "learning_rate": 9.697592305943917e-07, + "loss": -0.0182, + "num_tokens": 44751805.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9414957761764526, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07029692731520787, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06041351147718305, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08153617692869924, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1029 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1387.0, + "completions/mean_length": 1339.5, + "completions/mean_terminated_length": 1266.5455322265625, + "completions/min_length": 910.0, + "completions/min_terminated_length": 910.0, + "epoch": 0.20604120824164832, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6293811154684197, + "kl": 0.00791168212890625, + "learning_rate": 9.696458804530582e-07, + "loss": -0.0024, + "num_tokens": 44798797.0, + "reward": 0.0, + "reward_std": 0.6607445478439331, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.20212746212767635, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16971318702349925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1030 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1328.0, + "completions/max_terminated_length": 1328.0, + "completions/mean_length": 1046.4375, + "completions/mean_terminated_length": 1046.4375, + "completions/min_length": 774.0, + "completions/min_terminated_length": 774.0, + "epoch": 0.20624124824964993, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.634436643563332, + "kl": 0.0120086669921875, + "learning_rate": 9.69532325690488e-07, + "loss": -0.0389, + "num_tokens": 44839508.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0363447666168213, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.025539973690187973, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11574432840788659, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07969850595746354, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1031 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1198.625, + "completions/mean_terminated_length": 1178.533447265625, + "completions/min_length": 935.0, + "completions/min_terminated_length": 935.0, + "epoch": 0.20644128825765154, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3277411746779038, + "kl": 0.0137786865234375, + "learning_rate": 9.694185663620505e-07, + "loss": 0.0155, + "num_tokens": 44882654.0, + "reward": 0.0, + "reward_std": 0.7885055541992188, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.004345969893362971, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05447451624326258, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03626037527129048, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1032 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1337.375, + "completions/mean_terminated_length": 1263.45458984375, + "completions/min_length": 1011.0, + "completions/min_terminated_length": 1011.0, + "epoch": 0.20664132826565312, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7715936249050994, + "kl": 0.011322021484375, + "learning_rate": 9.693046025232158e-07, + "loss": -0.0012, + "num_tokens": 44927916.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8547992706298828, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02424819206084344, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03611667328450101, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07391185942027816, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1033 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1411.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1140.625, + "completions/mean_terminated_length": 1140.625, + "completions/min_length": 873.0, + "completions/min_terminated_length": 873.0, + "epoch": 0.20684136827365474, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.597852749804784, + "kl": 0.0127716064453125, + "learning_rate": 9.691904342295527e-07, + "loss": 0.0169, + "num_tokens": 44963782.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.872109055519104, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11102292224344264, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0790354002323487, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452246, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1034 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1494.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1140.875, + "completions/mean_terminated_length": 1140.875, + "completions/min_length": 968.0, + "completions/min_terminated_length": 968.0, + "epoch": 0.20704140828165632, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4023205132046086, + "kl": 0.0088348388671875, + "learning_rate": 9.690760615367303e-07, + "loss": -0.0379, + "num_tokens": 45006484.0, + "reward": 0.0, + "reward_std": 0.6810414791107178, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.21081275331930224, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11723907041606435, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09179284245476838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1035 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1284.0, + "completions/max_terminated_length": 1284.0, + "completions/mean_length": 1129.625, + "completions/mean_terminated_length": 1129.625, + "completions/min_length": 960.0, + "completions/min_terminated_length": 960.0, + "epoch": 0.20724144828965793, + "frac_reward_zero_std": 0.5, + "grad_norm": 1.354502304767268, + "kl": 0.00531005859375, + "learning_rate": 9.689614845005175e-07, + "loss": -0.011, + "num_tokens": 45042734.0, + "reward": 0.0, + "reward_std": 0.7131551504135132, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2257934815549915, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09529904043964993, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.975, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04127594582445935, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1036 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 997.0, + "completions/max_terminated_length": 997.0, + "completions/mean_length": 785.5, + "completions/mean_terminated_length": 785.5, + "completions/min_length": 665.0, + "completions/min_terminated_length": 665.0, + "epoch": 0.20744148829765954, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4883622655256246, + "kl": 0.0079193115234375, + "learning_rate": 9.688467031767824e-07, + "loss": -0.0071, + "num_tokens": 45068158.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7914562225341797, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06108224969572095, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.087456492300306, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9833333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03849001794597504, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1037 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1468.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1147.3125, + "completions/mean_terminated_length": 1147.3125, + "completions/min_length": 912.0, + "completions/min_terminated_length": 912.0, + "epoch": 0.20764152830566113, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.647005490846882, + "kl": 0.01361083984375, + "learning_rate": 9.687317176214927e-07, + "loss": -0.0026, + "num_tokens": 45119427.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6434769034385681, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1487694723099919, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15969124732093323, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06831300510639733, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1038 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1298.0, + "completions/max_terminated_length": 1298.0, + "completions/mean_length": 1092.0, + "completions/mean_terminated_length": 1092.0, + "completions/min_length": 938.0, + "completions/min_terminated_length": 938.0, + "epoch": 0.20784156831366274, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.637034604361485, + "kl": 0.01016998291015625, + "learning_rate": 9.686165278907162e-07, + "loss": 0.03, + "num_tokens": 45152491.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0341699123382568, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.009960911708034093, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.044298112977873214, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04216370213557838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1039 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1270.0, + "completions/max_terminated_length": 1270.0, + "completions/mean_length": 1124.375, + "completions/mean_terminated_length": 1124.375, + "completions/min_length": 875.0, + "completions/min_terminated_length": 875.0, + "epoch": 0.20804160832166432, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.249328169540596, + "kl": 0.0107269287109375, + "learning_rate": 9.6850113404062e-07, + "loss": -0.0193, + "num_tokens": 45194017.0, + "reward": 0.0, + "reward_std": 0.9673272371292114, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.004950301719011532, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07144619244202702, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1040 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1224.0, + "completions/max_terminated_length": 1224.0, + "completions/mean_length": 976.9375, + "completions/mean_terminated_length": 976.9375, + "completions/min_length": 843.0, + "completions/min_terminated_length": 843.0, + "epoch": 0.20824164832966593, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7783445505103277, + "kl": 0.01318359375, + "learning_rate": 9.683855361274702e-07, + "loss": -0.0113, + "num_tokens": 45240672.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0593981742858887, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03466457436038206, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.134774132258851, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07097208632298359, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1041 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1402.5625, + "completions/mean_terminated_length": 1358.2728271484375, + "completions/min_length": 1231.0, + "completions/min_terminated_length": 1231.0, + "epoch": 0.20844168833766755, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.392817292939917, + "kl": 0.00795745849609375, + "learning_rate": 9.68269734207633e-07, + "loss": 0.0168, + "num_tokens": 45290761.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0196175575256348, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.054819090761213614, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11357681383538859, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09067647005823629, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1042 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1338.0, + "completions/max_terminated_length": 1338.0, + "completions/mean_length": 1021.9375, + "completions/mean_terminated_length": 1021.9375, + "completions/min_length": 892.0, + "completions/min_terminated_length": 892.0, + "epoch": 0.20864172834566913, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8763553232870938, + "kl": 0.0106201171875, + "learning_rate": 9.681537283375741e-07, + "loss": -0.0102, + "num_tokens": 45338448.0, + "reward": 0.0, + "reward_std": 0.6624785661697388, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03850384587642319, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16773785185587173, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06831300510639733, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1043 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1292.0, + "completions/max_terminated_length": 1292.0, + "completions/mean_length": 1076.125, + "completions/mean_terminated_length": 1076.125, + "completions/min_length": 875.0, + "completions/min_terminated_length": 875.0, + "epoch": 0.20884176835367074, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.823288345620695, + "kl": 0.011505126953125, + "learning_rate": 9.680375185738587e-07, + "loss": -0.0382, + "num_tokens": 45380698.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0442867279052734, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03813790663519937, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.029102599624887263, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09067647005823629, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1044 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1307.0, + "completions/max_terminated_length": 1307.0, + "completions/mean_length": 1128.0625, + "completions/mean_terminated_length": 1128.0625, + "completions/min_length": 940.0, + "completions/min_terminated_length": 940.0, + "epoch": 0.20904180836167233, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.527360577985357, + "kl": 0.003406524658203125, + "learning_rate": 9.67921104973151e-07, + "loss": 0.0057, + "num_tokens": 45422691.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.058334231376648, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1136344885527813, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3788332256834741, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0620632890834175, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1045 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1069.375, + "completions/mean_terminated_length": 1040.666748046875, + "completions/min_length": 872.0, + "completions/min_terminated_length": 872.0, + "epoch": 0.20924184836967394, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.033283136376021, + "kl": 0.008819580078125, + "learning_rate": 9.678044875922147e-07, + "loss": -0.0153, + "num_tokens": 45464793.0, + "reward": 0.0, + "reward_std": 0.9009420871734619, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10744375383252625, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07084021518095808, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10246950765959599, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1046 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1375.0, + "completions/max_terminated_length": 1375.0, + "completions/mean_length": 1042.9375, + "completions/mean_terminated_length": 1042.9375, + "completions/min_length": 839.0, + "completions/min_terminated_length": 839.0, + "epoch": 0.20944188837767552, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.395403484704696, + "kl": 0.0076961517333984375, + "learning_rate": 9.67687666487913e-07, + "loss": -0.0041, + "num_tokens": 45515936.0, + "reward": 0.0, + "reward_std": 0.8827536702156067, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02409196910119206, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2337973575066618, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08255189164891874, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1047 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1259.75, + "completions/mean_terminated_length": 1179.666748046875, + "completions/min_length": 1017.0, + "completions/min_terminated_length": 1017.0, + "epoch": 0.20964192838567713, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1321210663847547, + "kl": 0.01025390625, + "learning_rate": 9.675706417172084e-07, + "loss": -0.0006, + "num_tokens": 45568372.0, + "reward": 0.0, + "reward_std": 1.0089685916900635, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11587607189952474, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11813105624064668, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09888264649460886, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1048 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1272.0, + "completions/max_terminated_length": 1272.0, + "completions/mean_length": 889.25, + "completions/mean_terminated_length": 889.25, + "completions/min_length": 607.0, + "completions/min_terminated_length": 607.0, + "epoch": 0.20984196839367875, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.329521342625746, + "kl": 0.0081634521484375, + "learning_rate": 9.674534133371629e-07, + "loss": -0.0669, + "num_tokens": 45600856.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.34936434030532837, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10248644660187961, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11119302367533936, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14343665526661614, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1049 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1192.625, + "completions/mean_terminated_length": 1148.71435546875, + "completions/min_length": 900.0, + "completions/min_terminated_length": 900.0, + "epoch": 0.21004200840168033, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3257593603320252, + "kl": 0.0125732421875, + "learning_rate": 9.673359814049372e-07, + "loss": -0.0233, + "num_tokens": 45652874.0, + "reward": 0.0, + "reward_std": 1.001774787902832, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.20387736982207283, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1818093081758727, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258096, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1050 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1150.0, + "completions/max_terminated_length": 1150.0, + "completions/mean_length": 997.5, + "completions/mean_terminated_length": 997.5, + "completions/min_length": 716.0, + "completions/min_terminated_length": 716.0, + "epoch": 0.21024204840968194, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8069753396134285, + "kl": 0.013031005859375, + "learning_rate": 9.672183459777922e-07, + "loss": -0.0101, + "num_tokens": 45692202.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9666947722434998, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08608627334236407, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08391704871074265, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06666666666666668, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1051 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1216.0, + "completions/max_terminated_length": 1216.0, + "completions/mean_length": 950.0, + "completions/mean_terminated_length": 950.0, + "completions/min_length": 813.0, + "completions/min_terminated_length": 813.0, + "epoch": 0.21044208841768353, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.666866918092883, + "kl": 0.0083465576171875, + "learning_rate": 9.671005071130868e-07, + "loss": -0.0084, + "num_tokens": 45735002.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.8375790119171143, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15688966295896525, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07982133946516463, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09339283817414601, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1052 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1318.0, + "completions/mean_length": 1169.4375, + "completions/mean_terminated_length": 1147.4000244140625, + "completions/min_length": 952.0, + "completions/min_terminated_length": 952.0, + "epoch": 0.21064212842568514, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.595036323639607, + "kl": 0.00667572021484375, + "learning_rate": 9.669824648682805e-07, + "loss": 0.0236, + "num_tokens": 45772089.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6647682785987854, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05555121498450462, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06362406735579079, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09727776191382574, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1053 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1110.3125, + "completions/mean_terminated_length": 1084.3333740234375, + "completions/min_length": 802.0, + "completions/min_terminated_length": 802.0, + "epoch": 0.21084216843368675, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.603709063250059, + "kl": 0.011444091796875, + "learning_rate": 9.668642193009306e-07, + "loss": -0.0514, + "num_tokens": 45815086.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9259830713272095, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11765611526505206, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14076202249461273, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11409872268574492, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1054 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1485.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1133.1875, + "completions/mean_terminated_length": 1133.1875, + "completions/min_length": 969.0, + "completions/min_terminated_length": 969.0, + "epoch": 0.21104220844168833, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4688178661509323, + "kl": 0.014801025390625, + "learning_rate": 9.667457704686943e-07, + "loss": -0.0359, + "num_tokens": 45852769.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7068248987197876, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11967898552448158, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11499804176345539, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12292725943057185, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1055 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1284.0, + "completions/max_terminated_length": 1284.0, + "completions/mean_length": 999.9375, + "completions/mean_terminated_length": 999.9375, + "completions/min_length": 787.0, + "completions/min_terminated_length": 787.0, + "epoch": 0.21124224844968995, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.09040611734006, + "kl": 0.014068603515625, + "learning_rate": 9.66627118429328e-07, + "loss": -0.0733, + "num_tokens": 45887136.0, + "reward": 0.0, + "reward_std": 0.6380780935287476, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.14581323995393053, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17543151682892857, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09179284245476836, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1056 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1366.0, + "completions/max_terminated_length": 1366.0, + "completions/mean_length": 1147.0, + "completions/mean_terminated_length": 1147.0, + "completions/min_length": 936.0, + "completions/min_terminated_length": 936.0, + "epoch": 0.21144228845769153, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9934046719229093, + "kl": 0.00910186767578125, + "learning_rate": 9.665082632406872e-07, + "loss": -0.0272, + "num_tokens": 45933368.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.006459355354309, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04717723181061447, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05689152259515753, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09583937179043478, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1057 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1383.0, + "completions/max_terminated_length": 1383.0, + "completions/mean_length": 1024.0, + "completions/mean_terminated_length": 1024.0, + "completions/min_length": 871.0, + "completions/min_terminated_length": 871.0, + "epoch": 0.21164232846569314, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.048011787305935, + "kl": 0.008453369140625, + "learning_rate": 9.663892049607257e-07, + "loss": -0.0278, + "num_tokens": 45974056.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9022600054740906, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03061201497075596, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24110143077497084, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08421753138505422, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1058 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1359.0, + "completions/mean_length": 1239.5, + "completions/mean_terminated_length": 1222.1334228515625, + "completions/min_length": 1063.0, + "completions/min_terminated_length": 1063.0, + "epoch": 0.21184236847369473, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.466194638420697, + "kl": 0.007030487060546875, + "learning_rate": 9.662699436474969e-07, + "loss": 0.0128, + "num_tokens": 46022768.0, + "reward": 0.0, + "reward_std": 0.7785208225250244, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06893153675254804, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1791799542937142, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04849589520621153, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1059 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1126.5, + "completions/mean_terminated_length": 1101.60009765625, + "completions/min_length": 764.0, + "completions/min_terminated_length": 764.0, + "epoch": 0.21204240848169634, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7715555156780156, + "kl": 0.0120391845703125, + "learning_rate": 9.661504793591536e-07, + "loss": -0.0168, + "num_tokens": 46064280.0, + "reward": 0.0, + "reward_std": 0.732518196105957, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1623715519326982, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13293423545988503, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06871842709362766, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1060 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1467.0, + "completions/mean_length": 1349.4375, + "completions/mean_terminated_length": 1281.0, + "completions/min_length": 1180.0, + "completions/min_terminated_length": 1180.0, + "epoch": 0.21224244848969795, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5826838255580222, + "kl": 0.0127716064453125, + "learning_rate": 9.660308121539469e-07, + "loss": -0.0114, + "num_tokens": 46118335.0, + "reward": 0.0, + "reward_std": 0.6869628429412842, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13542551615932952, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23887194173969717, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452246, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1061 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1124.0625, + "completions/mean_terminated_length": 1099.0, + "completions/min_length": 759.0, + "completions/min_terminated_length": 759.0, + "epoch": 0.21244248849769953, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.792498524406483, + "kl": 0.013946533203125, + "learning_rate": 9.659109420902268e-07, + "loss": -0.0118, + "num_tokens": 46168544.0, + "reward": 0.0, + "reward_std": 1.0198485851287842, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12432076816675956, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.34812120558728715, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1408308678285174, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1062 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1245.0, + "completions/mean_length": 1099.625, + "completions/mean_terminated_length": 1072.933349609375, + "completions/min_length": 865.0, + "completions/min_terminated_length": 865.0, + "epoch": 0.21264252850570114, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2604745481129647, + "kl": 0.01073455810546875, + "learning_rate": 9.65790869226443e-07, + "loss": 0.0624, + "num_tokens": 46213986.0, + "reward": 0.0, + "reward_std": 0.8885941505432129, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.13659190596345888, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20865994960558087, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1063 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1109.0, + "completions/max_terminated_length": 1109.0, + "completions/mean_length": 885.6875, + "completions/mean_terminated_length": 885.6875, + "completions/min_length": 656.0, + "completions/min_terminated_length": 656.0, + "epoch": 0.21284256851370273, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4136014472135265, + "kl": 0.01128387451171875, + "learning_rate": 9.65670593621143e-07, + "loss": -0.0182, + "num_tokens": 46252541.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9813805818557739, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.016960870296593957, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15229476253341345, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09727776191382573, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1064 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1215.9375, + "completions/mean_terminated_length": 1175.357177734375, + "completions/min_length": 984.0, + "completions/min_terminated_length": 984.0, + "epoch": 0.21304260852170434, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.154390534121487, + "kl": 0.0134429931640625, + "learning_rate": 9.655501153329743e-07, + "loss": -0.006, + "num_tokens": 46303692.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9790463447570801, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1445917308726149, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10436244114764336, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09418264367902598, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1065 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1201.0, + "completions/max_terminated_length": 1201.0, + "completions/mean_length": 946.9375, + "completions/mean_terminated_length": 946.9375, + "completions/min_length": 815.0, + "completions/min_terminated_length": 815.0, + "epoch": 0.21324264852970595, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9151964653293665, + "kl": 0.0069732666015625, + "learning_rate": 9.654294344206822e-07, + "loss": -0.007, + "num_tokens": 46352651.0, + "reward": 0.0, + "reward_std": 0.5069700479507446, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11490148867799484, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14596679934349652, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1192569587999888, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1066 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1351.0, + "completions/mean_length": 1356.875, + "completions/mean_terminated_length": 1245.5555419921875, + "completions/min_length": 1148.0, + "completions/min_terminated_length": 1148.0, + "epoch": 0.21344268853770754, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2404941232487854, + "kl": 0.0112762451171875, + "learning_rate": 9.653085509431115e-07, + "loss": 0.0355, + "num_tokens": 46405713.0, + "reward": 0.0, + "reward_std": 0.9002017974853516, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10158454231381638, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04150244806517545, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13957607775504186, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1067 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1344.0, + "completions/max_terminated_length": 1344.0, + "completions/mean_length": 1119.5, + "completions/mean_terminated_length": 1119.5, + "completions/min_length": 835.0, + "completions/min_terminated_length": 835.0, + "epoch": 0.21364272854570915, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.411096594575992, + "kl": 0.004878997802734375, + "learning_rate": 9.651874649592055e-07, + "loss": -0.001, + "num_tokens": 46453073.0, + "reward": 0.0, + "reward_std": 0.991739809513092, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11874280651141975, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07849646327839517, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11013459778666118, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1068 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1188.25, + "completions/mean_terminated_length": 1143.71435546875, + "completions/min_length": 869.0, + "completions/min_terminated_length": 869.0, + "epoch": 0.21384276855371073, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1163849991256845, + "kl": 0.0118865966796875, + "learning_rate": 9.650661765280062e-07, + "loss": 0.0329, + "num_tokens": 46500029.0, + "reward": 0.0, + "reward_std": 0.8938359022140503, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11487527762078516, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22873879829258129, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13770607453181927, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1069 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1377.0, + "completions/max_terminated_length": 1377.0, + "completions/mean_length": 1053.6875, + "completions/mean_terminated_length": 1053.6875, + "completions/min_length": 777.0, + "completions/min_terminated_length": 777.0, + "epoch": 0.21404280856171234, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7155980668072695, + "kl": 0.0088348388671875, + "learning_rate": 9.649446857086547e-07, + "loss": -0.053, + "num_tokens": 46542048.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5443695783615112, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16448302537500845, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12954544191647036, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.062063289083417524, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1070 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1388.0, + "completions/mean_terminated_length": 1276.0, + "completions/min_length": 903.0, + "completions/min_terminated_length": 903.0, + "epoch": 0.21424284856971396, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7107764339755684, + "kl": 0.0098876953125, + "learning_rate": 9.648229925603898e-07, + "loss": -0.0098, + "num_tokens": 46596624.0, + "reward": 0.0, + "reward_std": 1.0646164417266846, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08562535873418184, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06732947754961068, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09803627446568496, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1071 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1361.0, + "completions/mean_length": 1019.25, + "completions/mean_terminated_length": 987.2000732421875, + "completions/min_length": 800.0, + "completions/min_terminated_length": 800.0, + "epoch": 0.21444288857771554, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.794649440207888, + "kl": 0.01267242431640625, + "learning_rate": 9.647010971425503e-07, + "loss": -0.0308, + "num_tokens": 46647284.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9203571081161499, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07775430339783669, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10096360406175653, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05426273532033234, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1072 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1262.0, + "completions/max_terminated_length": 1262.0, + "completions/mean_length": 1037.25, + "completions/mean_terminated_length": 1037.25, + "completions/min_length": 850.0, + "completions/min_terminated_length": 850.0, + "epoch": 0.21464292858571715, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.589022889845084, + "kl": 0.0101776123046875, + "learning_rate": 9.645789995145727e-07, + "loss": -0.0318, + "num_tokens": 46677888.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0404889583587646, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11782862367606806, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10335434816486248, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258096, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1073 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1238.875, + "completions/mean_terminated_length": 1221.4666748046875, + "completions/min_length": 1004.0, + "completions/min_terminated_length": 1004.0, + "epoch": 0.21484296859371874, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8713682367988818, + "kl": 0.0100555419921875, + "learning_rate": 9.644566997359924e-07, + "loss": -0.0188, + "num_tokens": 46714182.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0498671531677246, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06622069686692782, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05680757891647722, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1074 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1267.0625, + "completions/mean_terminated_length": 1233.7857666015625, + "completions/min_length": 1062.0, + "completions/min_terminated_length": 1062.0, + "epoch": 0.21504300860172035, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.972720826188326, + "kl": 0.011444091796875, + "learning_rate": 9.643341978664432e-07, + "loss": -0.007, + "num_tokens": 46759655.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9367009401321411, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1769460822217052, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16980396286751362, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08944271909999159, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1075 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1240.0, + "completions/max_terminated_length": 1240.0, + "completions/mean_length": 995.5625, + "completions/mean_terminated_length": 995.5625, + "completions/min_length": 690.0, + "completions/min_terminated_length": 690.0, + "epoch": 0.21524304860972193, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2846410788636224, + "kl": 0.0095977783203125, + "learning_rate": 9.642114939656579e-07, + "loss": -0.0326, + "num_tokens": 46800168.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8802672624588013, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03567882136467566, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05199254966199888, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08595864638818418, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1076 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1119.0, + "completions/mean_length": 1232.1875, + "completions/mean_terminated_length": 964.375, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.21544308861772354, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1323822213154653, + "kl": 0.01165771484375, + "learning_rate": 9.64088588093467e-07, + "loss": -0.0325, + "num_tokens": 46847707.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0368850231170654, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06320925918944531, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06571742372689934, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09016445879408155, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1077 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1421.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 1166.25, + "completions/mean_terminated_length": 1166.25, + "completions/min_length": 966.0, + "completions/min_terminated_length": 966.0, + "epoch": 0.21564312862572516, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.111601867375634, + "kl": 0.009857177734375, + "learning_rate": 9.639654803098003e-07, + "loss": 0.0231, + "num_tokens": 46885815.0, + "reward": 0.0, + "reward_std": 1.0079350471496582, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.043681808005111684, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07451624061053157, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13381856152046848, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1078 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1224.0, + "completions/mean_terminated_length": 1205.60009765625, + "completions/min_length": 1004.0, + "completions/min_terminated_length": 1004.0, + "epoch": 0.21584316863372674, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2811907524248296, + "kl": 0.0125732421875, + "learning_rate": 9.638421706746857e-07, + "loss": 0.0103, + "num_tokens": 46931367.0, + "reward": 0.0, + "reward_std": 0.5778778195381165, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.05586923457703461, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08838259088536417, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0758897836290186, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1079 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1242.3125, + "completions/mean_terminated_length": 1225.1334228515625, + "completions/min_length": 990.0, + "completions/min_terminated_length": 990.0, + "epoch": 0.21604320864172835, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6641548444413057, + "kl": 0.0115966796875, + "learning_rate": 9.637186592482493e-07, + "loss": -0.0466, + "num_tokens": 46975148.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9510846138000488, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06566742731212705, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06058836632306303, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9833333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05163977794943221, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1080 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1346.0625, + "completions/mean_terminated_length": 1294.75, + "completions/min_length": 985.0, + "completions/min_terminated_length": 985.0, + "epoch": 0.21624324864972994, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0469311412851576, + "kl": 0.01134490966796875, + "learning_rate": 9.63594946090716e-07, + "loss": -0.0066, + "num_tokens": 47020773.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.049539566040039, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10004487185467921, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10959167189654617, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14240006242195888, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1081 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1333.0, + "completions/max_terminated_length": 1333.0, + "completions/mean_length": 1014.0625, + "completions/mean_terminated_length": 1014.0625, + "completions/min_length": 709.0, + "completions/min_terminated_length": 709.0, + "epoch": 0.21644328865773155, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.912662474337833, + "kl": 0.0155029296875, + "learning_rate": 9.634710312624091e-07, + "loss": -0.0036, + "num_tokens": 47059078.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.946196436882019, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2520158687783619, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.26172364794294506, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11855612829185828, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1082 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1432.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1144.4375, + "completions/mean_terminated_length": 1144.4375, + "completions/min_length": 902.0, + "completions/min_terminated_length": 902.0, + "epoch": 0.21664332866573316, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.479165379192721, + "kl": 0.013458251953125, + "learning_rate": 9.633469148237496e-07, + "loss": -0.015, + "num_tokens": 47107285.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8016307950019836, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10501192253464837, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2265930819029568, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05443310539518172, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1083 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1309.5625, + "completions/mean_terminated_length": 1296.86669921875, + "completions/min_length": 1154.0, + "completions/min_terminated_length": 1154.0, + "epoch": 0.21684336867373474, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.153164994630873, + "kl": 0.0124053955078125, + "learning_rate": 9.632225968352577e-07, + "loss": 0.0235, + "num_tokens": 47152078.0, + "reward": 0.0, + "reward_std": 0.9523670673370361, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.40428535402655563, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12525846232342613, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14446581038560777, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1084 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1028.875, + "completions/mean_terminated_length": 997.4667358398438, + "completions/min_length": 730.0, + "completions/min_terminated_length": 730.0, + "epoch": 0.21704340868173636, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.152623602444593, + "kl": 0.010833740234375, + "learning_rate": 9.63098077357551e-07, + "loss": 0.0433, + "num_tokens": 47189652.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7500772476196289, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.31984110678401195, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1048677572954235, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1085 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1137.0, + "completions/max_terminated_length": 1137.0, + "completions/mean_length": 970.3125, + "completions/mean_terminated_length": 970.3125, + "completions/min_length": 816.0, + "completions/min_terminated_length": 816.0, + "epoch": 0.21724344868973794, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8821068262521106, + "kl": 0.0099334716796875, + "learning_rate": 9.62973356451346e-07, + "loss": -0.0005, + "num_tokens": 47229345.0, + "reward": 0.0, + "reward_std": 0.8582053780555725, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09768133982227742, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0895089832016335, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07391185942027817, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1086 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1407.125, + "completions/mean_terminated_length": 1287.71435546875, + "completions/min_length": 1176.0, + "completions/min_terminated_length": 1176.0, + "epoch": 0.21744348869773955, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.644483967535229, + "kl": 0.01140594482421875, + "learning_rate": 9.62848434177457e-07, + "loss": 0.0069, + "num_tokens": 47285651.0, + "reward": 0.0, + "reward_std": 0.5868078470230103, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.177380249993809, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.27305134687292204, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1087 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1363.0, + "completions/mean_length": 1390.0625, + "completions/mean_terminated_length": 1324.0999755859375, + "completions/min_length": 1248.0, + "completions/min_terminated_length": 1248.0, + "epoch": 0.21764352870574113, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.483324638173167, + "kl": 0.0023956298828125, + "learning_rate": 9.62723310596797e-07, + "loss": 0.0024, + "num_tokens": 47325740.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7219498157501221, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06509345599434, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09299440461534278, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11122216672215289, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1088 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1241.0, + "completions/mean_length": 1236.0, + "completions/mean_terminated_length": 1030.6666259765625, + "completions/min_length": 832.0, + "completions/min_terminated_length": 832.0, + "epoch": 0.21784356871374275, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.602947003401225, + "kl": 0.0089111328125, + "learning_rate": 9.625979857703764e-07, + "loss": 0.0292, + "num_tokens": 47370908.0, + "reward": 0.0, + "reward_std": 0.8420524597167969, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09896370706332128, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10194200065248624, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06191391873668902, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1089 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1372.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 1037.9375, + "completions/mean_terminated_length": 1037.9375, + "completions/min_length": 859.0, + "completions/min_terminated_length": 859.0, + "epoch": 0.21804360872174436, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5130783761353475, + "kl": 0.012664794921875, + "learning_rate": 9.624724597593045e-07, + "loss": -0.0256, + "num_tokens": 47415195.0, + "reward": 0.0, + "reward_std": 0.9283661842346191, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.057076806691167, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18232752982438233, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09803627446568494, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1090 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1458.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1136.1875, + "completions/mean_terminated_length": 1136.1875, + "completions/min_length": 805.0, + "completions/min_terminated_length": 805.0, + "epoch": 0.21824364872974594, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2737396147989672, + "kl": 0.0132598876953125, + "learning_rate": 9.623467326247882e-07, + "loss": -0.013, + "num_tokens": 47459150.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8234089612960815, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.032097039031438346, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22353360508039455, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08388704928078612, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1091 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1389.0, + "completions/mean_length": 1216.8125, + "completions/mean_terminated_length": 1197.933349609375, + "completions/min_length": 907.0, + "completions/min_terminated_length": 907.0, + "epoch": 0.21844368873774755, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1500485352600958, + "kl": 0.00922393798828125, + "learning_rate": 9.622208044281328e-07, + "loss": -0.0552, + "num_tokens": 47509499.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5720289349555969, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11402440487314942, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12344577305671763, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13443985299781488, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1092 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1253.0, + "completions/max_terminated_length": 1253.0, + "completions/mean_length": 1047.0, + "completions/mean_terminated_length": 1047.0, + "completions/min_length": 842.0, + "completions/min_terminated_length": 842.0, + "epoch": 0.21864372874574914, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1118452986572374, + "kl": 0.010650634765625, + "learning_rate": 9.62094675230741e-07, + "loss": -0.0058, + "num_tokens": 47542379.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0412813425064087, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08582604651301197, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1275602936814551, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1093 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1405.0, + "completions/max_terminated_length": 1405.0, + "completions/mean_length": 1014.0625, + "completions/mean_terminated_length": 1014.0625, + "completions/min_length": 649.0, + "completions/min_terminated_length": 649.0, + "epoch": 0.21884376875375075, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.047537027324401, + "kl": 0.01214599609375, + "learning_rate": 9.619683450941146e-07, + "loss": 0.0154, + "num_tokens": 47578372.0, + "reward": 0.0, + "reward_std": 0.9995240569114685, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05740620079177461, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09298537378810151, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10470416879457553, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1094 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1353.0, + "completions/max_terminated_length": 1353.0, + "completions/mean_length": 992.6875, + "completions/mean_terminated_length": 992.6875, + "completions/min_length": 797.0, + "completions/min_terminated_length": 797.0, + "epoch": 0.21904380876175236, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2750482429435777, + "kl": 0.0106048583984375, + "learning_rate": 9.61841814079852e-07, + "loss": -0.0411, + "num_tokens": 47620367.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0418176651000977, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0043283314321644305, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08705208052607362, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14401645996461912, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1095 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1370.0, + "completions/mean_length": 1011.8125, + "completions/mean_terminated_length": 979.2667236328125, + "completions/min_length": 717.0, + "completions/min_terminated_length": 717.0, + "epoch": 0.21924384876975395, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.488606480001311, + "kl": 0.012237548828125, + "learning_rate": 9.61715082249651e-07, + "loss": -0.0202, + "num_tokens": 47661228.0, + "reward": 0.0, + "reward_std": 0.7601935863494873, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08295072152091801, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14621416426290662, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10390522747338701, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1096 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1422.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1078.75, + "completions/mean_terminated_length": 1078.75, + "completions/min_length": 813.0, + "completions/min_terminated_length": 813.0, + "epoch": 0.21944388877775556, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.258276758188429, + "kl": 0.0152130126953125, + "learning_rate": 9.615881496653062e-07, + "loss": -0.0105, + "num_tokens": 47712152.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9233806133270264, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.026954427290562948, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1536124711708256, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09574271077563383, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 1097 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1423.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1105.0, + "completions/mean_terminated_length": 1105.0, + "completions/min_length": 785.0, + "completions/min_terminated_length": 785.0, + "epoch": 0.21964392878575714, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.117758499061851, + "kl": 0.00843048095703125, + "learning_rate": 9.61461016388711e-07, + "loss": 0.0249, + "num_tokens": 47755576.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5264866352081299, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.025472354626657742, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.25543136427636715, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.016666666666666663, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1098 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1398.0, + "completions/mean_length": 1140.8125, + "completions/mean_terminated_length": 1089.5, + "completions/min_length": 716.0, + "completions/min_terminated_length": 716.0, + "epoch": 0.21984396879375875, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6273199012410435, + "kl": 0.011077880859375, + "learning_rate": 9.613336824818555e-07, + "loss": -0.0054, + "num_tokens": 47806909.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9884711503982544, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.016123245995893327, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03184001163321191, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1099 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1353.0, + "completions/mean_length": 1155.125, + "completions/mean_terminated_length": 1132.1334228515625, + "completions/min_length": 870.0, + "completions/min_terminated_length": 870.0, + "epoch": 0.22004400880176037, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.940878466939745, + "kl": 0.00766754150390625, + "learning_rate": 9.612061480068286e-07, + "loss": -0.0485, + "num_tokens": 47845487.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.49113819003105164, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04571974689965144, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12865985681098302, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11013459778666118, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1100 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1430.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1126.5, + "completions/mean_terminated_length": 1126.5, + "completions/min_length": 924.0, + "completions/min_terminated_length": 924.0, + "epoch": 0.22024404880976195, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.320415235317526, + "kl": 0.010162353515625, + "learning_rate": 9.610784130258167e-07, + "loss": -0.0299, + "num_tokens": 47896207.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0269901752471924, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0033157786998193137, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04239518456025495, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11080513425729777, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1101 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1310.0, + "completions/mean_length": 1253.1875, + "completions/mean_terminated_length": 1141.0, + "completions/min_length": 820.0, + "completions/min_terminated_length": 820.0, + "epoch": 0.22044408881776356, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5667252328003847, + "kl": 0.00753021240234375, + "learning_rate": 9.60950477601104e-07, + "loss": -0.0104, + "num_tokens": 47947930.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.592005729675293, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.15989282268496105, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17036392625915958, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09583937179043477, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1102 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1481.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1097.625, + "completions/mean_terminated_length": 1097.625, + "completions/min_length": 798.0, + "completions/min_terminated_length": 798.0, + "epoch": 0.22064412882576515, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7479840501287085, + "kl": 0.008514404296875, + "learning_rate": 9.608223417950724e-07, + "loss": -0.0523, + "num_tokens": 47987964.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5856928825378418, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.010327486548924165, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05139924447699854, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12141145226353543, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1103 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1358.0, + "completions/max_terminated_length": 1358.0, + "completions/mean_length": 1016.1875, + "completions/mean_terminated_length": 1016.1875, + "completions/min_length": 695.0, + "completions/min_terminated_length": 695.0, + "epoch": 0.22084416883376676, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8030987871680115, + "kl": 0.0105438232421875, + "learning_rate": 9.606940056702012e-07, + "loss": -0.0104, + "num_tokens": 48018143.0, + "reward": 0.0, + "reward_std": 0.876544713973999, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0480581032224602, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.054624884868438646, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08850612031567837, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1104 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1495.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1067.1875, + "completions/mean_terminated_length": 1067.1875, + "completions/min_length": 788.0, + "completions/min_terminated_length": 788.0, + "epoch": 0.22104420884176834, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4558844005172027, + "kl": 0.01690673828125, + "learning_rate": 9.60565469289068e-07, + "loss": -0.0069, + "num_tokens": 48069186.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0045816898345947, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0003380703885324911, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10200831954300067, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08734775114237131, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1105 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1263.0, + "completions/mean_length": 1030.6875, + "completions/mean_terminated_length": 999.4000244140625, + "completions/min_length": 598.0, + "completions/min_terminated_length": 598.0, + "epoch": 0.22124424884976995, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4207112625581284, + "kl": 0.011322021484375, + "learning_rate": 9.604367327143478e-07, + "loss": -0.0364, + "num_tokens": 48105605.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0327287912368774, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1731881285623572, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1038455376416315, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07888106377466156, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1106 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1361.0, + "completions/max_terminated_length": 1361.0, + "completions/mean_length": 1085.875, + "completions/mean_terminated_length": 1085.875, + "completions/min_length": 904.0, + "completions/min_terminated_length": 904.0, + "epoch": 0.22144428885777157, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5177046400561167, + "kl": 0.014862060546875, + "learning_rate": 9.603077960088128e-07, + "loss": -0.0478, + "num_tokens": 48148531.0, + "reward": 0.0, + "reward_std": 0.4245275855064392, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07696007975091629, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24118539409951462, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11642832797715322, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1107 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1145.0, + "completions/max_terminated_length": 1145.0, + "completions/mean_length": 934.0625, + "completions/mean_terminated_length": 934.0625, + "completions/min_length": 717.0, + "completions/min_terminated_length": 717.0, + "epoch": 0.22164432886577315, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.766508355886085, + "kl": 0.0137176513671875, + "learning_rate": 9.601786592353334e-07, + "loss": -0.0187, + "num_tokens": 48191108.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.8047142028808594, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13917665492506706, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0850891592063418, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06831300510639733, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1108 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1367.0, + "completions/mean_length": 1154.3125, + "completions/mean_terminated_length": 1131.2667236328125, + "completions/min_length": 856.0, + "completions/min_terminated_length": 856.0, + "epoch": 0.22184436887377476, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3722571384210616, + "kl": 0.0131683349609375, + "learning_rate": 9.60049322456877e-07, + "loss": -0.0078, + "num_tokens": 48236617.0, + "reward": 0.0, + "reward_std": 0.8497191667556763, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.007459480907043976, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22938268956279095, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08255189164891871, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1109 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1420.0, + "completions/max_terminated_length": 1420.0, + "completions/mean_length": 1078.3125, + "completions/mean_terminated_length": 1078.3125, + "completions/min_length": 722.0, + "completions/min_terminated_length": 722.0, + "epoch": 0.22204440888177635, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.180807012017664, + "kl": 0.0083465576171875, + "learning_rate": 9.599197857365091e-07, + "loss": 0.0057, + "num_tokens": 48270710.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9500327110290527, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06983787889756439, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1500759763489918, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09803627446568494, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1110 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1173.0, + "completions/max_terminated_length": 1173.0, + "completions/mean_length": 997.25, + "completions/mean_terminated_length": 997.25, + "completions/min_length": 774.0, + "completions/min_terminated_length": 774.0, + "epoch": 0.22224444888977796, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8645739939908856, + "kl": 0.0145263671875, + "learning_rate": 9.597900491373925e-07, + "loss": -0.0427, + "num_tokens": 48301754.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.952698826789856, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.061021025863279614, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06960862304277725, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08421753138505424, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1111 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1321.0, + "completions/max_terminated_length": 1321.0, + "completions/mean_length": 1109.3125, + "completions/mean_terminated_length": 1109.3125, + "completions/min_length": 915.0, + "completions/min_terminated_length": 915.0, + "epoch": 0.22244448889777957, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4994652419303076, + "kl": 0.0069732666015625, + "learning_rate": 9.596601127227868e-07, + "loss": -0.0325, + "num_tokens": 48351711.0, + "reward": 3.725290298461914e-09, + "reward_std": 0.9885015487670898, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.16372601187768018, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06854769780888935, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09953596037316068, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1112 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1169.6875, + "completions/mean_terminated_length": 1147.666748046875, + "completions/min_length": 924.0, + "completions/min_terminated_length": 924.0, + "epoch": 0.22264452890578115, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.530030756345314, + "kl": 0.007953643798828125, + "learning_rate": 9.5952997655605e-07, + "loss": -0.054, + "num_tokens": 48396338.0, + "reward": 0.0, + "reward_std": 0.8982025384902954, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.17229265832807, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10465122971188498, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09574271077563382, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1113 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1322.4375, + "completions/mean_terminated_length": 1215.9000244140625, + "completions/min_length": 1021.0, + "completions/min_terminated_length": 1021.0, + "epoch": 0.22284456891378276, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5132481769803254, + "kl": 0.00823211669921875, + "learning_rate": 9.59399640700637e-07, + "loss": -0.0087, + "num_tokens": 48443097.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.6414424777030945, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1377678063703865, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08114292498576073, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06540472290116194, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1114 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1385.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1041.375, + "completions/mean_terminated_length": 1041.375, + "completions/min_length": 694.0, + "completions/min_terminated_length": 694.0, + "epoch": 0.22304460892178435, + "frac_reward_zero_std": 0.0, + "grad_norm": 5.925503165264402, + "kl": 0.0349884033203125, + "learning_rate": 9.592691052201002e-07, + "loss": -0.0062, + "num_tokens": 48494551.0, + "reward": 0.0, + "reward_std": 0.9798471927642822, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03514023519520519, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05933535395956627, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06666666666666665, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1115 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1492.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1219.875, + "completions/mean_terminated_length": 1219.875, + "completions/min_length": 1055.0, + "completions/min_terminated_length": 1055.0, + "epoch": 0.22324464892978596, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8791770911168455, + "kl": 0.0095062255859375, + "learning_rate": 9.59138370178089e-07, + "loss": 0.0265, + "num_tokens": 48539493.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.03444242477417, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05935507459682021, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22630228862816795, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0769800358919501, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1116 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1403.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1217.625, + "completions/mean_terminated_length": 1217.625, + "completions/min_length": 965.0, + "completions/min_terminated_length": 965.0, + "epoch": 0.22344468893778754, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.344517938051676, + "kl": 0.00637054443359375, + "learning_rate": 9.59007435638351e-07, + "loss": -0.0339, + "num_tokens": 48581791.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.066986322402954, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.01582659280095758, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1001036914915011, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04194352464039305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1117 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1354.0, + "completions/max_terminated_length": 1354.0, + "completions/mean_length": 1139.0, + "completions/mean_terminated_length": 1139.0, + "completions/min_length": 954.0, + "completions/min_terminated_length": 954.0, + "epoch": 0.22364472894578916, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4824852204407915, + "kl": 0.0121002197265625, + "learning_rate": 9.588763016647298e-07, + "loss": 0.0004, + "num_tokens": 48620487.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9877213835716248, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.13156699955509474, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13233975716741575, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0769800358919501, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1118 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1320.0, + "completions/mean_length": 1088.4375, + "completions/mean_terminated_length": 1029.6429443359375, + "completions/min_length": 569.0, + "completions/min_terminated_length": 569.0, + "epoch": 0.22384476895379077, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5820369813524855, + "kl": 0.0114288330078125, + "learning_rate": 9.587449683211675e-07, + "loss": -0.0455, + "num_tokens": 48661638.0, + "reward": 0.0, + "reward_std": 0.8545340299606323, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.027910316677830074, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1424018190219094, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1119 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1431.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1114.25, + "completions/mean_terminated_length": 1114.25, + "completions/min_length": 783.0, + "completions/min_terminated_length": 783.0, + "epoch": 0.22404480896179235, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.700091274663297, + "kl": 0.013427734375, + "learning_rate": 9.586134356717026e-07, + "loss": -0.0341, + "num_tokens": 48698738.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.6425969004631042, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1315340798850865, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2370134719434242, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06666666666666665, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1120 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1295.0, + "completions/mean_terminated_length": 1247.6923828125, + "completions/min_length": 813.0, + "completions/min_terminated_length": 813.0, + "epoch": 0.22424484896979396, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6219279133422666, + "kl": 0.0107574462890625, + "learning_rate": 9.584817037804708e-07, + "loss": 0.0304, + "num_tokens": 48752410.0, + "reward": 0.0, + "reward_std": 0.7700464725494385, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07512105320234355, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1740198171190759, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06652763279965644, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1121 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1205.0, + "completions/max_terminated_length": 1205.0, + "completions/mean_length": 1046.1875, + "completions/mean_terminated_length": 1046.1875, + "completions/min_length": 747.0, + "completions/min_terminated_length": 747.0, + "epoch": 0.22444488897779555, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.569846880322828, + "kl": 0.01263427734375, + "learning_rate": 9.583497727117054e-07, + "loss": 0.0121, + "num_tokens": 48788549.0, + "reward": 0.0, + "reward_std": 0.7647693157196045, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08236978042460191, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09048545062804847, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07490735018081408, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1122 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1219.0, + "completions/mean_terminated_length": 1178.857177734375, + "completions/min_length": 844.0, + "completions/min_terminated_length": 844.0, + "epoch": 0.22464492898579716, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0479160429396663, + "kl": 0.0119171142578125, + "learning_rate": 9.582176425297366e-07, + "loss": -0.0281, + "num_tokens": 48834021.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0297996997833252, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06885615916266123, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0930670313926102, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08734775114237131, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1123 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1220.0, + "completions/max_terminated_length": 1220.0, + "completions/mean_length": 906.25, + "completions/mean_terminated_length": 906.25, + "completions/min_length": 573.0, + "completions/min_terminated_length": 573.0, + "epoch": 0.22484496899379877, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.441092478611498, + "kl": 0.0107879638671875, + "learning_rate": 9.580853132989916e-07, + "loss": -0.0097, + "num_tokens": 48863617.0, + "reward": 0.0, + "reward_std": 0.5824519395828247, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.19552587443342415, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10285594887933885, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09259629622222518, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1124 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1124.0, + "completions/mean_length": 1262.1875, + "completions/mean_terminated_length": 1024.375, + "completions/min_length": 943.0, + "completions/min_terminated_length": 943.0, + "epoch": 0.22504500900180036, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6369224162874776, + "kl": 0.00907135009765625, + "learning_rate": 9.579527850839947e-07, + "loss": 0.0217, + "num_tokens": 48917844.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6254435181617737, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1392824308474724, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1998472445385856, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08509254221575908, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1125 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1218.5, + "completions/mean_terminated_length": 1090.5455322265625, + "completions/min_length": 763.0, + "completions/min_terminated_length": 763.0, + "epoch": 0.22524504900980197, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8784957807624734, + "kl": 0.0106201171875, + "learning_rate": 9.578200579493674e-07, + "loss": -0.0148, + "num_tokens": 48972724.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5425010919570923, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07743631182784011, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11479751784509637, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06652763279965646, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1126 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1287.1875, + "completions/mean_terminated_length": 1256.7857666015625, + "completions/min_length": 1004.0, + "completions/min_terminated_length": 1004.0, + "epoch": 0.22544508901780355, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.671437315580917, + "kl": 0.00952911376953125, + "learning_rate": 9.57687131959828e-07, + "loss": 0.0026, + "num_tokens": 49014103.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.32740527391433716, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.033069837079354625, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1769649737256352, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.070841502796867, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1127 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1222.0, + "completions/max_terminated_length": 1222.0, + "completions/mean_length": 1057.125, + "completions/mean_terminated_length": 1057.125, + "completions/min_length": 942.0, + "completions/min_terminated_length": 942.0, + "epoch": 0.22564512902580516, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.89409778352216, + "kl": 0.0133056640625, + "learning_rate": 9.575540071801917e-07, + "loss": 0.0088, + "num_tokens": 49061401.0, + "reward": 0.0, + "reward_std": 0.8976319432258606, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06386601012356706, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15101492101542285, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.102469507659596, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1128 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1235.0, + "completions/mean_length": 1292.6875, + "completions/mean_terminated_length": 1085.375, + "completions/min_length": 985.0, + "completions/min_terminated_length": 985.0, + "epoch": 0.22584516903380678, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.58058677770921, + "kl": 0.00927734375, + "learning_rate": 9.574206836753708e-07, + "loss": 0.0146, + "num_tokens": 49098540.0, + "reward": 5.587935447692871e-09, + "reward_std": 1.0675444602966309, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.587935447692871e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13890793559999287, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0622157430863391, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06191391873668902, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1129 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1298.0, + "completions/max_terminated_length": 1298.0, + "completions/mean_length": 1121.0625, + "completions/mean_terminated_length": 1121.0625, + "completions/min_length": 874.0, + "completions/min_terminated_length": 874.0, + "epoch": 0.22604520904180836, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.973699068104549, + "kl": 0.0091705322265625, + "learning_rate": 9.572871615103747e-07, + "loss": -0.0316, + "num_tokens": 49151821.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9231468439102173, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.23257176907616325, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3034978701342261, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194864, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1130 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1170.9375, + "completions/mean_terminated_length": 1149.0, + "completions/min_length": 973.0, + "completions/min_terminated_length": 973.0, + "epoch": 0.22624524904980997, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3946860893877258, + "kl": 0.0105438232421875, + "learning_rate": 9.57153440750309e-07, + "loss": -0.0252, + "num_tokens": 49194628.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.45196333527565, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04076312006706048, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.21601948484287653, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09067647005823629, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1131 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1293.625, + "completions/mean_terminated_length": 1279.86669921875, + "completions/min_length": 1069.0, + "completions/min_terminated_length": 1069.0, + "epoch": 0.22644528905781156, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1018123758622083, + "kl": 0.011627197265625, + "learning_rate": 9.570195214603767e-07, + "loss": -0.0354, + "num_tokens": 49240334.0, + "reward": 0.0, + "reward_std": 0.9945549964904785, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.019393454256384023, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10162972488519495, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.975, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.033333333333333326, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1132 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1214.875, + "completions/mean_terminated_length": 1195.86669921875, + "completions/min_length": 963.0, + "completions/min_terminated_length": 963.0, + "epoch": 0.22664532906581317, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.133860397163449, + "kl": 0.0110321044921875, + "learning_rate": 9.568854037058776e-07, + "loss": -0.0356, + "num_tokens": 49282372.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.5069957971572876, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.027870068462279716, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.265380755674777, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04127594582445935, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1133 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1283.25, + "completions/mean_terminated_length": 1233.2308349609375, + "completions/min_length": 945.0, + "completions/min_terminated_length": 945.0, + "epoch": 0.22684536907381475, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.003385970775244, + "kl": 0.01153564453125, + "learning_rate": 9.567510875522081e-07, + "loss": -0.0311, + "num_tokens": 49332152.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.39044612646102905, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.13263523423085896, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.41490186505698756, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06191391873668902, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1134 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 1237.0625, + "completions/mean_terminated_length": 1176.3846435546875, + "completions/min_length": 985.0, + "completions/min_terminated_length": 985.0, + "epoch": 0.22704540908181636, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.157545647367974, + "kl": 0.010040283203125, + "learning_rate": 9.566165730648613e-07, + "loss": -0.0029, + "num_tokens": 49387313.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9208770394325256, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.02776471038524523, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09338857043851785, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09428090415820636, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1135 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1119.0, + "completions/mean_length": 1172.125, + "completions/mean_terminated_length": 844.25, + "completions/min_length": 753.0, + "completions/min_terminated_length": 753.0, + "epoch": 0.22724544908981797, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5763790302294702, + "kl": 0.005387306213378906, + "learning_rate": 9.56481860309427e-07, + "loss": 0.0095, + "num_tokens": 49422627.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9687747955322266, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09853231614542826, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04216418269981477, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.046943622609505783, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1136 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1390.0, + "completions/mean_length": 1168.25, + "completions/mean_terminated_length": 1146.1334228515625, + "completions/min_length": 1018.0, + "completions/min_terminated_length": 1018.0, + "epoch": 0.22744548909781956, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7281074592215497, + "kl": 0.00879669189453125, + "learning_rate": 9.563469493515917e-07, + "loss": 0.0064, + "num_tokens": 49474903.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0508205890655518, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.010424281829958706, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05025860706209607, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0687184270936277, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1137 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1252.0625, + "completions/mean_terminated_length": 1235.533447265625, + "completions/min_length": 987.0, + "completions/min_terminated_length": 987.0, + "epoch": 0.22764552910582117, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0331360438512114, + "kl": 0.0104217529296875, + "learning_rate": 9.562118402571387e-07, + "loss": 0.0429, + "num_tokens": 49528816.0, + "reward": 0.0, + "reward_std": 0.7713534832000732, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12198716665320958, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19373346520840368, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06191391873668902, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1138 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1405.0, + "completions/max_terminated_length": 1405.0, + "completions/mean_length": 1000.375, + "completions/mean_terminated_length": 1000.375, + "completions/min_length": 634.0, + "completions/min_terminated_length": 634.0, + "epoch": 0.22784556911382275, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5041656234878844, + "kl": 0.0107269287109375, + "learning_rate": 9.56076533091948e-07, + "loss": 0.0449, + "num_tokens": 49575654.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8483834266662598, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.018924604443213817, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2152324037763882, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10610965676722954, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1139 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1349.0, + "completions/mean_length": 1134.75, + "completions/mean_terminated_length": 1110.4000244140625, + "completions/min_length": 794.0, + "completions/min_terminated_length": 794.0, + "epoch": 0.22804560912182437, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.66687573578962, + "kl": 0.00807952880859375, + "learning_rate": 9.559410279219959e-07, + "loss": -0.0044, + "num_tokens": 49627466.0, + "reward": 0.0, + "reward_std": 0.9560683965682983, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.16405512927080454, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08895825992929743, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1140 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 1312.875, + "completions/mean_terminated_length": 1227.8182373046875, + "completions/min_length": 1035.0, + "completions/min_terminated_length": 1035.0, + "epoch": 0.22824564912982598, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6558375461483608, + "kl": 0.0111846923828125, + "learning_rate": 9.55805324813355e-07, + "loss": 0.0092, + "num_tokens": 49664600.0, + "reward": 0.0, + "reward_std": 0.8391335010528564, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03344041789260352, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10834814753742375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10671873729054748, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1141 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1461.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1227.3125, + "completions/mean_terminated_length": 1227.3125, + "completions/min_length": 976.0, + "completions/min_terminated_length": 976.0, + "epoch": 0.22844568913782756, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9386696437340523, + "kl": 0.00547027587890625, + "learning_rate": 9.55669423832195e-07, + "loss": 0.0015, + "num_tokens": 49707445.0, + "reward": 0.0, + "reward_std": 0.48847872018814087, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08129265174238615, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16813038536404082, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13221755360572016, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1142 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1307.875, + "completions/mean_terminated_length": 1192.5999755859375, + "completions/min_length": 829.0, + "completions/min_terminated_length": 829.0, + "epoch": 0.22864572914582917, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7813941334783205, + "kl": 0.0111846923828125, + "learning_rate": 9.555333250447819e-07, + "loss": -0.0677, + "num_tokens": 49762547.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.011335849761963, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16394101430470653, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20175248420653022, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0894427190999916, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1143 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1188.375, + "completions/mean_terminated_length": 1143.857177734375, + "completions/min_length": 810.0, + "completions/min_terminated_length": 810.0, + "epoch": 0.22884576915383076, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.490145734086335, + "kl": 0.007724761962890625, + "learning_rate": 9.55397028517478e-07, + "loss": 0.0013, + "num_tokens": 49804441.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9867085814476013, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11435641410220633, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.052720553149164164, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03849001794597504, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1144 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1470.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1215.3125, + "completions/mean_terminated_length": 1215.3125, + "completions/min_length": 922.0, + "completions/min_terminated_length": 922.0, + "epoch": 0.22904580916183237, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.792851775629051, + "kl": 0.00731658935546875, + "learning_rate": 9.552605343167422e-07, + "loss": -0.04, + "num_tokens": 49852294.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9141248464584351, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1872721717796481, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.28077355435260215, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04999999999999999, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1145 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1260.0, + "completions/max_terminated_length": 1260.0, + "completions/mean_length": 1002.9375, + "completions/mean_terminated_length": 1002.9375, + "completions/min_length": 796.0, + "completions/min_terminated_length": 796.0, + "epoch": 0.22924584916983395, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4520021353627883, + "kl": 0.01153564453125, + "learning_rate": 9.551238425091295e-07, + "loss": -0.027, + "num_tokens": 49891797.0, + "reward": 0.0, + "reward_std": 0.7190994620323181, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06752173594981167, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07048206187525616, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08606629658238706, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1146 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1318.0, + "completions/max_terminated_length": 1318.0, + "completions/mean_length": 1081.5, + "completions/mean_terminated_length": 1081.5, + "completions/min_length": 762.0, + "completions/min_terminated_length": 762.0, + "epoch": 0.22944588917783557, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7108149071641745, + "kl": 0.0130157470703125, + "learning_rate": 9.549869531612918e-07, + "loss": 0.0339, + "num_tokens": 49928301.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.572751522064209, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1546437600456614, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16999409880687466, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06978803887752091, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1147 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1148.0, + "completions/max_terminated_length": 1148.0, + "completions/mean_length": 950.25, + "completions/mean_terminated_length": 950.25, + "completions/min_length": 832.0, + "completions/min_terminated_length": 832.0, + "epoch": 0.22964592918583718, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8027401022576637, + "kl": 0.0091400146484375, + "learning_rate": 9.548498663399764e-07, + "loss": -0.0428, + "num_tokens": 49964249.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.3496186435222626, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11368839045560442, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2682151546720387, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11409872268574492, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1148 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1302.125, + "completions/mean_terminated_length": 1256.4615478515625, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.22984596919383876, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1958689516621717, + "kl": 0.011962890625, + "learning_rate": 9.54712582112028e-07, + "loss": -0.0218, + "num_tokens": 50017883.0, + "reward": 0.0, + "reward_std": 0.9573728442192078, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.046581253399967576, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08647471787744106, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12881223774390613, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1149 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1418.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1286.4375, + "completions/mean_terminated_length": 1286.4375, + "completions/min_length": 1082.0, + "completions/min_terminated_length": 1082.0, + "epoch": 0.23004600920184037, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9822520807235375, + "kl": 0.012115478515625, + "learning_rate": 9.545751005443868e-07, + "loss": 0.0108, + "num_tokens": 50067146.0, + "reward": 0.0, + "reward_std": 0.987741231918335, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.061570847621325295, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09790547512901057, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05146016078626404, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1150 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1398.0, + "completions/mean_length": 1137.625, + "completions/mean_terminated_length": 1085.857177734375, + "completions/min_length": 741.0, + "completions/min_terminated_length": 741.0, + "epoch": 0.23024604920984196, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.23904789256914, + "kl": 0.0126953125, + "learning_rate": 9.544374217040894e-07, + "loss": -0.0927, + "num_tokens": 50118948.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9261940717697144, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09784669238788062, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1875200503751609, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09496588081262933, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1151 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1424.8125, + "completions/mean_terminated_length": 1366.3333740234375, + "completions/min_length": 1073.0, + "completions/min_terminated_length": 1073.0, + "epoch": 0.23044608921784357, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.837185662020484, + "kl": 0.00881195068359375, + "learning_rate": 9.542995456582687e-07, + "loss": 0.0281, + "num_tokens": 50166873.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8210859298706055, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1244707034869137, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10076333554421922, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.052880017930181294, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1152 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1317.0, + "completions/max_terminated_length": 1317.0, + "completions/mean_length": 1135.25, + "completions/mean_terminated_length": 1135.25, + "completions/min_length": 936.0, + "completions/min_terminated_length": 936.0, + "epoch": 0.23064612922584518, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6222700011119287, + "kl": 0.0113525390625, + "learning_rate": 9.541614724741535e-07, + "loss": -0.0172, + "num_tokens": 50208637.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9922845363616943, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07433594432675877, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20255242243575847, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08255189164891871, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1153 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1302.375, + "completions/mean_terminated_length": 1236.5, + "completions/min_length": 1022.0, + "completions/min_terminated_length": 1022.0, + "epoch": 0.23084616923384677, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0162171059627987, + "kl": 0.0107421875, + "learning_rate": 9.540232022190694e-07, + "loss": -0.0108, + "num_tokens": 50260659.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8617068529129028, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.01887082345475566, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06060454278989857, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06978803887752091, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1154 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1389.0, + "completions/max_terminated_length": 1389.0, + "completions/mean_length": 1116.5, + "completions/mean_terminated_length": 1116.5, + "completions/min_length": 910.0, + "completions/min_terminated_length": 910.0, + "epoch": 0.23104620924184838, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5334722969400487, + "kl": 0.013275146484375, + "learning_rate": 9.538847349604369e-07, + "loss": -0.04, + "num_tokens": 50298987.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.035852074623108, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08235374399441003, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10349522439544706, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0620632890834175, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1155 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1400.0, + "completions/max_terminated_length": 1400.0, + "completions/mean_length": 1081.5625, + "completions/mean_terminated_length": 1081.5625, + "completions/min_length": 773.0, + "completions/min_terminated_length": 773.0, + "epoch": 0.23124624924984996, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7895443993110094, + "kl": 0.01397705078125, + "learning_rate": 9.53746070765774e-07, + "loss": 0.0174, + "num_tokens": 50352588.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0679316520690918, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.23755543324303283, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13037094969372756, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09269623828717428, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1156 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1322.0, + "completions/max_terminated_length": 1322.0, + "completions/mean_length": 1020.125, + "completions/mean_terminated_length": 1020.125, + "completions/min_length": 837.0, + "completions/min_terminated_length": 837.0, + "epoch": 0.23144628925785157, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.34190576159468, + "kl": 0.011077880859375, + "learning_rate": 9.536072097026933e-07, + "loss": 0.0182, + "num_tokens": 50390750.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0360794067382812, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07205422238451778, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1886915704383926, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14291929864761418, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1157 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1306.0, + "completions/max_terminated_length": 1306.0, + "completions/mean_length": 1059.5625, + "completions/mean_terminated_length": 1059.5625, + "completions/min_length": 748.0, + "completions/min_terminated_length": 748.0, + "epoch": 0.23164632926585316, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.221869718688801, + "kl": 0.0098419189453125, + "learning_rate": 9.534681518389045e-07, + "loss": -0.0141, + "num_tokens": 50432879.0, + "reward": 0.0, + "reward_std": 0.7912411689758301, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.013447665928921028, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23807278127893636, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05163977794943221, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1158 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1041.0625, + "completions/mean_terminated_length": 975.5000610351562, + "completions/min_length": 464.0, + "completions/min_terminated_length": 464.0, + "epoch": 0.23184636927385477, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.680427230372907, + "kl": 0.01971435546875, + "learning_rate": 9.533288972422126e-07, + "loss": 0.0709, + "num_tokens": 50471704.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9626301527023315, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.007284704286967915, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07334889349610853, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10532137766186214, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1159 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1396.125, + "completions/mean_terminated_length": 1315.3333740234375, + "completions/min_length": 1053.0, + "completions/min_terminated_length": 1053.0, + "epoch": 0.23204640928185638, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.291126224012084, + "kl": 0.013916015625, + "learning_rate": 9.531894459805192e-07, + "loss": 0.0135, + "num_tokens": 50526034.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7734341025352478, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05371085137943549, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07024237595823339, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11155467020454342, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1160 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1481.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1091.5, + "completions/mean_terminated_length": 1091.5, + "completions/min_length": 871.0, + "completions/min_terminated_length": 871.0, + "epoch": 0.23224644928985796, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.797142020655594, + "kl": 0.00714874267578125, + "learning_rate": 9.53049798121821e-07, + "loss": -0.0058, + "num_tokens": 50559938.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7661815285682678, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.17557388627714568, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10976815112624286, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12141145226353543, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1161 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1139.3125, + "completions/mean_terminated_length": 1087.7857666015625, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.23244648929785958, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4029839556753054, + "kl": 0.0146331787109375, + "learning_rate": 9.52909953734211e-07, + "loss": 0.0283, + "num_tokens": 50607423.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0510642528533936, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12635546984308957, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14605269871411783, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09428090415820634, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1162 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1216.5, + "completions/mean_terminated_length": 1197.60009765625, + "completions/min_length": 960.0, + "completions/min_terminated_length": 960.0, + "epoch": 0.23264652930586116, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.096832192104855, + "kl": 0.0107421875, + "learning_rate": 9.527699128858779e-07, + "loss": -0.0118, + "num_tokens": 50653007.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9624508619308472, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09096757040477704, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17360219931373472, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11344765475923412, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1163 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1257.0, + "completions/max_terminated_length": 1257.0, + "completions/mean_length": 902.375, + "completions/mean_terminated_length": 902.375, + "completions/min_length": 451.0, + "completions/min_terminated_length": 451.0, + "epoch": 0.23284656931386277, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.575896834794338, + "kl": 0.00926971435546875, + "learning_rate": 9.526296756451065e-07, + "loss": 0.0168, + "num_tokens": 50682629.0, + "reward": 0.0, + "reward_std": 0.7360181212425232, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.020355508579483334, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05991148055202983, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08734775114237131, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1164 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1209.0, + "completions/max_terminated_length": 1209.0, + "completions/mean_length": 868.75, + "completions/mean_terminated_length": 868.75, + "completions/min_length": 693.0, + "completions/min_terminated_length": 693.0, + "epoch": 0.23304660932186438, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.442416612745571, + "kl": 0.0128173828125, + "learning_rate": 9.524892420802769e-07, + "loss": -0.0365, + "num_tokens": 50731825.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9774985909461975, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2655529956452961, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.28120500067852555, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09339283817414601, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1165 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1199.1875, + "completions/mean_terminated_length": 1098.916748046875, + "completions/min_length": 863.0, + "completions/min_terminated_length": 863.0, + "epoch": 0.23324664932986597, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3916035949483194, + "kl": 0.0117340087890625, + "learning_rate": 9.523486122598652e-07, + "loss": -0.0896, + "num_tokens": 50784116.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0463393926620483, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.016816855916364917, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07767007869039154, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1112221667221529, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1166 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1413.0, + "completions/mean_length": 1222.6875, + "completions/mean_terminated_length": 1204.2000732421875, + "completions/min_length": 907.0, + "completions/min_terminated_length": 907.0, + "epoch": 0.23344668933786758, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.946210126931088, + "kl": 0.014068603515625, + "learning_rate": 9.522077862524432e-07, + "loss": -0.0015, + "num_tokens": 50836695.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9903689622879028, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.030667226080364755, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14853939813132802, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08509254221575907, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1167 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1391.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1061.625, + "completions/mean_terminated_length": 1061.625, + "completions/min_length": 946.0, + "completions/min_terminated_length": 946.0, + "epoch": 0.23364672934586916, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.517934789427849, + "kl": 0.0146484375, + "learning_rate": 9.520667641266781e-07, + "loss": -0.0216, + "num_tokens": 50881337.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.9971720576286316, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0208736118838848, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05751976002672652, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08243965245133132, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1168 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1268.5625, + "completions/mean_terminated_length": 1191.416748046875, + "completions/min_length": 966.0, + "completions/min_terminated_length": 966.0, + "epoch": 0.23384676935387078, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.741469083677512, + "kl": 0.0106658935546875, + "learning_rate": 9.519255459513332e-07, + "loss": 0.0058, + "num_tokens": 50934618.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0138280391693115, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08112696328260775, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06455431058397804, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258098, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1169 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1492.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1095.75, + "completions/mean_terminated_length": 1095.75, + "completions/min_length": 879.0, + "completions/min_terminated_length": 879.0, + "epoch": 0.2340468093618724, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.258546006272156, + "kl": 0.014312744140625, + "learning_rate": 9.517841317952668e-07, + "loss": -0.0078, + "num_tokens": 50977638.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8368469476699829, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.20667613845120308, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12216433894640819, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08333333333333334, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1170 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1429.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 963.8125, + "completions/mean_terminated_length": 963.8125, + "completions/min_length": 725.0, + "completions/min_terminated_length": 725.0, + "epoch": 0.23424684936987397, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.459468097167225, + "kl": 0.0128631591796875, + "learning_rate": 9.516425217274333e-07, + "loss": -0.0314, + "num_tokens": 51006507.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7648836374282837, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.05586602942546922, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08160328741442918, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452246, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1171 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1385.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1116.5625, + "completions/mean_terminated_length": 1116.5625, + "completions/min_length": 798.0, + "completions/min_terminated_length": 798.0, + "epoch": 0.23444688937787558, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.923013758640876, + "kl": 0.00649261474609375, + "learning_rate": 9.515007158168826e-07, + "loss": -0.0218, + "num_tokens": 51046892.0, + "reward": 0.0, + "reward_std": 0.5498537421226501, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10630525198845096, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08940876562190353, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.026874192494328493, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1172 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1191.125, + "completions/mean_terminated_length": 1119.84619140625, + "completions/min_length": 970.0, + "completions/min_terminated_length": 970.0, + "epoch": 0.23464692938587717, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.804759057915188, + "kl": 0.0102996826171875, + "learning_rate": 9.513587141327596e-07, + "loss": 0.0224, + "num_tokens": 51091894.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0450172424316406, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04820224210604462, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12481480413222454, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11538983843829063, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1173 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1353.0, + "completions/max_terminated_length": 1353.0, + "completions/mean_length": 1003.0, + "completions/mean_terminated_length": 1003.0, + "completions/min_length": 755.0, + "completions/min_terminated_length": 755.0, + "epoch": 0.23484696939387878, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.924754675333851, + "kl": 0.0128936767578125, + "learning_rate": 9.512165167443049e-07, + "loss": -0.0355, + "num_tokens": 51132902.0, + "reward": 0.0, + "reward_std": 0.7470074892044067, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2865050065504879, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.21752064015811598, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04554200340426487, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1174 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1244.0, + "completions/mean_terminated_length": 1207.4285888671875, + "completions/min_length": 833.0, + "completions/min_terminated_length": 833.0, + "epoch": 0.23504700940188036, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0877137114887434, + "kl": 0.00809478759765625, + "learning_rate": 9.510741237208549e-07, + "loss": -0.042, + "num_tokens": 51180014.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8111889958381653, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.02361024474202384, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14450386372478724, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.016666666666666663, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1175 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1339.0, + "completions/max_terminated_length": 1339.0, + "completions/mean_length": 1095.875, + "completions/mean_terminated_length": 1095.875, + "completions/min_length": 859.0, + "completions/min_terminated_length": 859.0, + "epoch": 0.23524704940988198, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.764678899491785, + "kl": 0.0101318359375, + "learning_rate": 9.509315351318409e-07, + "loss": -0.0188, + "num_tokens": 51216148.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5173150897026062, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.008974982975399826, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2925610593619195, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04999999999999999, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1176 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1261.0, + "completions/max_terminated_length": 1261.0, + "completions/mean_length": 1034.5625, + "completions/mean_terminated_length": 1034.5625, + "completions/min_length": 812.0, + "completions/min_terminated_length": 812.0, + "epoch": 0.2354470894178836, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9952922003869658, + "kl": 0.012725830078125, + "learning_rate": 9.507887510467898e-07, + "loss": -0.0103, + "num_tokens": 51253493.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5666517615318298, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09244393423004098, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12453028652025595, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10532137766186216, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1177 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1326.0, + "completions/max_terminated_length": 1326.0, + "completions/mean_length": 1107.25, + "completions/mean_terminated_length": 1107.25, + "completions/min_length": 738.0, + "completions/min_terminated_length": 738.0, + "epoch": 0.23564712942588517, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2523029253387175, + "kl": 0.011993408203125, + "learning_rate": 9.506457715353236e-07, + "loss": -0.0377, + "num_tokens": 51294225.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.043263554573059, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0930362139364367, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12081010390869043, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08153617692869924, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1178 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1388.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 1238.6875, + "completions/mean_terminated_length": 1238.6875, + "completions/min_length": 1107.0, + "completions/min_terminated_length": 1107.0, + "epoch": 0.23584716943388678, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3597368702561754, + "kl": 0.0085296630859375, + "learning_rate": 9.505025966671601e-07, + "loss": -0.0054, + "num_tokens": 51341508.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0624487400054932, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.015535637113912813, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08465900148876271, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03442651863295481, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1179 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1232.0, + "completions/max_terminated_length": 1232.0, + "completions/mean_length": 1083.75, + "completions/mean_terminated_length": 1083.75, + "completions/min_length": 804.0, + "completions/min_terminated_length": 804.0, + "epoch": 0.23604720944188837, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3172324178183197, + "kl": 0.01239013671875, + "learning_rate": 9.503592265121117e-07, + "loss": -0.0456, + "num_tokens": 51373736.0, + "reward": 0.0, + "reward_std": 1.0501863956451416, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.01671924946962281, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07293112880549794, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09583937179043477, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1180 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1395.0, + "completions/max_terminated_length": 1395.0, + "completions/mean_length": 1146.9375, + "completions/mean_terminated_length": 1146.9375, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.23624724944988998, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.483000238064089, + "kl": 0.012451171875, + "learning_rate": 9.502156611400866e-07, + "loss": -0.0182, + "num_tokens": 51425687.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9701535701751709, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.15846762529847488, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07088121892813978, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11642832797715323, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1181 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1210.0, + "completions/max_terminated_length": 1210.0, + "completions/mean_length": 1026.875, + "completions/mean_terminated_length": 1026.875, + "completions/min_length": 811.0, + "completions/min_terminated_length": 811.0, + "epoch": 0.2364472894578916, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4702432221826953, + "kl": 0.01263427734375, + "learning_rate": 9.500719006210877e-07, + "loss": 0.0115, + "num_tokens": 51468677.0, + "reward": 0.0, + "reward_std": 0.6430450677871704, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04978217071847785, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0695076103579662, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11642832797715322, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1182 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1342.0, + "completions/mean_length": 1133.875, + "completions/mean_terminated_length": 1109.4666748046875, + "completions/min_length": 901.0, + "completions/min_terminated_length": 901.0, + "epoch": 0.23664732946589317, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6394500762897293, + "kl": 0.00696563720703125, + "learning_rate": 9.499279450252134e-07, + "loss": 0.0189, + "num_tokens": 51518659.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8804893493652344, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03831252764093982, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08800370332371384, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.061913918736689035, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1183 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1393.0, + "completions/mean_length": 1324.5625, + "completions/mean_terminated_length": 1244.8182373046875, + "completions/min_length": 950.0, + "completions/min_terminated_length": 950.0, + "epoch": 0.2368473694738948, + "frac_reward_zero_std": 0.5, + "grad_norm": 2.195165149351403, + "kl": 0.0124969482421875, + "learning_rate": 9.49783794422657e-07, + "loss": 0.01, + "num_tokens": 51566828.0, + "reward": 2.2351741790771484e-08, + "reward_std": 0.738893449306488, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.3759599966969881, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2873840785815087, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0620632890834175, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1184 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1183.8125, + "completions/mean_terminated_length": 994.1000366210938, + "completions/min_length": 683.0, + "completions/min_terminated_length": 683.0, + "epoch": 0.23704740948189637, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.633293999354772, + "kl": 0.017486572265625, + "learning_rate": 9.496394488837071e-07, + "loss": -0.0534, + "num_tokens": 51621889.0, + "reward": 0.0, + "reward_std": 0.6940269470214844, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.17121577008649627, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.25765451467280176, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12405196043952266, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1185 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1402.0, + "completions/max_terminated_length": 1402.0, + "completions/mean_length": 1145.25, + "completions/mean_terminated_length": 1145.25, + "completions/min_length": 837.0, + "completions/min_terminated_length": 837.0, + "epoch": 0.23724744948989798, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.789938546822308, + "kl": 0.010223388671875, + "learning_rate": 9.494949084787472e-07, + "loss": -0.0116, + "num_tokens": 51670197.0, + "reward": 0.0, + "reward_std": 0.9995273947715759, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03210011670296026, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.059679967660379106, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06666666666666665, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1186 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1433.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1117.1875, + "completions/mean_terminated_length": 1117.1875, + "completions/min_length": 975.0, + "completions/min_terminated_length": 975.0, + "epoch": 0.23744748949789957, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.053889791525715, + "kl": 0.009662628173828125, + "learning_rate": 9.493501732782559e-07, + "loss": -0.0545, + "num_tokens": 51712200.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7754299640655518, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.015097652932631968, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13227305093326427, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452246, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1187 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1300.9375, + "completions/mean_terminated_length": 1255.0, + "completions/min_length": 1063.0, + "completions/min_terminated_length": 1063.0, + "epoch": 0.23764752950590118, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3398328778261375, + "kl": 0.0125885009765625, + "learning_rate": 9.492052433528065e-07, + "loss": -0.0383, + "num_tokens": 51755431.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0380761623382568, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07342760314409968, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09458056439548311, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.067631901304592, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1188 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1282.3125, + "completions/mean_terminated_length": 1151.7000732421875, + "completions/min_length": 926.0, + "completions/min_terminated_length": 926.0, + "epoch": 0.2378475695139028, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.605622831765575, + "kl": 0.0128326416015625, + "learning_rate": 9.490601187730679e-07, + "loss": -0.0153, + "num_tokens": 51805980.0, + "reward": 0.0, + "reward_std": 0.9944010376930237, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.031922261065070304, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09652818438817634, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09179284245476838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1189 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1290.0625, + "completions/mean_terminated_length": 1241.615478515625, + "completions/min_length": 1127.0, + "completions/min_terminated_length": 1127.0, + "epoch": 0.23804760952190437, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6387442530907013, + "kl": 0.01052093505859375, + "learning_rate": 9.489147996098031e-07, + "loss": 0.0002, + "num_tokens": 51851125.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9676609039306641, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.045620631665265345, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05944001104731781, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1190 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1335.0, + "completions/max_terminated_length": 1335.0, + "completions/mean_length": 984.0, + "completions/mean_terminated_length": 984.0, + "completions/min_length": 753.0, + "completions/min_terminated_length": 753.0, + "epoch": 0.23824764952990599, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.780610446228049, + "kl": 0.01495361328125, + "learning_rate": 9.487692859338709e-07, + "loss": 0.0723, + "num_tokens": 51889661.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6628699898719788, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07872822847714132, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07982671891520197, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194865, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1191 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1391.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1172.9375, + "completions/mean_terminated_length": 1172.9375, + "completions/min_length": 993.0, + "completions/min_terminated_length": 993.0, + "epoch": 0.23844768953790757, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.80041476779887, + "kl": 0.0099945068359375, + "learning_rate": 9.486235778162238e-07, + "loss": -0.037, + "num_tokens": 51932500.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9446781873703003, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12942814109916057, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04832270595359173, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 1192 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1449.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1101.375, + "completions/mean_terminated_length": 1101.375, + "completions/min_length": 886.0, + "completions/min_terminated_length": 886.0, + "epoch": 0.23864772954590918, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0438936241164365, + "kl": 0.00461578369140625, + "learning_rate": 9.484776753279101e-07, + "loss": -0.0532, + "num_tokens": 51973666.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9681024551391602, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.008162143131390052, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12032183952414016, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06978803887752091, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1193 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1221.1875, + "completions/mean_terminated_length": 1128.25, + "completions/min_length": 927.0, + "completions/min_terminated_length": 927.0, + "epoch": 0.2388477695539108, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4160572243336746, + "kl": 0.007965087890625, + "learning_rate": 9.483315785400726e-07, + "loss": 0.0198, + "num_tokens": 52011677.0, + "reward": -9.313225746154785e-09, + "reward_std": 0.9708843231201172, + "rewards/wordcountpos_reward_nokeypoint/mean": -9.313225746154785e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06867031990681963, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.062011876363744924, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.067631901304592, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1194 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1397.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1210.9375, + "completions/mean_terminated_length": 1210.9375, + "completions/min_length": 987.0, + "completions/min_terminated_length": 987.0, + "epoch": 0.23904780956191238, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.032204580067044, + "kl": 0.010467529296875, + "learning_rate": 9.481852875239485e-07, + "loss": -0.0116, + "num_tokens": 52056204.0, + "reward": 0.0, + "reward_std": 1.0170783996582031, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16424706633962652, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09456484318412645, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06871842709362766, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1195 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1369.0, + "completions/max_terminated_length": 1369.0, + "completions/mean_length": 1036.5, + "completions/mean_terminated_length": 1036.5, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.239247849569914, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.867211486931057, + "kl": 0.01324462890625, + "learning_rate": 9.480388023508702e-07, + "loss": -0.0306, + "num_tokens": 52099844.0, + "reward": 3.725290298461914e-08, + "reward_std": 1.049767255783081, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.186078833223592, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0733938367880594, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1196 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1364.0, + "completions/mean_length": 1246.75, + "completions/mean_terminated_length": 1049.77783203125, + "completions/min_length": 948.0, + "completions/min_terminated_length": 948.0, + "epoch": 0.23944788957791557, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.985399080168734, + "kl": 0.0107879638671875, + "learning_rate": 9.478921230922643e-07, + "loss": -0.0004, + "num_tokens": 52152832.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8919453024864197, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.029859304660057574, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06586845576740573, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10174405069512349, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 1197 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1465.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1054.4375, + "completions/mean_terminated_length": 1054.4375, + "completions/min_length": 806.0, + "completions/min_terminated_length": 806.0, + "epoch": 0.23964792958591719, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9052627358333165, + "kl": 0.011688232421875, + "learning_rate": 9.477452498196526e-07, + "loss": 0.0242, + "num_tokens": 52194111.0, + "reward": 0.0, + "reward_std": 1.0347249507904053, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09407636585394256, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07231046985980144, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258099, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1198 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1443.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1162.75, + "completions/mean_terminated_length": 1162.75, + "completions/min_length": 1006.0, + "completions/min_terminated_length": 1006.0, + "epoch": 0.2398479695939188, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5263278854668156, + "kl": 0.015838623046875, + "learning_rate": 9.475981826046507e-07, + "loss": 0.0327, + "num_tokens": 52239483.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0535037517547607, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05794707101260529, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0730488855790384, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07969850595746354, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1199 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1300.0, + "completions/max_terminated_length": 1300.0, + "completions/mean_length": 1109.375, + "completions/mean_terminated_length": 1109.375, + "completions/min_length": 911.0, + "completions/min_terminated_length": 911.0, + "epoch": 0.24004800960192038, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.475008583475681, + "kl": 0.0120391845703125, + "learning_rate": 9.474509215189696e-07, + "loss": -0.026, + "num_tokens": 52272609.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6601617336273193, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.18615396511579216, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08020945591121918, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059472994182545036, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1200 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1238.8125, + "completions/mean_terminated_length": 1221.4000244140625, + "completions/min_length": 921.0, + "completions/min_terminated_length": 921.0, + "epoch": 0.240248049609922, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0486113977040055, + "kl": 0.0115814208984375, + "learning_rate": 9.473034666344144e-07, + "loss": -0.0071, + "num_tokens": 52319150.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9512143135070801, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09078828777571853, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07952808848551363, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04216370213557838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1201 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1239.0, + "completions/max_terminated_length": 1239.0, + "completions/mean_length": 972.3125, + "completions/mean_terminated_length": 972.3125, + "completions/min_length": 823.0, + "completions/min_terminated_length": 823.0, + "epoch": 0.24044808961792358, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6937268644283945, + "kl": 0.00687408447265625, + "learning_rate": 9.471558180228846e-07, + "loss": -0.001, + "num_tokens": 52359755.0, + "reward": 0.0, + "reward_std": 0.9846370220184326, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11826398251229922, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07994061193815942, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14194417264596723, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1202 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1311.0, + "completions/mean_terminated_length": 1225.0909423828125, + "completions/min_length": 1024.0, + "completions/min_terminated_length": 1024.0, + "epoch": 0.2406481296259252, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9715979117365254, + "kl": 0.0097808837890625, + "learning_rate": 9.470079757563746e-07, + "loss": 0.0129, + "num_tokens": 52415227.0, + "reward": 0.0, + "reward_std": 0.8675866723060608, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.14616954373736896, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19417035517285725, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08734775114237132, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1203 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1490.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1179.75, + "completions/mean_terminated_length": 1179.75, + "completions/min_length": 867.0, + "completions/min_terminated_length": 867.0, + "epoch": 0.24084816963392677, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3512491326616507, + "kl": 0.0118408203125, + "learning_rate": 9.468599399069729e-07, + "loss": 0.0573, + "num_tokens": 52460839.0, + "reward": 0.0, + "reward_std": 0.8283161520957947, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.016146891614507494, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11817852782003538, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1204 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1390.0, + "completions/mean_length": 1078.4375, + "completions/mean_terminated_length": 1050.3333740234375, + "completions/min_length": 573.0, + "completions/min_terminated_length": 573.0, + "epoch": 0.24104820964192838, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.777618692146142, + "kl": 0.007080078125, + "learning_rate": 9.467117105468623e-07, + "loss": -0.0712, + "num_tokens": 52503998.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8827570676803589, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.16176267969981475, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1404042891640454, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1205 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1288.0, + "completions/max_terminated_length": 1288.0, + "completions/mean_length": 964.875, + "completions/mean_terminated_length": 964.875, + "completions/min_length": 663.0, + "completions/min_terminated_length": 663.0, + "epoch": 0.24124824964993, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.712854836750848, + "kl": 0.014373779296875, + "learning_rate": 9.465632877483203e-07, + "loss": -0.0277, + "num_tokens": 52540620.0, + "reward": 0.0, + "reward_std": 0.8569568395614624, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.011834589435428313, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06432164801705738, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08606629658238704, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1206 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1272.0, + "completions/mean_terminated_length": 1168.3636474609375, + "completions/min_length": 966.0, + "completions/min_terminated_length": 966.0, + "epoch": 0.24144828965793158, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8928807584895684, + "kl": 0.0035800933837890625, + "learning_rate": 9.464146715837185e-07, + "loss": -0.0277, + "num_tokens": 52597004.0, + "reward": 0.0, + "reward_std": 0.9806574583053589, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.178347621395375, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07634789875832494, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04127594582445935, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1207 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1238.0, + "completions/max_terminated_length": 1238.0, + "completions/mean_length": 1101.6875, + "completions/mean_terminated_length": 1101.6875, + "completions/min_length": 707.0, + "completions/min_terminated_length": 707.0, + "epoch": 0.2416483296659332, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.474733757685711, + "kl": 0.0137786865234375, + "learning_rate": 9.462658621255226e-07, + "loss": 0.0077, + "num_tokens": 52639863.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9360576868057251, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10437993952717682, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08491685770341512, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05374838498865701, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1208 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1217.5, + "completions/mean_terminated_length": 1177.1429443359375, + "completions/min_length": 997.0, + "completions/min_terminated_length": 997.0, + "epoch": 0.24184836967393478, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.83895273711735, + "kl": 0.0102386474609375, + "learning_rate": 9.461168594462931e-07, + "loss": -0.0269, + "num_tokens": 52675151.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5193768739700317, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.019543760277669457, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18860270600857104, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06540472290116194, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1209 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1204.375, + "completions/mean_terminated_length": 1162.1429443359375, + "completions/min_length": 1035.0, + "completions/min_terminated_length": 1035.0, + "epoch": 0.2420484096819364, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1866455882268636, + "kl": 0.0086212158203125, + "learning_rate": 9.459676636186839e-07, + "loss": 0.0277, + "num_tokens": 52721997.0, + "reward": 0.0, + "reward_std": 0.7753390669822693, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.048562005663089915, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06633970386947907, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05426273532033234, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1210 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1156.8125, + "completions/mean_terminated_length": 1042.416748046875, + "completions/min_length": 764.0, + "completions/min_terminated_length": 764.0, + "epoch": 0.242248449689938, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2507896306020654, + "kl": 0.01312255859375, + "learning_rate": 9.458182747154441e-07, + "loss": -0.0517, + "num_tokens": 52764170.0, + "reward": 0.0, + "reward_std": 0.7809500694274902, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04438737939509834, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06275589109242583, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11800816042090448, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1211 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1135.8125, + "completions/mean_terminated_length": 1111.533447265625, + "completions/min_length": 877.0, + "completions/min_terminated_length": 877.0, + "epoch": 0.24244848969793958, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5614740777233824, + "kl": 0.0129547119140625, + "learning_rate": 9.456686928094162e-07, + "loss": 0.0439, + "num_tokens": 52814727.0, + "reward": 0.0, + "reward_std": 0.8031761646270752, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05799904165533688, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06281482457940048, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08509254221575907, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1212 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1142.125, + "completions/mean_terminated_length": 1091.0, + "completions/min_length": 683.0, + "completions/min_terminated_length": 683.0, + "epoch": 0.2426485297059412, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7198653641675476, + "kl": 0.00847625732421875, + "learning_rate": 9.455189179735369e-07, + "loss": -0.0444, + "num_tokens": 52854137.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5379201769828796, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0484470242893616, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06223003757785115, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10027739304327549, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1213 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1398.0, + "completions/mean_length": 1258.0625, + "completions/mean_terminated_length": 1177.416748046875, + "completions/min_length": 1068.0, + "completions/min_terminated_length": 1068.0, + "epoch": 0.24284856971394278, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3319839889075107, + "kl": 0.013519287109375, + "learning_rate": 9.453689502808372e-07, + "loss": -0.0359, + "num_tokens": 52906194.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5433708429336548, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03526778600763611, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03642441276404843, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1214 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1186.8125, + "completions/mean_terminated_length": 1165.933349609375, + "completions/min_length": 933.0, + "completions/min_terminated_length": 933.0, + "epoch": 0.2430486097219444, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1776198635596153, + "kl": 0.013275146484375, + "learning_rate": 9.452187898044421e-07, + "loss": 0.0387, + "num_tokens": 52953703.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0513304471969604, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10325991737915406, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08059839353682614, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12464765155042849, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1215 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1351.0, + "completions/max_terminated_length": 1351.0, + "completions/mean_length": 885.625, + "completions/mean_terminated_length": 885.625, + "completions/min_length": 402.0, + "completions/min_terminated_length": 402.0, + "epoch": 0.24324864972994598, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.265971127343422, + "kl": 0.0152435302734375, + "learning_rate": 9.450684366175703e-07, + "loss": -0.0216, + "num_tokens": 52985409.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.044643759727478, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2350737159782352, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22005201542828726, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1216 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1338.0, + "completions/max_terminated_length": 1338.0, + "completions/mean_length": 1171.1875, + "completions/mean_terminated_length": 1171.1875, + "completions/min_length": 732.0, + "completions/min_terminated_length": 732.0, + "epoch": 0.2434486897379476, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.28747403636103, + "kl": 0.0104827880859375, + "learning_rate": 9.449178907935349e-07, + "loss": -0.0394, + "num_tokens": 53028380.0, + "reward": -3.3527612686157227e-08, + "reward_std": 1.0639660358428955, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.3527612686157227e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04450585881326605, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09542926606147568, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06440611887195305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1217 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1292.0, + "completions/max_terminated_length": 1292.0, + "completions/mean_length": 1008.875, + "completions/mean_terminated_length": 1008.875, + "completions/min_length": 733.0, + "completions/min_terminated_length": 733.0, + "epoch": 0.2436487297459492, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0654200560113347, + "kl": 0.00815582275390625, + "learning_rate": 9.447671524057427e-07, + "loss": -0.054, + "num_tokens": 53059410.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0280407667160034, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02520444522148089, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08231323997889545, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09269623828717428, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1218 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1415.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1199.8125, + "completions/mean_terminated_length": 1199.8125, + "completions/min_length": 1035.0, + "completions/min_terminated_length": 1035.0, + "epoch": 0.24384876975395078, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1823099782985413, + "kl": 0.0121612548828125, + "learning_rate": 9.446162215276942e-07, + "loss": -0.0237, + "num_tokens": 53105887.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8998653292655945, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1902038754507695, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.073023599543687, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07685966046898339, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1219 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1235.625, + "completions/mean_terminated_length": 1218.0001220703125, + "completions/min_length": 914.0, + "completions/min_terminated_length": 914.0, + "epoch": 0.2440488097619524, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0349614195635692, + "kl": 0.01020050048828125, + "learning_rate": 9.444650982329844e-07, + "loss": -0.0361, + "num_tokens": 53153017.0, + "reward": 0.0, + "reward_std": 0.6303545236587524, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03685984060360664, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.21810477340588655, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1220 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1463.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1139.9375, + "completions/mean_terminated_length": 1139.9375, + "completions/min_length": 709.0, + "completions/min_terminated_length": 709.0, + "epoch": 0.24424884976995398, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9518963712227433, + "kl": 0.011077880859375, + "learning_rate": 9.443137825953013e-07, + "loss": -0.0475, + "num_tokens": 53192536.0, + "reward": 0.0, + "reward_std": 0.6459058523178101, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0647601303274762, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0721670257063344, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1221 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1352.0, + "completions/mean_length": 1152.125, + "completions/mean_terminated_length": 1128.933349609375, + "completions/min_length": 851.0, + "completions/min_terminated_length": 851.0, + "epoch": 0.2444488897779556, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.439272632842449, + "kl": 0.013519287109375, + "learning_rate": 9.441622746884275e-07, + "loss": 0.0129, + "num_tokens": 53238618.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0618233680725098, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.02437869523901307, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06263784065956741, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11080513425729777, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1222 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1273.1875, + "completions/mean_terminated_length": 1240.7857666015625, + "completions/min_length": 997.0, + "completions/min_terminated_length": 997.0, + "epoch": 0.2446489297859572, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9399872122540067, + "kl": 0.0103912353515625, + "learning_rate": 9.440105745862385e-07, + "loss": 0.0329, + "num_tokens": 53287381.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.5057079792022705, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08115420820543624, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05763370513895736, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11417984514369005, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1223 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1454.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1186.375, + "completions/mean_terminated_length": 1186.375, + "completions/min_length": 916.0, + "completions/min_terminated_length": 916.0, + "epoch": 0.2448489697939588, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.099868425157708, + "kl": 0.012725830078125, + "learning_rate": 9.438586823627042e-07, + "loss": -0.0317, + "num_tokens": 53329507.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7746308445930481, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08928048428029382, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07539116800050547, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08333333333333333, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1224 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1136.4375, + "completions/mean_terminated_length": 1084.5, + "completions/min_length": 765.0, + "completions/min_terminated_length": 765.0, + "epoch": 0.2450490098019604, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7676862319033737, + "kl": 0.017791748046875, + "learning_rate": 9.43706598091888e-07, + "loss": 0.0335, + "num_tokens": 53381762.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6101119518280029, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10197225813259092, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17673971414725653, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06652763279965644, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1225 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1195.0, + "completions/mean_terminated_length": 1174.666748046875, + "completions/min_length": 987.0, + "completions/min_terminated_length": 987.0, + "epoch": 0.24524904980996198, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.354066889693283, + "kl": 0.0105133056640625, + "learning_rate": 9.435543218479467e-07, + "loss": 0.0178, + "num_tokens": 53429474.0, + "reward": 0.0, + "reward_std": 0.7174580097198486, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.24134201228223356, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1621501263357574, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15770342536029575, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1226 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1312.0, + "completions/mean_length": 1159.9375, + "completions/mean_terminated_length": 1137.2667236328125, + "completions/min_length": 1024.0, + "completions/min_terminated_length": 1024.0, + "epoch": 0.2454490898179636, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6586350468409226, + "kl": 0.011077880859375, + "learning_rate": 9.43401853705131e-07, + "loss": -0.0199, + "num_tokens": 53482497.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0614392757415771, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06281585625785374, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07066091283233365, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0697880388775209, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1227 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1427.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1032.3125, + "completions/mean_terminated_length": 1032.3125, + "completions/min_length": 674.0, + "completions/min_terminated_length": 674.0, + "epoch": 0.2456491298259652, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9493077911267647, + "kl": 0.0160369873046875, + "learning_rate": 9.432491937377851e-07, + "loss": -0.0245, + "num_tokens": 53523718.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.962242841720581, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11053323756596214, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1942167285547152, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10878112581387148, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1228 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1444.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1170.75, + "completions/mean_terminated_length": 1170.75, + "completions/min_length": 961.0, + "completions/min_terminated_length": 961.0, + "epoch": 0.2458491698339668, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2806413210424243, + "kl": 0.0127105712890625, + "learning_rate": 9.430963420203465e-07, + "loss": -0.0077, + "num_tokens": 53571386.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9899970293045044, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11115230025604603, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.055995817114852214, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07187952884282608, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1229 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1080.1875, + "completions/mean_terminated_length": 1052.2000732421875, + "completions/min_length": 839.0, + "completions/min_terminated_length": 839.0, + "epoch": 0.2460492098419684, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3720528986961504, + "kl": 0.0139617919921875, + "learning_rate": 9.429432986273465e-07, + "loss": 0.0541, + "num_tokens": 53613189.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6104050278663635, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06717777257159771, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12076418540161357, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12816366850994054, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1230 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1339.0, + "completions/mean_length": 1247.875, + "completions/mean_terminated_length": 1231.0667724609375, + "completions/min_length": 1115.0, + "completions/min_terminated_length": 1115.0, + "epoch": 0.24624924984997, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8798407662473204, + "kl": 0.0099334716796875, + "learning_rate": 9.427900636334098e-07, + "loss": -0.0188, + "num_tokens": 53658419.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0036295652389526, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1858635897479361, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06209541573512593, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05146016078626404, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1231 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1283.0, + "completions/max_terminated_length": 1283.0, + "completions/mean_length": 938.0625, + "completions/mean_terminated_length": 938.0625, + "completions/min_length": 749.0, + "completions/min_terminated_length": 749.0, + "epoch": 0.2464492898579716, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.958809183944195, + "kl": 0.01263427734375, + "learning_rate": 9.426366371132546e-07, + "loss": -0.0375, + "num_tokens": 53688268.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8359350562095642, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04389255145634226, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05349317600793901, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06440611887195305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1232 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1184.8125, + "completions/mean_terminated_length": 1163.800048828125, + "completions/min_length": 1021.0, + "completions/min_terminated_length": 1021.0, + "epoch": 0.24664932986597318, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7341796661808155, + "kl": 0.00970458984375, + "learning_rate": 9.42483019141692e-07, + "loss": 0.0324, + "num_tokens": 53731473.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9323559999465942, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.022534043851500103, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.061227993166724476, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1233 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1192.0, + "completions/max_terminated_length": 1192.0, + "completions/mean_length": 1040.125, + "completions/mean_terminated_length": 1040.125, + "completions/min_length": 852.0, + "completions/min_terminated_length": 852.0, + "epoch": 0.2468493698739748, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.68192667926883, + "kl": 0.0140838623046875, + "learning_rate": 9.423292097936272e-07, + "loss": -0.0052, + "num_tokens": 53773715.0, + "reward": 0.0, + "reward_std": 0.9097425937652588, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07826289734370692, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1447114894998304, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1189459883650901, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1234 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1289.0, + "completions/mean_length": 1044.875, + "completions/mean_terminated_length": 1014.5333862304688, + "completions/min_length": 827.0, + "completions/min_terminated_length": 827.0, + "epoch": 0.2470494098819764, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8177355023509536, + "kl": 0.013641357421875, + "learning_rate": 9.421752091440581e-07, + "loss": -0.0588, + "num_tokens": 53810497.0, + "reward": 0.0, + "reward_std": 0.8530327081680298, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2423505425881665, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.060329934135573485, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08606629658238704, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1235 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1300.875, + "completions/mean_terminated_length": 1272.4285888671875, + "completions/min_length": 732.0, + "completions/min_terminated_length": 732.0, + "epoch": 0.247249449889978, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9545013941129223, + "kl": 0.042266845703125, + "learning_rate": 9.420210172680762e-07, + "loss": -0.0034, + "num_tokens": 53862687.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.9057515263557434, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.23051783061080316, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.34135662040932935, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07187952884282608, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1236 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1314.1875, + "completions/mean_terminated_length": 1287.6429443359375, + "completions/min_length": 1111.0, + "completions/min_terminated_length": 1111.0, + "epoch": 0.2474494898979796, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.25881925673143, + "kl": 0.0145416259765625, + "learning_rate": 9.418666342408662e-07, + "loss": 0.0403, + "num_tokens": 53905994.0, + "reward": 0.0, + "reward_std": 0.5742642879486084, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08488932849685016, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09747793302286643, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12102953419784838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1237 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1205.5, + "completions/mean_terminated_length": 1107.3333740234375, + "completions/min_length": 843.0, + "completions/min_terminated_length": 843.0, + "epoch": 0.24764952990598119, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.154980814432815, + "kl": 0.0106201171875, + "learning_rate": 9.41712060137706e-07, + "loss": 0.0083, + "num_tokens": 53951466.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.971402645111084, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13887485175617598, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07764723057209483, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12524050936172842, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1238 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1474.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1205.5, + "completions/mean_terminated_length": 1205.5, + "completions/min_length": 1034.0, + "completions/min_terminated_length": 1034.0, + "epoch": 0.2478495699139828, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9789283130175526, + "kl": 0.0081024169921875, + "learning_rate": 9.415572950339664e-07, + "loss": 0.0081, + "num_tokens": 53989370.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.739425778388977, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08540651552923831, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05512596425346279, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10246950765959599, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1239 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1313.0, + "completions/max_terminated_length": 1313.0, + "completions/mean_length": 985.125, + "completions/mean_terminated_length": 985.125, + "completions/min_length": 777.0, + "completions/min_terminated_length": 777.0, + "epoch": 0.2480496099219844, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0949032679849635, + "kl": 0.0180206298828125, + "learning_rate": 9.414023390051118e-07, + "loss": -0.0317, + "num_tokens": 54020716.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.4895075261592865, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.014143431666919688, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12078984698946178, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1240 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1244.6875, + "completions/mean_terminated_length": 1227.666748046875, + "completions/min_length": 820.0, + "completions/min_terminated_length": 820.0, + "epoch": 0.248249649929986, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.092295980091014, + "kl": 0.0142669677734375, + "learning_rate": 9.412471921266994e-07, + "loss": 0.0098, + "num_tokens": 54073767.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.9605109095573425, + "rewards/wordcountpos_reward_nokeypoint/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.20395037063650634, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07777350408963035, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08766518798921945, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1241 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1117.0, + "completions/max_terminated_length": 1117.0, + "completions/mean_length": 922.1875, + "completions/mean_terminated_length": 922.1875, + "completions/min_length": 667.0, + "completions/min_terminated_length": 667.0, + "epoch": 0.2484496899379876, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4960198219347838, + "kl": 0.014190673828125, + "learning_rate": 9.410918544743793e-07, + "loss": 0.0021, + "num_tokens": 54120266.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.0329149961471558, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.00019150370630627647, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12903890111907523, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08073734277593311, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1242 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1472.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1148.75, + "completions/mean_terminated_length": 1148.75, + "completions/min_length": 925.0, + "completions/min_terminated_length": 925.0, + "epoch": 0.2486497299459892, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.109931622652004, + "kl": 0.06345367431640625, + "learning_rate": 9.409363261238952e-07, + "loss": -0.0061, + "num_tokens": 54162334.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.7796798944473267, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.22873651521232263, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24870591434388237, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12171612389003691, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1243 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1264.25, + "completions/mean_terminated_length": 1185.666748046875, + "completions/min_length": 1062.0, + "completions/min_terminated_length": 1062.0, + "epoch": 0.2488497699539908, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.768524101501841, + "kl": 0.0121917724609375, + "learning_rate": 9.407806071510833e-07, + "loss": -0.0052, + "num_tokens": 54205386.0, + "reward": 0.0, + "reward_std": 0.7409926056861877, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0753059107342979, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10473618855679648, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258098, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1244 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1013.0, + "completions/max_terminated_length": 1013.0, + "completions/mean_length": 725.625, + "completions/mean_terminated_length": 725.625, + "completions/min_length": 316.0, + "completions/min_terminated_length": 316.0, + "epoch": 0.24904980996199239, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8717959521066, + "kl": 0.0124053955078125, + "learning_rate": 9.406246976318727e-07, + "loss": -0.0575, + "num_tokens": 54231580.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7055568695068359, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.05946655410375935, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04700074320551911, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08734775114237131, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1245 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1206.9375, + "completions/mean_terminated_length": 1139.3077392578125, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.249249849969994, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.136110492157204, + "kl": 0.0151519775390625, + "learning_rate": 9.40468597642286e-07, + "loss": -0.0236, + "num_tokens": 54271891.0, + "reward": 0.0, + "reward_std": 1.048326849937439, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0030277391995366274, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0825866450176069, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11147163731607215, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1246 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1285.375, + "completions/mean_terminated_length": 1254.71435546875, + "completions/min_length": 859.0, + "completions/min_terminated_length": 859.0, + "epoch": 0.2494498899779956, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.891800606600598, + "kl": 0.0094757080078125, + "learning_rate": 9.403123072584378e-07, + "loss": 0.0054, + "num_tokens": 54316425.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5847506523132324, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.01261071473946541, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22740363228662477, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.067631901304592, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1247 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1443.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1201.375, + "completions/mean_terminated_length": 1201.375, + "completions/min_length": 1040.0, + "completions/min_terminated_length": 1040.0, + "epoch": 0.2496499299859972, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0554361505084566, + "kl": 0.012481689453125, + "learning_rate": 9.401558265565363e-07, + "loss": -0.0177, + "num_tokens": 54367159.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8567005395889282, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.012248348092095246, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07042102025637996, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1248 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1221.375, + "completions/mean_terminated_length": 1181.571533203125, + "completions/min_length": 858.0, + "completions/min_terminated_length": 858.0, + "epoch": 0.2498499699939988, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.352096598835339, + "kl": 0.0143280029296875, + "learning_rate": 9.399991556128821e-07, + "loss": 0.0544, + "num_tokens": 54410885.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9322360157966614, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1059824062165977, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.32903875578498126, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11547005383792516, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1249 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1234.875, + "completions/mean_terminated_length": 1197.0, + "completions/min_length": 971.0, + "completions/min_terminated_length": 971.0, + "epoch": 0.2500500100020004, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1804282920005242, + "kl": 0.01177978515625, + "learning_rate": 9.398422945038687e-07, + "loss": 0.0165, + "num_tokens": 54451691.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0614540576934814, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10373415661249426, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14778080968629564, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09699179041242308, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1250 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1455.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1097.4375, + "completions/mean_terminated_length": 1097.4375, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.250250050010002, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.639865946986642, + "kl": 0.015411376953125, + "learning_rate": 9.396852433059822e-07, + "loss": 0.0371, + "num_tokens": 54496994.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7099945545196533, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03823811495200684, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1170961015713795, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10532137766186214, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1251 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1284.0, + "completions/max_terminated_length": 1284.0, + "completions/mean_length": 1165.4375, + "completions/mean_terminated_length": 1165.4375, + "completions/min_length": 925.0, + "completions/min_terminated_length": 925.0, + "epoch": 0.2504500900180036, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1943644329163465, + "kl": 0.0113067626953125, + "learning_rate": 9.395280020958017e-07, + "loss": -0.0229, + "num_tokens": 54538201.0, + "reward": 3.725290298461914e-09, + "reward_std": 0.9786310195922852, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10537577306253201, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0698038632189857, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9833333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.029814239699997188, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1252 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1154.6875, + "completions/mean_terminated_length": 1131.666748046875, + "completions/min_length": 805.0, + "completions/min_terminated_length": 805.0, + "epoch": 0.2506501300260052, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0322462131680576, + "kl": 0.00890350341796875, + "learning_rate": 9.393705709499983e-07, + "loss": -0.0164, + "num_tokens": 54574100.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.3541765511035919, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.15031029678405067, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.27350711500958685, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05163977794943221, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1253 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1309.9375, + "completions/mean_terminated_length": 1223.5455322265625, + "completions/min_length": 793.0, + "completions/min_terminated_length": 793.0, + "epoch": 0.2508501700340068, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3584929751528922, + "kl": 0.01032257080078125, + "learning_rate": 9.392129499453365e-07, + "loss": -0.0048, + "num_tokens": 54620995.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.47694459557533264, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16913835987888923, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3061327288733996, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04554200340426487, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1254 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1115.0, + "completions/max_terminated_length": 1115.0, + "completions/mean_length": 920.125, + "completions/mean_terminated_length": 920.125, + "completions/min_length": 667.0, + "completions/min_terminated_length": 667.0, + "epoch": 0.2510502100420084, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7414469962002737, + "kl": 0.01055145263671875, + "learning_rate": 9.390551391586729e-07, + "loss": 0.0405, + "num_tokens": 54655197.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0165935754776, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07641552976876126, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0674252626573512, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.975, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04127594582445935, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1255 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1221.0, + "completions/max_terminated_length": 1221.0, + "completions/mean_length": 1068.8125, + "completions/mean_terminated_length": 1068.8125, + "completions/min_length": 935.0, + "completions/min_terminated_length": 935.0, + "epoch": 0.25125025005001, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3913980662423513, + "kl": 0.004581451416015625, + "learning_rate": 9.388971386669569e-07, + "loss": -0.0069, + "num_tokens": 54688018.0, + "reward": 0.0, + "reward_std": 1.066338300704956, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03868014895406355, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1069739226692894, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15244914148902494, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1256 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1210.875, + "completions/mean_terminated_length": 1144.1539306640625, + "completions/min_length": 893.0, + "completions/min_terminated_length": 893.0, + "epoch": 0.2514502900580116, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.318268962013941, + "kl": 0.0146026611328125, + "learning_rate": 9.387389485472301e-07, + "loss": 0.0146, + "num_tokens": 54726192.0, + "reward": 0.0, + "reward_std": 0.9853297472000122, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.009920810494358494, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.015697499696447313, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06540472290116194, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1257 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1458.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1072.0, + "completions/mean_terminated_length": 1072.0, + "completions/min_length": 719.0, + "completions/min_terminated_length": 719.0, + "epoch": 0.25165033006601323, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0809742813493592, + "kl": 0.011993408203125, + "learning_rate": 9.385805688766268e-07, + "loss": 0.0346, + "num_tokens": 54764224.0, + "reward": 0.0, + "reward_std": 0.9851630926132202, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1068799431208477, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.061201023480415374, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.033333333333333326, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1258 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1180.25, + "completions/mean_terminated_length": 1158.933349609375, + "completions/min_length": 930.0, + "completions/min_terminated_length": 930.0, + "epoch": 0.2518503700740148, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6532574579688313, + "kl": 0.015869140625, + "learning_rate": 9.384219997323734e-07, + "loss": -0.0016, + "num_tokens": 54808956.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0037332773208618, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.3930027159463211, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13156237433323742, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12345339501504503, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1259 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1185.1875, + "completions/mean_terminated_length": 1112.5384521484375, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.2520504100820164, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0797593303167714, + "kl": 0.010040283203125, + "learning_rate": 9.382632411917896e-07, + "loss": -0.0207, + "num_tokens": 54851247.0, + "reward": 0.0, + "reward_std": 0.7204616069793701, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08751511422882009, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08119396813009043, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03442651863295481, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1260 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1350.0, + "completions/mean_length": 1197.125, + "completions/mean_terminated_length": 1153.857177734375, + "completions/min_length": 997.0, + "completions/min_terminated_length": 997.0, + "epoch": 0.252250450090018, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4416676394475623, + "kl": 0.01458740234375, + "learning_rate": 9.38104293332286e-07, + "loss": -0.0015, + "num_tokens": 54902641.0, + "reward": 0.0, + "reward_std": 0.9836982488632202, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.005099535100076736, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17089507846710492, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.19007795671678931, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1261 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1286.0, + "completions/max_terminated_length": 1286.0, + "completions/mean_length": 1053.9375, + "completions/mean_terminated_length": 1053.9375, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.2524504900980196, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.657046308639508, + "kl": 0.0107879638671875, + "learning_rate": 9.379451562313665e-07, + "loss": -0.0111, + "num_tokens": 54929704.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9905418157577515, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02096227980973242, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.034818810304306766, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05821416398857659, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1262 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1348.0, + "completions/max_terminated_length": 1348.0, + "completions/mean_length": 1128.8125, + "completions/mean_terminated_length": 1128.8125, + "completions/min_length": 842.0, + "completions/min_terminated_length": 842.0, + "epoch": 0.25265053010602123, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0087346226126095, + "kl": 0.0109100341796875, + "learning_rate": 9.377858299666274e-07, + "loss": 0.0194, + "num_tokens": 54966173.0, + "reward": 0.0, + "reward_std": 0.7078356742858887, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10458529552353578, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13042498845931064, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08777074514725108, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1263 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1120.3125, + "completions/mean_terminated_length": 1066.071533203125, + "completions/min_length": 813.0, + "completions/min_terminated_length": 813.0, + "epoch": 0.2528505701140228, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.579104153449033, + "kl": 0.0125732421875, + "learning_rate": 9.376263146157567e-07, + "loss": -0.0204, + "num_tokens": 55015082.0, + "reward": 0.0, + "reward_std": 0.7294256091117859, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.00879907113162733, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09640545944273621, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09878896324620105, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1264 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1330.0, + "completions/max_terminated_length": 1330.0, + "completions/mean_length": 1104.3125, + "completions/mean_terminated_length": 1104.3125, + "completions/min_length": 870.0, + "completions/min_terminated_length": 870.0, + "epoch": 0.2530506101220244, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.815922766431481, + "kl": 0.00933074951171875, + "learning_rate": 9.374666102565349e-07, + "loss": 0.016, + "num_tokens": 55059751.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.056098461151123, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.18868509876646047, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05477410121249519, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12171612389003694, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1265 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1279.0, + "completions/mean_length": 1130.875, + "completions/mean_terminated_length": 1106.2667236328125, + "completions/min_length": 799.0, + "completions/min_terminated_length": 799.0, + "epoch": 0.253250650130026, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7068121766909363, + "kl": 0.00748443603515625, + "learning_rate": 9.373067169668342e-07, + "loss": 0.0096, + "num_tokens": 55114557.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9098656177520752, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04519905473545071, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12157048542197176, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06426219440409447, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1266 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1490.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1210.1875, + "completions/mean_terminated_length": 1210.1875, + "completions/min_length": 982.0, + "completions/min_terminated_length": 982.0, + "epoch": 0.2534506901380276, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5135850217760063, + "kl": 0.015472412109375, + "learning_rate": 9.3714663482462e-07, + "loss": 0.0269, + "num_tokens": 55160344.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.5098235607147217, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03121274086652067, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11427134190228477, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1267 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1381.0, + "completions/mean_length": 1187.75, + "completions/mean_terminated_length": 1115.6923828125, + "completions/min_length": 804.0, + "completions/min_terminated_length": 804.0, + "epoch": 0.2536507301460292, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.277436968999846, + "kl": 0.0126953125, + "learning_rate": 9.369863639079483e-07, + "loss": -0.0712, + "num_tokens": 55211388.0, + "reward": 0.0, + "reward_std": 0.9006429314613342, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05929345841095649, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05726361991467606, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11917929226045819, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1268 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1204.4375, + "completions/mean_terminated_length": 1162.21435546875, + "completions/min_length": 926.0, + "completions/min_terminated_length": 926.0, + "epoch": 0.2538507701540308, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.690646215291593, + "kl": 0.00948333740234375, + "learning_rate": 9.368259042949684e-07, + "loss": -0.0606, + "num_tokens": 55265147.0, + "reward": 0.0, + "reward_std": 0.9207109212875366, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13336369837876255, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14639084322795307, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11287488977066931, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1269 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1007.9375, + "completions/mean_terminated_length": 975.1333618164062, + "completions/min_length": 600.0, + "completions/min_terminated_length": 600.0, + "epoch": 0.2540508101620324, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.635778649447583, + "kl": 0.0157012939453125, + "learning_rate": 9.366652560639213e-07, + "loss": 0.0004, + "num_tokens": 55308378.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7772260904312134, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.016731103395015275, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.027247289931688377, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11666666666666668, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1270 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1317.0, + "completions/max_terminated_length": 1317.0, + "completions/mean_length": 938.1875, + "completions/mean_terminated_length": 938.1875, + "completions/min_length": 761.0, + "completions/min_terminated_length": 761.0, + "epoch": 0.254250850170034, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5971317456732588, + "kl": 0.0134735107421875, + "learning_rate": 9.36504419293139e-07, + "loss": -0.0341, + "num_tokens": 55350317.0, + "reward": 0.0, + "reward_std": 0.7713504433631897, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1369755817755484, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1925700782003626, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06978803887752091, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1271 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1264.0, + "completions/max_terminated_length": 1264.0, + "completions/mean_length": 1016.875, + "completions/mean_terminated_length": 1016.875, + "completions/min_length": 875.0, + "completions/min_terminated_length": 875.0, + "epoch": 0.2544508901780356, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.962896609147739, + "kl": 0.008724212646484375, + "learning_rate": 9.363433940610473e-07, + "loss": -0.0229, + "num_tokens": 55385147.0, + "reward": 0.0, + "reward_std": 0.5518646240234375, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11723211739587884, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0788678688549081, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12345339501504503, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1272 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1250.0, + "completions/max_terminated_length": 1250.0, + "completions/mean_length": 991.3125, + "completions/mean_terminated_length": 991.3125, + "completions/min_length": 676.0, + "completions/min_terminated_length": 676.0, + "epoch": 0.2546509301860372, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8342320294937458, + "kl": 0.01763916015625, + "learning_rate": 9.36182180446162e-07, + "loss": -0.0069, + "num_tokens": 55429808.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8455149531364441, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.20888380701728332, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.35740995273242926, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10602235962635781, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1273 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1326.125, + "completions/mean_terminated_length": 1221.800048828125, + "completions/min_length": 1115.0, + "completions/min_terminated_length": 1115.0, + "epoch": 0.2548509701940388, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.882408305269296, + "kl": 0.0118408203125, + "learning_rate": 9.360207785270919e-07, + "loss": 0.018, + "num_tokens": 55474858.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0448600053787231, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04113254167647894, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07751971156413977, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1274 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1455.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1221.3125, + "completions/mean_terminated_length": 1221.3125, + "completions/min_length": 998.0, + "completions/min_terminated_length": 998.0, + "epoch": 0.2550510102020404, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1362475809008488, + "kl": 0.0132598876953125, + "learning_rate": 9.358591883825374e-07, + "loss": -0.0042, + "num_tokens": 55526959.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9423317909240723, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04743876047279608, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09517807310831908, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.045338235029118136, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1275 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1273.0, + "completions/max_terminated_length": 1273.0, + "completions/mean_length": 1035.1875, + "completions/mean_terminated_length": 1035.1875, + "completions/min_length": 769.0, + "completions/min_terminated_length": 769.0, + "epoch": 0.255251050210042, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5902298590136197, + "kl": 0.006275177001953125, + "learning_rate": 9.356974100912905e-07, + "loss": -0.0309, + "num_tokens": 55559802.0, + "reward": 0.0, + "reward_std": 0.7188621163368225, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1839374185906732, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07328841261575203, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1276 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1266.0, + "completions/max_terminated_length": 1266.0, + "completions/mean_length": 1050.1875, + "completions/mean_terminated_length": 1050.1875, + "completions/min_length": 797.0, + "completions/min_terminated_length": 797.0, + "epoch": 0.25545109021804363, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.047631956187371, + "kl": 0.0120391845703125, + "learning_rate": 9.355354437322349e-07, + "loss": 0.0057, + "num_tokens": 55601693.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.9923591017723083, + "rewards/wordcountpos_reward_nokeypoint/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0156493823410628, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09808535094747943, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08734775114237132, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1277 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1484.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 901.625, + "completions/mean_terminated_length": 901.625, + "completions/min_length": 422.0, + "completions/min_terminated_length": 422.0, + "epoch": 0.2556511302260452, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.965598507628534, + "kl": 0.0116119384765625, + "learning_rate": 9.353732893843463e-07, + "loss": 0.1055, + "num_tokens": 55627711.0, + "reward": 0.0, + "reward_std": 0.7974559664726257, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.029207024759345637, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.054133397929865, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13977495139343474, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1278 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1171.25, + "completions/mean_terminated_length": 1095.3846435546875, + "completions/min_length": 856.0, + "completions/min_terminated_length": 856.0, + "epoch": 0.2558511702340468, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.781892414506334, + "kl": 0.0122833251953125, + "learning_rate": 9.352109471266921e-07, + "loss": 0.024, + "num_tokens": 55673491.0, + "reward": -3.725290298461914e-09, + "reward_std": 0.9901776313781738, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09980925627166456, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06161923461874671, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0620632890834175, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1279 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1278.125, + "completions/mean_terminated_length": 1145.0, + "completions/min_length": 951.0, + "completions/min_terminated_length": 951.0, + "epoch": 0.2560512102420484, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7374192406869247, + "kl": 0.0115966796875, + "learning_rate": 9.350484170384305e-07, + "loss": -0.0207, + "num_tokens": 55726741.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5881100296974182, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.058862094366562145, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23083490500078466, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09067647005823629, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1280 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1420.0, + "completions/mean_length": 1285.875, + "completions/mean_terminated_length": 1255.2857666015625, + "completions/min_length": 942.0, + "completions/min_terminated_length": 942.0, + "epoch": 0.25625125025005, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.898504235212053, + "kl": 0.0119781494140625, + "learning_rate": 9.348856991988124e-07, + "loss": -0.0616, + "num_tokens": 55781699.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8732249736785889, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08880605096880252, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0470121423297169, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11279282877125754, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1281 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1187.5625, + "completions/mean_terminated_length": 1000.1000366210938, + "completions/min_length": 779.0, + "completions/min_terminated_length": 779.0, + "epoch": 0.25645129025805163, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.364446514029598, + "kl": 0.00995635986328125, + "learning_rate": 9.347227936871798e-07, + "loss": -0.0376, + "num_tokens": 55824156.0, + "reward": 0.0, + "reward_std": 0.917456865310669, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09663904719928909, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06452782048504827, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1282 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1312.0, + "completions/mean_length": 1227.0625, + "completions/mean_terminated_length": 1188.071533203125, + "completions/min_length": 1033.0, + "completions/min_terminated_length": 1033.0, + "epoch": 0.2566513302660532, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.988575395919066, + "kl": 0.0157318115234375, + "learning_rate": 9.345597005829659e-07, + "loss": -0.007, + "num_tokens": 55863829.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.5025292038917542, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0036147807459967606, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11403443207296036, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.975, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 1283 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1371.0, + "completions/mean_length": 1090.4375, + "completions/mean_terminated_length": 1063.1334228515625, + "completions/min_length": 777.0, + "completions/min_terminated_length": 777.0, + "epoch": 0.2568513702740548, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6341694245178155, + "kl": 0.0121002197265625, + "learning_rate": 9.343964199656958e-07, + "loss": -0.0498, + "num_tokens": 55900444.0, + "reward": 0.0, + "reward_std": 0.6826701760292053, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.31457080782743124, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17814521897705315, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05163977794943221, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 1284 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 1180.1875, + "completions/mean_terminated_length": 1134.5, + "completions/min_length": 968.0, + "completions/min_terminated_length": 968.0, + "epoch": 0.2570514102820564, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.035200603899983, + "kl": 0.0118560791015625, + "learning_rate": 9.342329519149857e-07, + "loss": -0.008, + "num_tokens": 55942975.0, + "reward": 0.0, + "reward_std": 0.7832263708114624, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.044028199832397495, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10125697869298697, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.060705726131767695, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1285 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1143.0, + "completions/max_terminated_length": 1143.0, + "completions/mean_length": 938.75, + "completions/mean_terminated_length": 938.75, + "completions/min_length": 727.0, + "completions/min_terminated_length": 727.0, + "epoch": 0.257251450290058, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9576026765332863, + "kl": 0.019927978515625, + "learning_rate": 9.340692965105436e-07, + "loss": 0.0163, + "num_tokens": 55991003.0, + "reward": 0.0, + "reward_std": 0.8953949213027954, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06115289820184245, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10000277326566105, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08777074514725114, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1286 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1470.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1255.4375, + "completions/mean_terminated_length": 1255.4375, + "completions/min_length": 1065.0, + "completions/min_terminated_length": 1065.0, + "epoch": 0.25745149029805964, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.096336939617325, + "kl": 0.013763427734375, + "learning_rate": 9.339054538321684e-07, + "loss": -0.0171, + "num_tokens": 56040738.0, + "reward": 1.862645149230957e-08, + "reward_std": 1.002617597579956, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.862645149230957e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.16117463222767492, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1462118199193502, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06540472290116194, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1287 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1211.0, + "completions/max_terminated_length": 1211.0, + "completions/mean_length": 983.6875, + "completions/mean_terminated_length": 983.6875, + "completions/min_length": 707.0, + "completions/min_terminated_length": 707.0, + "epoch": 0.2576515303060612, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4961260665182947, + "kl": 0.0120086669921875, + "learning_rate": 9.337414239597508e-07, + "loss": -0.0141, + "num_tokens": 56076325.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9737988710403442, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.049717106821487306, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05294628817962337, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12285191326386657, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1288 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1429.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 1100.5625, + "completions/mean_terminated_length": 1100.5625, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.2578515703140628, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3612620579283967, + "kl": 0.0141754150390625, + "learning_rate": 9.335772069732721e-07, + "loss": -0.079, + "num_tokens": 56122686.0, + "reward": 0.0, + "reward_std": 0.7122688889503479, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.01754355521315733, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07009856654827226, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07781745019952503, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1289 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1280.0, + "completions/max_terminated_length": 1280.0, + "completions/mean_length": 1077.9375, + "completions/mean_terminated_length": 1077.9375, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.2580516103220644, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8895980622506086, + "kl": 0.010467529296875, + "learning_rate": 9.334128029528056e-07, + "loss": -0.0146, + "num_tokens": 56165197.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.01045560836792, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10002061606217912, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14507345848193037, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1290 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1081.0, + "completions/mean_length": 855.5625, + "completions/mean_terminated_length": 812.6000366210938, + "completions/min_length": 562.0, + "completions/min_terminated_length": 562.0, + "epoch": 0.25825165033006603, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.466832196476743, + "kl": 0.01043701171875, + "learning_rate": 9.332482119785154e-07, + "loss": -0.0894, + "num_tokens": 56207822.0, + "reward": 0.0, + "reward_std": 0.9924743175506592, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05124872520236903, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05598982607455689, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14446581038560777, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1291 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1347.0, + "completions/max_terminated_length": 1347.0, + "completions/mean_length": 1092.5625, + "completions/mean_terminated_length": 1092.5625, + "completions/min_length": 833.0, + "completions/min_terminated_length": 833.0, + "epoch": 0.25845169033806764, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3103648027363173, + "kl": 0.0127105712890625, + "learning_rate": 9.330834341306568e-07, + "loss": 0.0225, + "num_tokens": 56243007.0, + "reward": 0.0, + "reward_std": 0.853863000869751, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08782901753524505, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05664993838101348, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09651328828101763, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1292 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 1185.9375, + "completions/mean_terminated_length": 1165.0, + "completions/min_length": 860.0, + "completions/min_terminated_length": 860.0, + "epoch": 0.2586517303460692, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.484132894994341, + "kl": 0.0126800537109375, + "learning_rate": 9.329184694895761e-07, + "loss": -0.0293, + "num_tokens": 56283926.0, + "reward": 0.0, + "reward_std": 0.8243103623390198, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.061789103598527156, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24311752469623626, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0697880388775209, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1293 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1383.0, + "completions/max_terminated_length": 1383.0, + "completions/mean_length": 998.6875, + "completions/mean_terminated_length": 998.6875, + "completions/min_length": 720.0, + "completions/min_terminated_length": 720.0, + "epoch": 0.2588517703540708, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3377592463050307, + "kl": 0.0142059326171875, + "learning_rate": 9.327533181357108e-07, + "loss": -0.0151, + "num_tokens": 56330433.0, + "reward": 0.0, + "reward_std": 1.0442612171173096, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04669353850396883, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07494987076174393, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.057089922571845, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1294 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1365.625, + "completions/mean_terminated_length": 1285.0, + "completions/min_length": 1082.0, + "completions/min_terminated_length": 1082.0, + "epoch": 0.2590518103620724, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.190583353339365, + "kl": 0.0143585205078125, + "learning_rate": 9.325879801495896e-07, + "loss": -0.0115, + "num_tokens": 56378203.0, + "reward": 0.0, + "reward_std": 0.771960973739624, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1596142335952996, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2370239843365725, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08333333333333334, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1295 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1203.0, + "completions/max_terminated_length": 1203.0, + "completions/mean_length": 1012.0, + "completions/mean_terminated_length": 1012.0, + "completions/min_length": 921.0, + "completions/min_terminated_length": 921.0, + "epoch": 0.25925185037007403, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.418319196793221, + "kl": 0.01015472412109375, + "learning_rate": 9.32422455611832e-07, + "loss": 0.001, + "num_tokens": 56412051.0, + "reward": 0.0, + "reward_std": 0.41568315029144287, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08634685888530487, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16896584437002507, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10532137766186213, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1296 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1442.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1170.25, + "completions/mean_terminated_length": 1170.25, + "completions/min_length": 935.0, + "completions/min_terminated_length": 935.0, + "epoch": 0.2594518903780756, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3392955121610997, + "kl": 0.01251220703125, + "learning_rate": 9.322567446031485e-07, + "loss": 0.0207, + "num_tokens": 56463799.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.015886902809143, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.013236193982189167, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1525590578151914, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09016445879408155, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1297 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1163.0, + "completions/max_terminated_length": 1163.0, + "completions/mean_length": 986.75, + "completions/mean_terminated_length": 986.75, + "completions/min_length": 811.0, + "completions/min_terminated_length": 811.0, + "epoch": 0.2596519303860772, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.426965607406823, + "kl": 0.00897216796875, + "learning_rate": 9.320908472043405e-07, + "loss": 0.0148, + "num_tokens": 56503547.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0688046216964722, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.22310511979027212, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2855760292993534, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14707015206910487, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1298 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1229.0, + "completions/mean_terminated_length": 1210.933349609375, + "completions/min_length": 1003.0, + "completions/min_terminated_length": 1003.0, + "epoch": 0.2598519703940788, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1596585316439216, + "kl": 0.014892578125, + "learning_rate": 9.319247634963005e-07, + "loss": -0.007, + "num_tokens": 56548387.0, + "reward": -3.3527612686157227e-08, + "reward_std": 0.9706915616989136, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.3527612686157227e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.050495697712348034, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1136792936651789, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09108400680852977, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1299 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1279.875, + "completions/mean_terminated_length": 1229.0770263671875, + "completions/min_length": 936.0, + "completions/min_terminated_length": 936.0, + "epoch": 0.2600520104020804, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8058397738395287, + "kl": 0.01468658447265625, + "learning_rate": 9.317584935600112e-07, + "loss": 0.0032, + "num_tokens": 56600857.0, + "reward": 0.0, + "reward_std": 0.8875187039375305, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06051126431581041, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11474191482987793, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10532137766186214, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1300 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1184.0, + "completions/max_terminated_length": 1184.0, + "completions/mean_length": 988.75, + "completions/mean_terminated_length": 988.75, + "completions/min_length": 863.0, + "completions/min_terminated_length": 863.0, + "epoch": 0.26025205041008204, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7582527384570694, + "kl": 0.0146026611328125, + "learning_rate": 9.315920374765473e-07, + "loss": 0.0233, + "num_tokens": 56641933.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8171082735061646, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.3626351221539215, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13936567819905546, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09179284245476838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1301 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1490.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1186.0625, + "completions/mean_terminated_length": 1186.0625, + "completions/min_length": 985.0, + "completions/min_terminated_length": 985.0, + "epoch": 0.2604520904180836, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.642971366860192, + "kl": 0.00962066650390625, + "learning_rate": 9.314253953270729e-07, + "loss": -0.0038, + "num_tokens": 56684918.0, + "reward": -1.862645149230957e-09, + "reward_std": 1.0036709308624268, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.862645149230957e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.004563104533401587, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08169153233378387, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06666666666666667, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1302 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1459.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1202.0, + "completions/mean_terminated_length": 1202.0, + "completions/min_length": 1067.0, + "completions/min_terminated_length": 1067.0, + "epoch": 0.2606521304260852, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.871203268096461, + "kl": 0.01080322265625, + "learning_rate": 9.312585671928438e-07, + "loss": -0.0068, + "num_tokens": 56729782.0, + "reward": 0.0, + "reward_std": 0.7305346727371216, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08653971931405695, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15199492080008528, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.070841502796867, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1303 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1114.4375, + "completions/mean_terminated_length": 1059.357177734375, + "completions/min_length": 947.0, + "completions/min_terminated_length": 947.0, + "epoch": 0.2608521704340868, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.25388706823242, + "kl": 0.00589752197265625, + "learning_rate": 9.31091553155206e-07, + "loss": 0.0221, + "num_tokens": 56774229.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.931647777557373, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08619139168770305, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06596874988774026, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04554200340426487, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1304 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1472.0625, + "completions/mean_terminated_length": 1388.25, + "completions/min_length": 1305.0, + "completions/min_terminated_length": 1305.0, + "epoch": 0.26105221044208843, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6314825142033302, + "kl": 0.01023101806640625, + "learning_rate": 9.309243532955965e-07, + "loss": -0.0222, + "num_tokens": 56831910.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0551811456680298, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.009611739798220473, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.040677577957056676, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12765694770084507, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1305 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1376.0, + "completions/max_terminated_length": 1376.0, + "completions/mean_length": 1187.375, + "completions/mean_terminated_length": 1187.375, + "completions/min_length": 1019.0, + "completions/min_terminated_length": 1019.0, + "epoch": 0.26125225045009004, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1472618211331578, + "kl": 0.009918212890625, + "learning_rate": 9.307569676955427e-07, + "loss": -0.0302, + "num_tokens": 56872284.0, + "reward": 0.0, + "reward_std": 0.8624413013458252, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11490164886238587, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23946899728696322, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07097208632298362, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1306 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1276.8125, + "completions/mean_terminated_length": 1244.9285888671875, + "completions/min_length": 829.0, + "completions/min_terminated_length": 829.0, + "epoch": 0.2614522904580916, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8236352040181933, + "kl": 0.010406494140625, + "learning_rate": 9.305893964366622e-07, + "loss": -0.0759, + "num_tokens": 56928961.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0201424360275269, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.18762916248648429, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23527859631488385, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10809803506625451, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1307 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1416.75, + "completions/mean_terminated_length": 1352.0, + "completions/min_length": 1165.0, + "completions/min_terminated_length": 1165.0, + "epoch": 0.2616523304660932, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3247740142658775, + "kl": 0.0153045654296875, + "learning_rate": 9.30421639600664e-07, + "loss": 0.0111, + "num_tokens": 56980741.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0233627557754517, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12054198949357664, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08969046568272315, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13333333333333333, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1308 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1215.0, + "completions/max_terminated_length": 1215.0, + "completions/mean_length": 1070.6875, + "completions/mean_terminated_length": 1070.6875, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.2618523704740948, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7560859745497597, + "kl": 0.01300048828125, + "learning_rate": 9.302536972693468e-07, + "loss": 0.0008, + "num_tokens": 57014576.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5737980604171753, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06659232002698756, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05602160889049203, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06540472290116194, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1309 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1418.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1176.0, + "completions/mean_terminated_length": 1176.0, + "completions/min_length": 950.0, + "completions/min_terminated_length": 950.0, + "epoch": 0.26205241048209643, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.6215495912480082, + "kl": 0.00882720947265625, + "learning_rate": 9.300855695246001e-07, + "loss": -0.011, + "num_tokens": 57055648.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8348129391670227, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11076130610890005, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10614807407530995, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08766518798921945, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1310 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1237.625, + "completions/mean_terminated_length": 1220.1334228515625, + "completions/min_length": 993.0, + "completions/min_terminated_length": 993.0, + "epoch": 0.26225245049009804, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9994850460970297, + "kl": 0.01016998291015625, + "learning_rate": 9.299172564484037e-07, + "loss": -0.0248, + "num_tokens": 57100962.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0505247116088867, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.02733783934524784, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10133861017410127, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05443310539518172, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1311 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1450.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1052.9375, + "completions/mean_terminated_length": 1052.9375, + "completions/min_length": 760.0, + "completions/min_terminated_length": 760.0, + "epoch": 0.2624524904980996, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.739217524706013, + "kl": 0.0135650634765625, + "learning_rate": 9.297487581228278e-07, + "loss": -0.0268, + "num_tokens": 57133897.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9666324853897095, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02998346674742797, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0520993499910976, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05146016078626404, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1312 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1214.0, + "completions/max_terminated_length": 1214.0, + "completions/mean_length": 988.5625, + "completions/mean_terminated_length": 988.5625, + "completions/min_length": 778.0, + "completions/min_terminated_length": 778.0, + "epoch": 0.2626525305061012, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.021754128163401, + "kl": 0.008457183837890625, + "learning_rate": 9.295800746300333e-07, + "loss": -0.0072, + "num_tokens": 57180554.0, + "reward": 0.0, + "reward_std": 0.8228437304496765, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2244581816108898, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0852714676442901, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1408308678285174, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1313 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1286.0, + "completions/max_terminated_length": 1286.0, + "completions/mean_length": 978.75, + "completions/mean_terminated_length": 978.75, + "completions/min_length": 813.0, + "completions/min_terminated_length": 813.0, + "epoch": 0.2628525705141028, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.238436150106498, + "kl": 0.01300048828125, + "learning_rate": 9.294112060522707e-07, + "loss": -0.033, + "num_tokens": 57229814.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0397770404815674, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.137161452149235, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1826385777786303, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12345339501504503, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1314 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1395.0, + "completions/max_terminated_length": 1395.0, + "completions/mean_length": 1073.125, + "completions/mean_terminated_length": 1073.125, + "completions/min_length": 760.0, + "completions/min_terminated_length": 760.0, + "epoch": 0.26305261052210444, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.802739243450424, + "kl": 0.0102386474609375, + "learning_rate": 9.29242152471881e-07, + "loss": -0.0155, + "num_tokens": 57280744.0, + "reward": 0.0, + "reward_std": 0.7873568534851074, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08457433300066113, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06646920376457546, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12412657816683505, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1315 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1463.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1125.0, + "completions/mean_terminated_length": 1125.0, + "completions/min_length": 851.0, + "completions/min_terminated_length": 851.0, + "epoch": 0.26325265053010605, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1827852925080307, + "kl": 0.0129547119140625, + "learning_rate": 9.290729139712959e-07, + "loss": -0.0093, + "num_tokens": 57323432.0, + "reward": 0.0, + "reward_std": 0.8627240657806396, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06887068293664389, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11403121883038933, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9791666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04013864859597431, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1316 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1221.0, + "completions/max_terminated_length": 1221.0, + "completions/mean_length": 1070.25, + "completions/mean_terminated_length": 1070.25, + "completions/min_length": 699.0, + "completions/min_terminated_length": 699.0, + "epoch": 0.2634526905381076, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3714665180273657, + "kl": 0.0139923095703125, + "learning_rate": 9.289034906330364e-07, + "loss": -0.0195, + "num_tokens": 57373140.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0646336078643799, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.31146007533169623, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07194896403548304, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.061913918736689035, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1317 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1394.0, + "completions/max_terminated_length": 1394.0, + "completions/mean_length": 1168.5, + "completions/mean_terminated_length": 1168.5, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.2636527305461092, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3275356544329107, + "kl": 0.0151214599609375, + "learning_rate": 9.287338825397144e-07, + "loss": -0.0072, + "num_tokens": 57417276.0, + "reward": 0.0, + "reward_std": 0.7464814186096191, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1124195688748566, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15651700786041917, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10183501544346313, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1318 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1126.5625, + "completions/mean_terminated_length": 956.8182373046875, + "completions/min_length": 615.0, + "completions/min_terminated_length": 615.0, + "epoch": 0.2638527705541108, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.455476451851487, + "kl": 0.01080322265625, + "learning_rate": 9.285640897740315e-07, + "loss": 0.0007, + "num_tokens": 57465357.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9566620588302612, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06021595614040501, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10538013153345592, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09888264649460884, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1319 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1255.0, + "completions/max_terminated_length": 1255.0, + "completions/mean_length": 1146.625, + "completions/mean_terminated_length": 1146.625, + "completions/min_length": 988.0, + "completions/min_terminated_length": 988.0, + "epoch": 0.26405281056211244, + "frac_reward_zero_std": 0.0, + "grad_norm": 5.099441489543575, + "kl": 0.00904083251953125, + "learning_rate": 9.283941124187794e-07, + "loss": 0.0103, + "num_tokens": 57502887.0, + "reward": -3.3527612686157227e-08, + "reward_std": 1.0136570930480957, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.3527612686157227e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.015257607458203336, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06982190485737806, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08073734277593311, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1320 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1207.5, + "completions/mean_terminated_length": 1188.0001220703125, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.264252850570114, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0863667445109675, + "kl": 0.0127410888671875, + "learning_rate": 9.282239505568398e-07, + "loss": -0.0003, + "num_tokens": 57548703.0, + "reward": 1.3969838619232178e-08, + "reward_std": 0.8690091371536255, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.3969838619232178e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02662389597118882, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1077419292256881, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10327955589886446, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1321 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1209.0, + "completions/max_terminated_length": 1209.0, + "completions/mean_length": 1003.4375, + "completions/mean_terminated_length": 1003.4375, + "completions/min_length": 847.0, + "completions/min_terminated_length": 847.0, + "epoch": 0.2644528905781156, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.482119478451297, + "kl": 0.0159149169921875, + "learning_rate": 9.280536042711843e-07, + "loss": -0.0421, + "num_tokens": 57597622.0, + "reward": 0.0, + "reward_std": 0.9012045860290527, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04530398262213222, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0913639044461977, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08766518798921943, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1322 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1384.0625, + "completions/mean_terminated_length": 1357.3077392578125, + "completions/min_length": 1109.0, + "completions/min_terminated_length": 1109.0, + "epoch": 0.2646529305861172, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.585135203987126, + "kl": 0.009796142578125, + "learning_rate": 9.278830736448749e-07, + "loss": -0.0046, + "num_tokens": 57637655.0, + "reward": -3.725290298461914e-08, + "reward_std": 1.0556907653808594, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09790160434916169, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10757628682739327, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1323 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1100.0, + "completions/max_terminated_length": 1100.0, + "completions/mean_length": 963.0, + "completions/mean_terminated_length": 963.0, + "completions/min_length": 798.0, + "completions/min_terminated_length": 798.0, + "epoch": 0.26485297059411883, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.253804016830731, + "kl": 0.0137176513671875, + "learning_rate": 9.277123587610627e-07, + "loss": -0.0102, + "num_tokens": 57676087.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5693473815917969, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04341821987437618, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1652392288642577, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1324 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1228.0, + "completions/mean_length": 1051.9375, + "completions/mean_terminated_length": 987.9285888671875, + "completions/min_length": 819.0, + "completions/min_terminated_length": 819.0, + "epoch": 0.26505301060212044, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.222081273672153, + "kl": 0.01026153564453125, + "learning_rate": 9.275414597029892e-07, + "loss": 0.0236, + "num_tokens": 57715374.0, + "reward": 0.0, + "reward_std": 1.0172433853149414, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15626949556323855, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17278070861586722, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1325 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1331.25, + "completions/mean_terminated_length": 1275.0, + "completions/min_length": 944.0, + "completions/min_terminated_length": 944.0, + "epoch": 0.265253050610122, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.923156708395259, + "kl": 0.0126190185546875, + "learning_rate": 9.273703765539856e-07, + "loss": 0.03, + "num_tokens": 57772018.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0454744100570679, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06904825199431239, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2669688626701764, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1326 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1319.625, + "completions/mean_terminated_length": 1307.60009765625, + "completions/min_length": 1111.0, + "completions/min_terminated_length": 1111.0, + "epoch": 0.2654530906181236, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.565097276053253, + "kl": 0.00861358642578125, + "learning_rate": 9.271991093974729e-07, + "loss": 0.002, + "num_tokens": 57817220.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.47491201758384705, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.032959552708163434, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14488030329608098, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07290277645477443, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1327 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1176.9375, + "completions/mean_terminated_length": 1155.4000244140625, + "completions/min_length": 781.0, + "completions/min_terminated_length": 781.0, + "epoch": 0.2656531306261252, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4547431673036213, + "kl": 0.013336181640625, + "learning_rate": 9.270276583169615e-07, + "loss": 0.0093, + "num_tokens": 57852131.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5218457579612732, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0036958526750917515, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07711660911699726, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12412657816683505, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1328 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1265.0, + "completions/mean_terminated_length": 1210.769287109375, + "completions/min_length": 957.0, + "completions/min_terminated_length": 957.0, + "epoch": 0.26585317063412683, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7438646681513106, + "kl": 0.0093536376953125, + "learning_rate": 9.26856023396052e-07, + "loss": 0.0072, + "num_tokens": 57896699.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8032482266426086, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.008521033578553234, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08142376892112975, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0620632890834175, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1329 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1288.0, + "completions/max_terminated_length": 1288.0, + "completions/mean_length": 1019.125, + "completions/mean_terminated_length": 1019.125, + "completions/min_length": 704.0, + "completions/min_terminated_length": 704.0, + "epoch": 0.26605321064212845, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9601260586288616, + "kl": 0.014892578125, + "learning_rate": 9.266842047184341e-07, + "loss": -0.0127, + "num_tokens": 57947605.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0623466968536377, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10015962780242348, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1189521445856747, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11287488977066928, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1330 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1163.5, + "completions/mean_terminated_length": 1115.4285888671875, + "completions/min_length": 872.0, + "completions/min_terminated_length": 872.0, + "epoch": 0.26625325065013, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0273616556544924, + "kl": 0.01324462890625, + "learning_rate": 9.265122023678876e-07, + "loss": -0.0027, + "num_tokens": 57999117.0, + "reward": 0.0, + "reward_std": 0.771207332611084, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03446360744723531, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03883977485894147, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09651328828101763, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1331 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1363.0, + "completions/max_terminated_length": 1363.0, + "completions/mean_length": 1104.0625, + "completions/mean_terminated_length": 1104.0625, + "completions/min_length": 884.0, + "completions/min_terminated_length": 884.0, + "epoch": 0.2664532906581316, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7342677799329773, + "kl": 0.0130462646484375, + "learning_rate": 9.263400164282813e-07, + "loss": -0.0228, + "num_tokens": 58040310.0, + "reward": 0.0, + "reward_std": 0.7747074365615845, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06355195089664804, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14958455686227298, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03626037527129048, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1332 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1045.5625, + "completions/mean_terminated_length": 1015.2667236328125, + "completions/min_length": 637.0, + "completions/min_terminated_length": 637.0, + "epoch": 0.2666533306661332, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3868708500831852, + "kl": 0.0127716064453125, + "learning_rate": 9.261676469835742e-07, + "loss": -0.0594, + "num_tokens": 58083095.0, + "reward": 0.0, + "reward_std": 0.9089178442955017, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12637210635453786, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11546794805880392, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12881223774390615, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1333 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1331.0, + "completions/max_terminated_length": 1331.0, + "completions/mean_length": 1001.875, + "completions/mean_terminated_length": 1001.875, + "completions/min_length": 623.0, + "completions/min_terminated_length": 623.0, + "epoch": 0.26685337067413484, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.710120956832136, + "kl": 0.017486572265625, + "learning_rate": 9.259950941178143e-07, + "loss": -0.0459, + "num_tokens": 58117797.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0474348068237305, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.007118891693995785, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06132425117303907, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13305526559931294, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1334 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1159.6875, + "completions/mean_terminated_length": 1137.0, + "completions/min_length": 886.0, + "completions/min_terminated_length": 886.0, + "epoch": 0.26705341068213645, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.518382240480854, + "kl": 0.0131988525390625, + "learning_rate": 9.258223579151391e-07, + "loss": -0.0622, + "num_tokens": 58151304.0, + "reward": 0.0, + "reward_std": 0.8454245328903198, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11976540121198148, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.02803813580173176, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10036968702787746, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1335 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1307.0, + "completions/max_terminated_length": 1307.0, + "completions/mean_length": 1098.25, + "completions/mean_terminated_length": 1098.25, + "completions/min_length": 946.0, + "completions/min_terminated_length": 946.0, + "epoch": 0.267253450690138, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2598579060818818, + "kl": 0.013153076171875, + "learning_rate": 9.256494384597757e-07, + "loss": -0.0219, + "num_tokens": 58191676.0, + "reward": 0.0, + "reward_std": 0.9158148765563965, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.060256582081089395, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1585778378056801, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08153617692869924, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1336 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1344.0, + "completions/max_terminated_length": 1344.0, + "completions/mean_length": 1059.8125, + "completions/mean_terminated_length": 1059.8125, + "completions/min_length": 771.0, + "completions/min_terminated_length": 771.0, + "epoch": 0.2674534906981396, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.827378839284969, + "kl": 0.015106201171875, + "learning_rate": 9.254763358360404e-07, + "loss": -0.0526, + "num_tokens": 58231769.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.8132504820823669, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.44127332104118655, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.35958639735674586, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07391185942027817, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1337 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1201.0, + "completions/max_terminated_length": 1201.0, + "completions/mean_length": 959.0, + "completions/mean_terminated_length": 959.0, + "completions/min_length": 716.0, + "completions/min_terminated_length": 716.0, + "epoch": 0.26765353070614123, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1009752644945077, + "kl": 0.013885498046875, + "learning_rate": 9.253030501283385e-07, + "loss": -0.007, + "num_tokens": 58282049.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0310897827148438, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15573068266465798, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06482745206090938, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07391185942027817, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1338 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1298.75, + "completions/mean_terminated_length": 1207.272705078125, + "completions/min_length": 985.0, + "completions/min_terminated_length": 985.0, + "epoch": 0.26785357071414284, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.031985585377431, + "kl": 0.00714874267578125, + "learning_rate": 9.251295814211653e-07, + "loss": 0.0161, + "num_tokens": 58322685.0, + "reward": -2.60770320892334e-08, + "reward_std": 1.0296118259429932, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.60770320892334e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.01204877083803789, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09890325488790559, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059472994182545036, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1339 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1308.0, + "completions/max_terminated_length": 1308.0, + "completions/mean_length": 1033.25, + "completions/mean_terminated_length": 1033.25, + "completions/min_length": 867.0, + "completions/min_terminated_length": 867.0, + "epoch": 0.26805361072214445, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6349252455387187, + "kl": 0.0131072998046875, + "learning_rate": 9.249559297991048e-07, + "loss": -0.0623, + "num_tokens": 58354417.0, + "reward": 0.0, + "reward_std": 0.9362287521362305, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0004696580526784098, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13519554330508365, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06763190130459201, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1340 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1390.0, + "completions/max_terminated_length": 1390.0, + "completions/mean_length": 1140.0, + "completions/mean_terminated_length": 1140.0, + "completions/min_length": 796.0, + "completions/min_terminated_length": 796.0, + "epoch": 0.268253650730146, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4521701363516124, + "kl": 0.01513671875, + "learning_rate": 9.247820953468303e-07, + "loss": -0.0091, + "num_tokens": 58404105.0, + "reward": -4.0978193283081055e-08, + "reward_std": 0.9068785309791565, + "rewards/wordcountpos_reward_nokeypoint/mean": -4.0978193283081055e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06623112867898934, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07600908033514252, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07698003589195011, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1341 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 1333.5, + "completions/mean_terminated_length": 1278.0, + "completions/min_length": 1142.0, + "completions/min_terminated_length": 1142.0, + "epoch": 0.2684536907381476, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7404863531260557, + "kl": 0.0123748779296875, + "learning_rate": 9.24608078149104e-07, + "loss": 0.0024, + "num_tokens": 58451137.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7468385100364685, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.14406824943579966, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09776715422651854, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08333333333333334, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1342 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1356.0, + "completions/max_terminated_length": 1356.0, + "completions/mean_length": 1167.4375, + "completions/mean_terminated_length": 1167.4375, + "completions/min_length": 842.0, + "completions/min_terminated_length": 842.0, + "epoch": 0.26865373074614923, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8637229332413128, + "kl": 0.01049041748046875, + "learning_rate": 9.244338782907779e-07, + "loss": -0.0246, + "num_tokens": 58490704.0, + "reward": 0.0, + "reward_std": 0.7832260131835938, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0157062407951258, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03714008705530517, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14580555290954889, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1343 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1201.1875, + "completions/mean_terminated_length": 1158.5, + "completions/min_length": 854.0, + "completions/min_terminated_length": 854.0, + "epoch": 0.26885377075415084, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2571229141930087, + "kl": 0.014617919921875, + "learning_rate": 9.242594958567927e-07, + "loss": -0.022, + "num_tokens": 58526731.0, + "reward": 0.0, + "reward_std": 0.8598706722259521, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07847972394852126, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09171674610868896, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194864, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1344 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1298.0625, + "completions/mean_terminated_length": 1284.60009765625, + "completions/min_length": 1062.0, + "completions/min_terminated_length": 1062.0, + "epoch": 0.26905381076215246, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9688880746971744, + "kl": 0.0163421630859375, + "learning_rate": 9.240849309321775e-07, + "loss": -0.0215, + "num_tokens": 58574212.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0553255081176758, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05888909823165348, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.31592458450009636, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10174405069512348, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1345 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1403.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1165.4375, + "completions/mean_terminated_length": 1165.4375, + "completions/min_length": 907.0, + "completions/min_terminated_length": 907.0, + "epoch": 0.269253850770154, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9858211609865437, + "kl": 0.0122222900390625, + "learning_rate": 9.239101836020514e-07, + "loss": -0.0352, + "num_tokens": 58625675.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0014092922210693, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0673455286977264, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06486422082186356, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04367387557118565, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1346 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1170.0, + "completions/max_terminated_length": 1170.0, + "completions/mean_length": 915.125, + "completions/mean_terminated_length": 915.125, + "completions/min_length": 722.0, + "completions/min_terminated_length": 722.0, + "epoch": 0.2694538907781556, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2945021597738813, + "kl": 0.01273345947265625, + "learning_rate": 9.237352539516218e-07, + "loss": -0.0201, + "num_tokens": 58654701.0, + "reward": 0.0, + "reward_std": 0.9181333780288696, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.005991797370903202, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.027950911454332195, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06191391873668902, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1347 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1051.0, + "completions/max_terminated_length": 1051.0, + "completions/mean_length": 905.625, + "completions/mean_terminated_length": 905.625, + "completions/min_length": 811.0, + "completions/min_terminated_length": 811.0, + "epoch": 0.26965393078615724, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.757036359829974, + "kl": 0.00629425048828125, + "learning_rate": 9.235601420661854e-07, + "loss": 0.0038, + "num_tokens": 58698527.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7817403078079224, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.17965625294501697, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19351978568918968, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07503085784948503, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1348 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1076.625, + "completions/mean_terminated_length": 1016.1428833007812, + "completions/min_length": 759.0, + "completions/min_terminated_length": 759.0, + "epoch": 0.26985397079415885, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6295060069376577, + "kl": 0.0144195556640625, + "learning_rate": 9.233848480311276e-07, + "loss": -0.0184, + "num_tokens": 58741193.0, + "reward": 2.60770320892334e-08, + "reward_std": 0.9308240413665771, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.60770320892334e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02453269575471837, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.046040741675737865, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1172998689652263, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1349 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1268.875, + "completions/mean_terminated_length": 1235.857177734375, + "completions/min_length": 1060.0, + "completions/min_terminated_length": 1060.0, + "epoch": 0.2700540108021604, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.248291590345642, + "kl": 0.012969970703125, + "learning_rate": 9.232093719319222e-07, + "loss": -0.0133, + "num_tokens": 58790031.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.967057466506958, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.18148941135394064, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18406520179127153, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.052880017930181294, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1350 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1301.0, + "completions/max_terminated_length": 1301.0, + "completions/mean_length": 1033.0, + "completions/mean_terminated_length": 1033.0, + "completions/min_length": 824.0, + "completions/min_terminated_length": 824.0, + "epoch": 0.270254050810162, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5668212697973787, + "kl": 0.00958251953125, + "learning_rate": 9.230337138541324e-07, + "loss": -0.0234, + "num_tokens": 58828351.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6078698635101318, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.007273285036826891, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10601226705010723, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11603000888978234, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1351 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1236.875, + "completions/mean_terminated_length": 1219.3333740234375, + "completions/min_length": 937.0, + "completions/min_terminated_length": 937.0, + "epoch": 0.27045409081816363, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0503933345334526, + "kl": 0.0110626220703125, + "learning_rate": 9.228578738834097e-07, + "loss": -0.0166, + "num_tokens": 58872821.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9215264320373535, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0741113479490368, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08688553092637584, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0957427107756338, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1352 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1246.6875, + "completions/mean_terminated_length": 1188.2308349609375, + "completions/min_length": 819.0, + "completions/min_terminated_length": 819.0, + "epoch": 0.27065413082616524, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.251559542296735, + "kl": 0.0138397216796875, + "learning_rate": 9.226818521054946e-07, + "loss": -0.0474, + "num_tokens": 58916848.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7954856157302856, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15179383473134317, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07478555912486604, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06763190130459201, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1353 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1203.1875, + "completions/mean_terminated_length": 1183.4000244140625, + "completions/min_length": 1014.0, + "completions/min_terminated_length": 1014.0, + "epoch": 0.27085417083416685, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.765779679457968, + "kl": 0.0115966796875, + "learning_rate": 9.225056486062162e-07, + "loss": 0.0273, + "num_tokens": 58964099.0, + "reward": 0.0, + "reward_std": 0.6230892539024353, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12681370863358024, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10226611456467187, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1354 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1334.625, + "completions/mean_terminated_length": 1206.0, + "completions/min_length": 1001.0, + "completions/min_terminated_length": 1001.0, + "epoch": 0.2710542108421684, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1355032089720702, + "kl": 0.0135498046875, + "learning_rate": 9.22329263471492e-07, + "loss": 0.0182, + "num_tokens": 59005469.0, + "reward": 0.0, + "reward_std": 0.901725172996521, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.15288527836852975, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2648214219827817, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09727776191382573, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1355 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1337.4375, + "completions/mean_terminated_length": 1299.923095703125, + "completions/min_length": 1048.0, + "completions/min_terminated_length": 1048.0, + "epoch": 0.27125425085017, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4090660145608127, + "kl": 0.00872802734375, + "learning_rate": 9.221526967873282e-07, + "loss": -0.0116, + "num_tokens": 59055500.0, + "reward": 0.0, + "reward_std": 0.8519736528396606, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09674191734482634, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06351375786667013, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03626037527129048, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1356 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1182.0, + "completions/max_terminated_length": 1182.0, + "completions/mean_length": 896.25, + "completions/mean_terminated_length": 896.25, + "completions/min_length": 660.0, + "completions/min_terminated_length": 660.0, + "epoch": 0.27145429085817163, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.009777294745327, + "kl": 0.017333984375, + "learning_rate": 9.219759486398195e-07, + "loss": -0.0083, + "num_tokens": 59096992.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9068814516067505, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03535747899760114, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04147282137580555, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11792967144619462, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1357 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1174.0, + "completions/max_terminated_length": 1174.0, + "completions/mean_length": 867.5, + "completions/mean_terminated_length": 867.5, + "completions/min_length": 481.0, + "completions/min_terminated_length": 481.0, + "epoch": 0.27165433086617324, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7105531908708627, + "kl": 0.00875091552734375, + "learning_rate": 9.217990191151491e-07, + "loss": 0.0082, + "num_tokens": 59133920.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0172581672668457, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.01617696285737996, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.055379229437180974, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11666666666666667, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1358 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1230.0, + "completions/max_terminated_length": 1230.0, + "completions/mean_length": 1053.0625, + "completions/mean_terminated_length": 1053.0625, + "completions/min_length": 761.0, + "completions/min_terminated_length": 761.0, + "epoch": 0.27185437087417486, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.715875616874234, + "kl": 0.014190673828125, + "learning_rate": 9.216219082995888e-07, + "loss": -0.0561, + "num_tokens": 59183561.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0286660194396973, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08980951042934547, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14721715340289335, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.046943622609505783, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1359 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1305.0, + "completions/mean_length": 1165.3125, + "completions/mean_terminated_length": 1053.75, + "completions/min_length": 855.0, + "completions/min_terminated_length": 855.0, + "epoch": 0.2720544108821764, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0606784582611373, + "kl": 0.015289306640625, + "learning_rate": 9.214446162794985e-07, + "loss": 0.0264, + "num_tokens": 59234158.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6638395190238953, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.18791501638706914, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.28014503251730133, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.046943622609505783, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1360 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1472.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1146.0625, + "completions/mean_terminated_length": 1146.0625, + "completions/min_length": 945.0, + "completions/min_terminated_length": 945.0, + "epoch": 0.272254450890178, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0965783386666397, + "kl": 0.013763427734375, + "learning_rate": 9.212671431413266e-07, + "loss": -0.0049, + "num_tokens": 59283423.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9867372512817383, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03092755286132004, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1109709996929578, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1361 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1377.0, + "completions/mean_length": 1223.5625, + "completions/mean_terminated_length": 1205.1334228515625, + "completions/min_length": 988.0, + "completions/min_terminated_length": 988.0, + "epoch": 0.27245449089817964, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8685952663199883, + "kl": 0.0133819580078125, + "learning_rate": 9.210894889716096e-07, + "loss": -0.0048, + "num_tokens": 59323448.0, + "reward": 0.0, + "reward_std": 0.664895236492157, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0045370732848177, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07942222086796574, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05821416398857659, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1362 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1166.0, + "completions/max_terminated_length": 1166.0, + "completions/mean_length": 928.125, + "completions/mean_terminated_length": 928.125, + "completions/min_length": 680.0, + "completions/min_terminated_length": 680.0, + "epoch": 0.27265453090618125, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5326306928036133, + "kl": 0.0121002197265625, + "learning_rate": 9.20911653856973e-07, + "loss": 0.0022, + "num_tokens": 59358898.0, + "reward": 0.0, + "reward_std": 0.78182053565979, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.040658058969235036, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07025272770693956, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08153617692869923, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1363 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1369.0, + "completions/mean_length": 1076.3125, + "completions/mean_terminated_length": 1048.0667724609375, + "completions/min_length": 807.0, + "completions/min_terminated_length": 807.0, + "epoch": 0.27285457091418286, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.369038969199683, + "kl": 0.0133819580078125, + "learning_rate": 9.207336378841296e-07, + "loss": -0.0688, + "num_tokens": 59392431.0, + "reward": 0.0, + "reward_std": 0.8225746750831604, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0899289251923553, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11427836643153746, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.045338235029118136, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1364 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1285.0, + "completions/max_terminated_length": 1285.0, + "completions/mean_length": 1004.9375, + "completions/mean_terminated_length": 1004.9375, + "completions/min_length": 725.0, + "completions/min_terminated_length": 725.0, + "epoch": 0.2730546109221844, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.508436679666149, + "kl": 0.0155181884765625, + "learning_rate": 9.205554411398809e-07, + "loss": -0.0508, + "num_tokens": 59429342.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7103227972984314, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.002652987310267573, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23964783891787367, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06871842709362767, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1365 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1332.0, + "completions/mean_length": 1262.25, + "completions/mean_terminated_length": 1207.3846435546875, + "completions/min_length": 990.0, + "completions/min_terminated_length": 990.0, + "epoch": 0.273254650930186, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6843186533918977, + "kl": 0.01177978515625, + "learning_rate": 9.203770637111164e-07, + "loss": 0.032, + "num_tokens": 59481018.0, + "reward": 0.0, + "reward_std": 0.43309858441352844, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04400212679104942, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10426540851001151, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.046943622609505783, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1366 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1414.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 1211.625, + "completions/mean_terminated_length": 1211.625, + "completions/min_length": 996.0, + "completions/min_terminated_length": 996.0, + "epoch": 0.27345469093818764, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.974429889057829, + "kl": 0.011810302734375, + "learning_rate": 9.201985056848137e-07, + "loss": -0.0015, + "num_tokens": 59529460.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0319807529449463, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09167690327658962, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11139737278972743, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07685966046898339, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1367 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1168.0, + "completions/mean_length": 1178.4375, + "completions/mean_terminated_length": 928.3333129882812, + "completions/min_length": 710.0, + "completions/min_terminated_length": 710.0, + "epoch": 0.27365473094618925, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5106532146161404, + "kl": 0.024078369140625, + "learning_rate": 9.200197671480388e-07, + "loss": 0.0223, + "num_tokens": 59584579.0, + "reward": 0.0, + "reward_std": 0.8433984518051147, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.021111038423863502, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06585772938540797, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07187952884282608, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1368 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1329.4375, + "completions/mean_terminated_length": 1158.875, + "completions/min_length": 941.0, + "completions/min_terminated_length": 941.0, + "epoch": 0.27385477095419086, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.800205685280084, + "kl": 0.01068115234375, + "learning_rate": 9.198408481879451e-07, + "loss": 0.0384, + "num_tokens": 59630162.0, + "reward": 0.0, + "reward_std": 0.480735719203949, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08418772593414463, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2926291907897678, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1369 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1274.0, + "completions/max_terminated_length": 1274.0, + "completions/mean_length": 925.375, + "completions/mean_terminated_length": 925.375, + "completions/min_length": 720.0, + "completions/min_terminated_length": 720.0, + "epoch": 0.2740548109621924, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7157957924122487, + "kl": 0.02044677734375, + "learning_rate": 9.196617488917744e-07, + "loss": -0.0756, + "num_tokens": 59680304.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8997427225112915, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.16076090852136352, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20357648530372494, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09016445879408157, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1370 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1425.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 1167.9375, + "completions/mean_terminated_length": 1167.9375, + "completions/min_length": 933.0, + "completions/min_terminated_length": 933.0, + "epoch": 0.27425485097019403, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1243335475752683, + "kl": 0.0122528076171875, + "learning_rate": 9.194824693468565e-07, + "loss": 0.0156, + "num_tokens": 59726335.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9867253303527832, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.02856957480220266, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09801266734748854, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11417984514369005, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1371 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1290.75, + "completions/mean_terminated_length": 1260.857177734375, + "completions/min_length": 1046.0, + "completions/min_terminated_length": 1046.0, + "epoch": 0.27445489097819564, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.229046175977985, + "kl": 0.01275634765625, + "learning_rate": 9.193030096406086e-07, + "loss": 0.0204, + "num_tokens": 59775947.0, + "reward": 0.0, + "reward_std": 0.5547963976860046, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.028176462798117155, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07430689886264014, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12171612389003693, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1372 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1184.0, + "completions/max_terminated_length": 1184.0, + "completions/mean_length": 966.5, + "completions/mean_terminated_length": 966.5, + "completions/min_length": 781.0, + "completions/min_terminated_length": 781.0, + "epoch": 0.27465493098619725, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.618911071144693, + "kl": 0.01806640625, + "learning_rate": 9.191233698605362e-07, + "loss": -0.0203, + "num_tokens": 59827627.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9921776056289673, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11381501914312395, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07015280840561076, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1179296714461946, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1373 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1432.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1163.6875, + "completions/mean_terminated_length": 1163.6875, + "completions/min_length": 878.0, + "completions/min_terminated_length": 878.0, + "epoch": 0.27485497099419887, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.820172752900193, + "kl": 0.010589599609375, + "learning_rate": 9.189435500942325e-07, + "loss": -0.0052, + "num_tokens": 59867942.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.932351291179657, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0076675593968172605, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18745392499057995, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05821416398857659, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1374 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1409.375, + "completions/mean_terminated_length": 1318.75, + "completions/min_length": 1159.0, + "completions/min_terminated_length": 1159.0, + "epoch": 0.2750550110022004, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.998270050537721, + "kl": 0.0151824951171875, + "learning_rate": 9.187635504293786e-07, + "loss": -0.0239, + "num_tokens": 59923748.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.5859823822975159, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.056464564240524685, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.058742492086680045, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1375 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1315.0, + "completions/max_terminated_length": 1315.0, + "completions/mean_length": 1070.375, + "completions/mean_terminated_length": 1070.375, + "completions/min_length": 728.0, + "completions/min_terminated_length": 728.0, + "epoch": 0.27525505101020203, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.405650742913708, + "kl": 0.0142059326171875, + "learning_rate": 9.185833709537428e-07, + "loss": 0.0245, + "num_tokens": 59959146.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7741302251815796, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.14065489470633213, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19610554576492603, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06978803887752091, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1376 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1265.125, + "completions/mean_terminated_length": 1186.8333740234375, + "completions/min_length": 908.0, + "completions/min_terminated_length": 908.0, + "epoch": 0.27545509101820365, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.602733651571319, + "kl": 0.0143890380859375, + "learning_rate": 9.184030117551817e-07, + "loss": -0.0191, + "num_tokens": 60003276.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.526024341583252, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.17895722677373854, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24140057801339013, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11642832797715322, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1377 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1317.0, + "completions/max_terminated_length": 1317.0, + "completions/mean_length": 1164.5, + "completions/mean_terminated_length": 1164.5, + "completions/min_length": 1034.0, + "completions/min_terminated_length": 1034.0, + "epoch": 0.27565513102620526, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2518700747330054, + "kl": 0.0140838623046875, + "learning_rate": 9.182224729216392e-07, + "loss": 0.006, + "num_tokens": 60047180.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9533426761627197, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15538100588650003, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10180109895750834, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.056927504255331086, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1378 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1290.0, + "completions/max_terminated_length": 1290.0, + "completions/mean_length": 1111.125, + "completions/mean_terminated_length": 1111.125, + "completions/min_length": 992.0, + "completions/min_terminated_length": 992.0, + "epoch": 0.2758551710342068, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6647859501329134, + "kl": 0.0123138427734375, + "learning_rate": 9.18041754541147e-07, + "loss": -0.0237, + "num_tokens": 60090670.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8391597270965576, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13446630424433378, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09046235399976343, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05163977794943221, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1379 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1150.75, + "completions/mean_terminated_length": 1100.857177734375, + "completions/min_length": 651.0, + "completions/min_terminated_length": 651.0, + "epoch": 0.2760552110422084, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1705925249219646, + "kl": 0.0124053955078125, + "learning_rate": 9.178608567018239e-07, + "loss": -0.001, + "num_tokens": 60146042.0, + "reward": 0.0, + "reward_std": 0.6919154524803162, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05777302959687015, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11653809616898984, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11917929226045818, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1380 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1325.0, + "completions/max_terminated_length": 1325.0, + "completions/mean_length": 1167.9375, + "completions/mean_terminated_length": 1167.9375, + "completions/min_length": 981.0, + "completions/min_terminated_length": 981.0, + "epoch": 0.27625525105021004, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3648433462922718, + "kl": 0.007965087890625, + "learning_rate": 9.176797794918766e-07, + "loss": -0.0417, + "num_tokens": 60189905.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7943872809410095, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.026435069313590527, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07647212280908372, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9833333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03849001794597504, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1381 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1372.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 1114.3125, + "completions/mean_terminated_length": 1114.3125, + "completions/min_length": 772.0, + "completions/min_terminated_length": 772.0, + "epoch": 0.27645529105821165, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0059765949686423, + "kl": 0.01214599609375, + "learning_rate": 9.174985229995993e-07, + "loss": -0.0401, + "num_tokens": 60231958.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.791483998298645, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07033278323098852, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09448972543720781, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1102186379345533, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1382 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1198.0, + "completions/max_terminated_length": 1198.0, + "completions/mean_length": 1075.5625, + "completions/mean_terminated_length": 1075.5625, + "completions/min_length": 902.0, + "completions/min_terminated_length": 902.0, + "epoch": 0.27665533106621326, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0459759195442113, + "kl": 0.012237548828125, + "learning_rate": 9.173170873133733e-07, + "loss": -0.0027, + "num_tokens": 60261575.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.062567949295044, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04587133589266537, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12118778234275224, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.057089922571845, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1383 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1223.0, + "completions/max_terminated_length": 1223.0, + "completions/mean_length": 1020.3125, + "completions/mean_terminated_length": 1020.3125, + "completions/min_length": 762.0, + "completions/min_terminated_length": 762.0, + "epoch": 0.2768553710742148, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.616035832963117, + "kl": 0.0156707763671875, + "learning_rate": 9.171354725216677e-07, + "loss": -0.0557, + "num_tokens": 60298780.0, + "reward": 2.7939677238464355e-08, + "reward_std": 1.068703293800354, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.7939677238464355e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12944789129003478, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04852454605453814, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.055611083361076424, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1384 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1266.875, + "completions/mean_terminated_length": 1251.3333740234375, + "completions/min_length": 1085.0, + "completions/min_terminated_length": 1085.0, + "epoch": 0.27705541108221643, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.757520186901259, + "kl": 0.009857177734375, + "learning_rate": 9.169536787130384e-07, + "loss": -0.0157, + "num_tokens": 60334474.0, + "reward": 0.0, + "reward_std": 0.8073381781578064, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.3016119230302522, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13132765710958605, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04127594582445935, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1385 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1463.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1129.625, + "completions/mean_terminated_length": 1129.625, + "completions/min_length": 956.0, + "completions/min_terminated_length": 956.0, + "epoch": 0.27725545109021804, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.196126711265262, + "kl": 0.01629638671875, + "learning_rate": 9.167717059761291e-07, + "loss": -0.0225, + "num_tokens": 60377108.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7455917596817017, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1889598702932463, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16616001971054686, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.070841502796867, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1386 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1365.0, + "completions/max_terminated_length": 1365.0, + "completions/mean_length": 1154.875, + "completions/mean_terminated_length": 1154.875, + "completions/min_length": 974.0, + "completions/min_terminated_length": 974.0, + "epoch": 0.27745549109821965, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1183026380266234, + "kl": 0.00799560546875, + "learning_rate": 9.165895543996706e-07, + "loss": 0.0046, + "num_tokens": 60409818.0, + "reward": 0.0, + "reward_std": 1.0163074731826782, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03986188127033401, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09034477202069427, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.052880017930181294, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1387 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1195.0, + "completions/max_terminated_length": 1195.0, + "completions/mean_length": 1016.625, + "completions/mean_terminated_length": 1016.625, + "completions/min_length": 872.0, + "completions/min_terminated_length": 872.0, + "epoch": 0.27765553110622126, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.492233648268207, + "kl": 0.0131683349609375, + "learning_rate": 9.164072240724808e-07, + "loss": 0.0057, + "num_tokens": 60442732.0, + "reward": 0.0, + "reward_std": 1.0652186870574951, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.034916650364329414, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07247918157810924, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10610965676722954, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1388 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1192.1875, + "completions/mean_terminated_length": 1148.21435546875, + "completions/min_length": 799.0, + "completions/min_terminated_length": 799.0, + "epoch": 0.2778555711142228, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8495246537226953, + "kl": 0.0106353759765625, + "learning_rate": 9.162247150834647e-07, + "loss": -0.0599, + "num_tokens": 60494919.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0422221422195435, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.042970857971277865, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07407699619414726, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03191423692521126, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 1389 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1285.8125, + "completions/mean_terminated_length": 1271.533447265625, + "completions/min_length": 1031.0, + "completions/min_terminated_length": 1031.0, + "epoch": 0.27805561112222443, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8716565337498823, + "kl": 0.015655517578125, + "learning_rate": 9.160420275216143e-07, + "loss": -0.0191, + "num_tokens": 60531484.0, + "reward": 0.0, + "reward_std": 0.9257342219352722, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04300747187383053, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05993793540699466, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0758897836290186, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1390 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1330.9375, + "completions/mean_terminated_length": 1291.923095703125, + "completions/min_length": 908.0, + "completions/min_terminated_length": 908.0, + "epoch": 0.27825565113022604, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.756201374558551, + "kl": 0.01206207275390625, + "learning_rate": 9.158591614760093e-07, + "loss": 0.0072, + "num_tokens": 60578307.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9141738414764404, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.18714051625739309, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20574963986516798, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11979921473804349, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1391 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1268.0, + "completions/max_terminated_length": 1268.0, + "completions/mean_length": 1029.5625, + "completions/mean_terminated_length": 1029.5625, + "completions/min_length": 779.0, + "completions/min_terminated_length": 779.0, + "epoch": 0.27845569113822766, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6988691659756108, + "kl": 0.0161285400390625, + "learning_rate": 9.156761170358157e-07, + "loss": -0.0039, + "num_tokens": 60610468.0, + "reward": 0.0, + "reward_std": 0.728611171245575, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.02798611489614699, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07604556606853971, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.055611083361076424, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1392 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1232.75, + "completions/mean_terminated_length": 1214.933349609375, + "completions/min_length": 949.0, + "completions/min_terminated_length": 949.0, + "epoch": 0.27865573114622927, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.392384037808402, + "kl": 0.00872039794921875, + "learning_rate": 9.154928942902871e-07, + "loss": -0.0086, + "num_tokens": 60669112.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6669691801071167, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1874518985953172, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14626002154599616, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258096, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1393 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1259.0, + "completions/mean_length": 1149.25, + "completions/mean_terminated_length": 1125.86669921875, + "completions/min_length": 899.0, + "completions/min_terminated_length": 899.0, + "epoch": 0.2788557711542308, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2618469870133366, + "kl": 0.006473541259765625, + "learning_rate": 9.153094933287635e-07, + "loss": 0.0269, + "num_tokens": 60706388.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7007532119750977, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.032268421902578476, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08075838916607282, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06652763279965644, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1394 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1374.0, + "completions/mean_terminated_length": 1356.0, + "completions/min_length": 1222.0, + "completions/min_terminated_length": 1222.0, + "epoch": 0.27905581116223244, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.443983302629103, + "kl": 0.00899505615234375, + "learning_rate": 9.15125914240672e-07, + "loss": 0.0102, + "num_tokens": 60757636.0, + "reward": 0.0, + "reward_std": 0.9748229384422302, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0747939820921513, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1293762242805655, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1395 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1284.0, + "completions/max_terminated_length": 1284.0, + "completions/mean_length": 1057.5, + "completions/mean_terminated_length": 1057.5, + "completions/min_length": 890.0, + "completions/min_terminated_length": 890.0, + "epoch": 0.27925585117023405, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.21438008247218, + "kl": 0.01690673828125, + "learning_rate": 9.149421571155269e-07, + "loss": -0.0221, + "num_tokens": 60803756.0, + "reward": 0.0, + "reward_std": 0.9958001375198364, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04004132755329023, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23509763278992513, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0683130051063973, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1396 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1349.0, + "completions/mean_length": 1287.875, + "completions/mean_terminated_length": 1122.888916015625, + "completions/min_length": 805.0, + "completions/min_terminated_length": 805.0, + "epoch": 0.27945589117823566, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1864978550317233, + "kl": 0.0267181396484375, + "learning_rate": 9.14758222042929e-07, + "loss": 0.0235, + "num_tokens": 60860490.0, + "reward": 0.0, + "reward_std": 0.9571911692619324, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.008065976319645486, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06454206681021799, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12524050936172845, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1397 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1413.0, + "completions/max_terminated_length": 1413.0, + "completions/mean_length": 1166.3125, + "completions/mean_terminated_length": 1166.3125, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.27965593118623727, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.947155543069679, + "kl": 0.0107269287109375, + "learning_rate": 9.145741091125653e-07, + "loss": 0.0196, + "num_tokens": 60907783.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9452995657920837, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1042872630718532, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1916047024353444, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.046943622609505783, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1398 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1260.0, + "completions/mean_length": 1236.5, + "completions/mean_terminated_length": 973.0, + "completions/min_length": 807.0, + "completions/min_terminated_length": 807.0, + "epoch": 0.27985597119423883, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9810921644988109, + "kl": 0.00769805908203125, + "learning_rate": 9.143898184142107e-07, + "loss": -0.0311, + "num_tokens": 60951887.0, + "reward": 0.0, + "reward_std": 0.8030901551246643, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03987609897386956, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11686336748430322, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10174405069512346, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1399 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1325.0, + "completions/max_terminated_length": 1325.0, + "completions/mean_length": 1072.125, + "completions/mean_terminated_length": 1072.125, + "completions/min_length": 968.0, + "completions/min_terminated_length": 968.0, + "epoch": 0.28005601120224044, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.557051912181972, + "kl": 0.0159149169921875, + "learning_rate": 9.142053500377258e-07, + "loss": -0.0084, + "num_tokens": 60995201.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0425703525543213, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.021513004109171938, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1620113079481982, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14599594109020572, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1400 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1237.5625, + "completions/mean_terminated_length": 1177.0, + "completions/min_length": 921.0, + "completions/min_terminated_length": 921.0, + "epoch": 0.28025605121024205, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.247294058738329, + "kl": 0.01776123046875, + "learning_rate": 9.140207040730587e-07, + "loss": -0.012, + "num_tokens": 61048066.0, + "reward": 0.0, + "reward_std": 1.009735107421875, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07324873540759795, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13719312750790297, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1401 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1402.0, + "completions/max_terminated_length": 1402.0, + "completions/mean_length": 1160.0, + "completions/mean_terminated_length": 1160.0, + "completions/min_length": 847.0, + "completions/min_terminated_length": 847.0, + "epoch": 0.28045609121824366, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7552474678474597, + "kl": 0.0119171142578125, + "learning_rate": 9.138358806102432e-07, + "loss": -0.0394, + "num_tokens": 61087034.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.034000039100647, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.01635571653943328, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.053353167008310276, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09583937179043478, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1402 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1483.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1121.8125, + "completions/mean_terminated_length": 1121.8125, + "completions/min_length": 911.0, + "completions/min_terminated_length": 911.0, + "epoch": 0.2806561312262453, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.238294214601668, + "kl": 0.0119476318359375, + "learning_rate": 9.136508797393999e-07, + "loss": 0.0265, + "num_tokens": 61132695.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.44950032234191895, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04774212677053202, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15767629529511482, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1403 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1338.0, + "completions/max_terminated_length": 1338.0, + "completions/mean_length": 1144.3125, + "completions/mean_terminated_length": 1144.3125, + "completions/min_length": 962.0, + "completions/min_terminated_length": 962.0, + "epoch": 0.28085617123424683, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4410506428341336, + "kl": 0.0150146484375, + "learning_rate": 9.134657015507367e-07, + "loss": -0.0437, + "num_tokens": 61185108.0, + "reward": 0.0, + "reward_std": 0.7713695168495178, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11883389204419187, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16542892505111267, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09269623828717427, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1404 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1439.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1200.25, + "completions/mean_terminated_length": 1200.25, + "completions/min_length": 933.0, + "completions/min_terminated_length": 933.0, + "epoch": 0.28105621124224844, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0081982695389455, + "kl": 0.0103912353515625, + "learning_rate": 9.132803461345468e-07, + "loss": 0.004, + "num_tokens": 61224600.0, + "reward": 0.0, + "reward_std": 0.95379638671875, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.18021881439949006, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14819588800487238, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09016445879408157, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1405 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1418.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1150.6875, + "completions/mean_terminated_length": 1150.6875, + "completions/min_length": 962.0, + "completions/min_terminated_length": 962.0, + "epoch": 0.28125625125025006, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1301559719793333, + "kl": 0.01177978515625, + "learning_rate": 9.130948135812105e-07, + "loss": 0.011, + "num_tokens": 61270539.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8282616138458252, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12420154562045528, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.21229219670111338, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09651328828101764, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1406 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1144.4375, + "completions/mean_terminated_length": 1120.7333984375, + "completions/min_length": 901.0, + "completions/min_terminated_length": 901.0, + "epoch": 0.28145629125825167, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.051273463992888, + "kl": 0.0115966796875, + "learning_rate": 9.129091039811944e-07, + "loss": -0.0595, + "num_tokens": 61308434.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9571661353111267, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2522400061303402, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16110399588211555, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03415650255319865, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1407 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1080.0, + "completions/max_terminated_length": 1080.0, + "completions/mean_length": 922.125, + "completions/mean_terminated_length": 922.125, + "completions/min_length": 649.0, + "completions/min_terminated_length": 649.0, + "epoch": 0.2816563312662532, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.533169385598401, + "kl": 0.0075836181640625, + "learning_rate": 9.127232174250511e-07, + "loss": -0.0126, + "num_tokens": 61342876.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0589864253997803, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06704312885606524, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07001128662899729, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05146016078626404, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1408 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1341.0, + "completions/max_terminated_length": 1341.0, + "completions/mean_length": 1089.25, + "completions/mean_terminated_length": 1089.25, + "completions/min_length": 758.0, + "completions/min_terminated_length": 758.0, + "epoch": 0.28185637127425484, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7894259048453387, + "kl": 0.0131072998046875, + "learning_rate": 9.125371540034202e-07, + "loss": -0.0269, + "num_tokens": 61379152.0, + "reward": 0.0, + "reward_std": 0.7619322538375854, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10904888962253201, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10983134860650051, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1409 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1297.0, + "completions/mean_length": 1137.125, + "completions/mean_terminated_length": 1112.933349609375, + "completions/min_length": 961.0, + "completions/min_terminated_length": 961.0, + "epoch": 0.28205641128225645, + "frac_reward_zero_std": 0.5, + "grad_norm": 1.6646365469905966, + "kl": 0.0121307373046875, + "learning_rate": 9.123509138070265e-07, + "loss": 0.0183, + "num_tokens": 61413514.0, + "reward": 1.1175870895385742e-08, + "reward_std": 0.750025749206543, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08822798597196094, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0824843192559386, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1410 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1369.0, + "completions/max_terminated_length": 1369.0, + "completions/mean_length": 1141.3125, + "completions/mean_terminated_length": 1141.3125, + "completions/min_length": 975.0, + "completions/min_terminated_length": 975.0, + "epoch": 0.28225645129025806, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9740497978005322, + "kl": 0.0128173828125, + "learning_rate": 9.121644969266819e-07, + "loss": -0.0411, + "num_tokens": 61456119.0, + "reward": 0.0, + "reward_std": 0.8904587030410767, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16183179575845422, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2188748982878454, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1411 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1218.5, + "completions/mean_terminated_length": 1199.7333984375, + "completions/min_length": 836.0, + "completions/min_terminated_length": 836.0, + "epoch": 0.28245649129825967, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1715345459087683, + "kl": 0.014434814453125, + "learning_rate": 9.11977903453284e-07, + "loss": -0.0359, + "num_tokens": 61511623.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9668803215026855, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.14715008215809192, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08526025313784882, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026005, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1412 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1461.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1231.125, + "completions/mean_terminated_length": 1231.125, + "completions/min_length": 992.0, + "completions/min_terminated_length": 992.0, + "epoch": 0.2826565313062612, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.486197129785917, + "kl": 0.0099945068359375, + "learning_rate": 9.117911334778167e-07, + "loss": -0.0265, + "num_tokens": 61565185.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7964466214179993, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10151329186874428, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2211188253134633, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10027739304327551, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1413 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1407.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1179.625, + "completions/mean_terminated_length": 1179.625, + "completions/min_length": 846.0, + "completions/min_terminated_length": 846.0, + "epoch": 0.28285657131426284, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.359743845213838, + "kl": 0.014129638671875, + "learning_rate": 9.116041870913498e-07, + "loss": -0.0213, + "num_tokens": 61605123.0, + "reward": 0.0, + "reward_std": 0.8253852128982544, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.16513347324639405, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3527732670094512, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07781745019952502, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1414 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1350.0, + "completions/max_terminated_length": 1350.0, + "completions/mean_length": 1160.0, + "completions/mean_terminated_length": 1160.0, + "completions/min_length": 742.0, + "completions/min_terminated_length": 742.0, + "epoch": 0.28305661132226445, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7564769207010964, + "kl": 0.01190185546875, + "learning_rate": 9.114170643850393e-07, + "loss": -0.0005, + "num_tokens": 61655475.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8698737025260925, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10454913214112088, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11597050917743557, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09428090415820635, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1415 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1301.1875, + "completions/mean_terminated_length": 1255.3077392578125, + "completions/min_length": 1043.0, + "completions/min_terminated_length": 1043.0, + "epoch": 0.28325665133026606, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1262720989361723, + "kl": 0.013946533203125, + "learning_rate": 9.11229765450127e-07, + "loss": -0.0187, + "num_tokens": 61699678.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0591495037078857, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.3147418701104599, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1465179384257129, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09339283817414602, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1416 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1411.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1175.0, + "completions/mean_terminated_length": 1175.0, + "completions/min_length": 933.0, + "completions/min_terminated_length": 933.0, + "epoch": 0.2834566913382677, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1899851612306294, + "kl": 0.0107574462890625, + "learning_rate": 9.110422903779408e-07, + "loss": 0.0019, + "num_tokens": 61742446.0, + "reward": 0.0, + "reward_std": 1.0262709856033325, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10402230829130411, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12573564020111702, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0925962962222252, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1417 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1489.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1101.5, + "completions/mean_terminated_length": 1101.5, + "completions/min_length": 940.0, + "completions/min_terminated_length": 940.0, + "epoch": 0.28365673134626923, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.988364525155783, + "kl": 0.010746002197265625, + "learning_rate": 9.108546392598945e-07, + "loss": -0.06, + "num_tokens": 61780262.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8838249444961548, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.026129236904589632, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05805181015057785, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258099, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1418 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1477.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1088.8125, + "completions/mean_terminated_length": 1088.8125, + "completions/min_length": 806.0, + "completions/min_terminated_length": 806.0, + "epoch": 0.28385677135427084, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.3667816363813967, + "kl": 0.00614166259765625, + "learning_rate": 9.106668121874873e-07, + "loss": -0.0122, + "num_tokens": 61815787.0, + "reward": 0.0, + "reward_std": 0.747808575630188, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15607446411452186, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16809722089967183, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08766518798921945, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1419 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1353.375, + "completions/mean_terminated_length": 1109.0, + "completions/min_length": 984.0, + "completions/min_terminated_length": 984.0, + "epoch": 0.28405681136227245, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.42869178787471, + "kl": 0.0121307373046875, + "learning_rate": 9.104788092523048e-07, + "loss": 0.0493, + "num_tokens": 61872161.0, + "reward": 0.0, + "reward_std": 0.6753178834915161, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03296771126126924, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16428127081439511, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14782371884055634, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1420 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1371.4375, + "completions/mean_terminated_length": 1271.4444580078125, + "completions/min_length": 996.0, + "completions/min_terminated_length": 996.0, + "epoch": 0.28425685137027407, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.371416753627292, + "kl": 0.0099945068359375, + "learning_rate": 9.10290630546018e-07, + "loss": 0.0079, + "num_tokens": 61918680.0, + "reward": 0.0, + "reward_std": 0.7987505793571472, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06084023694622244, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10918853511204274, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12102953419784838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1421 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1381.0, + "completions/max_terminated_length": 1381.0, + "completions/mean_length": 1034.9375, + "completions/mean_terminated_length": 1034.9375, + "completions/min_length": 704.0, + "completions/min_terminated_length": 704.0, + "epoch": 0.2844568913782757, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.50066560239764, + "kl": 0.01495361328125, + "learning_rate": 9.101022761603838e-07, + "loss": -0.0186, + "num_tokens": 61951151.0, + "reward": 0.0, + "reward_std": 1.0181548595428467, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02003938596668129, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.039091821215670716, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0806225774829855, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1422 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1454.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1211.0, + "completions/mean_terminated_length": 1211.0, + "completions/min_length": 1007.0, + "completions/min_terminated_length": 1007.0, + "epoch": 0.28465693138627723, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0748730794035306, + "kl": 0.00875091552734375, + "learning_rate": 9.099137461872442e-07, + "loss": -0.0207, + "num_tokens": 61995063.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.015357255935669, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06181086304040728, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10614104960479835, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13817594795257457, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1423 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1399.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 1076.875, + "completions/mean_terminated_length": 1076.875, + "completions/min_length": 832.0, + "completions/min_terminated_length": 832.0, + "epoch": 0.28485697139427885, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7696400076908727, + "kl": 0.022613525390625, + "learning_rate": 9.09725040718528e-07, + "loss": 0.011, + "num_tokens": 62039549.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.009477972984314, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11145277045285248, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10772416964975069, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1424 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1266.0, + "completions/mean_length": 1243.5625, + "completions/mean_terminated_length": 1089.7000732421875, + "completions/min_length": 984.0, + "completions/min_terminated_length": 984.0, + "epoch": 0.28505701140228046, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7215465219383246, + "kl": 0.0089263916015625, + "learning_rate": 9.095361598462483e-07, + "loss": 0.0153, + "num_tokens": 62081214.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0574737787246704, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10057011466799468, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08599994167390086, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09651328828101763, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1425 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1388.25, + "completions/mean_terminated_length": 1301.3333740234375, + "completions/min_length": 1003.0, + "completions/min_terminated_length": 1003.0, + "epoch": 0.28525705141028207, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.870512452206262, + "kl": 0.011077880859375, + "learning_rate": 9.093471036625046e-07, + "loss": 0.0071, + "num_tokens": 62134434.0, + "reward": -1.862645149230957e-08, + "reward_std": 1.0689113140106201, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.13191775893640179, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0600668878871811, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1426 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1308.0, + "completions/max_terminated_length": 1308.0, + "completions/mean_length": 1086.75, + "completions/mean_terminated_length": 1086.75, + "completions/min_length": 875.0, + "completions/min_terminated_length": 875.0, + "epoch": 0.2854570914182837, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9186459960893343, + "kl": 0.0136871337890625, + "learning_rate": 9.091578722594811e-07, + "loss": 0.0163, + "num_tokens": 62166134.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6704177856445312, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06676881738352387, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0883256110810812, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03849001794597504, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1427 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1245.0, + "completions/max_terminated_length": 1245.0, + "completions/mean_length": 999.8125, + "completions/mean_terminated_length": 999.8125, + "completions/min_length": 787.0, + "completions/min_terminated_length": 787.0, + "epoch": 0.28565713142628524, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0244732662921034, + "kl": 0.011138916015625, + "learning_rate": 9.089684657294485e-07, + "loss": 0.0386, + "num_tokens": 62202843.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9074347019195557, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11290112686195027, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16791689974542648, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13333333333333336, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1428 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1155.875, + "completions/mean_terminated_length": 1155.875, + "completions/min_length": 890.0, + "completions/min_terminated_length": 890.0, + "epoch": 0.28585717143428685, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3650552716387936, + "kl": 0.01470947265625, + "learning_rate": 9.087788841647619e-07, + "loss": -0.0417, + "num_tokens": 62243569.0, + "reward": 2.60770320892334e-08, + "reward_std": 1.0574560165405273, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.60770320892334e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.048429488818769656, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13816255467304117, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12524050936172845, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1429 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1352.0, + "completions/max_terminated_length": 1352.0, + "completions/mean_length": 1039.9375, + "completions/mean_terminated_length": 1039.9375, + "completions/min_length": 746.0, + "completions/min_terminated_length": 746.0, + "epoch": 0.28605721144228846, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.584557191961611, + "kl": 0.015380859375, + "learning_rate": 9.085891276578621e-07, + "loss": -0.0008, + "num_tokens": 62276168.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7393932342529297, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03548529603205486, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10391787791329421, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258099, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1430 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1284.0, + "completions/mean_length": 1086.9375, + "completions/mean_terminated_length": 1059.4000244140625, + "completions/min_length": 808.0, + "completions/min_terminated_length": 808.0, + "epoch": 0.2862572514502901, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2173295690349857, + "kl": 0.0102386474609375, + "learning_rate": 9.083991963012753e-07, + "loss": -0.0555, + "num_tokens": 62327047.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9480913877487183, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04938714178657769, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10183060069609942, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12345339501504503, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1431 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1430.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1107.5625, + "completions/mean_terminated_length": 1107.5625, + "completions/min_length": 867.0, + "completions/min_terminated_length": 867.0, + "epoch": 0.2864572914582917, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.969396507955373, + "kl": 0.010894775390625, + "learning_rate": 9.082090901876131e-07, + "loss": -0.0155, + "num_tokens": 62368256.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.57248854637146, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.14270584830753852, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.21902709784630686, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1432 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1374.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1093.125, + "completions/mean_terminated_length": 1093.125, + "completions/min_length": 19.0, + "completions/min_terminated_length": 19.0, + "epoch": 0.28665733146629324, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.966789573573127, + "kl": 0.0133514404296875, + "learning_rate": 9.080188094095717e-07, + "loss": -0.1465, + "num_tokens": 62405794.0, + "reward": 0.0, + "reward_std": 0.7448587417602539, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.010725911205306145, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16607790791875207, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15438048235879215, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1433 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1319.9375, + "completions/mean_terminated_length": 1294.21435546875, + "completions/min_length": 1048.0, + "completions/min_terminated_length": 1048.0, + "epoch": 0.28685737147429485, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1007067689778927, + "kl": 0.0149688720703125, + "learning_rate": 9.078283540599333e-07, + "loss": -0.0334, + "num_tokens": 62448537.0, + "reward": 0.0, + "reward_std": 1.004623532295227, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06761675652731114, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1486032876570904, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04849589520621153, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1434 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1399.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 1173.5625, + "completions/mean_terminated_length": 1173.5625, + "completions/min_length": 812.0, + "completions/min_terminated_length": 812.0, + "epoch": 0.28705741148229647, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.001733299573736, + "kl": 0.0135955810546875, + "learning_rate": 9.076377242315645e-07, + "loss": -0.0141, + "num_tokens": 62502594.0, + "reward": 0.0, + "reward_std": 0.9959633350372314, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.014526683687619696, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04990672513163277, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059628479399994404, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1435 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1224.0, + "completions/mean_length": 1092.1875, + "completions/mean_terminated_length": 998.0769653320312, + "completions/min_length": 790.0, + "completions/min_terminated_length": 790.0, + "epoch": 0.2872574514902981, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0508009700628747, + "kl": 0.0114898681640625, + "learning_rate": 9.074469200174174e-07, + "loss": 0.0808, + "num_tokens": 62554541.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8191786408424377, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.14534103070098459, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11194653016207683, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11547005383792518, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1436 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1064.375, + "completions/mean_terminated_length": 1035.3333740234375, + "completions/min_length": 779.0, + "completions/min_terminated_length": 779.0, + "epoch": 0.28745749149829963, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.324893106968829, + "kl": 0.012542724609375, + "learning_rate": 9.07255941510529e-07, + "loss": -0.0208, + "num_tokens": 62586091.0, + "reward": 0.0, + "reward_std": 0.6439462900161743, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0032305329902653614, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.01414079936950989, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04849589520621153, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1437 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1326.0, + "completions/max_terminated_length": 1326.0, + "completions/mean_length": 1073.125, + "completions/mean_terminated_length": 1073.125, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.28765753150630125, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.663212036047132, + "kl": 0.016693115234375, + "learning_rate": 9.070647888040213e-07, + "loss": 0.0528, + "num_tokens": 62633445.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0157477855682373, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1437122305685487, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05574067579110159, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14807405554629052, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1438 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1342.0, + "completions/max_terminated_length": 1342.0, + "completions/mean_length": 1159.8125, + "completions/mean_terminated_length": 1159.8125, + "completions/min_length": 1038.0, + "completions/min_terminated_length": 1038.0, + "epoch": 0.28785757151430286, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.752617517710464, + "kl": 0.014434814453125, + "learning_rate": 9.068734619911009e-07, + "loss": 0.0274, + "num_tokens": 62680282.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6377702355384827, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04402072555422479, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0496684453823002, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042255, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1439 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1437.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1163.375, + "completions/mean_terminated_length": 1163.375, + "completions/min_length": 883.0, + "completions/min_terminated_length": 883.0, + "epoch": 0.28805761152230447, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6431195521996647, + "kl": 0.0161895751953125, + "learning_rate": 9.066819611650603e-07, + "loss": 0.0046, + "num_tokens": 62726424.0, + "reward": 0.0, + "reward_std": 0.823772132396698, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.006668914177267948, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05271315148346884, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1067187372905475, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1440 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1390.0, + "completions/max_terminated_length": 1390.0, + "completions/mean_length": 1194.75, + "completions/mean_terminated_length": 1194.75, + "completions/min_length": 953.0, + "completions/min_terminated_length": 953.0, + "epoch": 0.2882576515303061, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6116560104982494, + "kl": 0.01446533203125, + "learning_rate": 9.064902864192755e-07, + "loss": -0.0131, + "num_tokens": 62765444.0, + "reward": 1.862645149230957e-09, + "reward_std": 0.9253782033920288, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.862645149230957e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11146256705544244, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1621503231802928, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0620632890834175, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1441 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1377.0, + "completions/mean_length": 1217.5, + "completions/mean_terminated_length": 1152.3077392578125, + "completions/min_length": 1022.0, + "completions/min_terminated_length": 1022.0, + "epoch": 0.28845769153830764, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.503472413681846, + "kl": 0.0152740478515625, + "learning_rate": 9.062984378472082e-07, + "loss": -0.0149, + "num_tokens": 62808548.0, + "reward": 0.0, + "reward_std": 0.6229599714279175, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.3984581089511098, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.49422476060962095, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1002773930432755, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1442 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1357.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1035.875, + "completions/mean_terminated_length": 1035.875, + "completions/min_length": 836.0, + "completions/min_terminated_length": 836.0, + "epoch": 0.28865773154630925, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4262811583916646, + "kl": 0.01220703125, + "learning_rate": 9.061064155424049e-07, + "loss": 0.015, + "num_tokens": 62848322.0, + "reward": -4.470348358154297e-08, + "reward_std": 1.0242369174957275, + "rewards/wordcountpos_reward_nokeypoint/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03972771448493, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05433772053077202, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059472994182545036, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1443 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1237.3125, + "completions/mean_terminated_length": 1176.6923828125, + "completions/min_length": 958.0, + "completions/min_terminated_length": 958.0, + "epoch": 0.28885777155431086, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3447971854944303, + "kl": 0.013214111328125, + "learning_rate": 9.059142195984962e-07, + "loss": -0.0255, + "num_tokens": 62896855.0, + "reward": 0.0, + "reward_std": 0.8576700687408447, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.025792943538292052, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12503608487953066, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06871842709362769, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1444 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1290.0, + "completions/max_terminated_length": 1290.0, + "completions/mean_length": 1132.5, + "completions/mean_terminated_length": 1132.5, + "completions/min_length": 988.0, + "completions/min_terminated_length": 988.0, + "epoch": 0.28905781156231247, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.54248264674619, + "kl": 0.00975799560546875, + "learning_rate": 9.057218501091981e-07, + "loss": -0.0143, + "num_tokens": 62937023.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6774441599845886, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.021522182034298357, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04203521866390233, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.055611083361076424, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1445 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 1274.5625, + "completions/mean_terminated_length": 1242.357177734375, + "completions/min_length": 1078.0, + "completions/min_terminated_length": 1078.0, + "epoch": 0.2892578515703141, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3688332052809256, + "kl": 0.017913818359375, + "learning_rate": 9.055293071683104e-07, + "loss": 0.0093, + "num_tokens": 62986840.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9556785821914673, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1733796455378364, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08876982767129372, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0806225774829855, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1446 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1390.0, + "completions/max_terminated_length": 1390.0, + "completions/mean_length": 1043.25, + "completions/mean_terminated_length": 1043.25, + "completions/min_length": 590.0, + "completions/min_terminated_length": 590.0, + "epoch": 0.28945789157831564, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.7304195839038978, + "kl": 0.00516510009765625, + "learning_rate": 9.053365908697181e-07, + "loss": 0.0029, + "num_tokens": 63035420.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7661334276199341, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10445686745184211, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08757610864467412, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0925962962222252, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1447 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 1177.625, + "completions/mean_terminated_length": 1156.1334228515625, + "completions/min_length": 943.0, + "completions/min_terminated_length": 943.0, + "epoch": 0.28965793158631725, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1968010832171654, + "kl": 0.0140228271484375, + "learning_rate": 9.051437013073904e-07, + "loss": -0.0752, + "num_tokens": 63085846.0, + "reward": 0.0, + "reward_std": 0.7767517566680908, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06838840928766243, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13576777170564866, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042255, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1448 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1196.0625, + "completions/mean_terminated_length": 1152.6429443359375, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.28985797159431886, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.259752920617709, + "kl": 0.0142669677734375, + "learning_rate": 9.049506385753814e-07, + "loss": -0.0043, + "num_tokens": 63144927.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.84952712059021, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06131394142859103, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10173260810996779, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07084150279686703, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1449 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1321.0, + "completions/mean_length": 1108.3125, + "completions/mean_terminated_length": 1052.357177734375, + "completions/min_length": 785.0, + "completions/min_terminated_length": 785.0, + "epoch": 0.2900580116023205, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2265950817233313, + "kl": 0.01373291015625, + "learning_rate": 9.047574027678293e-07, + "loss": 0.0471, + "num_tokens": 63182452.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8898369073867798, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1055402880628196, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.21620710037936727, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1450 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1193.875, + "completions/mean_terminated_length": 1173.4666748046875, + "completions/min_length": 978.0, + "completions/min_terminated_length": 978.0, + "epoch": 0.2902580516103221, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8850314238918053, + "kl": 0.017181396484375, + "learning_rate": 9.045639939789566e-07, + "loss": -0.0291, + "num_tokens": 63233714.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9372420310974121, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11656496149537954, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1749562222997431, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10741060020797313, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1451 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1313.0, + "completions/mean_length": 1091.25, + "completions/mean_terminated_length": 1064.0, + "completions/min_length": 740.0, + "completions/min_terminated_length": 740.0, + "epoch": 0.29045809161832364, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.555212117077981, + "kl": 0.0140838623046875, + "learning_rate": 9.043704123030704e-07, + "loss": -0.0318, + "num_tokens": 63266542.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.49321120977401733, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.3301618264288724, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.39452470221263275, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1452 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1233.0, + "completions/mean_length": 1085.8125, + "completions/mean_terminated_length": 990.2308349609375, + "completions/min_length": 817.0, + "completions/min_terminated_length": 817.0, + "epoch": 0.29065813162632526, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1934974316136544, + "kl": 0.0138092041015625, + "learning_rate": 9.04176657834562e-07, + "loss": -0.0501, + "num_tokens": 63317211.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9316532611846924, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07244194316470898, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11491958043203122, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1453 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1232.0, + "completions/max_terminated_length": 1232.0, + "completions/mean_length": 1075.0, + "completions/mean_terminated_length": 1075.0, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.29085817163432687, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.943535837198437, + "kl": 0.0095062255859375, + "learning_rate": 9.03982730667907e-07, + "loss": 0.0292, + "num_tokens": 63352971.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9767860174179077, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07798158689535901, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04489973275304509, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1080980350662545, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1454 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1406.9375, + "completions/mean_terminated_length": 1375.916748046875, + "completions/min_length": 1002.0, + "completions/min_terminated_length": 1002.0, + "epoch": 0.2910582116423285, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.141912193735392, + "kl": 0.00908660888671875, + "learning_rate": 9.037886308976651e-07, + "loss": -0.0382, + "num_tokens": 63403818.0, + "reward": 0.0, + "reward_std": 0.9899873733520508, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.16507636778233065, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1719499041764007, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10809803506625451, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1455 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1204.3125, + "completions/mean_terminated_length": 1162.071533203125, + "completions/min_length": 940.0, + "completions/min_terminated_length": 940.0, + "epoch": 0.2912582516503301, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4845952968361336, + "kl": 0.015380859375, + "learning_rate": 9.035943586184804e-07, + "loss": 0.0046, + "num_tokens": 63455143.0, + "reward": 0.0, + "reward_std": 0.7594713568687439, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.138756775500162, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16161396742588469, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09108400680852977, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1456 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1393.625, + "completions/mean_terminated_length": 1386.533447265625, + "completions/min_length": 1141.0, + "completions/min_terminated_length": 1141.0, + "epoch": 0.29145829165833165, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4273517207183803, + "kl": 0.01080322265625, + "learning_rate": 9.033999139250807e-07, + "loss": -0.0081, + "num_tokens": 63502801.0, + "reward": 0.0, + "reward_std": 0.8656008243560791, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.021200716847279023, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04449877882026275, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1457 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1160.0, + "completions/max_terminated_length": 1160.0, + "completions/mean_length": 1024.5, + "completions/mean_terminated_length": 1024.5, + "completions/min_length": 897.0, + "completions/min_terminated_length": 897.0, + "epoch": 0.29165833166633326, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.113316107582229, + "kl": 0.013397216796875, + "learning_rate": 9.032052969122781e-07, + "loss": -0.0085, + "num_tokens": 63534481.0, + "reward": 0.0, + "reward_std": 0.6330538988113403, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.19673818084252065, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05490500154866254, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1031898645611484, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1458 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1446.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1153.0625, + "completions/mean_terminated_length": 1153.0625, + "completions/min_length": 893.0, + "completions/min_terminated_length": 893.0, + "epoch": 0.29185837167433487, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2920858960264066, + "kl": 0.01611328125, + "learning_rate": 9.030105076749689e-07, + "loss": 0.0164, + "num_tokens": 63579874.0, + "reward": 0.0, + "reward_std": 0.30749011039733887, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.30864203085736447, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1837110627140668, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.16487930490266264, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1459 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1376.0, + "completions/max_terminated_length": 1376.0, + "completions/mean_length": 954.625, + "completions/mean_terminated_length": 954.625, + "completions/min_length": 626.0, + "completions/min_terminated_length": 626.0, + "epoch": 0.2920584116823365, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7738142961085592, + "kl": 0.0131988525390625, + "learning_rate": 9.02815546308133e-07, + "loss": 0.0044, + "num_tokens": 63617820.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.055699110031128, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07361828364339443, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1194078528312856, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11080513425729777, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1460 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1084.0, + "completions/mean_length": 1151.0625, + "completions/mean_terminated_length": 802.125, + "completions/min_length": 646.0, + "completions/min_terminated_length": 646.0, + "epoch": 0.2922584516903381, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6432335533796962, + "kl": 0.0108642578125, + "learning_rate": 9.02620412906835e-07, + "loss": 0.0518, + "num_tokens": 63664125.0, + "reward": 0.0, + "reward_std": 0.6230928897857666, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1432948659111888, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16174329136295432, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0910840068085298, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1461 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1067.6875, + "completions/mean_terminated_length": 1038.86669921875, + "completions/min_length": 678.0, + "completions/min_terminated_length": 678.0, + "epoch": 0.29245849169833965, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.221150727596584, + "kl": 0.013092041015625, + "learning_rate": 9.024251075662222e-07, + "loss": -0.0341, + "num_tokens": 63710496.0, + "reward": 0.0, + "reward_std": 0.6946728825569153, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.021793797382925195, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0604978129143002, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09269623828717428, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1462 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1320.1875, + "completions/mean_terminated_length": 1212.300048828125, + "completions/min_length": 918.0, + "completions/min_terminated_length": 918.0, + "epoch": 0.29265853170634126, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.856388210909843, + "kl": 0.009796142578125, + "learning_rate": 9.022296303815266e-07, + "loss": -0.0187, + "num_tokens": 63762299.0, + "reward": 0.0, + "reward_std": 0.9576707482337952, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06744152951722562, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12083063496152965, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1463 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1174.0, + "completions/max_terminated_length": 1174.0, + "completions/mean_length": 1023.625, + "completions/mean_terminated_length": 1023.625, + "completions/min_length": 854.0, + "completions/min_terminated_length": 854.0, + "epoch": 0.2928585717143429, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.216707920645848, + "kl": 0.0119781494140625, + "learning_rate": 9.02033981448064e-07, + "loss": -0.0108, + "num_tokens": 63798277.0, + "reward": 0.0, + "reward_std": 0.9886502027511597, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10058581928865096, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06391286489538102, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10027739304327549, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1464 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1226.0, + "completions/mean_length": 1047.3125, + "completions/mean_terminated_length": 1017.1333618164062, + "completions/min_length": 876.0, + "completions/min_terminated_length": 876.0, + "epoch": 0.2930586117223445, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.605091269181348, + "kl": 0.0120697021484375, + "learning_rate": 9.018381608612335e-07, + "loss": -0.0222, + "num_tokens": 63837762.0, + "reward": 0.0, + "reward_std": 0.6430993676185608, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.23230947211013522, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11900081822627306, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.048686449556014755, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1465 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1359.0, + "completions/mean_length": 1189.9375, + "completions/mean_terminated_length": 1169.2667236328125, + "completions/min_length": 992.0, + "completions/min_terminated_length": 992.0, + "epoch": 0.29325865173034604, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3845361009535004, + "kl": 0.0091400146484375, + "learning_rate": 9.016421687165179e-07, + "loss": -0.0222, + "num_tokens": 63873289.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9587173461914062, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07423628430342938, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.098313745899284, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1029203215725281, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1466 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1296.0, + "completions/mean_length": 1028.1875, + "completions/mean_terminated_length": 996.7333984375, + "completions/min_length": 668.0, + "completions/min_terminated_length": 668.0, + "epoch": 0.29345869173834765, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.237525453226578, + "kl": 0.0282440185546875, + "learning_rate": 9.014460051094841e-07, + "loss": -0.0276, + "num_tokens": 63914644.0, + "reward": 0.0, + "reward_std": 0.8310941457748413, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2501046421728556, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.29763390740273993, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06871842709362766, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1467 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1078.6875, + "completions/mean_terminated_length": 1050.60009765625, + "completions/min_length": 628.0, + "completions/min_terminated_length": 628.0, + "epoch": 0.29365873174634927, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0740387007113092, + "kl": 0.0156097412109375, + "learning_rate": 9.012496701357824e-07, + "loss": -0.0528, + "num_tokens": 63960815.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.9935223460197449, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.025938894548628126, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0528960968986327, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0620632890834175, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1468 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1356.0, + "completions/max_terminated_length": 1356.0, + "completions/mean_length": 1092.4375, + "completions/mean_terminated_length": 1092.4375, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.2938587717543509, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.136509072472882, + "kl": 0.0145416259765625, + "learning_rate": 9.010531638911465e-07, + "loss": -0.066, + "num_tokens": 63998846.0, + "reward": 0.0, + "reward_std": 0.8535526990890503, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03785504292518405, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.074647777561637, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.052880017930181294, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1469 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1234.0, + "completions/max_terminated_length": 1234.0, + "completions/mean_length": 1066.0, + "completions/mean_terminated_length": 1066.0, + "completions/min_length": 882.0, + "completions/min_terminated_length": 882.0, + "epoch": 0.2940588117623525, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.800744867598768, + "kl": 0.01161956787109375, + "learning_rate": 9.008564864713938e-07, + "loss": 0.0247, + "num_tokens": 64033686.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6364213228225708, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.15598154655481988, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2209596125517717, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1470 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1444.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1141.625, + "completions/mean_terminated_length": 1141.625, + "completions/min_length": 800.0, + "completions/min_terminated_length": 800.0, + "epoch": 0.29425885177035405, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.639323954137469, + "kl": 0.0147857666015625, + "learning_rate": 9.006596379724251e-07, + "loss": -0.0015, + "num_tokens": 64076496.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9615904092788696, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.354292952568837, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2986660405151817, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06831300510639733, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1471 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1396.0, + "completions/mean_length": 1357.25, + "completions/mean_terminated_length": 1173.71435546875, + "completions/min_length": 948.0, + "completions/min_terminated_length": 948.0, + "epoch": 0.29445889177835566, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.970569845681191, + "kl": 0.012481689453125, + "learning_rate": 9.004626184902246e-07, + "loss": -0.0343, + "num_tokens": 64121196.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.49967795610427856, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09077608659517157, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07979804813838778, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0758897836290186, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1472 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1342.5625, + "completions/mean_terminated_length": 1271.0, + "completions/min_length": 1076.0, + "completions/min_terminated_length": 1076.0, + "epoch": 0.29465893178635727, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5039978896162984, + "kl": 0.01371002197265625, + "learning_rate": 9.002654281208598e-07, + "loss": -0.0179, + "num_tokens": 64156845.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6641901731491089, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07286432144569088, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11456894051311069, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9791666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03191423692521126, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1473 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1386.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1104.75, + "completions/mean_terminated_length": 1104.75, + "completions/min_length": 790.0, + "completions/min_terminated_length": 790.0, + "epoch": 0.2948589717943589, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0084651302423557, + "kl": 0.0137786865234375, + "learning_rate": 9.000680669604819e-07, + "loss": -0.0003, + "num_tokens": 64196169.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.025627851486206, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.038530950935636236, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0939846715382004, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.060705726131767695, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1474 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1352.0, + "completions/max_terminated_length": 1352.0, + "completions/mean_length": 1060.5625, + "completions/mean_terminated_length": 1060.5625, + "completions/min_length": 682.0, + "completions/min_terminated_length": 682.0, + "epoch": 0.2950590118023605, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.956226356414294, + "kl": 0.0137176513671875, + "learning_rate": 8.998705351053248e-07, + "loss": -0.005, + "num_tokens": 64236570.0, + "reward": 4.470348358154297e-08, + "reward_std": 1.0257737636566162, + "rewards/wordcountpos_reward_nokeypoint/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04463507115742593, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.045589398546474584, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12292725943057184, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1475 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1394.0, + "completions/mean_length": 1091.0, + "completions/mean_terminated_length": 1063.7333984375, + "completions/min_length": 729.0, + "completions/min_terminated_length": 729.0, + "epoch": 0.29525905181036205, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.431260185901243, + "kl": 0.017578125, + "learning_rate": 8.996728326517062e-07, + "loss": -0.0256, + "num_tokens": 64286754.0, + "reward": 0.0, + "reward_std": 0.9557709693908691, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0006668644245684647, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06798889566263612, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09574271077563382, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1476 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1161.6875, + "completions/mean_terminated_length": 1083.615478515625, + "completions/min_length": 780.0, + "completions/min_terminated_length": 780.0, + "epoch": 0.29545909181836366, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.403030712062461, + "kl": 0.00760650634765625, + "learning_rate": 8.994749596960264e-07, + "loss": 0.0259, + "num_tokens": 64339453.0, + "reward": 0.0, + "reward_std": 0.8572632074356079, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1064402892691035, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06217758825331078, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0758897836290186, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1477 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1342.0, + "completions/mean_length": 921.6875, + "completions/mean_terminated_length": 883.1333618164062, + "completions/min_length": 625.0, + "completions/min_terminated_length": 625.0, + "epoch": 0.2956591318263653, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5769315082694475, + "kl": 0.01324462890625, + "learning_rate": 8.992769163347695e-07, + "loss": -0.0272, + "num_tokens": 64377384.0, + "reward": 0.0, + "reward_std": 0.9252187013626099, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.00774871952136507, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06814940621422283, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11155467020454342, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1478 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1310.0, + "completions/mean_length": 1217.375, + "completions/mean_terminated_length": 1198.533447265625, + "completions/min_length": 889.0, + "completions/min_terminated_length": 889.0, + "epoch": 0.2958591718343669, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.778952031528686, + "kl": 0.0111083984375, + "learning_rate": 8.99078702664502e-07, + "loss": 0.0223, + "num_tokens": 64431070.0, + "reward": 0.0, + "reward_std": 0.6163296699523926, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.22924013571438784, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09746643278140915, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1102186379345533, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1479 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1170.3125, + "completions/mean_terminated_length": 1148.3333740234375, + "completions/min_length": 994.0, + "completions/min_terminated_length": 994.0, + "epoch": 0.2960592118423685, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0771255576605028, + "kl": 0.0149993896484375, + "learning_rate": 8.98880318781874e-07, + "loss": -0.0274, + "num_tokens": 64471691.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0293776988983154, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05633562055309017, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0523626044336232, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09428090415820634, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1480 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1296.125, + "completions/mean_terminated_length": 1203.45458984375, + "completions/min_length": 958.0, + "completions/min_terminated_length": 958.0, + "epoch": 0.29625925185037005, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4120439283662187, + "kl": 0.017578125, + "learning_rate": 8.986817647836183e-07, + "loss": 0.0273, + "num_tokens": 64519109.0, + "reward": 0.0, + "reward_std": 0.7146883606910706, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07919370521900305, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17624956506873782, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1481 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1288.0, + "completions/max_terminated_length": 1288.0, + "completions/mean_length": 1098.0625, + "completions/mean_terminated_length": 1098.0625, + "completions/min_length": 949.0, + "completions/min_terminated_length": 949.0, + "epoch": 0.29645929185837167, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6925064381386044, + "kl": 0.0124359130859375, + "learning_rate": 8.984830407665508e-07, + "loss": -0.0181, + "num_tokens": 64560630.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.8725080490112305, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11897475599094996, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11242734043607938, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1482 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1268.625, + "completions/mean_terminated_length": 1215.2308349609375, + "completions/min_length": 952.0, + "completions/min_terminated_length": 952.0, + "epoch": 0.2966593318663733, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.13799397260347, + "kl": 0.0149993896484375, + "learning_rate": 8.982841468275702e-07, + "loss": -0.0258, + "num_tokens": 64608240.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6412814855575562, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.4064451263419023, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17246542729833855, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10610965676722954, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1483 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1275.0, + "completions/max_terminated_length": 1275.0, + "completions/mean_length": 1056.5625, + "completions/mean_terminated_length": 1056.5625, + "completions/min_length": 756.0, + "completions/min_terminated_length": 756.0, + "epoch": 0.2968593718743749, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1646433281461746, + "kl": 0.0148468017578125, + "learning_rate": 8.980850830636581e-07, + "loss": -0.0105, + "num_tokens": 64645913.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9614226222038269, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.17204861664444682, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12565115443842265, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04127594582445935, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1484 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1487.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1168.6875, + "completions/mean_terminated_length": 1168.6875, + "completions/min_length": 818.0, + "completions/min_terminated_length": 818.0, + "epoch": 0.2970594118823765, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.559117777748301, + "kl": 0.0110015869140625, + "learning_rate": 8.978858495718789e-07, + "loss": -0.0835, + "num_tokens": 64689100.0, + "reward": 3.725290298461914e-08, + "reward_std": 1.0027258396148682, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12410022018961032, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08997412396367707, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0806225774829855, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1485 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1123.625, + "completions/mean_terminated_length": 1069.857177734375, + "completions/min_length": 837.0, + "completions/min_terminated_length": 837.0, + "epoch": 0.29725945189037806, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.124756616914591, + "kl": 0.008270263671875, + "learning_rate": 8.976864464493796e-07, + "loss": 0.0604, + "num_tokens": 64734086.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9919420480728149, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10482089086309862, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09482068226933704, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1486 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1298.8125, + "completions/mean_terminated_length": 1207.3636474609375, + "completions/min_length": 877.0, + "completions/min_terminated_length": 877.0, + "epoch": 0.29745949189837967, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.881754721105014, + "kl": 0.0131378173828125, + "learning_rate": 8.974868737933903e-07, + "loss": -0.03, + "num_tokens": 64779891.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7134722471237183, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.3234027738037513, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08804527007281383, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1487 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1268.0, + "completions/max_terminated_length": 1268.0, + "completions/mean_length": 1074.9375, + "completions/mean_terminated_length": 1074.9375, + "completions/min_length": 889.0, + "completions/min_terminated_length": 889.0, + "epoch": 0.2976595319063813, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.771710724235315, + "kl": 0.0128021240234375, + "learning_rate": 8.972871317012235e-07, + "loss": -0.0026, + "num_tokens": 64819658.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0359959602355957, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12705859840706113, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08511717466077838, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15563490039905004, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1488 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1427.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1032.625, + "completions/mean_terminated_length": 1032.625, + "completions/min_length": 725.0, + "completions/min_terminated_length": 725.0, + "epoch": 0.2978595719143829, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0424023656064803, + "kl": 0.012847900390625, + "learning_rate": 8.970872202702742e-07, + "loss": 0.0142, + "num_tokens": 64853148.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9847686290740967, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07076789269101913, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.135001149818638, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09388724521901158, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1489 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1316.0, + "completions/max_terminated_length": 1316.0, + "completions/mean_length": 1206.375, + "completions/mean_terminated_length": 1206.375, + "completions/min_length": 994.0, + "completions/min_terminated_length": 994.0, + "epoch": 0.2980596119223845, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3051907026909917, + "kl": 0.01666259765625, + "learning_rate": 8.968871395980201e-07, + "loss": -0.0205, + "num_tokens": 64901002.0, + "reward": 0.0, + "reward_std": 0.9156355857849121, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1368870903106638, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09833598335791821, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0842175313850542, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1490 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1214.4375, + "completions/mean_terminated_length": 1173.6429443359375, + "completions/min_length": 847.0, + "completions/min_terminated_length": 847.0, + "epoch": 0.29825965193038606, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3466652612549015, + "kl": 0.020416259765625, + "learning_rate": 8.966868897820217e-07, + "loss": -0.0007, + "num_tokens": 64945657.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9633594751358032, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0008779924361256176, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11289041829393337, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10602235962635778, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1491 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1316.0, + "completions/max_terminated_length": 1316.0, + "completions/mean_length": 1014.1875, + "completions/mean_terminated_length": 1014.1875, + "completions/min_length": 668.0, + "completions/min_terminated_length": 668.0, + "epoch": 0.2984596919383877, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2551058250800744, + "kl": 0.01226806640625, + "learning_rate": 8.964864709199216e-07, + "loss": -0.0168, + "num_tokens": 64978852.0, + "reward": 0.0, + "reward_std": 1.0228623151779175, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06971016496288011, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0756492647145616, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.060705726131767695, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1492 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1152.0, + "completions/max_terminated_length": 1152.0, + "completions/mean_length": 923.625, + "completions/mean_terminated_length": 923.625, + "completions/min_length": 794.0, + "completions/min_terminated_length": 794.0, + "epoch": 0.2986597319463893, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.228022946126772, + "kl": 0.0103759765625, + "learning_rate": 8.962858831094447e-07, + "loss": -0.0489, + "num_tokens": 65006822.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.953399658203125, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15553114273255117, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08412158802697378, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.038248698840130005, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1493 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1384.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1122.375, + "completions/mean_terminated_length": 1122.375, + "completions/min_length": 950.0, + "completions/min_terminated_length": 950.0, + "epoch": 0.2988597719543909, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.506681907068181, + "kl": 0.015350341796875, + "learning_rate": 8.960851264483989e-07, + "loss": 0.0161, + "num_tokens": 65052380.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9969913363456726, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.18650316142011156, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0569976230139585, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06440611887195305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1494 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1219.75, + "completions/mean_terminated_length": 1126.3333740234375, + "completions/min_length": 805.0, + "completions/min_terminated_length": 805.0, + "epoch": 0.29905981196239245, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9942664903202374, + "kl": 0.0130615234375, + "learning_rate": 8.95884201034674e-07, + "loss": 0.0171, + "num_tokens": 65096440.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.021674633026123, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1136037387472039, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13907427992729093, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06666666666666665, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1495 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1316.0, + "completions/max_terminated_length": 1316.0, + "completions/mean_length": 996.8125, + "completions/mean_terminated_length": 996.8125, + "completions/min_length": 687.0, + "completions/min_terminated_length": 687.0, + "epoch": 0.29925985197039406, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.654840350660976, + "kl": 0.0129547119140625, + "learning_rate": 8.956831069662418e-07, + "loss": -0.0362, + "num_tokens": 65145013.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9350187182426453, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04040900311488896, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2628408546522825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09259629622222518, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1496 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1468.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1157.3125, + "completions/mean_terminated_length": 1157.3125, + "completions/min_length": 879.0, + "completions/min_terminated_length": 879.0, + "epoch": 0.2994598919783957, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.270542580110495, + "kl": 0.0146331787109375, + "learning_rate": 8.954818443411573e-07, + "loss": -0.0443, + "num_tokens": 65181842.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5022234320640564, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10253414753385375, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.056544116542642894, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04216370213557838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1497 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1468.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1098.75, + "completions/mean_terminated_length": 1098.75, + "completions/min_length": 869.0, + "completions/min_terminated_length": 869.0, + "epoch": 0.2996599319863973, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.745260613068283, + "kl": 0.0139923095703125, + "learning_rate": 8.952804132575563e-07, + "loss": -0.0203, + "num_tokens": 65232614.0, + "reward": 0.0, + "reward_std": 0.6505610942840576, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.24523674177229154, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14083892225505676, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09878896324620104, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1498 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1271.625, + "completions/mean_terminated_length": 1134.5999755859375, + "completions/min_length": 953.0, + "completions/min_terminated_length": 953.0, + "epoch": 0.2998599719943989, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6150109401431245, + "kl": 0.014404296875, + "learning_rate": 8.950788138136581e-07, + "loss": -0.037, + "num_tokens": 65280176.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.490470290184021, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.05781144308918191, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1554348742184697, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1837369294923023, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1499 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1294.0, + "completions/max_terminated_length": 1294.0, + "completions/mean_length": 1055.5625, + "completions/mean_terminated_length": 1055.5625, + "completions/min_length": 858.0, + "completions/min_terminated_length": 858.0, + "epoch": 0.30006001200240046, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.768787613600849, + "kl": 0.01031494140625, + "learning_rate": 8.948770461077634e-07, + "loss": -0.0023, + "num_tokens": 65318481.0, + "reward": 0.0, + "reward_std": 0.9595103859901428, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11198583125518596, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04018469021931192, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.060705726131767695, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1500 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1205.5625, + "completions/mean_terminated_length": 1163.5, + "completions/min_length": 924.0, + "completions/min_terminated_length": 924.0, + "epoch": 0.30026005201040207, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3491696287669135, + "kl": 0.015899658203125, + "learning_rate": 8.946751102382548e-07, + "loss": -0.0148, + "num_tokens": 65366594.0, + "reward": 0.0, + "reward_std": 0.99042809009552, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.021085416469550484, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09752876394479107, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0957427107756338, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1501 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1468.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1254.0, + "completions/mean_terminated_length": 1254.0, + "completions/min_length": 1054.0, + "completions/min_terminated_length": 1054.0, + "epoch": 0.3004600920184037, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.200960672743736, + "kl": 0.00760650634765625, + "learning_rate": 8.944730063035973e-07, + "loss": 0.0057, + "num_tokens": 65407018.0, + "reward": 0.0, + "reward_std": 0.9816587567329407, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10838477214088388, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.051856927143104484, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04127594582445935, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1502 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1170.8125, + "completions/mean_terminated_length": 1148.86669921875, + "completions/min_length": 903.0, + "completions/min_terminated_length": 903.0, + "epoch": 0.3006601320264053, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.642798489694854, + "kl": 0.0191802978515625, + "learning_rate": 8.942707344023379e-07, + "loss": -0.0108, + "num_tokens": 65452207.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9819061756134033, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13586025950202282, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07655252880636444, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1503 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1262.5, + "completions/mean_terminated_length": 1246.666748046875, + "completions/min_length": 989.0, + "completions/min_terminated_length": 989.0, + "epoch": 0.3008601720344069, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.317151629707701, + "kl": 0.0117034912109375, + "learning_rate": 8.940682946331049e-07, + "loss": -0.0291, + "num_tokens": 65496863.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9361814856529236, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08133425842513213, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14295678563243427, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06426219440409445, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1504 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1242.0, + "completions/max_terminated_length": 1242.0, + "completions/mean_length": 958.4375, + "completions/mean_terminated_length": 958.4375, + "completions/min_length": 754.0, + "completions/min_terminated_length": 754.0, + "epoch": 0.30106021204240846, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9739867863317815, + "kl": 0.013946533203125, + "learning_rate": 8.938656870946092e-07, + "loss": -0.0143, + "num_tokens": 65532350.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7431100010871887, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07187932649695158, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06810528362777109, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1505 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1286.0, + "completions/max_terminated_length": 1286.0, + "completions/mean_length": 1095.5625, + "completions/mean_terminated_length": 1095.5625, + "completions/min_length": 930.0, + "completions/min_terminated_length": 930.0, + "epoch": 0.30126025205041007, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2594925251225777, + "kl": 0.0152587890625, + "learning_rate": 8.936629118856429e-07, + "loss": -0.0362, + "num_tokens": 65564247.0, + "reward": 0.0, + "reward_std": 0.633538007736206, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.039761830565414505, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08652524035053083, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04194352464039305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1506 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1327.0, + "completions/max_terminated_length": 1327.0, + "completions/mean_length": 1035.25, + "completions/mean_terminated_length": 1035.25, + "completions/min_length": 775.0, + "completions/min_terminated_length": 775.0, + "epoch": 0.3014602920584117, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.796131855149203, + "kl": 0.0181884765625, + "learning_rate": 8.934599691050802e-07, + "loss": -0.0184, + "num_tokens": 65611627.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7954545021057129, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.015767840777189242, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1023959513820248, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07588978362901862, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1507 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1242.5, + "completions/mean_terminated_length": 1205.71435546875, + "completions/min_length": 1032.0, + "completions/min_terminated_length": 1032.0, + "epoch": 0.3016603320664133, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.615371294771123, + "kl": 0.0165557861328125, + "learning_rate": 8.932568588518771e-07, + "loss": -0.0126, + "num_tokens": 65655083.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0439567565917969, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10447999681162863, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08120217892889914, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08243965245133131, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1508 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1409.8125, + "completions/mean_terminated_length": 1355.7000732421875, + "completions/min_length": 1218.0, + "completions/min_terminated_length": 1218.0, + "epoch": 0.3018603720744149, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5731966400722177, + "kl": 0.011566162109375, + "learning_rate": 8.930535812250708e-07, + "loss": -0.0037, + "num_tokens": 65705032.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7939399480819702, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.036829665178987225, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13176620491928384, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09418264367902597, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1509 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1295.0, + "completions/max_terminated_length": 1295.0, + "completions/mean_length": 944.1875, + "completions/mean_terminated_length": 944.1875, + "completions/min_length": 577.0, + "completions/min_terminated_length": 577.0, + "epoch": 0.30206041208241646, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.618735192631545, + "kl": 0.0166015625, + "learning_rate": 8.928501363237807e-07, + "loss": -0.0612, + "num_tokens": 65743323.0, + "reward": 0.0, + "reward_std": 0.6639224886894226, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15582244582925933, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2039605207257567, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09067647005823629, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1510 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1480.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1123.8125, + "completions/mean_terminated_length": 1123.8125, + "completions/min_length": 807.0, + "completions/min_terminated_length": 807.0, + "epoch": 0.3022604520904181, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.624878780051645, + "kl": 0.0152587890625, + "learning_rate": 8.926465242472071e-07, + "loss": -0.0093, + "num_tokens": 65788880.0, + "reward": 0.0, + "reward_std": 1.019156575202942, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06585817098242952, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14030340364898145, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1511 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1279.9375, + "completions/mean_terminated_length": 1229.1539306640625, + "completions/min_length": 881.0, + "completions/min_terminated_length": 881.0, + "epoch": 0.3024604920984197, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.431986882487495, + "kl": 0.012451171875, + "learning_rate": 8.924427450946324e-07, + "loss": -0.0272, + "num_tokens": 65843399.0, + "reward": 0.0, + "reward_std": 0.618445634841919, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2484176757848821, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1585710736093509, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12049281521534187, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1512 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1080.0, + "completions/max_terminated_length": 1080.0, + "completions/mean_length": 967.0625, + "completions/mean_terminated_length": 967.0625, + "completions/min_length": 815.0, + "completions/min_terminated_length": 815.0, + "epoch": 0.3026605321064213, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.549294182665523, + "kl": 0.016510009765625, + "learning_rate": 8.922387989654202e-07, + "loss": -0.0026, + "num_tokens": 65879424.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.65317702293396, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.13618670676040442, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13385001965619017, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1513 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1281.0625, + "completions/mean_terminated_length": 1208.0833740234375, + "completions/min_length": 1070.0, + "completions/min_terminated_length": 1070.0, + "epoch": 0.3028605721144229, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.108570786023503, + "kl": 0.01531982421875, + "learning_rate": 8.920346859590154e-07, + "loss": -0.0048, + "num_tokens": 65927145.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7916338443756104, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11771214624775765, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16302800230351266, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0806225774829855, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1514 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1318.0, + "completions/max_terminated_length": 1318.0, + "completions/mean_length": 954.875, + "completions/mean_terminated_length": 954.875, + "completions/min_length": 752.0, + "completions/min_terminated_length": 752.0, + "epoch": 0.30306061212242447, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.498405709787319, + "kl": 0.0176239013671875, + "learning_rate": 8.918304061749449e-07, + "loss": 0.0417, + "num_tokens": 65963895.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8311885595321655, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1397846371130699, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18119069868038354, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08509254221575907, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1515 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1263.1875, + "completions/mean_terminated_length": 1247.4000244140625, + "completions/min_length": 1034.0, + "completions/min_terminated_length": 1034.0, + "epoch": 0.3032606521304261, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.917705515150919, + "kl": 0.0125274658203125, + "learning_rate": 8.916259597128159e-07, + "loss": -0.0025, + "num_tokens": 66010618.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9304674863815308, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.007708346460429167, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05411082232439433, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042255, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1516 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1358.0, + "completions/max_terminated_length": 1358.0, + "completions/mean_length": 1098.625, + "completions/mean_terminated_length": 1098.625, + "completions/min_length": 899.0, + "completions/min_terminated_length": 899.0, + "epoch": 0.3034606921384277, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.095548805190001, + "kl": 0.012054443359375, + "learning_rate": 8.914213466723177e-07, + "loss": -0.0129, + "num_tokens": 66044244.0, + "reward": 0.0, + "reward_std": 0.5416679382324219, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06793913416221525, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11165486983106084, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05163977794943221, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1517 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1339.0, + "completions/max_terminated_length": 1339.0, + "completions/mean_length": 983.25, + "completions/mean_terminated_length": 983.25, + "completions/min_length": 693.0, + "completions/min_terminated_length": 693.0, + "epoch": 0.3036607321464293, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.403748079011543, + "kl": 0.0119781494140625, + "learning_rate": 8.912165671532204e-07, + "loss": -0.0278, + "num_tokens": 66082656.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.004054307937622, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03734762582933025, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.099160988272359, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11213417888437975, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1518 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1110.0, + "completions/mean_length": 1218.125, + "completions/mean_terminated_length": 936.25, + "completions/min_length": 742.0, + "completions/min_terminated_length": 742.0, + "epoch": 0.3038607721544309, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0581531868747214, + "kl": 0.0195465087890625, + "learning_rate": 8.910116212553758e-07, + "loss": 0.0316, + "num_tokens": 66127138.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9658659100532532, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03650161471748344, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08811384031356179, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06763190130459201, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1519 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1298.625, + "completions/mean_terminated_length": 1231.5, + "completions/min_length": 884.0, + "completions/min_terminated_length": 884.0, + "epoch": 0.30406081216243247, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1569275478143783, + "kl": 0.016387939453125, + "learning_rate": 8.90806509078716e-07, + "loss": 0.0246, + "num_tokens": 66174348.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9360854625701904, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.005599815092046115, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.01239714318963069, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1520 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1318.75, + "completions/mean_terminated_length": 1210.0, + "completions/min_length": 901.0, + "completions/min_terminated_length": 901.0, + "epoch": 0.3042608521704341, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1346609444681133, + "kl": 0.014739990234375, + "learning_rate": 8.906012307232548e-07, + "loss": 0.0058, + "num_tokens": 66225432.0, + "reward": 0.0, + "reward_std": 0.6794004440307617, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0829027536728783, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23118475652330264, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11474609652039006, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1521 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 1130.0, + "completions/mean_terminated_length": 1105.3333740234375, + "completions/min_length": 884.0, + "completions/min_terminated_length": 884.0, + "epoch": 0.3044608921784357, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.633802238971679, + "kl": 0.01666259765625, + "learning_rate": 8.903957862890869e-07, + "loss": 0.0068, + "num_tokens": 66266584.0, + "reward": 0.0, + "reward_std": 0.8837441205978394, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07364807446995494, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03737661225217148, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10576003586036262, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1522 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1283.6875, + "completions/mean_terminated_length": 1153.9000244140625, + "completions/min_length": 671.0, + "completions/min_terminated_length": 671.0, + "epoch": 0.3046609321864373, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.803122281803348, + "kl": 0.00952911376953125, + "learning_rate": 8.901901758763879e-07, + "loss": -0.0431, + "num_tokens": 66323339.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9879183769226074, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16181153421583389, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.31588122390424783, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11894598836509009, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1523 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1376.0, + "completions/max_terminated_length": 1376.0, + "completions/mean_length": 1140.5, + "completions/mean_terminated_length": 1140.5, + "completions/min_length": 878.0, + "completions/min_terminated_length": 878.0, + "epoch": 0.30486097219443886, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.502578689183381, + "kl": 0.0235595703125, + "learning_rate": 8.899843995854142e-07, + "loss": -0.0079, + "num_tokens": 66370091.0, + "reward": 0.0, + "reward_std": 0.6105871200561523, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08006050482324295, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13760736129469353, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10470416879457553, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1524 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1218.0, + "completions/max_terminated_length": 1218.0, + "completions/mean_length": 960.375, + "completions/mean_terminated_length": 960.375, + "completions/min_length": 713.0, + "completions/min_terminated_length": 713.0, + "epoch": 0.3050610122024405, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3009269404794637, + "kl": 0.01041412353515625, + "learning_rate": 8.897784575165037e-07, + "loss": 0.0205, + "num_tokens": 66413217.0, + "reward": 0.0, + "reward_std": 0.44044870138168335, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.21597470030653035, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1035851042485035, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13871099718746435, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1525 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1196.875, + "completions/mean_terminated_length": 1126.923095703125, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.3052610522104421, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3367451271754547, + "kl": 0.0164794921875, + "learning_rate": 8.895723497700743e-07, + "loss": -0.0175, + "num_tokens": 66455135.0, + "reward": 0.0, + "reward_std": 0.6530417203903198, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.15566927884794357, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18624190866049506, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08153617692869924, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1526 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1237.0, + "completions/max_terminated_length": 1237.0, + "completions/mean_length": 964.0, + "completions/mean_terminated_length": 964.0, + "completions/min_length": 794.0, + "completions/min_terminated_length": 794.0, + "epoch": 0.3054610922184437, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.997672288060783, + "kl": 0.0128173828125, + "learning_rate": 8.893660764466251e-07, + "loss": -0.0386, + "num_tokens": 66489415.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7700697183609009, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07151496771781653, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06234379907284544, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0683130051063973, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1527 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1420.0, + "completions/mean_length": 1171.1875, + "completions/mean_terminated_length": 1149.2667236328125, + "completions/min_length": 957.0, + "completions/min_terminated_length": 957.0, + "epoch": 0.3056611322264453, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.165916777062641, + "kl": 0.0150604248046875, + "learning_rate": 8.89159637646736e-07, + "loss": -0.0239, + "num_tokens": 66537866.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.9824047684669495, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.029245104699870905, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10497848939639318, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08606629658238704, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1528 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 1167.75, + "completions/mean_terminated_length": 1120.2857666015625, + "completions/min_length": 841.0, + "completions/min_terminated_length": 841.0, + "epoch": 0.30586117223444687, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8660316007145012, + "kl": 0.013916015625, + "learning_rate": 8.889530334710676e-07, + "loss": 0.0027, + "num_tokens": 66582958.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9319130182266235, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.017266601624402036, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0716243675090353, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1529 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1359.0, + "completions/max_terminated_length": 1359.0, + "completions/mean_length": 1141.0, + "completions/mean_terminated_length": 1141.0, + "completions/min_length": 918.0, + "completions/min_terminated_length": 918.0, + "epoch": 0.3060612122424485, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7432420128466632, + "kl": 0.0110015869140625, + "learning_rate": 8.887462640203609e-07, + "loss": -0.003, + "num_tokens": 66632974.0, + "reward": 0.0, + "reward_std": 0.7820208072662354, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11257912591479627, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22945100558429493, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.975, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1530 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1284.0, + "completions/max_terminated_length": 1284.0, + "completions/mean_length": 1087.8125, + "completions/mean_terminated_length": 1087.8125, + "completions/min_length": 1009.0, + "completions/min_terminated_length": 1009.0, + "epoch": 0.3062612522504501, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.620512919839237, + "kl": 0.0104827880859375, + "learning_rate": 8.885393293954377e-07, + "loss": -0.017, + "num_tokens": 66665179.0, + "reward": 0.0, + "reward_std": 0.40479612350463867, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.05367981928433324, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12512563465257479, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04013864859597431, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1531 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1396.0, + "completions/mean_length": 1176.8125, + "completions/mean_terminated_length": 1155.2667236328125, + "completions/min_length": 676.0, + "completions/min_terminated_length": 676.0, + "epoch": 0.3064612922584517, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9221067855564118, + "kl": 0.0065155029296875, + "learning_rate": 8.883322296972001e-07, + "loss": 0.0096, + "num_tokens": 66715840.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0368459224700928, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.013696752851053887, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06360896385023201, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0769800358919501, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1532 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1474.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1255.8125, + "completions/mean_terminated_length": 1255.8125, + "completions/min_length": 1115.0, + "completions/min_terminated_length": 1115.0, + "epoch": 0.3066613322664533, + "frac_reward_zero_std": 0.5, + "grad_norm": 2.1397027001834634, + "kl": 0.0155792236328125, + "learning_rate": 8.881249650266311e-07, + "loss": -0.0103, + "num_tokens": 66771229.0, + "reward": 0.0, + "reward_std": 0.34352079033851624, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09300047587897282, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09732558912887689, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10390522747338701, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1533 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1420.0, + "completions/max_terminated_length": 1420.0, + "completions/mean_length": 1150.25, + "completions/mean_terminated_length": 1150.25, + "completions/min_length": 858.0, + "completions/min_terminated_length": 858.0, + "epoch": 0.30686137227445487, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.372863022240997, + "kl": 0.0148773193359375, + "learning_rate": 8.879175354847937e-07, + "loss": -0.0164, + "num_tokens": 66826233.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8424433469772339, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.017116816900314455, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06849198066827525, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1458055529095489, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1534 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1148.0, + "completions/max_terminated_length": 1148.0, + "completions/mean_length": 1003.125, + "completions/mean_terminated_length": 1003.125, + "completions/min_length": 853.0, + "completions/min_terminated_length": 853.0, + "epoch": 0.3070614122824565, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3109456410985634, + "kl": 0.0136566162109375, + "learning_rate": 8.877099411728314e-07, + "loss": -0.0043, + "num_tokens": 66867739.0, + "reward": 0.0, + "reward_std": 0.5713367462158203, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.16594488738992488, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1812521535073792, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.062063289083417524, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1535 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1193.75, + "completions/mean_terminated_length": 1150.0, + "completions/min_length": 837.0, + "completions/min_terminated_length": 837.0, + "epoch": 0.3072614522904581, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6843075752365007, + "kl": 0.0117034912109375, + "learning_rate": 8.875021821919684e-07, + "loss": -0.0116, + "num_tokens": 66905335.0, + "reward": -3.725290298461914e-08, + "reward_std": 1.0368152856826782, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.030595138162730393, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07257845968053848, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1536 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1241.5, + "completions/mean_terminated_length": 1181.84619140625, + "completions/min_length": 914.0, + "completions/min_terminated_length": 914.0, + "epoch": 0.3074614922984597, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1044174719138677, + "kl": 0.0128173828125, + "learning_rate": 8.872942586435088e-07, + "loss": -0.0458, + "num_tokens": 66952103.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6029800176620483, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.01893250888757651, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16621339695645293, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1537 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1210.4375, + "completions/mean_terminated_length": 1191.1334228515625, + "completions/min_length": 969.0, + "completions/min_terminated_length": 969.0, + "epoch": 0.3076615323064613, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.993269175790197, + "kl": 0.0145111083984375, + "learning_rate": 8.87086170628837e-07, + "loss": -0.0009, + "num_tokens": 66995582.0, + "reward": 0.0, + "reward_std": 0.8772310018539429, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06768405128306687, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04227961400132562, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04194352464039305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1538 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1234.6875, + "completions/mean_terminated_length": 1196.7857666015625, + "completions/min_length": 946.0, + "completions/min_terminated_length": 946.0, + "epoch": 0.3078615723144629, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.064168852843537, + "kl": 0.017974853515625, + "learning_rate": 8.868779182494178e-07, + "loss": -0.0415, + "num_tokens": 67045553.0, + "reward": 0.0, + "reward_std": 0.6248084306716919, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.028222387969348225, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.26164114351834594, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0683130051063973, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1539 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1487.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1141.6875, + "completions/mean_terminated_length": 1141.6875, + "completions/min_length": 763.0, + "completions/min_terminated_length": 763.0, + "epoch": 0.3080616123224645, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6149508268696624, + "kl": 0.0134124755859375, + "learning_rate": 8.86669501606796e-07, + "loss": -0.0375, + "num_tokens": 67094844.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5268110632896423, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09201806631840828, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11471992267340937, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09953596037316065, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1540 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1371.0, + "completions/mean_length": 1220.1875, + "completions/mean_terminated_length": 1201.533447265625, + "completions/min_length": 1013.0, + "completions/min_terminated_length": 1013.0, + "epoch": 0.3082616523304661, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.010174112458887, + "kl": 0.01409912109375, + "learning_rate": 8.864609208025962e-07, + "loss": -0.0142, + "num_tokens": 67146271.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0138555765151978, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08943582770954224, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10862343694149272, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0769800358919501, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1541 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1200.0, + "completions/mean_length": 1041.0625, + "completions/mean_terminated_length": 975.5000610351562, + "completions/min_length": 830.0, + "completions/min_terminated_length": 830.0, + "epoch": 0.3084616923384677, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0652129466859486, + "kl": 0.0111083984375, + "learning_rate": 8.862521759385238e-07, + "loss": -0.0629, + "num_tokens": 67188896.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.513577938079834, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.019636106429333573, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.028770614761400356, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1567612007930345, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1542 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1168.0, + "completions/max_terminated_length": 1168.0, + "completions/mean_length": 991.625, + "completions/mean_terminated_length": 991.625, + "completions/min_length": 740.0, + "completions/min_terminated_length": 740.0, + "epoch": 0.3086617323464693, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2901308142851993, + "kl": 0.0128021240234375, + "learning_rate": 8.860432671163635e-07, + "loss": -0.0243, + "num_tokens": 67224386.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9325618743896484, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.029332266637773594, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.039245535994252616, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0749073501808141, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1543 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1336.0, + "completions/mean_length": 1150.25, + "completions/mean_terminated_length": 1126.933349609375, + "completions/min_length": 947.0, + "completions/min_terminated_length": 947.0, + "epoch": 0.3088617723544709, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.463974367225314, + "kl": 0.014312744140625, + "learning_rate": 8.858341944379801e-07, + "loss": -0.0303, + "num_tokens": 67276078.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.044062614440918, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0018586085237397671, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07785844368036289, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194865, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1544 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1207.3125, + "completions/mean_terminated_length": 1109.75, + "completions/min_length": 746.0, + "completions/min_terminated_length": 746.0, + "epoch": 0.3090618123624725, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8847180763369336, + "kl": 0.01568603515625, + "learning_rate": 8.856249580053186e-07, + "loss": -0.0341, + "num_tokens": 67320547.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9311764240264893, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.333603453537428, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08778251194398586, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08777074514725111, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1545 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1169.4375, + "completions/mean_terminated_length": 1147.4000244140625, + "completions/min_length": 962.0, + "completions/min_terminated_length": 962.0, + "epoch": 0.3092618523704741, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3929446489201998, + "kl": 0.0171356201171875, + "learning_rate": 8.854155579204036e-07, + "loss": -0.003, + "num_tokens": 67357290.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.057814121246338, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05736832395972089, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.057242950314389085, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258096, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1546 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1419.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1079.375, + "completions/mean_terminated_length": 1079.375, + "completions/min_length": 829.0, + "completions/min_terminated_length": 829.0, + "epoch": 0.3094618923784757, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.990549374700591, + "kl": 0.0119171142578125, + "learning_rate": 8.852059942853393e-07, + "loss": -0.0087, + "num_tokens": 67408664.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0558922290802002, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0317410930794585, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0776846868476515, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08734775114237132, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1547 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1465.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1205.9375, + "completions/mean_terminated_length": 1205.9375, + "completions/min_length": 977.0, + "completions/min_terminated_length": 977.0, + "epoch": 0.30966193238647727, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3311834068503545, + "kl": 0.0145111083984375, + "learning_rate": 8.8499626720231e-07, + "loss": -0.0377, + "num_tokens": 67461431.0, + "reward": 0.0, + "reward_std": 0.8043746948242188, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.038135810231763156, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12423111123486558, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09888264649460886, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1548 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1456.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1104.25, + "completions/mean_terminated_length": 1104.25, + "completions/min_length": 810.0, + "completions/min_terminated_length": 810.0, + "epoch": 0.3098619723944789, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8735757825362658, + "kl": 0.011993408203125, + "learning_rate": 8.847863767735798e-07, + "loss": 0.0237, + "num_tokens": 67507699.0, + "reward": 0.0, + "reward_std": 0.6134079694747925, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09925707505453257, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13300712941324463, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06309898162000302, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1549 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1359.0, + "completions/mean_length": 1138.4375, + "completions/mean_terminated_length": 1114.3333740234375, + "completions/min_length": 939.0, + "completions/min_terminated_length": 939.0, + "epoch": 0.3100620124024805, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.429411308111497, + "kl": 0.0148773193359375, + "learning_rate": 8.84576323101492e-07, + "loss": -0.0112, + "num_tokens": 67552754.0, + "reward": 0.0, + "reward_std": 0.6505298614501953, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04906154161910382, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17400385977157676, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06666666666666667, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1550 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1299.6875, + "completions/mean_terminated_length": 1143.888916015625, + "completions/min_length": 977.0, + "completions/min_terminated_length": 977.0, + "epoch": 0.3102620524104821, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.498309468581845, + "kl": 0.0159759521484375, + "learning_rate": 8.843661062884697e-07, + "loss": 0.0234, + "num_tokens": 67600437.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0174545049667358, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0450892917040239, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05637515811104906, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1551 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1369.0, + "completions/mean_length": 1266.5, + "completions/mean_terminated_length": 1233.1429443359375, + "completions/min_length": 1026.0, + "completions/min_terminated_length": 1026.0, + "epoch": 0.3104620924184837, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5718639017538307, + "kl": 0.01080322265625, + "learning_rate": 8.841557264370157e-07, + "loss": -0.0143, + "num_tokens": 67641285.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9663876295089722, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12795218700694996, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14446879804740387, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10749676997731401, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1552 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1363.375, + "completions/mean_terminated_length": 1226.75, + "completions/min_length": 1028.0, + "completions/min_terminated_length": 1028.0, + "epoch": 0.31066213242648527, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.256085574881518, + "kl": 0.01641845703125, + "learning_rate": 8.839451836497123e-07, + "loss": -0.0082, + "num_tokens": 67685531.0, + "reward": 0.0, + "reward_std": 0.6491678953170776, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.20507207402871996, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20909742038497509, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06831300510639733, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1553 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1247.0625, + "completions/mean_terminated_length": 1230.2000732421875, + "completions/min_length": 1134.0, + "completions/min_terminated_length": 1134.0, + "epoch": 0.3108621724344869, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.405195094693673, + "kl": 0.0097808837890625, + "learning_rate": 8.837344780292207e-07, + "loss": 0.011, + "num_tokens": 67724212.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9231009483337402, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.027881383403605974, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17207995271353232, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04849589520621153, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1554 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1371.0, + "completions/mean_terminated_length": 1352.571533203125, + "completions/min_length": 968.0, + "completions/min_terminated_length": 968.0, + "epoch": 0.3110622124424885, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.972836588157598, + "kl": 0.015289306640625, + "learning_rate": 8.835236096782823e-07, + "loss": -0.0238, + "num_tokens": 67771316.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.5550954341888428, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.02857398212344865, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12367797071828115, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07097208632298362, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1555 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1325.0, + "completions/max_terminated_length": 1325.0, + "completions/mean_length": 1120.1875, + "completions/mean_terminated_length": 1120.1875, + "completions/min_length": 866.0, + "completions/min_terminated_length": 866.0, + "epoch": 0.3112622524504901, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.851957644485927, + "kl": 0.0123443603515625, + "learning_rate": 8.833125786997172e-07, + "loss": 0.0168, + "num_tokens": 67820967.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0267953872680664, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03230386385232244, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.32275940328584446, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09388724521901158, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1556 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1252.4375, + "completions/mean_terminated_length": 1195.3077392578125, + "completions/min_length": 969.0, + "completions/min_terminated_length": 969.0, + "epoch": 0.3114622924584917, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1818181900393423, + "kl": 0.016754150390625, + "learning_rate": 8.831013851964253e-07, + "loss": 0.0081, + "num_tokens": 67863718.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9776814579963684, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12382472730019461, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09074673319339863, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12524050936172845, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1557 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1037.3125, + "completions/mean_terminated_length": 1006.4667358398438, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.3116623324664933, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4415119339013196, + "kl": 0.016571044921875, + "learning_rate": 8.828900292713852e-07, + "loss": 0.0148, + "num_tokens": 67911515.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0534327030181885, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0582770672843776, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06304682342911058, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1558 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1436.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1160.75, + "completions/mean_terminated_length": 1160.75, + "completions/min_length": 972.0, + "completions/min_terminated_length": 972.0, + "epoch": 0.3118623724744949, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7565348189723875, + "kl": 0.01190185546875, + "learning_rate": 8.826785110276554e-07, + "loss": 0.0166, + "num_tokens": 67951463.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.836932897567749, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0039861842044254525, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15472951817433742, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03415650255319865, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1559 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1409.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1227.0, + "completions/mean_terminated_length": 1227.0, + "completions/min_length": 1014.0, + "completions/min_terminated_length": 1014.0, + "epoch": 0.3120624124824965, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3748036942887967, + "kl": 0.0153045654296875, + "learning_rate": 8.824668305683727e-07, + "loss": -0.014, + "num_tokens": 68001207.0, + "reward": 0.0, + "reward_std": 0.3932991325855255, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.22519030351698538, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16613151835597195, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11474609652039004, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1560 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1251.0, + "completions/max_terminated_length": 1251.0, + "completions/mean_length": 1006.0, + "completions/mean_terminated_length": 1006.0, + "completions/min_length": 813.0, + "completions/min_terminated_length": 813.0, + "epoch": 0.3122624524904981, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.819133271984688, + "kl": 0.022979736328125, + "learning_rate": 8.822549879967542e-07, + "loss": -0.0231, + "num_tokens": 68046047.0, + "reward": 0.0, + "reward_std": 0.8953697681427002, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09204901933599412, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10894520846407058, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1270024788326182, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1561 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1218.0, + "completions/mean_terminated_length": 1048.800048828125, + "completions/min_length": 813.0, + "completions/min_terminated_length": 813.0, + "epoch": 0.3124624924984997, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.387027746871795, + "kl": 0.0162200927734375, + "learning_rate": 8.820429834160944e-07, + "loss": 0.0021, + "num_tokens": 68099159.0, + "reward": 0.0, + "reward_std": 0.3904249668121338, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0174456505694662, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1280175381768255, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13443985299781488, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1562 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1022.0, + "completions/max_terminated_length": 1022.0, + "completions/mean_length": 872.875, + "completions/mean_terminated_length": 872.875, + "completions/min_length": 696.0, + "completions/min_terminated_length": 696.0, + "epoch": 0.3126625325065013, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.202885711628843, + "kl": 0.013885498046875, + "learning_rate": 8.818308169297683e-07, + "loss": -0.006, + "num_tokens": 68139645.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6227456331253052, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.01393751494993977, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07002389978848976, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06652763279965644, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1563 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1235.3125, + "completions/mean_terminated_length": 1217.666748046875, + "completions/min_length": 886.0, + "completions/min_terminated_length": 886.0, + "epoch": 0.3128625725145029, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.747932933472774, + "kl": 0.009918212890625, + "learning_rate": 8.816184886412291e-07, + "loss": 0.0266, + "num_tokens": 68186402.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0554414987564087, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.032470716110369585, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08606098288083802, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05146016078626404, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1564 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1355.0, + "completions/max_terminated_length": 1355.0, + "completions/mean_length": 1110.1875, + "completions/mean_terminated_length": 1110.1875, + "completions/min_length": 841.0, + "completions/min_terminated_length": 841.0, + "epoch": 0.3130626125225045, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4637134559634597, + "kl": 0.0151214599609375, + "learning_rate": 8.814059986540087e-07, + "loss": 0.0213, + "num_tokens": 68233861.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9634569883346558, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.014412624585769928, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05987796257033845, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.045338235029118136, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1565 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1078.0, + "completions/max_terminated_length": 1078.0, + "completions/mean_length": 908.6875, + "completions/mean_terminated_length": 908.6875, + "completions/min_length": 722.0, + "completions/min_terminated_length": 722.0, + "epoch": 0.3132626525305061, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9042779901551183, + "kl": 0.026397705078125, + "learning_rate": 8.811933470717187e-07, + "loss": -0.0152, + "num_tokens": 68284336.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0499651432037354, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07770044444583708, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08241420068450797, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11538983843829065, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1566 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1203.0, + "completions/mean_terminated_length": 1160.571533203125, + "completions/min_length": 1018.0, + "completions/min_terminated_length": 1018.0, + "epoch": 0.3134626925385077, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2737021828291715, + "kl": 0.0144500732421875, + "learning_rate": 8.809805339980489e-07, + "loss": 0.0157, + "num_tokens": 68335552.0, + "reward": 0.0, + "reward_std": 0.8819326758384705, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.018706090337757164, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11533974246052729, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.046943622609505783, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1567 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1292.5625, + "completions/mean_terminated_length": 1262.9285888671875, + "completions/min_length": 1058.0, + "completions/min_terminated_length": 1058.0, + "epoch": 0.3136627325465093, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5946304108860647, + "kl": 0.008453369140625, + "learning_rate": 8.807675595367674e-07, + "loss": -0.0045, + "num_tokens": 68375153.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7779878377914429, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03930271280124304, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13016659259025307, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04849589520621153, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1568 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1489.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1265.9375, + "completions/mean_terminated_length": 1265.9375, + "completions/min_length": 1022.0, + "completions/min_terminated_length": 1022.0, + "epoch": 0.3138627725545109, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.316365793532417, + "kl": 0.010040283203125, + "learning_rate": 8.805544237917222e-07, + "loss": 0.0074, + "num_tokens": 68422824.0, + "reward": 0.0, + "reward_std": 0.7255394458770752, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -6.50927193236962e-05, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.117616349373657, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08933913745655643, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1569 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1362.0, + "completions/mean_length": 1203.3125, + "completions/mean_terminated_length": 1183.533447265625, + "completions/min_length": 888.0, + "completions/min_terminated_length": 888.0, + "epoch": 0.3140628125625125, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8407243360201586, + "kl": 0.0126953125, + "learning_rate": 8.803411268668387e-07, + "loss": 0.0014, + "num_tokens": 68472269.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.40839457511901855, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02903002995849416, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08684412965160909, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10532137766186216, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1570 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1072.0, + "completions/max_terminated_length": 1072.0, + "completions/mean_length": 981.0, + "completions/mean_terminated_length": 981.0, + "completions/min_length": 849.0, + "completions/min_terminated_length": 849.0, + "epoch": 0.3142628525705141, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.543472113999837, + "kl": 0.006683349609375, + "learning_rate": 8.801276688661217e-07, + "loss": -0.006, + "num_tokens": 68511333.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8837566375732422, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2404552355184828, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10210351258933284, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1571 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1420.0, + "completions/mean_length": 1218.875, + "completions/mean_terminated_length": 1178.71435546875, + "completions/min_length": 935.0, + "completions/min_terminated_length": 935.0, + "epoch": 0.31446289257851573, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.712699489050625, + "kl": 0.016845703125, + "learning_rate": 8.799140498936545e-07, + "loss": -0.0202, + "num_tokens": 68555635.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0533692836761475, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.034115091815113234, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10916260938863508, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0768596604689834, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1572 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1228.9375, + "completions/mean_terminated_length": 1210.86669921875, + "completions/min_length": 1046.0, + "completions/min_terminated_length": 1046.0, + "epoch": 0.3146629325865173, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.049937885900016, + "kl": 0.0159149169921875, + "learning_rate": 8.797002700535984e-07, + "loss": 0.0023, + "num_tokens": 68601674.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5282076597213745, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12637421920929162, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.27309316042425996, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.055611083361076424, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1573 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1390.0, + "completions/max_terminated_length": 1390.0, + "completions/mean_length": 1052.375, + "completions/mean_terminated_length": 1052.375, + "completions/min_length": 775.0, + "completions/min_terminated_length": 775.0, + "epoch": 0.3148629725945189, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.520872205702687, + "kl": 0.01422119140625, + "learning_rate": 8.794863294501934e-07, + "loss": 0.01, + "num_tokens": 68647816.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.987443208694458, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04585016807101949, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08348301345940719, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.17841898254763516, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1574 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1216.0, + "completions/max_terminated_length": 1216.0, + "completions/mean_length": 1001.9375, + "completions/mean_terminated_length": 1001.9375, + "completions/min_length": 866.0, + "completions/min_terminated_length": 866.0, + "epoch": 0.3150630126025205, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.168035203364408, + "kl": 0.020721435546875, + "learning_rate": 8.792722281877581e-07, + "loss": 0.0231, + "num_tokens": 68692711.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0485684871673584, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06405527421320906, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05329413430128486, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07290277645477444, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1575 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1214.0, + "completions/max_terminated_length": 1214.0, + "completions/mean_length": 978.9375, + "completions/mean_terminated_length": 978.9375, + "completions/min_length": 840.0, + "completions/min_terminated_length": 840.0, + "epoch": 0.3152630526105221, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4372410427110593, + "kl": 0.0139007568359375, + "learning_rate": 8.790579663706891e-07, + "loss": 0.0232, + "num_tokens": 68726430.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9567955136299133, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0017572428584407924, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04910352605209023, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.057089922571845, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1576 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1188.0, + "completions/max_terminated_length": 1188.0, + "completions/mean_length": 981.375, + "completions/mean_terminated_length": 981.375, + "completions/min_length": 704.0, + "completions/min_terminated_length": 704.0, + "epoch": 0.3154630926185237, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.529536959034316, + "kl": 0.0167388916015625, + "learning_rate": 8.788435441034614e-07, + "loss": -0.0104, + "num_tokens": 68768156.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9057783484458923, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.19760043988063367, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1039852845679733, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08606629658238706, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1577 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1490.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1178.625, + "completions/mean_terminated_length": 1178.625, + "completions/min_length": 793.0, + "completions/min_terminated_length": 793.0, + "epoch": 0.3156631326265253, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2553965957084503, + "kl": 0.0149993896484375, + "learning_rate": 8.786289614906283e-07, + "loss": 0.0205, + "num_tokens": 68808094.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0193744897842407, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04816662785876333, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06815695472157372, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07302967433402213, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1578 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1170.125, + "completions/mean_terminated_length": 1148.1334228515625, + "completions/min_length": 870.0, + "completions/min_terminated_length": 870.0, + "epoch": 0.3158631726345269, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.831057413669822, + "kl": 0.0139617919921875, + "learning_rate": 8.784142186368214e-07, + "loss": -0.0189, + "num_tokens": 68845888.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0009454488754272, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1768378487925814, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3740058803714264, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10292032157252809, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1579 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1438.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1164.5, + "completions/mean_terminated_length": 1164.5, + "completions/min_length": 784.0, + "completions/min_terminated_length": 784.0, + "epoch": 0.3160632126425285, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8359774592080127, + "kl": 0.010833740234375, + "learning_rate": 8.781993156467503e-07, + "loss": -0.0045, + "num_tokens": 68893016.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7408274412155151, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1025514789673954, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1770333180637127, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06309898162000305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1580 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1435.0, + "completions/mean_length": 1373.1875, + "completions/mean_terminated_length": 1315.5455322265625, + "completions/min_length": 1191.0, + "completions/min_terminated_length": 1191.0, + "epoch": 0.3162632526505301, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.863172656354749, + "kl": 0.0142974853515625, + "learning_rate": 8.779842526252024e-07, + "loss": -0.0241, + "num_tokens": 68936787.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9326673746109009, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07816588073754896, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03493885543472909, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04013864859597431, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1581 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1452.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1190.6875, + "completions/mean_terminated_length": 1190.6875, + "completions/min_length": 1015.0, + "completions/min_terminated_length": 1015.0, + "epoch": 0.3164632926585317, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.012050585613113, + "kl": 0.01483154296875, + "learning_rate": 8.777690296770437e-07, + "loss": -0.031, + "num_tokens": 68979182.0, + "reward": 0.0, + "reward_std": 0.4231562614440918, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.14193267862415543, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16105493883351918, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03849001794597504, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1582 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1156.4375, + "completions/mean_terminated_length": 1041.916748046875, + "completions/min_length": 667.0, + "completions/min_terminated_length": 667.0, + "epoch": 0.3166633326665333, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2310882023717236, + "kl": 0.01739501953125, + "learning_rate": 8.775536469072178e-07, + "loss": -0.0417, + "num_tokens": 69031333.0, + "reward": 0.0, + "reward_std": 0.9009206891059875, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.011640803476542637, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11237890611651088, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04554200340426487, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1583 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1138.125, + "completions/mean_terminated_length": 1114.0, + "completions/min_length": 930.0, + "completions/min_terminated_length": 930.0, + "epoch": 0.3168633726745349, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.706058364801564, + "kl": 0.0128631591796875, + "learning_rate": 8.77338104420746e-07, + "loss": -0.0219, + "num_tokens": 69075823.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9633883237838745, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04411158005625959, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0679992210324415, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09098229375970789, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1584 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1141.0, + "completions/max_terminated_length": 1141.0, + "completions/mean_length": 1008.0625, + "completions/mean_terminated_length": 1008.0625, + "completions/min_length": 876.0, + "completions/min_terminated_length": 876.0, + "epoch": 0.3170634126825365, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3349318939310533, + "kl": 0.013702392578125, + "learning_rate": 8.771224023227284e-07, + "loss": -0.0145, + "num_tokens": 69116264.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0135014057159424, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03031860242004235, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12011079325262361, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10470416879457553, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1585 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1136.0, + "completions/mean_length": 893.6875, + "completions/mean_terminated_length": 853.2667236328125, + "completions/min_length": 532.0, + "completions/min_terminated_length": 532.0, + "epoch": 0.31726345269053813, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2655702196955416, + "kl": 0.0128173828125, + "learning_rate": 8.769065407183418e-07, + "loss": -0.0725, + "num_tokens": 69152035.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6884259581565857, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.017793845856801882, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09540840634758814, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08062257748298547, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1586 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1390.1875, + "completions/mean_terminated_length": 1304.77783203125, + "completions/min_length": 1032.0, + "completions/min_terminated_length": 1032.0, + "epoch": 0.3174634926985397, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0190871231213925, + "kl": 0.017486572265625, + "learning_rate": 8.766905197128416e-07, + "loss": -0.0032, + "num_tokens": 69209198.0, + "reward": 0.0, + "reward_std": 0.9193522930145264, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.01005359472513982, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20526985534185327, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1587 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1424.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1007.75, + "completions/mean_terminated_length": 1007.75, + "completions/min_length": 737.0, + "completions/min_terminated_length": 737.0, + "epoch": 0.3176635327065413, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.376969040296407, + "kl": 0.0231475830078125, + "learning_rate": 8.764743394115604e-07, + "loss": 0.0217, + "num_tokens": 69259450.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.890683650970459, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03273988514088577, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.02591889522678367, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13743685418725538, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1588 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 917.0, + "completions/max_terminated_length": 917.0, + "completions/mean_length": 763.3125, + "completions/mean_terminated_length": 763.3125, + "completions/min_length": 601.0, + "completions/min_terminated_length": 601.0, + "epoch": 0.3178635727145429, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6127873309891316, + "kl": 0.0048675537109375, + "learning_rate": 8.762579999199089e-07, + "loss": 0.0278, + "num_tokens": 69290055.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9596253633499146, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1291461837175494, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0751567591495695, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08509254221575911, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1589 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 1195.8125, + "completions/mean_terminated_length": 1152.357177734375, + "completions/min_length": 909.0, + "completions/min_terminated_length": 909.0, + "epoch": 0.3180636127225445, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1989007082156875, + "kl": 0.0178375244140625, + "learning_rate": 8.76041501343375e-07, + "loss": -0.0362, + "num_tokens": 69329988.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0539488792419434, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10626895849224742, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09206677831900432, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06763190130459201, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1590 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1316.0, + "completions/mean_length": 1072.8125, + "completions/mean_terminated_length": 1044.3333740234375, + "completions/min_length": 519.0, + "completions/min_terminated_length": 519.0, + "epoch": 0.31826365273054613, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.168236544106154, + "kl": 0.01300048828125, + "learning_rate": 8.758248437875246e-07, + "loss": 0.0151, + "num_tokens": 69376873.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.2845609784126282, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03843839609805451, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04365059945346153, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.19883922409081423, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1591 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1360.0, + "completions/mean_terminated_length": 1340.0, + "completions/min_length": 1109.0, + "completions/min_terminated_length": 1109.0, + "epoch": 0.3184636927385477, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4256767188723587, + "kl": 0.01418304443359375, + "learning_rate": 8.756080273580009e-07, + "loss": -0.0053, + "num_tokens": 69422481.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8461411595344543, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07154143338892291, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3700170242145051, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.060705726131767695, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1592 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1402.0, + "completions/mean_length": 1149.0625, + "completions/mean_terminated_length": 1068.076904296875, + "completions/min_length": 778.0, + "completions/min_terminated_length": 778.0, + "epoch": 0.3186637327465493, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.766176412180995, + "kl": 0.016448974609375, + "learning_rate": 8.753910521605245e-07, + "loss": 0.0485, + "num_tokens": 69472858.0, + "reward": 0.0, + "reward_std": 0.8589536547660828, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.18568465168836937, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1005047635303826, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10470416879457553, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1593 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1498.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1289.4375, + "completions/mean_terminated_length": 1289.4375, + "completions/min_length": 1034.0, + "completions/min_terminated_length": 1034.0, + "epoch": 0.3188637727545509, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0036497418892485, + "kl": 0.018035888671875, + "learning_rate": 8.751739183008935e-07, + "loss": -0.0313, + "num_tokens": 69522073.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7860769033432007, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1434918637064869, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13496643115263282, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1594 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1372.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 1152.1875, + "completions/mean_terminated_length": 1152.1875, + "completions/min_length": 800.0, + "completions/min_terminated_length": 800.0, + "epoch": 0.3190638127625525, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3169390635965628, + "kl": 0.016998291015625, + "learning_rate": 8.749566258849833e-07, + "loss": -0.0221, + "num_tokens": 69566196.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9409724473953247, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.23593397648713293, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20324771313028156, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07097208632298359, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1595 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1208.625, + "completions/mean_terminated_length": 1189.2000732421875, + "completions/min_length": 924.0, + "completions/min_terminated_length": 924.0, + "epoch": 0.31926385277055414, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.838832679295083, + "kl": 0.0127105712890625, + "learning_rate": 8.747391750187468e-07, + "loss": -0.027, + "num_tokens": 69617702.0, + "reward": 0.0, + "reward_std": 0.7193774580955505, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10640023227392012, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15452587749775343, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04367387557118565, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1596 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1303.75, + "completions/mean_terminated_length": 1258.4615478515625, + "completions/min_length": 1023.0, + "completions/min_terminated_length": 1023.0, + "epoch": 0.3194638927785557, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.238079299446238, + "kl": 0.0147552490234375, + "learning_rate": 8.745215658082138e-07, + "loss": -0.0115, + "num_tokens": 69669042.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7563956379890442, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03163137820936244, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.232191944344485, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06426219440409446, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1597 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1242.0, + "completions/mean_length": 1099.8125, + "completions/mean_terminated_length": 1073.1334228515625, + "completions/min_length": 904.0, + "completions/min_terminated_length": 904.0, + "epoch": 0.3196639327865573, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1769426471143074, + "kl": 0.005176544189453125, + "learning_rate": 8.743037983594917e-07, + "loss": 0.0129, + "num_tokens": 69711527.0, + "reward": 0.0, + "reward_std": 0.7059041261672974, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.061736941519157804, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10206645735012487, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09699179041242309, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1598 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1447.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1158.8125, + "completions/mean_terminated_length": 1158.8125, + "completions/min_length": 874.0, + "completions/min_terminated_length": 874.0, + "epoch": 0.3198639727945589, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.436055404830759, + "kl": 0.01519775390625, + "learning_rate": 8.740858727787651e-07, + "loss": 0.0083, + "num_tokens": 69765788.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9875586032867432, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.011430848736754507, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03805284136424146, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.052880017930181294, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1599 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1260.5, + "completions/mean_terminated_length": 1205.2308349609375, + "completions/min_length": 1001.0, + "completions/min_terminated_length": 1001.0, + "epoch": 0.3200640128025605, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9486499606207435, + "kl": 0.0144805908203125, + "learning_rate": 8.738677891722951e-07, + "loss": 0.0419, + "num_tokens": 69807636.0, + "reward": 0.0, + "reward_std": 0.6569361090660095, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.012312117164027785, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1859341569980262, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 1.0, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1600 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1368.0, + "completions/mean_length": 1158.625, + "completions/mean_terminated_length": 1135.86669921875, + "completions/min_length": 906.0, + "completions/min_terminated_length": 906.0, + "epoch": 0.32026405281056214, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.55837228893192, + "kl": 0.025115966796875, + "learning_rate": 8.736495476464205e-07, + "loss": -0.0006, + "num_tokens": 69859262.0, + "reward": 1.862645149230957e-09, + "reward_std": 1.0665721893310547, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.862645149230957e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07024022193316838, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0793698106313431, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1601 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1289.75, + "completions/mean_terminated_length": 1259.71435546875, + "completions/min_length": 1077.0, + "completions/min_terminated_length": 1077.0, + "epoch": 0.3204640928185637, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.090684105940411, + "kl": 0.0164794921875, + "learning_rate": 8.734311483075568e-07, + "loss": 0.0005, + "num_tokens": 69907170.0, + "reward": 0.0, + "reward_std": 0.9218685626983643, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.004858181631444906, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2923329544489037, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1128748897706693, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1602 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1305.0, + "completions/max_terminated_length": 1305.0, + "completions/mean_length": 1117.125, + "completions/mean_terminated_length": 1117.125, + "completions/min_length": 981.0, + "completions/min_terminated_length": 981.0, + "epoch": 0.3206641328265653, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1871910524568796, + "kl": 0.0136871337890625, + "learning_rate": 8.732125912621966e-07, + "loss": -0.0438, + "num_tokens": 69949796.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0417417287826538, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.13587357358056434, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06439346264272873, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1180081604209045, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1603 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1403.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1119.5, + "completions/mean_terminated_length": 1119.5, + "completions/min_length": 851.0, + "completions/min_terminated_length": 851.0, + "epoch": 0.3208641728345669, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4874681210949556, + "kl": 0.01458740234375, + "learning_rate": 8.729938766169092e-07, + "loss": -0.0437, + "num_tokens": 69989828.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.712749719619751, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.021201384905764394, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09586446081064263, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194865, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1604 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1294.0, + "completions/max_terminated_length": 1294.0, + "completions/mean_length": 1021.875, + "completions/mean_terminated_length": 1021.875, + "completions/min_length": 707.0, + "completions/min_terminated_length": 707.0, + "epoch": 0.32106421284256853, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.972140976736618, + "kl": 0.0191650390625, + "learning_rate": 8.727750044783408e-07, + "loss": 0.0148, + "num_tokens": 70030618.0, + "reward": 0.0, + "reward_std": 1.0084891319274902, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09391227351148718, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07959649126985735, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05426273532033234, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1605 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1319.0, + "completions/mean_length": 1175.1875, + "completions/mean_terminated_length": 1128.7857666015625, + "completions/min_length": 876.0, + "completions/min_terminated_length": 876.0, + "epoch": 0.3212642528505701, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6504206315928798, + "kl": 0.016357421875, + "learning_rate": 8.725559749532145e-07, + "loss": -0.0181, + "num_tokens": 70084277.0, + "reward": 0.0, + "reward_std": 0.37805092334747314, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.25828148930831724, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.25872950152398033, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.107496769977314, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1606 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1329.0, + "completions/mean_length": 1283.625, + "completions/mean_terminated_length": 1067.25, + "completions/min_length": 446.0, + "completions/min_terminated_length": 446.0, + "epoch": 0.3214642928585717, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5193496491371596, + "kl": 0.010650634765625, + "learning_rate": 8.723367881483301e-07, + "loss": -0.1162, + "num_tokens": 70140015.0, + "reward": 0.0, + "reward_std": 0.8486621379852295, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05229387057894984, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11283809812085055, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08944271909999159, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1607 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1497.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1236.4375, + "completions/mean_terminated_length": 1236.4375, + "completions/min_length": 973.0, + "completions/min_terminated_length": 973.0, + "epoch": 0.3216643328665733, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3101395714302977, + "kl": 0.0152435302734375, + "learning_rate": 8.721174441705642e-07, + "loss": -0.0103, + "num_tokens": 70176174.0, + "reward": 0.0, + "reward_std": 0.585083544254303, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2211297090363789, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18309663096726664, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07084150279686702, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1608 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1341.0, + "completions/mean_length": 1172.375, + "completions/mean_terminated_length": 1150.533447265625, + "completions/min_length": 788.0, + "completions/min_terminated_length": 788.0, + "epoch": 0.3218643728745749, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4562560000312605, + "kl": 0.0090789794921875, + "learning_rate": 8.718979431268698e-07, + "loss": -0.03, + "num_tokens": 70225932.0, + "reward": -1.862645149230957e-09, + "reward_std": 1.0667715072631836, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.862645149230957e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03144847289064172, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08062365779862818, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059472994182545036, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1609 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1173.0, + "completions/max_terminated_length": 1173.0, + "completions/mean_length": 976.875, + "completions/mean_terminated_length": 976.875, + "completions/min_length": 658.0, + "completions/min_terminated_length": 658.0, + "epoch": 0.32206441288257653, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0946638383679574, + "kl": 0.0145721435546875, + "learning_rate": 8.716782851242766e-07, + "loss": -0.0064, + "num_tokens": 70261610.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0011439323425293, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10594723993838624, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04985824585152225, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.067631901304592, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1610 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1285.875, + "completions/mean_terminated_length": 1255.2857666015625, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.3222644528905781, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4052647817814425, + "kl": 0.0177459716796875, + "learning_rate": 8.714584702698908e-07, + "loss": -0.0118, + "num_tokens": 70307656.0, + "reward": 0.0, + "reward_std": 0.6776635646820068, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1902137058990272, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1623243617189691, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08073734277593311, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1611 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1285.0, + "completions/max_terminated_length": 1285.0, + "completions/mean_length": 1000.875, + "completions/mean_terminated_length": 1000.875, + "completions/min_length": 691.0, + "completions/min_terminated_length": 691.0, + "epoch": 0.3224644928985797, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6159184533700612, + "kl": 0.01385498046875, + "learning_rate": 8.712384986708953e-07, + "loss": -0.0067, + "num_tokens": 70356342.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9368137121200562, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07123655702396524, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08287781792536573, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.038248698840130005, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1612 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1068.0, + "completions/max_terminated_length": 1068.0, + "completions/mean_length": 937.625, + "completions/mean_terminated_length": 937.625, + "completions/min_length": 657.0, + "completions/min_terminated_length": 657.0, + "epoch": 0.3226645329065813, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3485940015742814, + "kl": 0.0124053955078125, + "learning_rate": 8.710183704345492e-07, + "loss": -0.0054, + "num_tokens": 70407216.0, + "reward": 0.0, + "reward_std": 0.9064692258834839, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09037283496172381, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0866870289026536, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.102469507659596, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1613 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1115.0, + "completions/mean_terminated_length": 1089.3333740234375, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.3228645729145829, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2695990984053074, + "kl": 0.01904296875, + "learning_rate": 8.707980856681878e-07, + "loss": -0.0014, + "num_tokens": 70447656.0, + "reward": 0.0, + "reward_std": 0.5096175074577332, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15345562194134613, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14075270013272095, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08681611046941136, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1614 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1172.0, + "completions/mean_length": 1017.5625, + "completions/mean_terminated_length": 985.4000244140625, + "completions/min_length": 772.0, + "completions/min_terminated_length": 772.0, + "epoch": 0.32306461292258454, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.015156629816648, + "kl": 0.0148773193359375, + "learning_rate": 8.705776444792232e-07, + "loss": -0.1044, + "num_tokens": 70481625.0, + "reward": 0.0, + "reward_std": 0.8758313655853271, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.025088008917042, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17801505215891622, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.048686449556014755, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1615 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1169.9375, + "completions/mean_terminated_length": 1059.916748046875, + "completions/min_length": 19.0, + "completions/min_terminated_length": 19.0, + "epoch": 0.3232646529305861, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9746859869781739, + "kl": 0.0128326416015625, + "learning_rate": 8.703570469751433e-07, + "loss": -0.1559, + "num_tokens": 70531176.0, + "reward": 0.0, + "reward_std": 0.8245944380760193, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12109940929414105, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08144316581567412, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.16233253479155635, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1616 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1184.0, + "completions/max_terminated_length": 1184.0, + "completions/mean_length": 993.5625, + "completions/mean_terminated_length": 993.5625, + "completions/min_length": 808.0, + "completions/min_terminated_length": 808.0, + "epoch": 0.3234646929385877, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1545169654369434, + "kl": 0.014434814453125, + "learning_rate": 8.701362932635128e-07, + "loss": -0.0107, + "num_tokens": 70569625.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9641684293746948, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.19119419454963493, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13635239531690796, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11147163731607214, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1617 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1274.0, + "completions/max_terminated_length": 1274.0, + "completions/mean_length": 903.4375, + "completions/mean_terminated_length": 903.4375, + "completions/min_length": 634.0, + "completions/min_terminated_length": 634.0, + "epoch": 0.3236647329465893, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5691054184269233, + "kl": 0.018951416015625, + "learning_rate": 8.699153834519718e-07, + "loss": -0.0545, + "num_tokens": 70612528.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0586519241333008, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07178792990165364, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05597779545397543, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08421753138505422, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1618 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1239.1875, + "completions/mean_terminated_length": 1201.9285888671875, + "completions/min_length": 792.0, + "completions/min_terminated_length": 792.0, + "epoch": 0.32386477295459093, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0886027795912354, + "kl": 0.0153350830078125, + "learning_rate": 8.696943176482372e-07, + "loss": -0.0792, + "num_tokens": 70659499.0, + "reward": 0.0, + "reward_std": 0.8765333294868469, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12744982992052378, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08430801686725056, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09888264649460884, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1619 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1267.0, + "completions/max_terminated_length": 1267.0, + "completions/mean_length": 970.0625, + "completions/mean_terminated_length": 970.0625, + "completions/min_length": 813.0, + "completions/min_terminated_length": 813.0, + "epoch": 0.32406481296259254, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.89869672953959, + "kl": 0.0141143798828125, + "learning_rate": 8.694730959601017e-07, + "loss": 0.0007, + "num_tokens": 70695164.0, + "reward": 0.0, + "reward_std": 0.5643842220306396, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.01593812147810448, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11553340272673018, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04849589520621153, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1620 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1462.75, + "completions/mean_terminated_length": 1414.857177734375, + "completions/min_length": 1281.0, + "completions/min_terminated_length": 1281.0, + "epoch": 0.3242648529705941, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.17449310725944, + "kl": 0.00778961181640625, + "learning_rate": 8.692517184954339e-07, + "loss": 0.0098, + "num_tokens": 70743280.0, + "reward": 0.0, + "reward_std": 0.902093231678009, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.031710181255489286, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07774271894751453, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1621 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1380.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1090.0, + "completions/mean_terminated_length": 1090.0, + "completions/min_length": 918.0, + "completions/min_terminated_length": 918.0, + "epoch": 0.3244648929785957, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6902125865018984, + "kl": 0.019378662109375, + "learning_rate": 8.690301853621783e-07, + "loss": -0.014, + "num_tokens": 70780872.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0485789775848389, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04008167833327901, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1510273139735971, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.16903867626692443, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1622 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1191.0, + "completions/max_terminated_length": 1191.0, + "completions/mean_length": 964.625, + "completions/mean_terminated_length": 964.625, + "completions/min_length": 679.0, + "completions/min_terminated_length": 679.0, + "epoch": 0.3246649329865973, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4499669527549295, + "kl": 0.01177978515625, + "learning_rate": 8.688084966683557e-07, + "loss": 0.0259, + "num_tokens": 70817090.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.91087806224823, + "rewards/wordcountpos_reward_nokeypoint/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0010759384955771924, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.049761529884909825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06191391873668902, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1623 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1212.625, + "completions/mean_terminated_length": 1193.4666748046875, + "completions/min_length": 972.0, + "completions/min_terminated_length": 972.0, + "epoch": 0.32486497299459893, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.87761543169123, + "kl": 0.0112457275390625, + "learning_rate": 8.685866525220625e-07, + "loss": -0.0215, + "num_tokens": 70865956.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.931084930896759, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02103784926899644, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12448664899548084, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13709958532503408, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1624 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1419.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1112.9375, + "completions/mean_terminated_length": 1112.9375, + "completions/min_length": 859.0, + "completions/min_terminated_length": 859.0, + "epoch": 0.32506501300260054, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.766130962092588, + "kl": 0.01078033447265625, + "learning_rate": 8.683646530314709e-07, + "loss": -0.0425, + "num_tokens": 70900683.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5933337807655334, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0042771078697745775, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0711787500562511, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09496588081262933, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1625 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1377.6875, + "completions/mean_terminated_length": 1322.0909423828125, + "completions/min_length": 879.0, + "completions/min_terminated_length": 879.0, + "epoch": 0.3252650530106021, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4167839120221704, + "kl": 0.010833740234375, + "learning_rate": 8.681424983048288e-07, + "loss": -0.0512, + "num_tokens": 70952270.0, + "reward": 0.0, + "reward_std": 0.9527186155319214, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0006165174834262327, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.21170909801913682, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1626 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1482.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1191.625, + "completions/mean_terminated_length": 1191.625, + "completions/min_length": 923.0, + "completions/min_terminated_length": 923.0, + "epoch": 0.3254650930186037, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.049589866510499, + "kl": 0.01043701171875, + "learning_rate": 8.679201884504598e-07, + "loss": 0.0422, + "num_tokens": 70996808.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6886202096939087, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.05497926641483148, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2230989000223447, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.048686449556014755, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1627 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1490.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1030.875, + "completions/mean_terminated_length": 1030.875, + "completions/min_length": 727.0, + "completions/min_terminated_length": 727.0, + "epoch": 0.3256651330266053, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.506632906289258, + "kl": 0.017303466796875, + "learning_rate": 8.676977235767632e-07, + "loss": 0.0517, + "num_tokens": 71032326.0, + "reward": 0.0, + "reward_std": 1.020640254020691, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.05042853216269897, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05995780836472868, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07391185942027817, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1628 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1444.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1243.5625, + "completions/mean_terminated_length": 1243.5625, + "completions/min_length": 1000.0, + "completions/min_terminated_length": 1000.0, + "epoch": 0.32586517303460694, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1362702251822583, + "kl": 0.0090484619140625, + "learning_rate": 8.674751037922141e-07, + "loss": -0.0089, + "num_tokens": 71078375.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9269343018531799, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.3675068627550299, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09814538504280602, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059472994182545036, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1629 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1288.0, + "completions/max_terminated_length": 1288.0, + "completions/mean_length": 958.125, + "completions/mean_terminated_length": 958.125, + "completions/min_length": 738.0, + "completions/min_terminated_length": 738.0, + "epoch": 0.32606521304260855, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8158044852090645, + "kl": 0.017486572265625, + "learning_rate": 8.672523292053627e-07, + "loss": -0.0039, + "num_tokens": 71125713.0, + "reward": 0.0, + "reward_std": 0.7694025039672852, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08797531294090585, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12704221656972933, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1031898645611484, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1630 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1392.0625, + "completions/mean_terminated_length": 1253.2857666015625, + "completions/min_length": 1105.0, + "completions/min_terminated_length": 1105.0, + "epoch": 0.3262652530506101, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2125069102395956, + "kl": 0.01397705078125, + "learning_rate": 8.670293999248351e-07, + "loss": 0.0238, + "num_tokens": 71180234.0, + "reward": 0.0, + "reward_std": 0.7472739219665527, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.14147252605511126, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19277519969447743, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11855612829185828, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1631 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1246.0, + "completions/mean_length": 1308.4375, + "completions/mean_terminated_length": 1116.875, + "completions/min_length": 905.0, + "completions/min_terminated_length": 905.0, + "epoch": 0.3264652930586117, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.930977619724116, + "kl": 0.005107879638671875, + "learning_rate": 8.668063160593323e-07, + "loss": -0.0179, + "num_tokens": 71220121.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0106077194213867, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.02259828817334155, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06755834510991432, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1632 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1243.8125, + "completions/mean_terminated_length": 1226.7333984375, + "completions/min_length": 996.0, + "completions/min_terminated_length": 996.0, + "epoch": 0.32666533306661333, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9298193426112915, + "kl": 0.012908935546875, + "learning_rate": 8.665830777176314e-07, + "loss": -0.0315, + "num_tokens": 71263030.0, + "reward": 1.862645149230957e-08, + "reward_std": 1.0160696506500244, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.862645149230957e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1477129746850217, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08842647265461515, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9791666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03191423692521126, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1633 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1363.0, + "completions/mean_length": 1113.5625, + "completions/mean_terminated_length": 984.75, + "completions/min_length": 814.0, + "completions/min_terminated_length": 814.0, + "epoch": 0.32686537307461494, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4084707075252756, + "kl": 0.0112152099609375, + "learning_rate": 8.66359685008584e-07, + "loss": -0.0018, + "num_tokens": 71307407.0, + "reward": 0.0, + "reward_std": 1.0058501958847046, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05803150037824717, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09396967508051629, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0769800358919501, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1634 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1416.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1043.0, + "completions/mean_terminated_length": 1043.0, + "completions/min_length": 778.0, + "completions/min_terminated_length": 778.0, + "epoch": 0.3270654130826165, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.184416460624516, + "kl": 0.013519287109375, + "learning_rate": 8.661361380411178e-07, + "loss": 0.0161, + "num_tokens": 71346799.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.036112904548645, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.006586182938105052, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.032048998118821434, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03442651863295481, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1635 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1430.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1178.5625, + "completions/mean_terminated_length": 1178.5625, + "completions/min_length": 781.0, + "completions/min_terminated_length": 781.0, + "epoch": 0.3272654530906181, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.857315011569989, + "kl": 0.01043701171875, + "learning_rate": 8.65912436924235e-07, + "loss": -0.0058, + "num_tokens": 71388632.0, + "reward": 0.0, + "reward_std": 0.713483452796936, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03795303510571689, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10354744176754953, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11122216672215288, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1636 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1372.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 1129.25, + "completions/mean_terminated_length": 1129.25, + "completions/min_length": 920.0, + "completions/min_terminated_length": 920.0, + "epoch": 0.3274654930986197, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5869867578526273, + "kl": 0.0162200927734375, + "learning_rate": 8.656885817670135e-07, + "loss": -0.0349, + "num_tokens": 71429900.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.899340033531189, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.058650301237126196, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07181921103467848, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.055611083361076424, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1637 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1223.0, + "completions/max_terminated_length": 1223.0, + "completions/mean_length": 995.3125, + "completions/mean_terminated_length": 995.3125, + "completions/min_length": 780.0, + "completions/min_terminated_length": 780.0, + "epoch": 0.32766553310662133, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5280665171720598, + "kl": 0.0179443359375, + "learning_rate": 8.654645726786061e-07, + "loss": -0.017, + "num_tokens": 71468161.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8548576831817627, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09629360383842683, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12301393637434206, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06309898162000305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1638 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1180.0, + "completions/max_terminated_length": 1180.0, + "completions/mean_length": 1007.5, + "completions/mean_terminated_length": 1007.5, + "completions/min_length": 775.0, + "completions/min_terminated_length": 775.0, + "epoch": 0.32786557311462294, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.202553952628214, + "kl": 0.0132293701171875, + "learning_rate": 8.652404097682405e-07, + "loss": 0.0045, + "num_tokens": 71500305.0, + "reward": 0.0, + "reward_std": 0.8693498373031616, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.045227720787274724, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04423686436333181, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059472994182545036, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1639 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1195.0, + "completions/max_terminated_length": 1195.0, + "completions/mean_length": 989.0, + "completions/mean_terminated_length": 989.0, + "completions/min_length": 836.0, + "completions/min_terminated_length": 836.0, + "epoch": 0.3280656131226245, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2092515398947983, + "kl": 0.0089874267578125, + "learning_rate": 8.650160931452196e-07, + "loss": -0.0299, + "num_tokens": 71531049.0, + "reward": 0.0, + "reward_std": 0.8814165592193604, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06511065616433005, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09455049727264447, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08606629658238706, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1640 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1154.875, + "completions/mean_terminated_length": 1131.86669921875, + "completions/min_length": 785.0, + "completions/min_terminated_length": 785.0, + "epoch": 0.3282656531306261, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4097425622736823, + "kl": 0.01824951171875, + "learning_rate": 8.647916229189212e-07, + "loss": -0.0236, + "num_tokens": 71580639.0, + "reward": 0.0, + "reward_std": 1.0502331256866455, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10666701232186857, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09085355941840502, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.055611083361076445, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1641 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1351.0, + "completions/mean_length": 1194.0, + "completions/mean_terminated_length": 1150.2857666015625, + "completions/min_length": 954.0, + "completions/min_terminated_length": 954.0, + "epoch": 0.3284656931386277, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.335092421622076, + "kl": 0.0184326171875, + "learning_rate": 8.645669991987981e-07, + "loss": -0.0523, + "num_tokens": 71626247.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.9971462488174438, + "rewards/wordcountpos_reward_nokeypoint/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07676831374805604, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1059871112111923, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08766518798921943, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1642 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1230.0625, + "completions/mean_terminated_length": 1212.0667724609375, + "completions/min_length": 953.0, + "completions/min_terminated_length": 953.0, + "epoch": 0.32866573314662934, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4926598056073885, + "kl": 0.0155029296875, + "learning_rate": 8.643422220943778e-07, + "loss": -0.0023, + "num_tokens": 71675600.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8187199831008911, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06483038492801323, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24011398558894195, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1275843947266976, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1643 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1207.1875, + "completions/mean_terminated_length": 1165.357177734375, + "completions/min_length": 693.0, + "completions/min_terminated_length": 693.0, + "epoch": 0.32886577315463095, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0643330602683103, + "kl": 0.018310546875, + "learning_rate": 8.641172917152626e-07, + "loss": 0.0455, + "num_tokens": 71728163.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9530442357063293, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10165154701103493, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17086879063178348, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11979921473804347, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1644 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1498.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1213.125, + "completions/mean_terminated_length": 1213.125, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.3290658131626325, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.840252726619261, + "kl": 0.00989532470703125, + "learning_rate": 8.638922081711295e-07, + "loss": -0.0126, + "num_tokens": 71781829.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9979990720748901, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06285944049384556, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13706717175399757, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10390522747338701, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1645 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1239.5625, + "completions/mean_terminated_length": 1222.2000732421875, + "completions/min_length": 1018.0, + "completions/min_terminated_length": 1018.0, + "epoch": 0.3292658531706341, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.630297019052143, + "kl": 0.0132598876953125, + "learning_rate": 8.636669715717304e-07, + "loss": -0.0155, + "num_tokens": 71834198.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0682823657989502, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11312741209515668, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14168224014410286, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1646 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1088.0, + "completions/max_terminated_length": 1088.0, + "completions/mean_length": 849.6875, + "completions/mean_terminated_length": 849.6875, + "completions/min_length": 611.0, + "completions/min_terminated_length": 611.0, + "epoch": 0.3294658931786357, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.099455244569302, + "kl": 0.0173187255859375, + "learning_rate": 8.634415820268915e-07, + "loss": -0.0345, + "num_tokens": 71871865.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.9950416684150696, + "rewards/wordcountpos_reward_nokeypoint/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0416768200583801, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10240138385889837, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1647 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1341.3125, + "completions/mean_terminated_length": 1304.6923828125, + "completions/min_length": 1081.0, + "completions/min_terminated_length": 1081.0, + "epoch": 0.32966593318663734, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.964940593272461, + "kl": 0.0129547119140625, + "learning_rate": 8.63216039646514e-07, + "loss": 0.001, + "num_tokens": 71915830.0, + "reward": 0.0, + "reward_std": 0.5855861902236938, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16513784583979985, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24930190514603698, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10945995377982527, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1648 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1264.0625, + "completions/mean_terminated_length": 1230.357177734375, + "completions/min_length": 919.0, + "completions/min_terminated_length": 919.0, + "epoch": 0.32986597319463895, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.047854284275479, + "kl": 0.0140228271484375, + "learning_rate": 8.629903445405733e-07, + "loss": -0.0007, + "num_tokens": 71953335.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.054059386253357, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07697042098780188, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07518357102367304, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1649 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1350.0, + "completions/mean_length": 1210.8125, + "completions/mean_terminated_length": 1169.5, + "completions/min_length": 998.0, + "completions/min_terminated_length": 998.0, + "epoch": 0.3300660132026405, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.970290781988412, + "kl": 0.01519775390625, + "learning_rate": 8.627644968191195e-07, + "loss": -0.0036, + "num_tokens": 71994924.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.8594913482666016, + "rewards/wordcountpos_reward_nokeypoint/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09238375973749642, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15450217630921806, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1650 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1283.25, + "completions/mean_terminated_length": 1268.800048828125, + "completions/min_length": 924.0, + "completions/min_terminated_length": 924.0, + "epoch": 0.3302660532106421, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.880886839812446, + "kl": 0.0137176513671875, + "learning_rate": 8.625384965922767e-07, + "loss": 0.009, + "num_tokens": 72047888.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0278414487838745, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.045051882173400025, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03982084691846716, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07588978362901858, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1651 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1098.875, + "completions/mean_terminated_length": 1072.1334228515625, + "completions/min_length": 839.0, + "completions/min_terminated_length": 839.0, + "epoch": 0.33046609321864373, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2691337208548004, + "kl": 0.01641845703125, + "learning_rate": 8.623123439702435e-07, + "loss": -0.0504, + "num_tokens": 72096334.0, + "reward": 0.0, + "reward_std": 0.8719402551651001, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.19669431813081012, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08327154328619243, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11021863793455329, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1652 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1442.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1066.5, + "completions/mean_terminated_length": 1066.5, + "completions/min_length": 643.0, + "completions/min_terminated_length": 643.0, + "epoch": 0.33066613322664534, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.476350681688712, + "kl": 0.017425537109375, + "learning_rate": 8.620860390632935e-07, + "loss": -0.0831, + "num_tokens": 72136158.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.020950436592102, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10713640189212613, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3104367781504284, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.045338235029118136, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1653 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1063.0, + "completions/max_terminated_length": 1063.0, + "completions/mean_length": 961.375, + "completions/mean_terminated_length": 961.375, + "completions/min_length": 729.0, + "completions/min_terminated_length": 729.0, + "epoch": 0.33086617323464695, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.029758169778084, + "kl": 0.0236358642578125, + "learning_rate": 8.618595819817736e-07, + "loss": -0.0198, + "num_tokens": 72173764.0, + "reward": 0.0, + "reward_std": 0.9446979761123657, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.054940879220375875, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09538306569225236, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0620632890834175, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1654 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1430.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1183.1875, + "completions/mean_terminated_length": 1183.1875, + "completions/min_length": 903.0, + "completions/min_terminated_length": 903.0, + "epoch": 0.3310662132426485, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.010316667818446, + "kl": 0.0136566162109375, + "learning_rate": 8.616329728361055e-07, + "loss": -0.0339, + "num_tokens": 72222191.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0490565299987793, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03987927934596143, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.060019160398560034, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04367387557118565, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1655 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1394.0, + "completions/mean_terminated_length": 1345.8182373046875, + "completions/min_length": 1233.0, + "completions/min_terminated_length": 1233.0, + "epoch": 0.3312662532506501, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3290249273045407, + "kl": 0.0104827880859375, + "learning_rate": 8.614062117367846e-07, + "loss": -0.003, + "num_tokens": 72275479.0, + "reward": 1.862645149230957e-08, + "reward_std": 0.9648418426513672, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.862645149230957e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1258226306573411, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1282478501045061, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026001, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1656 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1436.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1128.8125, + "completions/mean_terminated_length": 1128.8125, + "completions/min_length": 834.0, + "completions/min_terminated_length": 834.0, + "epoch": 0.33146629325865173, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8795734143947715, + "kl": 0.020843505859375, + "learning_rate": 8.611792987943808e-07, + "loss": 0.0043, + "num_tokens": 72327196.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9847660064697266, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0018546881822618525, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24542711032299744, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.057089922571845, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1657 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1370.0, + "completions/max_terminated_length": 1370.0, + "completions/mean_length": 1068.8125, + "completions/mean_terminated_length": 1068.8125, + "completions/min_length": 774.0, + "completions/min_terminated_length": 774.0, + "epoch": 0.33166633326665335, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0147799081554516, + "kl": 0.011383056640625, + "learning_rate": 8.609522341195379e-07, + "loss": -0.0165, + "num_tokens": 72357441.0, + "reward": 0.0, + "reward_std": 0.9157370924949646, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.14163564978045806, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.25353443633285144, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04216370213557838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1658 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1316.625, + "completions/mean_terminated_length": 1233.272705078125, + "completions/min_length": 949.0, + "completions/min_terminated_length": 949.0, + "epoch": 0.33186637327465496, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0969315339869268, + "kl": 0.018402099609375, + "learning_rate": 8.607250178229737e-07, + "loss": -0.021, + "num_tokens": 72404315.0, + "reward": 0.0, + "reward_std": 0.9376934766769409, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07545763639475084, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10978102063061886, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06191391873668902, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1659 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1349.0, + "completions/max_terminated_length": 1349.0, + "completions/mean_length": 1141.125, + "completions/mean_terminated_length": 1141.125, + "completions/min_length": 1023.0, + "completions/min_terminated_length": 1023.0, + "epoch": 0.3320664132826565, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.089329901310654, + "kl": 0.01495361328125, + "learning_rate": 8.604976500154799e-07, + "loss": 0.0325, + "num_tokens": 72447813.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0680248737335205, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.004557174736523338, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06074775564472341, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059628479399994376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1660 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1366.0, + "completions/mean_length": 1250.3125, + "completions/mean_terminated_length": 1136.8182373046875, + "completions/min_length": 985.0, + "completions/min_terminated_length": 985.0, + "epoch": 0.3322664532906581, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2760123774527323, + "kl": 0.0146484375, + "learning_rate": 8.602701308079217e-07, + "loss": 0.0052, + "num_tokens": 72500282.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.000259518623352, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03423999872630951, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07103007285257353, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07698003589195011, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1661 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1209.625, + "completions/mean_terminated_length": 1190.2667236328125, + "completions/min_length": 991.0, + "completions/min_terminated_length": 991.0, + "epoch": 0.33246649329865974, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2149375147823407, + "kl": 0.016876220703125, + "learning_rate": 8.600424603112391e-07, + "loss": 0.0044, + "num_tokens": 72539388.0, + "reward": 0.0, + "reward_std": 0.9199150800704956, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08805871793143383, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08274141695986507, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07888106377466154, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 1662 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1319.0, + "completions/max_terminated_length": 1319.0, + "completions/mean_length": 1096.25, + "completions/mean_terminated_length": 1096.25, + "completions/min_length": 836.0, + "completions/min_terminated_length": 836.0, + "epoch": 0.33266653330666135, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0818165615511335, + "kl": 0.0139007568359375, + "learning_rate": 8.598146386364447e-07, + "loss": 0.0141, + "num_tokens": 72582144.0, + "reward": 0.0, + "reward_std": 0.6997292637825012, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.029955676177904643, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11152439337916324, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07888106377466154, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1663 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1151.1875, + "completions/mean_terminated_length": 1127.933349609375, + "completions/min_length": 892.0, + "completions/min_terminated_length": 892.0, + "epoch": 0.3328665733146629, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4095099691435764, + "kl": 0.0109405517578125, + "learning_rate": 8.59586665894626e-07, + "loss": 0.0108, + "num_tokens": 72614923.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8804906606674194, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13565290625090626, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07844506045044769, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07391185942027818, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1664 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1309.0, + "completions/mean_length": 1093.875, + "completions/mean_terminated_length": 1066.800048828125, + "completions/min_length": 670.0, + "completions/min_terminated_length": 670.0, + "epoch": 0.3330666133226645, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2064068580567313, + "kl": 0.0129547119140625, + "learning_rate": 8.59358542196943e-07, + "loss": -0.0704, + "num_tokens": 72650489.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8157972097396851, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0696579275160746, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10312332666955712, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10610965676722954, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1665 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1293.8125, + "completions/mean_terminated_length": 1200.0909423828125, + "completions/min_length": 1113.0, + "completions/min_terminated_length": 1113.0, + "epoch": 0.33326665333066613, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.248529010414643, + "kl": 0.013427734375, + "learning_rate": 8.591302676546302e-07, + "loss": 0.0054, + "num_tokens": 72703126.0, + "reward": -2.60770320892334e-08, + "reward_std": 1.0528184175491333, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.60770320892334e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09031436616166263, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07053258621439412, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09878896324620104, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1666 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1281.0, + "completions/max_terminated_length": 1281.0, + "completions/mean_length": 962.0625, + "completions/mean_terminated_length": 962.0625, + "completions/min_length": 832.0, + "completions/min_terminated_length": 832.0, + "epoch": 0.33346669333866774, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.877201898898888, + "kl": 0.009857177734375, + "learning_rate": 8.589018423789951e-07, + "loss": -0.0401, + "num_tokens": 72733495.0, + "reward": 0.0, + "reward_std": 0.8649842739105225, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04569112181490484, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03975942232617288, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04216370213557838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1667 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1152.0, + "completions/max_terminated_length": 1152.0, + "completions/mean_length": 1026.625, + "completions/mean_terminated_length": 1026.625, + "completions/min_length": 887.0, + "completions/min_terminated_length": 887.0, + "epoch": 0.33366673334666935, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.275138242888335, + "kl": 0.017791748046875, + "learning_rate": 8.586732664814189e-07, + "loss": 0.0017, + "num_tokens": 72778049.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9191485643386841, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05898154551344343, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07858285827372351, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258098, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1668 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1169.0, + "completions/max_terminated_length": 1169.0, + "completions/mean_length": 979.875, + "completions/mean_terminated_length": 979.875, + "completions/min_length": 777.0, + "completions/min_terminated_length": 777.0, + "epoch": 0.3338667733546709, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.166455719164575, + "kl": 0.0201416015625, + "learning_rate": 8.584445400733564e-07, + "loss": -0.0446, + "num_tokens": 72826567.0, + "reward": 0.0, + "reward_std": 0.5744268894195557, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2148077461720485, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3076568226836724, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11013459778666117, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1669 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1412.3125, + "completions/mean_terminated_length": 1299.571533203125, + "completions/min_length": 1216.0, + "completions/min_terminated_length": 1216.0, + "epoch": 0.3340668133626725, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.752524507064111, + "kl": 0.0165557861328125, + "learning_rate": 8.582156632663356e-07, + "loss": 0.0026, + "num_tokens": 72878044.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.811604380607605, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.05811267987721283, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0970264822941732, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06666666666666665, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1670 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1216.0, + "completions/max_terminated_length": 1216.0, + "completions/mean_length": 966.9375, + "completions/mean_terminated_length": 966.9375, + "completions/min_length": 741.0, + "completions/min_terminated_length": 741.0, + "epoch": 0.33426685337067413, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.692019956154717, + "kl": 0.0175018310546875, + "learning_rate": 8.579866361719575e-07, + "loss": 0.0295, + "num_tokens": 72912323.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.943489670753479, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08275520482133412, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10285704638764416, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08153617692869926, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1671 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1498.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1193.5625, + "completions/mean_terminated_length": 1193.5625, + "completions/min_length": 984.0, + "completions/min_terminated_length": 984.0, + "epoch": 0.33446689337867574, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.887987001668356, + "kl": 0.0137481689453125, + "learning_rate": 8.577574589018974e-07, + "loss": 0.0236, + "num_tokens": 72960828.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6888799667358398, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11942890577686148, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12563297907122195, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059628479399994376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1672 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1249.0, + "completions/max_terminated_length": 1249.0, + "completions/mean_length": 1079.4375, + "completions/mean_terminated_length": 1079.4375, + "completions/min_length": 955.0, + "completions/min_terminated_length": 955.0, + "epoch": 0.33466693338667736, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.6261629007026615, + "kl": 0.00733184814453125, + "learning_rate": 8.575281315679027e-07, + "loss": -0.0077, + "num_tokens": 73009243.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7745330929756165, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.030062028819410426, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0989339215506742, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07490735018081413, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1673 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1143.0, + "completions/mean_length": 1220.8125, + "completions/mean_terminated_length": 941.625, + "completions/min_length": 773.0, + "completions/min_terminated_length": 773.0, + "epoch": 0.3348669733946789, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.030538083346689, + "kl": 0.0065765380859375, + "learning_rate": 8.572986542817948e-07, + "loss": 0.0147, + "num_tokens": 73050888.0, + "reward": 0.0, + "reward_std": 0.8290323615074158, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11031108333063103, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16576041703842725, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07097208632298359, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1674 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1207.0, + "completions/mean_terminated_length": 1109.3333740234375, + "completions/min_length": 834.0, + "completions/min_terminated_length": 834.0, + "epoch": 0.3350670134026805, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4978935418047574, + "kl": 0.012908935546875, + "learning_rate": 8.570690271554674e-07, + "loss": 0.0288, + "num_tokens": 73097472.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9533826112747192, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08794882785094482, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09896738765022883, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059628479399994376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1675 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1192.0, + "completions/mean_length": 1008.125, + "completions/mean_terminated_length": 975.3333740234375, + "completions/min_length": 813.0, + "completions/min_terminated_length": 813.0, + "epoch": 0.33526705341068214, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.267542235767769, + "kl": 0.0122528076171875, + "learning_rate": 8.56839250300888e-07, + "loss": -0.0327, + "num_tokens": 73129106.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.040432095527649, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.013364369079691324, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0643580997241189, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.067631901304592, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1676 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1198.0, + "completions/max_terminated_length": 1198.0, + "completions/mean_length": 858.8125, + "completions/mean_terminated_length": 858.8125, + "completions/min_length": 718.0, + "completions/min_terminated_length": 718.0, + "epoch": 0.33546709341868375, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6851659781122654, + "kl": 0.014434814453125, + "learning_rate": 8.566093238300968e-07, + "loss": -0.0684, + "num_tokens": 73179695.0, + "reward": 2.0489096641540527e-08, + "reward_std": 1.0562494993209839, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.0489096641540527e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.012093376701007325, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.058142922312653535, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.17379212785308693, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1677 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 1246.3125, + "completions/mean_terminated_length": 1187.769287109375, + "completions/min_length": 1021.0, + "completions/min_terminated_length": 1021.0, + "epoch": 0.33566713342668536, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.581654171358655, + "kl": 0.01898193359375, + "learning_rate": 8.563792478552071e-07, + "loss": -0.0155, + "num_tokens": 73228484.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8695411682128906, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06545856660498217, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05813651272284397, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12041594578792297, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1678 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1299.5, + "completions/mean_terminated_length": 1270.857177734375, + "completions/min_length": 1101.0, + "completions/min_terminated_length": 1101.0, + "epoch": 0.3358671734346869, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0371895807867526, + "kl": 0.020233154296875, + "learning_rate": 8.561490224884049e-07, + "loss": -0.0188, + "num_tokens": 73278684.0, + "reward": -1.862645149230957e-08, + "reward_std": 1.0145847797393799, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2793900647429946, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10971973088562784, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194864, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1679 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1258.0, + "completions/max_terminated_length": 1258.0, + "completions/mean_length": 888.25, + "completions/mean_terminated_length": 888.25, + "completions/min_length": 586.0, + "completions/min_terminated_length": 586.0, + "epoch": 0.33606721344268853, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.232783361885938, + "kl": 0.01416015625, + "learning_rate": 8.559186478419492e-07, + "loss": -0.1129, + "num_tokens": 73323752.0, + "reward": 0.0, + "reward_std": 0.3526800572872162, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1105673779038384, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14841124226374527, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13601470508735444, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1680 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1488.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1174.0, + "completions/mean_terminated_length": 1174.0, + "completions/min_length": 952.0, + "completions/min_terminated_length": 952.0, + "epoch": 0.33626725345069014, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.213479443832754, + "kl": 0.01544189453125, + "learning_rate": 8.556881240281715e-07, + "loss": 0.035, + "num_tokens": 73371792.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8454281687736511, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.034083246404458006, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06065990562273917, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08734775114237131, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1681 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1144.5625, + "completions/mean_terminated_length": 1120.86669921875, + "completions/min_length": 918.0, + "completions/min_terminated_length": 918.0, + "epoch": 0.33646729345869175, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.847863037298385, + "kl": 0.0132293701171875, + "learning_rate": 8.554574511594766e-07, + "loss": 0.0005, + "num_tokens": 73419161.0, + "reward": 0.0, + "reward_std": 0.8801282644271851, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.05979269339764251, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.035519551521949015, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13871099718746435, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1682 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1409.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1080.8125, + "completions/mean_terminated_length": 1080.8125, + "completions/min_length": 792.0, + "completions/min_terminated_length": 792.0, + "epoch": 0.33666733346669336, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5096654010994164, + "kl": 0.019195556640625, + "learning_rate": 8.552266293483415e-07, + "loss": -0.0096, + "num_tokens": 73467806.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.0089560747146606, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16222790994531067, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08249767628876138, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.16307235385739852, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1683 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1288.0, + "completions/max_terminated_length": 1288.0, + "completions/mean_length": 1075.25, + "completions/mean_terminated_length": 1075.25, + "completions/min_length": 852.0, + "completions/min_terminated_length": 852.0, + "epoch": 0.3368673734746949, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.514604661545066, + "kl": 0.0189208984375, + "learning_rate": 8.549956587073157e-07, + "loss": -0.0214, + "num_tokens": 73498426.0, + "reward": 3.725290298461914e-09, + "reward_std": 0.9401815533638, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05216666088058683, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06712686754908505, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07969850595746353, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1684 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1428.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1279.6875, + "completions/mean_terminated_length": 1279.6875, + "completions/min_length": 1086.0, + "completions/min_terminated_length": 1086.0, + "epoch": 0.33706741348269653, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.645324948322763, + "kl": 0.0157318115234375, + "learning_rate": 8.547645393490218e-07, + "loss": 0.0079, + "num_tokens": 73545413.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0315126180648804, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1065230292108361, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0488257846110564, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06440611887195306, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1685 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1410.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 1089.4375, + "completions/mean_terminated_length": 1089.4375, + "completions/min_length": 594.0, + "completions/min_terminated_length": 594.0, + "epoch": 0.33726745349069814, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.363204752343262, + "kl": 0.0141448974609375, + "learning_rate": 8.545332713861547e-07, + "loss": 0.0066, + "num_tokens": 73592788.0, + "reward": 0.0, + "reward_std": 0.7117171287536621, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16793670626773294, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12183544006247045, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09803627446568498, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1686 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1129.5, + "completions/mean_terminated_length": 907.2000122070312, + "completions/min_length": 718.0, + "completions/min_terminated_length": 718.0, + "epoch": 0.33746749349869976, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0947762899011537, + "kl": 0.0167236328125, + "learning_rate": 8.543018549314817e-07, + "loss": 0.0437, + "num_tokens": 73636204.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8831521272659302, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08174387658673762, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.053089640524946975, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09878896324620104, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1687 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1390.0, + "completions/max_terminated_length": 1390.0, + "completions/mean_length": 1020.625, + "completions/mean_terminated_length": 1020.625, + "completions/min_length": 782.0, + "completions/min_terminated_length": 782.0, + "epoch": 0.33766753350670137, + "frac_reward_zero_std": 0.5, + "grad_norm": 2.362377296036935, + "kl": 0.022918701171875, + "learning_rate": 8.540702900978424e-07, + "loss": 0.0022, + "num_tokens": 73670646.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.7503840327262878, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.042918407888536694, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06859081512740176, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1688 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1248.875, + "completions/mean_terminated_length": 1190.923095703125, + "completions/min_length": 985.0, + "completions/min_terminated_length": 985.0, + "epoch": 0.3378675735147029, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.595720243564305, + "kl": 0.01544189453125, + "learning_rate": 8.538385769981488e-07, + "loss": 0.0072, + "num_tokens": 73721788.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9550642371177673, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.018780393292331193, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10836607057038704, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07391185942027816, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1689 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1399.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 1076.75, + "completions/mean_terminated_length": 1076.75, + "completions/min_length": 821.0, + "completions/min_terminated_length": 821.0, + "epoch": 0.33806761352270454, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.11864700391174, + "kl": 0.0201416015625, + "learning_rate": 8.536067157453854e-07, + "loss": -0.007, + "num_tokens": 73761608.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9240362644195557, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.065943850767751, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11306810232100059, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11979921473804347, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1690 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1203.0, + "completions/max_terminated_length": 1203.0, + "completions/mean_length": 933.5, + "completions/mean_terminated_length": 933.5, + "completions/min_length": 655.0, + "completions/min_terminated_length": 655.0, + "epoch": 0.33826765353070615, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.209081241396713, + "kl": 0.016357421875, + "learning_rate": 8.533747064526087e-07, + "loss": -0.0485, + "num_tokens": 73791360.0, + "reward": 0.0, + "reward_std": 0.8113054633140564, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2098984926892304, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17833946408877704, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08944271909999159, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1691 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1471.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1199.3125, + "completions/mean_terminated_length": 1199.3125, + "completions/min_length": 871.0, + "completions/min_terminated_length": 871.0, + "epoch": 0.33846769353870776, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0585531737544636, + "kl": 0.01788330078125, + "learning_rate": 8.531425492329474e-07, + "loss": 0.0102, + "num_tokens": 73835557.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6605508327484131, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06338449936769555, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16166540038001168, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05443310539518172, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1692 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1321.0, + "completions/mean_length": 1210.0625, + "completions/mean_terminated_length": 1190.7333984375, + "completions/min_length": 1075.0, + "completions/min_terminated_length": 1075.0, + "epoch": 0.3386677335467093, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.930340944518528, + "kl": 0.0162811279296875, + "learning_rate": 8.529102441996028e-07, + "loss": -0.027, + "num_tokens": 73888126.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8439550399780273, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07644459923806635, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05805453539151196, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04849589520621153, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1693 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1231.0, + "completions/max_terminated_length": 1231.0, + "completions/mean_length": 1056.4375, + "completions/mean_terminated_length": 1056.4375, + "completions/min_length": 874.0, + "completions/min_terminated_length": 874.0, + "epoch": 0.3388677735547109, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2936019798323497, + "kl": 0.0155792236328125, + "learning_rate": 8.526777914658475e-07, + "loss": 0.0064, + "num_tokens": 73929317.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0607792139053345, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0851158379750605, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07399631983287643, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.062063289083417524, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1694 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1456.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1134.875, + "completions/mean_terminated_length": 1134.875, + "completions/min_length": 876.0, + "completions/min_terminated_length": 876.0, + "epoch": 0.33906781356271254, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.814656184771504, + "kl": 0.0135040283203125, + "learning_rate": 8.524451911450268e-07, + "loss": -0.0211, + "num_tokens": 73964435.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9941896796226501, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04688098863823735, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10563707180948512, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.048686449556014755, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1695 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1221.0, + "completions/max_terminated_length": 1221.0, + "completions/mean_length": 1027.125, + "completions/mean_terminated_length": 1027.125, + "completions/min_length": 641.0, + "completions/min_terminated_length": 641.0, + "epoch": 0.33926785357071415, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.225370347001509, + "kl": 0.01654052734375, + "learning_rate": 8.522124433505574e-07, + "loss": -0.0383, + "num_tokens": 73995421.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8189383149147034, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10967242030835385, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16997785890963893, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06191391873668904, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1696 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1499.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1144.875, + "completions/mean_terminated_length": 1144.875, + "completions/min_length": 978.0, + "completions/min_terminated_length": 978.0, + "epoch": 0.33946789357871576, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.347942756800522, + "kl": 0.01910400390625, + "learning_rate": 8.519795481959283e-07, + "loss": -0.023, + "num_tokens": 74039123.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.4552757740020752, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.021374925569402983, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1743453681792853, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10945995377982527, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1697 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1407.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1123.875, + "completions/mean_terminated_length": 1123.875, + "completions/min_length": 890.0, + "completions/min_terminated_length": 890.0, + "epoch": 0.3396679335867173, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5434330103808707, + "kl": 0.015960693359375, + "learning_rate": 8.517465057947004e-07, + "loss": -0.0016, + "num_tokens": 74080025.0, + "reward": 0.0, + "reward_std": 0.8313549757003784, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04272637821213617, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10748892809997104, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04216370213557838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1698 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1253.0, + "completions/max_terminated_length": 1253.0, + "completions/mean_length": 1023.9375, + "completions/mean_terminated_length": 1023.9375, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.33986797359471893, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3590778862849713, + "kl": 0.020751953125, + "learning_rate": 8.51513316260506e-07, + "loss": -0.0098, + "num_tokens": 74117360.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8396466970443726, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.031771941882962444, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3515372901976886, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07187952884282608, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1699 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1379.0, + "completions/max_terminated_length": 1379.0, + "completions/mean_length": 1071.375, + "completions/mean_terminated_length": 1071.375, + "completions/min_length": 879.0, + "completions/min_terminated_length": 879.0, + "epoch": 0.34006801360272054, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.411230195559094, + "kl": 0.01483154296875, + "learning_rate": 8.512799797070492e-07, + "loss": 0.015, + "num_tokens": 74159966.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5698010325431824, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08684991015067263, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20180684887604775, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258096, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1700 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1143.0, + "completions/max_terminated_length": 1143.0, + "completions/mean_length": 905.375, + "completions/mean_terminated_length": 905.375, + "completions/min_length": 739.0, + "completions/min_terminated_length": 739.0, + "epoch": 0.34026805361072215, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6836088810693313, + "kl": 0.016876220703125, + "learning_rate": 8.510464962481065e-07, + "loss": -0.0489, + "num_tokens": 74201572.0, + "reward": 0.0, + "reward_std": 0.8603500127792358, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.00273964123849753, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05636706575298981, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1270024788326182, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1701 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1202.125, + "completions/mean_terminated_length": 1159.571533203125, + "completions/min_length": 950.0, + "completions/min_terminated_length": 950.0, + "epoch": 0.34046809361872377, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3074458082508076, + "kl": 0.0169219970703125, + "learning_rate": 8.508128659975251e-07, + "loss": 0.0087, + "num_tokens": 74254086.0, + "reward": 0.0, + "reward_std": 0.8176254034042358, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0815793038596167, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1420994545402092, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06871842709362766, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1702 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1393.375, + "completions/mean_terminated_length": 1310.4444580078125, + "completions/min_length": 1205.0, + "completions/min_terminated_length": 1205.0, + "epoch": 0.3406681336267253, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4469488689471364, + "kl": 0.0110626220703125, + "learning_rate": 8.505790890692243e-07, + "loss": 0.0038, + "num_tokens": 74304756.0, + "reward": 0.0, + "reward_std": 0.8853570222854614, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06829859668661915, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09547179899099932, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08766518798921948, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1703 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1368.0, + "completions/max_terminated_length": 1368.0, + "completions/mean_length": 1197.6875, + "completions/mean_terminated_length": 1197.6875, + "completions/min_length": 1011.0, + "completions/min_terminated_length": 1011.0, + "epoch": 0.34086817363472693, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.650899426803281, + "kl": 0.012542724609375, + "learning_rate": 8.503451655771948e-07, + "loss": 0.0302, + "num_tokens": 74351023.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7032220363616943, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04542843906811596, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3215454771204214, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04216370213557838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1704 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1372.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 1080.4375, + "completions/mean_terminated_length": 1080.4375, + "completions/min_length": 746.0, + "completions/min_terminated_length": 746.0, + "epoch": 0.34106821364272855, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3170718275425095, + "kl": 0.0128631591796875, + "learning_rate": 8.501110956354988e-07, + "loss": -0.0033, + "num_tokens": 74387118.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0388188362121582, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09103471314660766, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1721182580081381, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08421753138505424, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1705 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1436.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1112.3125, + "completions/mean_terminated_length": 1112.3125, + "completions/min_length": 765.0, + "completions/min_terminated_length": 765.0, + "epoch": 0.34126825365073016, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.553880407468435, + "kl": 0.0186920166015625, + "learning_rate": 8.498768793582696e-07, + "loss": -0.013, + "num_tokens": 74429075.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9084832668304443, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07734737741351212, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06985033925899502, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1706 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1275.6875, + "completions/mean_terminated_length": 1243.6429443359375, + "completions/min_length": 848.0, + "completions/min_terminated_length": 848.0, + "epoch": 0.34146829365873177, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8541362055516175, + "kl": 0.0151519775390625, + "learning_rate": 8.496425168597123e-07, + "loss": -0.0016, + "num_tokens": 74475798.0, + "reward": 0.0, + "reward_std": 0.9094574451446533, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.14424955847201645, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1904705933385993, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.048686449556014755, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1707 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1194.0, + "completions/max_terminated_length": 1194.0, + "completions/mean_length": 875.625, + "completions/mean_terminated_length": 875.625, + "completions/min_length": 489.0, + "completions/min_terminated_length": 489.0, + "epoch": 0.3416683336667333, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8095312635552174, + "kl": 0.0225830078125, + "learning_rate": 8.494080082541033e-07, + "loss": -0.0433, + "num_tokens": 74517232.0, + "reward": -3.725290298461914e-08, + "reward_std": 1.0189881324768066, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08259975575161806, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0590323108707885, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07888106377466154, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1708 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1361.0, + "completions/max_terminated_length": 1361.0, + "completions/mean_length": 1223.625, + "completions/mean_terminated_length": 1223.625, + "completions/min_length": 1077.0, + "completions/min_terminated_length": 1077.0, + "epoch": 0.34186837367473494, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.012838993098324, + "kl": 0.0071563720703125, + "learning_rate": 8.491733536557897e-07, + "loss": 0.0083, + "num_tokens": 74556434.0, + "reward": 0.0, + "reward_std": 0.8439576625823975, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.027162981739560225, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12726123288486912, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04554200340426487, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1709 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1309.0, + "completions/mean_length": 1187.125, + "completions/mean_terminated_length": 1142.4285888671875, + "completions/min_length": 884.0, + "completions/min_terminated_length": 884.0, + "epoch": 0.34206841368273655, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.48846442185187, + "kl": 0.0160675048828125, + "learning_rate": 8.489385531791906e-07, + "loss": -0.0486, + "num_tokens": 74595436.0, + "reward": 0.0, + "reward_std": 0.5902952551841736, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.14780896819833778, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15801718764806086, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15957118462605635, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1710 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 1190.375, + "completions/mean_terminated_length": 1118.923095703125, + "completions/min_length": 810.0, + "completions/min_terminated_length": 810.0, + "epoch": 0.34226845369073816, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1525605915190997, + "kl": 0.0147247314453125, + "learning_rate": 8.487036069387953e-07, + "loss": -0.038, + "num_tokens": 74646674.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0464494228363037, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.044105079553478324, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09748606823387856, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.062063289083417524, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1711 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1329.0, + "completions/max_terminated_length": 1329.0, + "completions/mean_length": 968.125, + "completions/mean_terminated_length": 968.125, + "completions/min_length": 627.0, + "completions/min_terminated_length": 627.0, + "epoch": 0.3424684936987398, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6851479939517224, + "kl": 0.021728515625, + "learning_rate": 8.484685150491649e-07, + "loss": -0.0367, + "num_tokens": 74698148.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.923636794090271, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04547860662256135, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0814584602520416, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194864, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1712 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1314.8125, + "completions/mean_terminated_length": 1288.357177734375, + "completions/min_length": 967.0, + "completions/min_terminated_length": 967.0, + "epoch": 0.34266853370674133, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.373167388447532, + "kl": 0.0191802978515625, + "learning_rate": 8.482332776249314e-07, + "loss": -0.0224, + "num_tokens": 74748993.0, + "reward": 0.0, + "reward_std": 0.7625467777252197, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08927777487089245, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11724320209881248, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09999999999999999, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1713 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1377.0, + "completions/max_terminated_length": 1377.0, + "completions/mean_length": 1040.3125, + "completions/mean_terminated_length": 1040.3125, + "completions/min_length": 651.0, + "completions/min_terminated_length": 651.0, + "epoch": 0.34286857371474294, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.291919235721647, + "kl": 0.0171966552734375, + "learning_rate": 8.479978947807975e-07, + "loss": -0.0233, + "num_tokens": 74794566.0, + "reward": 0.0, + "reward_std": 0.6183443069458008, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11408806727957342, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18126131755591934, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07888106377466154, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1714 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1273.0625, + "completions/mean_terminated_length": 1220.6923828125, + "completions/min_length": 1000.0, + "completions/min_terminated_length": 1000.0, + "epoch": 0.34306861372274455, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.621763583614096, + "kl": 0.0205078125, + "learning_rate": 8.477623666315367e-07, + "loss": 0.0168, + "num_tokens": 74847143.0, + "reward": 0.0, + "reward_std": 0.9846622943878174, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.082729723511082, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22164340017981687, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0806225774829855, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1715 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1093.1875, + "completions/mean_terminated_length": 1035.071533203125, + "completions/min_length": 646.0, + "completions/min_terminated_length": 646.0, + "epoch": 0.34326865373074616, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2563778377171957, + "kl": 0.0175628662109375, + "learning_rate": 8.475266932919938e-07, + "loss": -0.0299, + "num_tokens": 74887098.0, + "reward": 0.0, + "reward_std": 0.7244515419006348, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12605061613561244, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10232631167790489, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08933913745655642, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1716 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1289.0, + "completions/max_terminated_length": 1289.0, + "completions/mean_length": 1075.375, + "completions/mean_terminated_length": 1075.375, + "completions/min_length": 835.0, + "completions/min_terminated_length": 835.0, + "epoch": 0.3434686937387478, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0574200176171757, + "kl": 0.0117950439453125, + "learning_rate": 8.472908748770844e-07, + "loss": -0.0149, + "num_tokens": 74917528.0, + "reward": 0.0, + "reward_std": 0.7218612432479858, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07883142287387973, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12536655756096712, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194864, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1717 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1376.0, + "completions/max_terminated_length": 1376.0, + "completions/mean_length": 984.0625, + "completions/mean_terminated_length": 984.0625, + "completions/min_length": 704.0, + "completions/min_terminated_length": 704.0, + "epoch": 0.34366873374674933, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7900693486656234, + "kl": 0.020233154296875, + "learning_rate": 8.470549115017944e-07, + "loss": -0.0609, + "num_tokens": 74970073.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6840701103210449, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04990540213807068, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09121232308866654, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1287403358472941, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1718 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1225.3125, + "completions/mean_terminated_length": 1161.923095703125, + "completions/min_length": 942.0, + "completions/min_terminated_length": 942.0, + "epoch": 0.34386877375475094, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1720377256141234, + "kl": 0.015106201171875, + "learning_rate": 8.468188032811806e-07, + "loss": -0.032, + "num_tokens": 75013886.0, + "reward": 0.0, + "reward_std": 0.642206072807312, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.15276555919765988, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18100596076432146, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.975, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1719 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1366.0, + "completions/max_terminated_length": 1366.0, + "completions/mean_length": 1244.8125, + "completions/mean_terminated_length": 1244.8125, + "completions/min_length": 1043.0, + "completions/min_terminated_length": 1043.0, + "epoch": 0.34406881376275256, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4780438189718876, + "kl": 0.0103302001953125, + "learning_rate": 8.465825503303705e-07, + "loss": 0.0053, + "num_tokens": 75059235.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.039873480796814, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.009510160259113876, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2084351265728952, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05163977794943221, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1720 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1341.75, + "completions/mean_terminated_length": 1289.0, + "completions/min_length": 982.0, + "completions/min_terminated_length": 982.0, + "epoch": 0.34426885377075417, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6906506014979104, + "kl": 0.013763427734375, + "learning_rate": 8.463461527645621e-07, + "loss": 0.0334, + "num_tokens": 75102319.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7287445664405823, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07907436673973499, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07977856152241414, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04367387557118565, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1721 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1397.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1096.625, + "completions/mean_terminated_length": 1096.625, + "completions/min_length": 829.0, + "completions/min_terminated_length": 829.0, + "epoch": 0.3444688937787557, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.443364949209605, + "kl": 0.0164337158203125, + "learning_rate": 8.461096106990241e-07, + "loss": -0.0281, + "num_tokens": 75139297.0, + "reward": 0.0, + "reward_std": 0.882436990737915, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10424824387737329, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3272126967721131, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1722 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1348.0, + "completions/max_terminated_length": 1348.0, + "completions/mean_length": 1159.5, + "completions/mean_terminated_length": 1159.5, + "completions/min_length": 1018.0, + "completions/min_terminated_length": 1018.0, + "epoch": 0.34466893378675734, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3475788822613906, + "kl": 0.01003265380859375, + "learning_rate": 8.458729242490951e-07, + "loss": -0.0079, + "num_tokens": 75182769.0, + "reward": 0.0, + "reward_std": 0.648653507232666, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04369990755185375, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14117070540721222, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12141145226353543, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1723 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 989.625, + "completions/mean_terminated_length": 955.6000366210938, + "completions/min_length": 458.0, + "completions/min_terminated_length": 458.0, + "epoch": 0.34486897379475895, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.983410248028697, + "kl": 0.01885986328125, + "learning_rate": 8.456360935301849e-07, + "loss": -0.0325, + "num_tokens": 75235099.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9109336137771606, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.018512177833114825, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.027820586327220183, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10602235962635781, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1724 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1167.125, + "completions/mean_terminated_length": 1056.166748046875, + "completions/min_length": 748.0, + "completions/min_terminated_length": 748.0, + "epoch": 0.34506901380276056, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1607746763451674, + "kl": 0.0149993896484375, + "learning_rate": 8.453991186577727e-07, + "loss": -0.0887, + "num_tokens": 75287221.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9705591201782227, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2747147873915589, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17537203067357457, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1725 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1216.6875, + "completions/mean_terminated_length": 1197.800048828125, + "completions/min_length": 971.0, + "completions/min_terminated_length": 971.0, + "epoch": 0.34526905381076217, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.204124468510766, + "kl": 0.019256591796875, + "learning_rate": 8.451619997474093e-07, + "loss": -0.0336, + "num_tokens": 75339672.0, + "reward": 0.0, + "reward_std": 0.989846408367157, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13581368072093447, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1253899058464622, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12405196043952264, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 1726 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1415.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1168.125, + "completions/mean_terminated_length": 1168.125, + "completions/min_length": 884.0, + "completions/min_terminated_length": 884.0, + "epoch": 0.34546909381876373, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4486116021151423, + "kl": 0.018707275390625, + "learning_rate": 8.449247369147143e-07, + "loss": -0.0409, + "num_tokens": 75393170.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8702799081802368, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0459646464504935, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12010458626756157, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10101338378503961, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1727 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1269.125, + "completions/mean_terminated_length": 1253.7333984375, + "completions/min_length": 1023.0, + "completions/min_terminated_length": 1023.0, + "epoch": 0.34566913382676534, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.035682361278998, + "kl": 0.016998291015625, + "learning_rate": 8.446873302753783e-07, + "loss": 0.0083, + "num_tokens": 75431940.0, + "reward": 0.0, + "reward_std": 1.0170331001281738, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08356652752793131, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12399762041948204, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05692750425533111, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1728 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1138.0, + "completions/mean_length": 1019.5, + "completions/mean_terminated_length": 950.857177734375, + "completions/min_length": 764.0, + "completions/min_terminated_length": 764.0, + "epoch": 0.34586917383476695, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.902200978861309, + "kl": 0.019744873046875, + "learning_rate": 8.44449779945162e-07, + "loss": -0.0604, + "num_tokens": 75482556.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5530896186828613, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11969087214093999, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.194726194993644, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09699179041242309, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1729 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1228.0, + "completions/mean_length": 1149.125, + "completions/mean_terminated_length": 1068.1539306640625, + "completions/min_length": 893.0, + "completions/min_terminated_length": 893.0, + "epoch": 0.34606921384276856, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8770622043150076, + "kl": 0.0138092041015625, + "learning_rate": 8.442120860398958e-07, + "loss": 0.007, + "num_tokens": 75526262.0, + "reward": 0.0, + "reward_std": 0.9107504487037659, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10744317019357791, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06169897247230535, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1730 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1323.0, + "completions/max_terminated_length": 1323.0, + "completions/mean_length": 1111.3125, + "completions/mean_terminated_length": 1111.3125, + "completions/min_length": 691.0, + "completions/min_terminated_length": 691.0, + "epoch": 0.3462692538507702, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.678135287630609, + "kl": 0.016845703125, + "learning_rate": 8.439742486754806e-07, + "loss": -0.0438, + "num_tokens": 75579291.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0589884519577026, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.054474779106330876, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.061159321642873556, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1731 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1137.375, + "completions/mean_terminated_length": 972.5454711914062, + "completions/min_length": 688.0, + "completions/min_terminated_length": 688.0, + "epoch": 0.34646929385877173, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.224684325339074, + "kl": 0.01934814453125, + "learning_rate": 8.437362679678868e-07, + "loss": -0.0002, + "num_tokens": 75619329.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0673942565917969, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10376209370550615, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04103470322713088, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05163977794943221, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1732 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1188.0, + "completions/max_terminated_length": 1188.0, + "completions/mean_length": 1012.375, + "completions/mean_terminated_length": 1012.375, + "completions/min_length": 837.0, + "completions/min_terminated_length": 837.0, + "epoch": 0.34666933386677334, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7004072601507847, + "kl": 0.0128936767578125, + "learning_rate": 8.434981440331549e-07, + "loss": 0.0063, + "num_tokens": 75658615.0, + "reward": 0.0, + "reward_std": 0.7992897629737854, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13103698942478995, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07858598868571091, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11147163731607214, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1733 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1445.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1184.625, + "completions/mean_terminated_length": 1184.625, + "completions/min_length": 896.0, + "completions/min_terminated_length": 896.0, + "epoch": 0.34686937387477496, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.69018360413545, + "kl": 0.0098114013671875, + "learning_rate": 8.432598769873952e-07, + "loss": -0.0192, + "num_tokens": 75707889.0, + "reward": 0.0, + "reward_std": 0.40173858404159546, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.034513082746690404, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11339421688299878, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07876359377087681, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1734 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1379.0, + "completions/mean_length": 1126.75, + "completions/mean_terminated_length": 1101.86669921875, + "completions/min_length": 931.0, + "completions/min_terminated_length": 931.0, + "epoch": 0.34706941388277657, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.93467706714279, + "kl": 0.0156097412109375, + "learning_rate": 8.430214669467879e-07, + "loss": -0.0248, + "num_tokens": 75748373.0, + "reward": 0.0, + "reward_std": 0.45342621207237244, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0066447898558557005, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14272496585547761, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452246, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1735 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1197.25, + "completions/mean_terminated_length": 1154.0, + "completions/min_length": 919.0, + "completions/min_terminated_length": 919.0, + "epoch": 0.3472694538907782, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4408492388204506, + "kl": 0.017181396484375, + "learning_rate": 8.427829140275826e-07, + "loss": -0.004, + "num_tokens": 75795305.0, + "reward": 0.0, + "reward_std": 1.0008856058120728, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12982248491327358, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14815690322516356, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10183501544346313, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1736 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1338.0, + "completions/max_terminated_length": 1338.0, + "completions/mean_length": 1039.25, + "completions/mean_terminated_length": 1039.25, + "completions/min_length": 848.0, + "completions/min_terminated_length": 848.0, + "epoch": 0.34746949389877974, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5297186594693777, + "kl": 0.01526641845703125, + "learning_rate": 8.42544218346099e-07, + "loss": -0.0243, + "num_tokens": 75836573.0, + "reward": 0.0, + "reward_std": 0.5993616580963135, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.21859183664214601, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2267906105394519, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06540472290116194, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1737 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1302.4375, + "completions/mean_terminated_length": 1289.2667236328125, + "completions/min_length": 863.0, + "completions/min_terminated_length": 863.0, + "epoch": 0.34766953390678135, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.31481676739411, + "kl": 0.0126495361328125, + "learning_rate": 8.423053800187261e-07, + "loss": -0.0398, + "num_tokens": 75883748.0, + "reward": 0.0, + "reward_std": 0.6781505346298218, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.31937663331861743, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1956627623902431, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08850612031567837, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1738 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1268.625, + "completions/mean_terminated_length": 1253.2000732421875, + "completions/min_length": 1077.0, + "completions/min_terminated_length": 1077.0, + "epoch": 0.34786957391478296, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.575624246097994, + "kl": 0.0279541015625, + "learning_rate": 8.420663991619226e-07, + "loss": -0.0269, + "num_tokens": 75936934.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.004399299621582, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.001072875424237979, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.061191750685660865, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059628479399994376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1739 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1484.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1136.125, + "completions/mean_terminated_length": 1136.125, + "completions/min_length": 887.0, + "completions/min_terminated_length": 887.0, + "epoch": 0.34806961392278457, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0375467609413858, + "kl": 0.01556396484375, + "learning_rate": 8.418272758922167e-07, + "loss": -0.0392, + "num_tokens": 75984184.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9488637447357178, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09958500537345447, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09956048457343009, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07969850595746353, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1740 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1350.0, + "completions/mean_length": 1231.375, + "completions/mean_terminated_length": 1070.2000732421875, + "completions/min_length": 927.0, + "completions/min_terminated_length": 927.0, + "epoch": 0.3482696539307862, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.643203579876494, + "kl": 0.0101318359375, + "learning_rate": 8.415880103262059e-07, + "loss": -0.0138, + "num_tokens": 76038598.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9198373556137085, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.014375340946658943, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06702514533808862, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10183501544346313, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1741 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1405.0, + "completions/max_terminated_length": 1405.0, + "completions/mean_length": 1196.0, + "completions/mean_terminated_length": 1196.0, + "completions/min_length": 1028.0, + "completions/min_terminated_length": 1028.0, + "epoch": 0.34846969393878774, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.5490689516297464, + "kl": 0.005847930908203125, + "learning_rate": 8.41348602580557e-07, + "loss": -0.0177, + "num_tokens": 76075862.0, + "reward": 0.0, + "reward_std": 0.7016544342041016, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11664388791225115, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07354274360455224, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04194352464039305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1742 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1402.0, + "completions/mean_length": 1215.875, + "completions/mean_terminated_length": 1196.933349609375, + "completions/min_length": 1032.0, + "completions/min_terminated_length": 1032.0, + "epoch": 0.34866973394678935, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3293427685218484, + "kl": 0.01910400390625, + "learning_rate": 8.411090527720066e-07, + "loss": -0.0035, + "num_tokens": 76115172.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.049691081047058, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.025537266354531758, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11130759301116268, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.029502040105226113, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1743 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1311.0, + "completions/mean_terminated_length": 1298.4000244140625, + "completions/min_length": 1012.0, + "completions/min_terminated_length": 1012.0, + "epoch": 0.34886977395479096, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2005348263236395, + "kl": 0.01727294921875, + "learning_rate": 8.408693610173603e-07, + "loss": -0.0146, + "num_tokens": 76161012.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7509399652481079, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.18494452609718398, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1715777019443356, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258099, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1744 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1271.0, + "completions/max_terminated_length": 1271.0, + "completions/mean_length": 1017.25, + "completions/mean_terminated_length": 1017.25, + "completions/min_length": 638.0, + "completions/min_terminated_length": 638.0, + "epoch": 0.3490698139627926, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.67222984415414, + "kl": 0.016326904296875, + "learning_rate": 8.406295274334926e-07, + "loss": -0.0012, + "num_tokens": 76206032.0, + "reward": 3.725290298461914e-08, + "reward_std": 1.0302237272262573, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.158097119188263, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06194470299268374, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0938872452190116, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1745 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1268.0, + "completions/max_terminated_length": 1268.0, + "completions/mean_length": 1090.25, + "completions/mean_terminated_length": 1090.25, + "completions/min_length": 895.0, + "completions/min_terminated_length": 895.0, + "epoch": 0.3492698539707942, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.628378566305117, + "kl": 0.01678466796875, + "learning_rate": 8.403895521373476e-07, + "loss": -0.0571, + "num_tokens": 76239452.0, + "reward": 0.0, + "reward_std": 0.9765105247497559, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.23480577476646178, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1326512076725584, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04194352464039305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 1746 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1150.125, + "completions/mean_terminated_length": 1126.800048828125, + "completions/min_length": 823.0, + "completions/min_terminated_length": 823.0, + "epoch": 0.34946989397879574, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.150394201883453, + "kl": 0.014801025390625, + "learning_rate": 8.401494352459384e-07, + "loss": 0.024, + "num_tokens": 76282126.0, + "reward": 0.0, + "reward_std": 0.6560832858085632, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11888875127953531, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12528721197444986, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.093392838174146, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1747 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1188.0, + "completions/max_terminated_length": 1188.0, + "completions/mean_length": 989.5, + "completions/mean_terminated_length": 989.5, + "completions/min_length": 727.0, + "completions/min_terminated_length": 727.0, + "epoch": 0.34966993398679735, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5295236552927443, + "kl": 0.0181121826171875, + "learning_rate": 8.39909176876347e-07, + "loss": -0.0255, + "num_tokens": 76330678.0, + "reward": 0.0, + "reward_std": 0.4178394079208374, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.19857350311145192, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15518406193258613, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.19355351446812574, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1748 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1203.75, + "completions/mean_terminated_length": 1184.0001220703125, + "completions/min_length": 898.0, + "completions/min_terminated_length": 898.0, + "epoch": 0.34986997399479897, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.056473458404467, + "kl": 0.0150146484375, + "learning_rate": 8.396687771457245e-07, + "loss": 0.0082, + "num_tokens": 76365810.0, + "reward": 0.0, + "reward_std": 0.8808000683784485, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.17325428210561883, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13739464227756717, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1749 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1486.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1091.0625, + "completions/mean_terminated_length": 1091.0625, + "completions/min_length": 753.0, + "completions/min_terminated_length": 753.0, + "epoch": 0.3500700140028006, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.052376112162826, + "kl": 0.0131683349609375, + "learning_rate": 8.39428236171291e-07, + "loss": -0.0216, + "num_tokens": 76398659.0, + "reward": 0.0, + "reward_std": 1.0258970260620117, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04784798454303219, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24169775115710304, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04367387557118565, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1750 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1201.875, + "completions/mean_terminated_length": 1066.3636474609375, + "completions/min_length": 715.0, + "completions/min_terminated_length": 715.0, + "epoch": 0.35027005401080213, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2082081422242337, + "kl": 0.0158233642578125, + "learning_rate": 8.391875540703353e-07, + "loss": 0.0341, + "num_tokens": 76448153.0, + "reward": 0.0, + "reward_std": 0.899806797504425, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09448522608674008, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16124020533342343, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08073734277593311, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1751 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1402.0, + "completions/max_terminated_length": 1402.0, + "completions/mean_length": 899.375, + "completions/mean_terminated_length": 899.375, + "completions/min_length": 573.0, + "completions/min_terminated_length": 573.0, + "epoch": 0.35047009401880375, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.422777566303545, + "kl": 0.0142059326171875, + "learning_rate": 8.389467309602149e-07, + "loss": 0.0603, + "num_tokens": 76477791.0, + "reward": 3.725290298461914e-08, + "reward_std": 1.0351159572601318, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13982777679586206, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0747542244578916, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05947299418254507, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1752 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1165.0, + "completions/max_terminated_length": 1165.0, + "completions/mean_length": 945.4375, + "completions/mean_terminated_length": 945.4375, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.35067013402680536, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2373279113966498, + "kl": 0.0153656005859375, + "learning_rate": 8.387057669583564e-07, + "loss": -0.0136, + "num_tokens": 76510710.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0010383129119873, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05564063408072664, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05782675283777668, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.048686449556014755, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1753 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1112.1875, + "completions/mean_terminated_length": 1086.3333740234375, + "completions/min_length": 830.0, + "completions/min_terminated_length": 830.0, + "epoch": 0.35087017403480697, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.556371402843741, + "kl": 0.018646240234375, + "learning_rate": 8.38464662182255e-07, + "loss": -0.0125, + "num_tokens": 76561409.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0581417083740234, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.002035676875650953, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03299545307251513, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13601470508735444, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1754 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1113.0, + "completions/mean_terminated_length": 1087.2000732421875, + "completions/min_length": 847.0, + "completions/min_terminated_length": 847.0, + "epoch": 0.3510702140428086, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9309974488284314, + "kl": 0.0177764892578125, + "learning_rate": 8.382234167494747e-07, + "loss": 0.0248, + "num_tokens": 76601913.0, + "reward": 0.0, + "reward_std": 0.7100398540496826, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.18866726465569147, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.23097905546345873, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04216370213557838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1755 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1318.0, + "completions/mean_length": 1004.4375, + "completions/mean_terminated_length": 971.4000244140625, + "completions/min_length": 607.0, + "completions/min_terminated_length": 607.0, + "epoch": 0.35127025405081014, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3215225671035737, + "kl": 0.0172119140625, + "learning_rate": 8.379820307776472e-07, + "loss": -0.0445, + "num_tokens": 76644024.0, + "reward": 0.0, + "reward_std": 0.8708715438842773, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10702508676423013, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.127522591931522, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09016445879408155, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1756 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1134.0, + "completions/max_terminated_length": 1134.0, + "completions/mean_length": 910.5625, + "completions/mean_terminated_length": 910.5625, + "completions/min_length": 702.0, + "completions/min_terminated_length": 702.0, + "epoch": 0.35147029405881175, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.845019609019427, + "kl": 0.0175018310546875, + "learning_rate": 8.377405043844739e-07, + "loss": -0.0432, + "num_tokens": 76684041.0, + "reward": 0.0, + "reward_std": 0.572967529296875, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09365141698623702, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17268321797629968, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07302967433402216, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1757 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1196.5, + "completions/mean_terminated_length": 1176.2667236328125, + "completions/min_length": 983.0, + "completions/min_terminated_length": 983.0, + "epoch": 0.35167033406681336, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.560042631836972, + "kl": 0.022186279296875, + "learning_rate": 8.374988376877241e-07, + "loss": 0.0517, + "num_tokens": 76725697.0, + "reward": 0.0, + "reward_std": 0.9137843251228333, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04250903984642392, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09782268518710945, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0758897836290186, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1758 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1423.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1053.625, + "completions/mean_terminated_length": 1053.625, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.351870374074815, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.825159897119055, + "kl": 0.0148468017578125, + "learning_rate": 8.372570308052356e-07, + "loss": 0.0334, + "num_tokens": 76754739.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.3436211049556732, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06698548958272461, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07874771591454678, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.057089922571845, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1759 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1269.1875, + "completions/mean_terminated_length": 1215.923095703125, + "completions/min_length": 1012.0, + "completions/min_terminated_length": 1012.0, + "epoch": 0.3520704140828166, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1138371008127157, + "kl": 0.01837158203125, + "learning_rate": 8.370150838549143e-07, + "loss": 0.0069, + "num_tokens": 76806246.0, + "reward": 0.0, + "reward_std": 0.4570016860961914, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.01765775996326986, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18390997205347576, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08944271909999159, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1760 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1141.375, + "completions/mean_terminated_length": 978.3636474609375, + "completions/min_length": 624.0, + "completions/min_terminated_length": 624.0, + "epoch": 0.35227045409081814, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.000213405609253, + "kl": 0.014984130859375, + "learning_rate": 8.36772996954735e-07, + "loss": 0.0197, + "num_tokens": 76849300.0, + "reward": 0.0, + "reward_std": 0.7042227983474731, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05517172135904519, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07868329567806999, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.046943622609505783, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1761 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1274.375, + "completions/mean_terminated_length": 1259.3333740234375, + "completions/min_length": 967.0, + "completions/min_terminated_length": 967.0, + "epoch": 0.35247049409881975, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.319236726762567, + "kl": 0.0174102783203125, + "learning_rate": 8.365307702227402e-07, + "loss": 0.0002, + "num_tokens": 76889714.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7033008337020874, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04122473813805544, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1058830870014691, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.057089922571845, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1762 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1233.0, + "completions/max_terminated_length": 1233.0, + "completions/mean_length": 948.9375, + "completions/mean_terminated_length": 948.9375, + "completions/min_length": 768.0, + "completions/min_terminated_length": 768.0, + "epoch": 0.35267053410682137, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.172577645100569, + "kl": 0.0134429931640625, + "learning_rate": 8.362884037770406e-07, + "loss": -0.0623, + "num_tokens": 76931713.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0488773584365845, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09256817653806657, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10394414180449266, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1029203215725281, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1763 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1268.1875, + "completions/mean_terminated_length": 1214.6923828125, + "completions/min_length": 837.0, + "completions/min_terminated_length": 837.0, + "epoch": 0.352870574114823, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9768904507037908, + "kl": 0.0122833251953125, + "learning_rate": 8.360458977358153e-07, + "loss": 0.0027, + "num_tokens": 76980620.0, + "reward": 0.0, + "reward_std": 1.0033589601516724, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.19461057679407573, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09318397635538217, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04554200340426487, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1764 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1121.6875, + "completions/mean_terminated_length": 894.7000122070312, + "completions/min_length": 646.0, + "completions/min_terminated_length": 646.0, + "epoch": 0.3530706141228246, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8462201027460337, + "kl": 0.0133056640625, + "learning_rate": 8.358032522173114e-07, + "loss": -0.007, + "num_tokens": 77023911.0, + "reward": 0.0, + "reward_std": 0.7995069026947021, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08103344076195051, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08215878623972797, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07781745019952502, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1765 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1412.0, + "completions/mean_length": 1132.625, + "completions/mean_terminated_length": 1108.1334228515625, + "completions/min_length": 864.0, + "completions/min_terminated_length": 864.0, + "epoch": 0.35327065413082614, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5236000659456197, + "kl": 0.02093505859375, + "learning_rate": 8.35560467339844e-07, + "loss": -0.0157, + "num_tokens": 77076969.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8632822036743164, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0963545086476164, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09801631434299984, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14900907255500823, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1766 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1214.0, + "completions/max_terminated_length": 1214.0, + "completions/mean_length": 976.4375, + "completions/mean_terminated_length": 976.4375, + "completions/min_length": 817.0, + "completions/min_terminated_length": 817.0, + "epoch": 0.35347069413882776, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.444303472305, + "kl": 0.017730712890625, + "learning_rate": 8.353175432217959e-07, + "loss": -0.0188, + "num_tokens": 77117280.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.9198818206787109, + "rewards/wordcountpos_reward_nokeypoint/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11477566364966771, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0563602358003003, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0683130051063973, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1767 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1284.0, + "completions/max_terminated_length": 1284.0, + "completions/mean_length": 1095.9375, + "completions/mean_terminated_length": 1095.9375, + "completions/min_length": 869.0, + "completions/min_terminated_length": 869.0, + "epoch": 0.35367073414682937, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.198704003917221, + "kl": 0.0179290771484375, + "learning_rate": 8.35074479981618e-07, + "loss": -0.0095, + "num_tokens": 77167887.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.39101526141166687, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15345166108477312, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1400523913189857, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03191423692521126, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1768 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1172.625, + "completions/mean_terminated_length": 1150.800048828125, + "completions/min_length": 851.0, + "completions/min_terminated_length": 851.0, + "epoch": 0.353870774154831, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2854922145142678, + "kl": 0.016357421875, + "learning_rate": 8.348312777378293e-07, + "loss": -0.0167, + "num_tokens": 77211897.0, + "reward": 0.0, + "reward_std": 0.562897801399231, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03657559909446236, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1604196876855549, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09269623828717427, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1769 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1269.0, + "completions/max_terminated_length": 1269.0, + "completions/mean_length": 1016.3125, + "completions/mean_terminated_length": 1016.3125, + "completions/min_length": 741.0, + "completions/min_terminated_length": 741.0, + "epoch": 0.3540708141628326, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.536552003190818, + "kl": 0.0170745849609375, + "learning_rate": 8.345879366090164e-07, + "loss": -0.0215, + "num_tokens": 77259414.0, + "reward": 0.0, + "reward_std": 0.8807018995285034, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.013175509834400374, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.26626846262726633, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06426219440409445, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1770 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1272.8125, + "completions/mean_terminated_length": 1169.5455322265625, + "completions/min_length": 837.0, + "completions/min_terminated_length": 837.0, + "epoch": 0.35427085417083415, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.092596720079364, + "kl": 0.0183563232421875, + "learning_rate": 8.343444567138331e-07, + "loss": -0.0062, + "num_tokens": 77313027.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9371596574783325, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0358003894872216, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2868700108601471, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10945995377982527, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1771 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1236.4375, + "completions/mean_terminated_length": 1116.6363525390625, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.35447089417883576, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.125562216791654, + "kl": 0.01470947265625, + "learning_rate": 8.341008381710015e-07, + "loss": -0.0289, + "num_tokens": 77366290.0, + "reward": 4.470348358154297e-08, + "reward_std": 1.055213451385498, + "rewards/wordcountpos_reward_nokeypoint/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.17339255927139974, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08915407682889143, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0969917904124231, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1772 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1483.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1019.1875, + "completions/mean_terminated_length": 1019.1875, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.35467093418683737, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0960038513292822, + "kl": 0.0084075927734375, + "learning_rate": 8.338570810993111e-07, + "loss": -0.0491, + "num_tokens": 77404557.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0085277557373047, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.17520762786213065, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13609227095566784, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1773 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1321.0, + "completions/max_terminated_length": 1321.0, + "completions/mean_length": 1049.6875, + "completions/mean_terminated_length": 1049.6875, + "completions/min_length": 712.0, + "completions/min_terminated_length": 712.0, + "epoch": 0.354870974194839, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0043296573657634, + "kl": 0.012237548828125, + "learning_rate": 8.336131856176192e-07, + "loss": -0.0303, + "num_tokens": 77438080.0, + "reward": 0.0, + "reward_std": 0.8336870074272156, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.17353149162667517, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09690772265486865, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0769800358919501, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1774 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1383.0, + "completions/mean_length": 1248.5625, + "completions/mean_terminated_length": 1212.6429443359375, + "completions/min_length": 985.0, + "completions/min_terminated_length": 985.0, + "epoch": 0.3550710142028406, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.71271987105994, + "kl": 0.0142822265625, + "learning_rate": 8.3336915184485e-07, + "loss": -0.0046, + "num_tokens": 77486801.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.4086378216743469, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03572361837017765, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13164215370904722, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.16771890063326086, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1775 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1375.0, + "completions/max_terminated_length": 1375.0, + "completions/mean_length": 1151.8125, + "completions/mean_terminated_length": 1151.8125, + "completions/min_length": 730.0, + "completions/min_terminated_length": 730.0, + "epoch": 0.35527105421084215, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6550176739925107, + "kl": 0.0128326416015625, + "learning_rate": 8.331249798999957e-07, + "loss": -0.0168, + "num_tokens": 77538406.0, + "reward": 0.0, + "reward_std": 0.9021509289741516, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11933458614661069, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13642220373326755, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12524050936172842, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1776 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1200.5625, + "completions/mean_terminated_length": 1020.9000244140625, + "completions/min_length": 796.0, + "completions/min_terminated_length": 796.0, + "epoch": 0.35547109421884376, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0338864743391505, + "kl": 0.01495361328125, + "learning_rate": 8.328806699021155e-07, + "loss": -0.0136, + "num_tokens": 77583239.0, + "reward": 0.0, + "reward_std": 0.8024120330810547, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11925756808527449, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10268482115907478, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11385500851066221, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1777 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1256.0, + "completions/max_terminated_length": 1256.0, + "completions/mean_length": 1053.5625, + "completions/mean_terminated_length": 1053.5625, + "completions/min_length": 925.0, + "completions/min_terminated_length": 925.0, + "epoch": 0.3556711342268454, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5559502715184843, + "kl": 0.0164947509765625, + "learning_rate": 8.32636221970336e-07, + "loss": -0.0031, + "num_tokens": 77621440.0, + "reward": 0.0, + "reward_std": 0.3156833350658417, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.17211103058467594, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.29069532169283613, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1778 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1307.0, + "completions/max_terminated_length": 1307.0, + "completions/mean_length": 909.75, + "completions/mean_terminated_length": 909.75, + "completions/min_length": 587.0, + "completions/min_terminated_length": 587.0, + "epoch": 0.355871174234847, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0906702680724503, + "kl": 0.013824462890625, + "learning_rate": 8.323916362238514e-07, + "loss": -0.0513, + "num_tokens": 77659076.0, + "reward": 0.0, + "reward_std": 0.8734369277954102, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.09059118235301679, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08360136122826338, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0938872452190116, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1779 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1252.0, + "completions/max_terminated_length": 1252.0, + "completions/mean_length": 992.5625, + "completions/mean_terminated_length": 992.5625, + "completions/min_length": 604.0, + "completions/min_terminated_length": 604.0, + "epoch": 0.35607121424284854, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4872837372885455, + "kl": 0.0168609619140625, + "learning_rate": 8.321469127819227e-07, + "loss": 0.0186, + "num_tokens": 77705077.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.6407782435417175, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.18983653554497143, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18274022870156093, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08933913745655642, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1780 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1262.625, + "completions/mean_terminated_length": 1154.727294921875, + "completions/min_length": 856.0, + "completions/min_terminated_length": 856.0, + "epoch": 0.35627125425085016, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.127943548956721, + "kl": 0.0156097412109375, + "learning_rate": 8.31902051763878e-07, + "loss": -0.0349, + "num_tokens": 77749911.0, + "reward": 0.0, + "reward_std": 0.9480481147766113, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.010481826364990359, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06355489277127684, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1781 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1299.0, + "completions/max_terminated_length": 1299.0, + "completions/mean_length": 1124.5, + "completions/mean_terminated_length": 1124.5, + "completions/min_length": 951.0, + "completions/min_terminated_length": 951.0, + "epoch": 0.35647129425885177, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2114178119008523, + "kl": 0.01971435546875, + "learning_rate": 8.316570532891128e-07, + "loss": -0.0236, + "num_tokens": 77788135.0, + "reward": 0.0, + "reward_std": 1.0541596412658691, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08680593680880527, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07927631555663836, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05146016078626404, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1782 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1291.0, + "completions/max_terminated_length": 1291.0, + "completions/mean_length": 1103.8125, + "completions/mean_terminated_length": 1103.8125, + "completions/min_length": 968.0, + "completions/min_terminated_length": 968.0, + "epoch": 0.3566713342668534, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.40283683259427, + "kl": 0.0269775390625, + "learning_rate": 8.314119174770893e-07, + "loss": -0.0113, + "num_tokens": 77830420.0, + "reward": 0.0, + "reward_std": 0.8436186909675598, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.012718876754064383, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15573784810414942, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06831300510639732, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1783 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1328.0, + "completions/max_terminated_length": 1328.0, + "completions/mean_length": 1057.25, + "completions/mean_terminated_length": 1057.25, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.356871374274855, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9021804695572384, + "kl": 0.03106689453125, + "learning_rate": 8.311666444473372e-07, + "loss": 0.0142, + "num_tokens": 77873832.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7596322298049927, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10480702634928692, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11306538967710876, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09803627446568496, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1784 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1443.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1001.5, + "completions/mean_terminated_length": 1001.5, + "completions/min_length": 654.0, + "completions/min_terminated_length": 654.0, + "epoch": 0.35707141428285655, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3513912001693456, + "kl": 0.017852783203125, + "learning_rate": 8.309212343194524e-07, + "loss": 0.0096, + "num_tokens": 77914224.0, + "reward": 0.0, + "reward_std": 0.5608179569244385, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.11794826924521791, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1990517599884319, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.057089922571845, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1785 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1299.0, + "completions/max_terminated_length": 1299.0, + "completions/mean_length": 1012.5, + "completions/mean_terminated_length": 1012.5, + "completions/min_length": 909.0, + "completions/min_terminated_length": 909.0, + "epoch": 0.35727145429085816, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.17081899690862, + "kl": 0.0142974853515625, + "learning_rate": 8.306756872130981e-07, + "loss": 0.0217, + "num_tokens": 77955416.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8454060554504395, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08474456539252244, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10724995077219708, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 1.0, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1786 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1267.0625, + "completions/mean_terminated_length": 1213.3077392578125, + "completions/min_length": 889.0, + "completions/min_terminated_length": 889.0, + "epoch": 0.35747149429885977, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.758589022411733, + "kl": 0.0136871337890625, + "learning_rate": 8.304300032480043e-07, + "loss": -0.0356, + "num_tokens": 78010065.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8817118406295776, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.19080564057706179, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2784014118563271, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0938872452190116, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1787 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1465.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1280.4375, + "completions/mean_terminated_length": 1280.4375, + "completions/min_length": 1116.0, + "completions/min_terminated_length": 1116.0, + "epoch": 0.3576715343068614, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.024873596259936, + "kl": 0.015960693359375, + "learning_rate": 8.301841825439674e-07, + "loss": -0.0145, + "num_tokens": 78061216.0, + "reward": 0.0, + "reward_std": 0.9522070288658142, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.054660285111937164, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18102352616977724, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06426219440409445, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1788 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1122.9375, + "completions/mean_terminated_length": 1097.800048828125, + "completions/min_length": 819.0, + "completions/min_terminated_length": 819.0, + "epoch": 0.357871574314863, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5996800713296007, + "kl": 0.017120361328125, + "learning_rate": 8.299382252208508e-07, + "loss": 0.0298, + "num_tokens": 78113191.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9888581037521362, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.010194271896354754, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.052806969591037575, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1287403358472941, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1789 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1100.0, + "completions/mean_length": 1023.1875, + "completions/mean_terminated_length": 991.4000244140625, + "completions/min_length": 874.0, + "completions/min_terminated_length": 874.0, + "epoch": 0.35807161432286455, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.110775063670312, + "kl": 0.0159149169921875, + "learning_rate": 8.296921313985845e-07, + "loss": 0.0335, + "num_tokens": 78150002.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0220086574554443, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06080540857402285, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0455265669691387, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07302967433402216, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1790 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1134.1875, + "completions/mean_terminated_length": 1109.800048828125, + "completions/min_length": 897.0, + "completions/min_terminated_length": 897.0, + "epoch": 0.35827165433086616, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2078677520403764, + "kl": 0.017425537109375, + "learning_rate": 8.294459011971648e-07, + "loss": -0.0737, + "num_tokens": 78194341.0, + "reward": 0.0, + "reward_std": 0.981174111366272, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15618502927077219, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15370247120543473, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06666666666666667, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1791 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1182.0, + "completions/max_terminated_length": 1182.0, + "completions/mean_length": 1030.125, + "completions/mean_terminated_length": 1030.125, + "completions/min_length": 805.0, + "completions/min_terminated_length": 805.0, + "epoch": 0.3584716943388678, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.974854318453336, + "kl": 0.016357421875, + "learning_rate": 8.291995347366549e-07, + "loss": -0.0159, + "num_tokens": 78240303.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7962504625320435, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.01090876831187452, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.043528108601590586, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11409872268574493, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1792 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1128.25, + "completions/mean_terminated_length": 1103.4666748046875, + "completions/min_length": 910.0, + "completions/min_terminated_length": 910.0, + "epoch": 0.3586717343468694, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3939063795916056, + "kl": 0.021759033203125, + "learning_rate": 8.289530321371838e-07, + "loss": 0.0226, + "num_tokens": 78292331.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0326144695281982, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.032266435231943294, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09680293112494531, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09418264367902597, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1793 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1368.0, + "completions/mean_length": 1118.3125, + "completions/mean_terminated_length": 1092.86669921875, + "completions/min_length": 947.0, + "completions/min_terminated_length": 947.0, + "epoch": 0.358871774354871, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3446659974411, + "kl": 0.01849365234375, + "learning_rate": 8.287063935189477e-07, + "loss": -0.0569, + "num_tokens": 78343208.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9938691854476929, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02815596667571507, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.055642813530270244, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13333333333333336, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1794 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1249.125, + "completions/mean_terminated_length": 1191.2308349609375, + "completions/min_length": 905.0, + "completions/min_terminated_length": 905.0, + "epoch": 0.35907181436287255, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.117266100258213, + "kl": 0.01458740234375, + "learning_rate": 8.284596190022084e-07, + "loss": -0.0075, + "num_tokens": 78386426.0, + "reward": 0.0, + "reward_std": 0.8423290848731995, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12218934630100665, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.21596453183295336, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1795 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1451.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1148.0, + "completions/mean_terminated_length": 1148.0, + "completions/min_length": 913.0, + "completions/min_terminated_length": 913.0, + "epoch": 0.35927185437087417, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.690762939786055, + "kl": 0.01239013671875, + "learning_rate": 8.282127087072945e-07, + "loss": -0.0074, + "num_tokens": 78433946.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.996867299079895, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.004538856322865068, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05025256379557388, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10174405069512346, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1796 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1471.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1115.375, + "completions/mean_terminated_length": 1115.375, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.3594718943788758, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8018353136271688, + "kl": 0.0192413330078125, + "learning_rate": 8.279656627546006e-07, + "loss": -0.0478, + "num_tokens": 78479896.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0344955921173096, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13722847550510547, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1346661058848984, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04127594582445935, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1797 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1123.0, + "completions/max_terminated_length": 1123.0, + "completions/mean_length": 919.25, + "completions/mean_terminated_length": 919.25, + "completions/min_length": 694.0, + "completions/min_terminated_length": 694.0, + "epoch": 0.3596719343868774, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.404607093445253, + "kl": 0.01324462890625, + "learning_rate": 8.277184812645872e-07, + "loss": -0.0059, + "num_tokens": 78507780.0, + "reward": 0.0, + "reward_std": 0.763640284538269, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05966585139189346, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10464555413195063, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11080513425729775, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1798 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1324.0, + "completions/max_terminated_length": 1324.0, + "completions/mean_length": 1102.875, + "completions/mean_terminated_length": 1102.875, + "completions/min_length": 920.0, + "completions/min_terminated_length": 920.0, + "epoch": 0.359871974394879, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.5892113908797711, + "kl": 0.005950927734375, + "learning_rate": 8.274711643577812e-07, + "loss": -0.0303, + "num_tokens": 78542810.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.79658043384552, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.010936294163880176, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06102417096642818, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11666666666666668, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1799 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1384.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1003.75, + "completions/mean_terminated_length": 1003.75, + "completions/min_length": 777.0, + "completions/min_terminated_length": 777.0, + "epoch": 0.36007201440288056, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8521693419452194, + "kl": 0.00722503662109375, + "learning_rate": 8.272237121547755e-07, + "loss": -0.0034, + "num_tokens": 78575910.0, + "reward": 0.0, + "reward_std": 0.7797620296478271, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03938445022798302, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08914690206904811, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11147163731607215, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1800 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 1154.3125, + "completions/mean_terminated_length": 1104.9285888671875, + "completions/min_length": 848.0, + "completions/min_terminated_length": 848.0, + "epoch": 0.36027205441088217, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4923586664028514, + "kl": 0.017578125, + "learning_rate": 8.26976124776229e-07, + "loss": 0.0286, + "num_tokens": 78613843.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0579009056091309, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.011422407712564354, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05270682876797974, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07302967433402215, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1801 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1328.0, + "completions/max_terminated_length": 1328.0, + "completions/mean_length": 1008.4375, + "completions/mean_terminated_length": 1008.4375, + "completions/min_length": 671.0, + "completions/min_terminated_length": 671.0, + "epoch": 0.3604720944188838, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.429424529501224, + "kl": 0.0157623291015625, + "learning_rate": 8.267284023428663e-07, + "loss": -0.0053, + "num_tokens": 78657210.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9683893918991089, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.02690508981580942, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04790708957025376, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0938872452190116, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1802 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1492.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1108.0625, + "completions/mean_terminated_length": 1108.0625, + "completions/min_length": 949.0, + "completions/min_terminated_length": 949.0, + "epoch": 0.3606721344268854, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5312365531294656, + "kl": 0.0125732421875, + "learning_rate": 8.264805449754781e-07, + "loss": 0.0296, + "num_tokens": 78708195.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9031449556350708, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15094206110881328, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08275489785718906, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194864, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1803 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1306.0, + "completions/max_terminated_length": 1306.0, + "completions/mean_length": 1166.8125, + "completions/mean_terminated_length": 1166.8125, + "completions/min_length": 1009.0, + "completions/min_terminated_length": 1009.0, + "epoch": 0.36087217443488695, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.700993230555191, + "kl": 0.0112762451171875, + "learning_rate": 8.262325527949206e-07, + "loss": -0.0009, + "num_tokens": 78752520.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0516976118087769, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0005988693722179068, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10181451542492005, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09878896324620104, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1804 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1413.0, + "completions/mean_length": 1139.6875, + "completions/mean_terminated_length": 1115.666748046875, + "completions/min_length": 873.0, + "completions/min_terminated_length": 873.0, + "epoch": 0.36107221444288856, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.240035353646294, + "kl": 0.02178955078125, + "learning_rate": 8.25984425922116e-07, + "loss": -0.0141, + "num_tokens": 78794355.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0379116535186768, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06954750455624231, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.070972393084604, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0758897836290186, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1805 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 1307.6875, + "completions/mean_terminated_length": 1220.272705078125, + "completions/min_length": 973.0, + "completions/min_terminated_length": 973.0, + "epoch": 0.3612722544508902, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0631610376180407, + "kl": 0.016265869140625, + "learning_rate": 8.257361644780519e-07, + "loss": -0.0067, + "num_tokens": 78837486.0, + "reward": 0.0, + "reward_std": 0.7543654441833496, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.23927098423370527, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.27482632784281663, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06831300510639733, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1806 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1317.0625, + "completions/mean_terminated_length": 1290.9285888671875, + "completions/min_length": 1104.0, + "completions/min_terminated_length": 1104.0, + "epoch": 0.3614722944588918, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.77984621122164, + "kl": 0.0159149169921875, + "learning_rate": 8.254877685837824e-07, + "loss": -0.0074, + "num_tokens": 78884567.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0283704996109009, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0013033831842723179, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06252068994696425, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11155467020454342, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1807 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 1086.0, + "completions/mean_terminated_length": 1058.4000244140625, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.3616723344668934, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.100028687974234, + "kl": 0.018341064453125, + "learning_rate": 8.252392383604255e-07, + "loss": -0.0791, + "num_tokens": 78925303.0, + "reward": 1.6763806343078613e-08, + "reward_std": 1.0140161514282227, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.6763806343078613e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03196881998262416, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06407197968583073, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09418264367902597, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1808 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1276.0, + "completions/mean_length": 1238.75, + "completions/mean_terminated_length": 1035.5555419921875, + "completions/min_length": 870.0, + "completions/min_terminated_length": 870.0, + "epoch": 0.36187237447489495, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6673429290949273, + "kl": 0.016204833984375, + "learning_rate": 8.249905739291665e-07, + "loss": -0.0469, + "num_tokens": 78983563.0, + "reward": 0.0, + "reward_std": 0.6179324984550476, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.15619389965828478, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.15435205104316532, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10318986456114838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1809 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1203.0, + "completions/max_terminated_length": 1203.0, + "completions/mean_length": 1076.625, + "completions/mean_terminated_length": 1076.625, + "completions/min_length": 867.0, + "completions/min_terminated_length": 867.0, + "epoch": 0.36207241448289657, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3810714149317773, + "kl": 0.0170745849609375, + "learning_rate": 8.247417754112548e-07, + "loss": -0.0031, + "num_tokens": 79025677.0, + "reward": 0.0, + "reward_std": 0.7930078506469727, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10560108030743834, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22410150775525495, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08944271909999159, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1810 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1323.0, + "completions/max_terminated_length": 1323.0, + "completions/mean_length": 1093.3125, + "completions/mean_terminated_length": 1093.3125, + "completions/min_length": 818.0, + "completions/min_terminated_length": 818.0, + "epoch": 0.3622724544908982, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7559173925284606, + "kl": 0.0132293701171875, + "learning_rate": 8.244928429280058e-07, + "loss": -0.0158, + "num_tokens": 79076986.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.718095064163208, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.22646677222651865, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10529471845043613, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09428090415820635, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1811 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1274.6875, + "completions/mean_terminated_length": 1242.5, + "completions/min_length": 976.0, + "completions/min_terminated_length": 976.0, + "epoch": 0.3624724944988998, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.206829028606904, + "kl": 0.02020263671875, + "learning_rate": 8.242437766008001e-07, + "loss": -0.0543, + "num_tokens": 79126269.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0138870477676392, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.2666249675845564, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.40727417774878794, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1772213514433501, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1812 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1306.1875, + "completions/mean_terminated_length": 1189.9000244140625, + "completions/min_length": 879.0, + "completions/min_terminated_length": 879.0, + "epoch": 0.3626725345069014, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.941533700414517, + "kl": 0.0179901123046875, + "learning_rate": 8.239945765510837e-07, + "loss": 0.0082, + "num_tokens": 79181512.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9870463013648987, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1192823326636457, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03870121541159185, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04999999999999999, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1813 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1016.0625, + "completions/mean_terminated_length": 983.800048828125, + "completions/min_length": 713.0, + "completions/min_terminated_length": 713.0, + "epoch": 0.36287257451490296, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8922785098389316, + "kl": 0.021148681640625, + "learning_rate": 8.237452429003676e-07, + "loss": 0.053, + "num_tokens": 79220673.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7784633636474609, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.009231836060772564, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1578624219615674, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042255, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1814 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1331.0, + "completions/max_terminated_length": 1331.0, + "completions/mean_length": 1031.8125, + "completions/mean_terminated_length": 1031.8125, + "completions/min_length": 837.0, + "completions/min_terminated_length": 837.0, + "epoch": 0.36307261452290457, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.534699193665763, + "kl": 0.0179595947265625, + "learning_rate": 8.23495775770228e-07, + "loss": -0.0318, + "num_tokens": 79253790.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6813488006591797, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10815441423331898, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.173254441552916, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.056927504255331086, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1815 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1247.0, + "completions/max_terminated_length": 1247.0, + "completions/mean_length": 1034.0625, + "completions/mean_terminated_length": 1034.0625, + "completions/min_length": 875.0, + "completions/min_terminated_length": 875.0, + "epoch": 0.3632726545309062, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.869295538968803, + "kl": 0.0140838623046875, + "learning_rate": 8.232461752823062e-07, + "loss": -0.0095, + "num_tokens": 79309367.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0382907390594482, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.00019501525990189492, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03850074171366923, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06440611887195308, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1816 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1438.875, + "completions/mean_terminated_length": 1360.2857666015625, + "completions/min_length": 1218.0, + "completions/min_terminated_length": 1218.0, + "epoch": 0.3634726945389078, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0028726771281926, + "kl": 0.017333984375, + "learning_rate": 8.229964415583086e-07, + "loss": 0.0084, + "num_tokens": 79353573.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.2845398485660553, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1304084044863049, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16829772789979253, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1817 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1462.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1178.875, + "completions/mean_terminated_length": 1178.875, + "completions/min_length": 908.0, + "completions/min_terminated_length": 908.0, + "epoch": 0.3636727345469094, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.598878400921426, + "kl": 0.02313232421875, + "learning_rate": 8.227465747200064e-07, + "loss": -0.0167, + "num_tokens": 79403571.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9862051010131836, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07769996313905157, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07915888964837867, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10610965676722954, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1818 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1308.1875, + "completions/mean_terminated_length": 1193.0999755859375, + "completions/min_length": 888.0, + "completions/min_terminated_length": 888.0, + "epoch": 0.36387277455491096, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9326398858408225, + "kl": 0.0159912109375, + "learning_rate": 8.224965748892358e-07, + "loss": 0.0062, + "num_tokens": 79442118.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.5520796179771423, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.23225498800964578, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.26331197201577566, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.029502040105226113, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1819 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1272.0, + "completions/mean_terminated_length": 1135.2000732421875, + "completions/min_length": 997.0, + "completions/min_terminated_length": 997.0, + "epoch": 0.3640728145629126, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.306739522751308, + "kl": 0.01849365234375, + "learning_rate": 8.222464421878981e-07, + "loss": 0.0314, + "num_tokens": 79489534.0, + "reward": 0.0, + "reward_std": 0.9550045728683472, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03974987023550698, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.044315543472818485, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06666666666666665, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1820 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1138.0, + "completions/mean_length": 1242.4375, + "completions/mean_terminated_length": 984.875, + "completions/min_length": 872.0, + "completions/min_terminated_length": 872.0, + "epoch": 0.3642728545709142, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4181179088559346, + "kl": 0.01898193359375, + "learning_rate": 8.219961767379586e-07, + "loss": 0.0073, + "num_tokens": 79534869.0, + "reward": 0.0, + "reward_std": 0.9260210394859314, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.024474602399982885, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12264552790317484, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1821 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1326.0, + "completions/max_terminated_length": 1326.0, + "completions/mean_length": 1084.6875, + "completions/mean_terminated_length": 1084.6875, + "completions/min_length": 777.0, + "completions/min_terminated_length": 777.0, + "epoch": 0.3644728945789158, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0835646544812345, + "kl": 0.024169921875, + "learning_rate": 8.217457786614486e-07, + "loss": -0.047, + "num_tokens": 79573984.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0632988214492798, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12069891098264368, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0380685436615935, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0969917904124231, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1822 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1347.0, + "completions/max_terminated_length": 1347.0, + "completions/mean_length": 1147.9375, + "completions/mean_terminated_length": 1147.9375, + "completions/min_length": 926.0, + "completions/min_terminated_length": 926.0, + "epoch": 0.3646729345869174, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2455109770931276, + "kl": 0.022918701171875, + "learning_rate": 8.214952480804626e-07, + "loss": -0.0215, + "num_tokens": 79616151.0, + "reward": 0.0, + "reward_std": 0.6779146194458008, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06776028888460561, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3437289375282916, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0838870492807861, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1823 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1324.0, + "completions/mean_length": 1269.875, + "completions/mean_terminated_length": 1039.75, + "completions/min_length": 898.0, + "completions/min_terminated_length": 898.0, + "epoch": 0.36487297459491896, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.025320097344958, + "kl": 0.01824951171875, + "learning_rate": 8.212445851171611e-07, + "loss": -0.0248, + "num_tokens": 79671685.0, + "reward": 0.0, + "reward_std": 0.6574082374572754, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04594418352243225, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16777072760249984, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1824 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1486.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1154.0, + "completions/mean_terminated_length": 1154.0, + "completions/min_length": 879.0, + "completions/min_terminated_length": 879.0, + "epoch": 0.3650730146029206, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9880406631401906, + "kl": 0.0167999267578125, + "learning_rate": 8.20993789893768e-07, + "loss": 0.0283, + "num_tokens": 79710413.0, + "reward": 0.0, + "reward_std": 0.5268779397010803, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11663111891449834, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18457775200466212, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0806225774829855, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1825 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1253.0, + "completions/mean_length": 1035.3125, + "completions/mean_terminated_length": 1004.3333740234375, + "completions/min_length": 697.0, + "completions/min_terminated_length": 697.0, + "epoch": 0.3652730546109222, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0221940598415347, + "kl": 0.01641845703125, + "learning_rate": 8.207428625325724e-07, + "loss": 0.0135, + "num_tokens": 79740850.0, + "reward": 0.0, + "reward_std": 0.6927785873413086, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.016890268267355492, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06159918689920948, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.055611083361076424, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1826 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1356.0, + "completions/max_terminated_length": 1356.0, + "completions/mean_length": 1012.5, + "completions/mean_terminated_length": 1012.5, + "completions/min_length": 731.0, + "completions/min_terminated_length": 731.0, + "epoch": 0.3654730946189238, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.077329006240537, + "kl": 0.01898193359375, + "learning_rate": 8.204918031559278e-07, + "loss": -0.0072, + "num_tokens": 79774826.0, + "reward": 0.0, + "reward_std": 1.0456843376159668, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0672742409482315, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11500337220723676, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10945995377982527, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1827 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 996.75, + "completions/mean_terminated_length": 963.2000732421875, + "completions/min_length": 571.0, + "completions/min_terminated_length": 571.0, + "epoch": 0.3656731346269254, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8681876735817817, + "kl": 0.0137481689453125, + "learning_rate": 8.202406118862515e-07, + "loss": -0.0554, + "num_tokens": 79803726.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7719250321388245, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.021256042817987365, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.02408145188451065, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0938872452190116, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1828 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1195.4375, + "completions/mean_terminated_length": 1175.1334228515625, + "completions/min_length": 1016.0, + "completions/min_terminated_length": 1016.0, + "epoch": 0.36587317463492697, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.988212214224675, + "kl": 0.016571044921875, + "learning_rate": 8.199892888460257e-07, + "loss": -0.0165, + "num_tokens": 79847765.0, + "reward": 0.0, + "reward_std": 0.7774133682250977, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0011176591157297001, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2641187864247528, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05163977794943221, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1829 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1203.0625, + "completions/mean_terminated_length": 1104.0833740234375, + "completions/min_length": 805.0, + "completions/min_terminated_length": 805.0, + "epoch": 0.3660732146429286, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4360084354466287, + "kl": 0.0179443359375, + "learning_rate": 8.197378341577969e-07, + "loss": -0.0167, + "num_tokens": 79893886.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7117949724197388, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.19139237228360734, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.25567985578575253, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0806225774829855, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1830 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1349.0, + "completions/max_terminated_length": 1349.0, + "completions/mean_length": 991.4375, + "completions/mean_terminated_length": 991.4375, + "completions/min_length": 821.0, + "completions/min_terminated_length": 821.0, + "epoch": 0.3662732546509302, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1835975294068013, + "kl": 0.020050048828125, + "learning_rate": 8.194862479441751e-07, + "loss": 0.0036, + "num_tokens": 79932077.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5051649212837219, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12577314256949446, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09285409252878969, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09496588081262933, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1831 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1036.6875, + "completions/mean_terminated_length": 1005.800048828125, + "completions/min_length": 701.0, + "completions/min_terminated_length": 701.0, + "epoch": 0.3664732946589318, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.208471537129054, + "kl": 0.01593017578125, + "learning_rate": 8.192345303278351e-07, + "loss": 0.0082, + "num_tokens": 79983440.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.8649222254753113, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07220369642234921, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16264253827674194, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13601470508735444, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1832 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1224.0, + "completions/max_terminated_length": 1224.0, + "completions/mean_length": 1026.6875, + "completions/mean_terminated_length": 1026.6875, + "completions/min_length": 798.0, + "completions/min_terminated_length": 798.0, + "epoch": 0.36667333466693336, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6713018204726686, + "kl": 0.00823974609375, + "learning_rate": 8.189826814315157e-07, + "loss": -0.0114, + "num_tokens": 80030891.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8926501870155334, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03229947930661615, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04966855938471376, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10610965676722954, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1833 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1395.5625, + "completions/mean_terminated_length": 1261.2857666015625, + "completions/min_length": 1060.0, + "completions/min_terminated_length": 1060.0, + "epoch": 0.36687337467493497, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0188205788779934, + "kl": 0.02301025390625, + "learning_rate": 8.187307013780192e-07, + "loss": -0.0308, + "num_tokens": 80086076.0, + "reward": 0.0, + "reward_std": 0.6629332304000854, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.26996595257247286, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17003173246847975, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09803627446568494, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 1834 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1294.0, + "completions/max_terminated_length": 1294.0, + "completions/mean_length": 1142.5625, + "completions/mean_terminated_length": 1142.5625, + "completions/min_length": 888.0, + "completions/min_terminated_length": 888.0, + "epoch": 0.3670734146829366, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.482147434487718, + "kl": 0.020965576171875, + "learning_rate": 8.184785902902125e-07, + "loss": 0.0011, + "num_tokens": 80128341.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0285563468933105, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.045970723449708054, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.058084177279064265, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258098, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1835 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1465.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1209.0, + "completions/mean_terminated_length": 1209.0, + "completions/min_length": 950.0, + "completions/min_terminated_length": 950.0, + "epoch": 0.3672734546909382, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.536971231893089, + "kl": 0.018798828125, + "learning_rate": 8.182263482910263e-07, + "loss": 0.0057, + "num_tokens": 80180725.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7101560235023499, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16338763695632444, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1343410746444934, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09339283817414601, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1836 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1374.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1158.0625, + "completions/mean_terminated_length": 1158.0625, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.3674734946989398, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1742409496583366, + "kl": 0.0191650390625, + "learning_rate": 8.179739755034543e-07, + "loss": 0.0063, + "num_tokens": 80217478.0, + "reward": 0.0, + "reward_std": 1.0344743728637695, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06294396707336712, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05651075680975509, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1837 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1465.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1094.1875, + "completions/mean_terminated_length": 1094.1875, + "completions/min_length": 998.0, + "completions/min_terminated_length": 998.0, + "epoch": 0.36767353470694136, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.703618107910002, + "kl": 0.018798828125, + "learning_rate": 8.17721472050555e-07, + "loss": 0.0255, + "num_tokens": 80257513.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.00979745388031, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1770418876982063, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06835298551021456, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1838 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1376.375, + "completions/mean_terminated_length": 1302.2000732421875, + "completions/min_length": 1154.0, + "completions/min_terminated_length": 1154.0, + "epoch": 0.367873574714943, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.914685731242003, + "kl": 0.0146942138671875, + "learning_rate": 8.174688380554505e-07, + "loss": -0.0053, + "num_tokens": 80311383.0, + "reward": 0.0, + "reward_std": 0.4557536244392395, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03241444573999281, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.218148380531137, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08850612031567837, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1839 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 1143.625, + "completions/mean_terminated_length": 1119.86669921875, + "completions/min_length": 925.0, + "completions/min_terminated_length": 925.0, + "epoch": 0.3680736147229446, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4413818085622534, + "kl": 0.0151519775390625, + "learning_rate": 8.17216073641326e-07, + "loss": 0.0094, + "num_tokens": 80356465.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0399717092514038, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07699843548151168, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08072407687503812, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1840 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1413.0, + "completions/max_terminated_length": 1413.0, + "completions/mean_length": 1083.9375, + "completions/mean_terminated_length": 1083.9375, + "completions/min_length": 806.0, + "completions/min_terminated_length": 806.0, + "epoch": 0.3682736547309462, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9039510003840503, + "kl": 0.0135650634765625, + "learning_rate": 8.169631789314304e-07, + "loss": -0.0015, + "num_tokens": 80390408.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8389977216720581, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.01374793015399429, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04603433453782232, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07252075054258096, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1841 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1302.5625, + "completions/mean_terminated_length": 1302.5625, + "completions/min_length": 1069.0, + "completions/min_terminated_length": 1069.0, + "epoch": 0.3684736947389478, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.748784601285117, + "kl": 0.0195159912109375, + "learning_rate": 8.167101540490765e-07, + "loss": -0.0068, + "num_tokens": 80447177.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7953476905822754, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.24014531423252544, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.21328650933081023, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1842 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1206.0, + "completions/mean_length": 1073.5625, + "completions/mean_terminated_length": 1045.1334228515625, + "completions/min_length": 953.0, + "completions/min_terminated_length": 953.0, + "epoch": 0.36867373474694937, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0493206052327353, + "kl": 0.010772705078125, + "learning_rate": 8.164569991176405e-07, + "loss": -0.0097, + "num_tokens": 80489538.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.8971644639968872, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12445100837424142, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14882809298905517, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07302967433402213, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1843 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1285.0, + "completions/max_terminated_length": 1285.0, + "completions/mean_length": 984.4375, + "completions/mean_terminated_length": 984.4375, + "completions/min_length": 762.0, + "completions/min_terminated_length": 762.0, + "epoch": 0.368873774754951, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4129863848423185, + "kl": 0.019805908203125, + "learning_rate": 8.162037142605618e-07, + "loss": -0.0229, + "num_tokens": 80534641.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0280120372772217, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07115385017743975, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05794122623152799, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0787635937708768, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1844 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1274.0, + "completions/max_terminated_length": 1274.0, + "completions/mean_length": 1064.5, + "completions/mean_terminated_length": 1064.5, + "completions/min_length": 868.0, + "completions/min_terminated_length": 868.0, + "epoch": 0.3690738147629526, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.834202549737674, + "kl": 0.023162841796875, + "learning_rate": 8.159502996013432e-07, + "loss": -0.0342, + "num_tokens": 80575217.0, + "reward": 0.0, + "reward_std": 0.8664044141769409, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08714900483566781, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07772426019452991, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1845 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1351.125, + "completions/mean_terminated_length": 1283.45458984375, + "completions/min_length": 1000.0, + "completions/min_terminated_length": 1000.0, + "epoch": 0.3692738547709542, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8655213818321448, + "kl": 0.0132293701171875, + "learning_rate": 8.156967552635507e-07, + "loss": 0.0287, + "num_tokens": 80611115.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.906762421131134, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.20627948020478806, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.054291801464291016, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06885303726590962, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1846 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1481.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1102.5625, + "completions/mean_terminated_length": 1102.5625, + "completions/min_length": 765.0, + "completions/min_terminated_length": 765.0, + "epoch": 0.3694738947789558, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9749076740886036, + "kl": 0.0117645263671875, + "learning_rate": 8.154430813708139e-07, + "loss": -0.0406, + "num_tokens": 80651588.0, + "reward": 0.0, + "reward_std": 1.027543067932129, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03247241367524857, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0550825089377002, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06831300510639733, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1847 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1238.0, + "completions/max_terminated_length": 1238.0, + "completions/mean_length": 1001.5, + "completions/mean_terminated_length": 1001.5, + "completions/min_length": 697.0, + "completions/min_terminated_length": 697.0, + "epoch": 0.36967393478695737, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3482962708573605, + "kl": 0.0168304443359375, + "learning_rate": 8.151892780468255e-07, + "loss": -0.0245, + "num_tokens": 80701524.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0549482107162476, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08174472760888996, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11207832213233404, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1029203215725281, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1848 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1461.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1115.8125, + "completions/mean_terminated_length": 1115.8125, + "completions/min_length": 941.0, + "completions/min_terminated_length": 941.0, + "epoch": 0.369873974794959, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.364401922682704, + "kl": 0.01971435546875, + "learning_rate": 8.149353454153407e-07, + "loss": -0.0058, + "num_tokens": 80746041.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0626840591430664, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1024669805531521, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06807660134846161, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09878896324620103, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1849 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1483.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1146.5, + "completions/mean_terminated_length": 1146.5, + "completions/min_length": 776.0, + "completions/min_terminated_length": 776.0, + "epoch": 0.3700740148029606, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1451781141121966, + "kl": 0.0286102294921875, + "learning_rate": 8.146812836001785e-07, + "loss": -0.0368, + "num_tokens": 80783593.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7972575426101685, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06109610509966603, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14583882606353996, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1850 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1353.0, + "completions/max_terminated_length": 1353.0, + "completions/mean_length": 968.75, + "completions/mean_terminated_length": 968.75, + "completions/min_length": 710.0, + "completions/min_terminated_length": 710.0, + "epoch": 0.3702740548109622, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3718152525058636, + "kl": 0.0151519775390625, + "learning_rate": 8.144270927252204e-07, + "loss": -0.0933, + "num_tokens": 80825773.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8783084154129028, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.016425681671782048, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1360136320487443, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1851 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1318.0, + "completions/max_terminated_length": 1318.0, + "completions/mean_length": 977.75, + "completions/mean_terminated_length": 977.75, + "completions/min_length": 687.0, + "completions/min_terminated_length": 687.0, + "epoch": 0.3704740948189638, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.17304282468475, + "kl": 0.01971435546875, + "learning_rate": 8.141727729144112e-07, + "loss": 0.0823, + "num_tokens": 80856905.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.741578221321106, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.3013302122039553, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1736922130190315, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07888106377466156, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1852 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 1246.125, + "completions/mean_terminated_length": 1130.727294921875, + "completions/min_length": 987.0, + "completions/min_terminated_length": 987.0, + "epoch": 0.3706741348269654, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5328545348569467, + "kl": 0.0109100341796875, + "learning_rate": 8.139183242917584e-07, + "loss": -0.0365, + "num_tokens": 80902059.0, + "reward": 0.0, + "reward_std": 0.8199870586395264, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03547521983469466, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18047685653321147, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08073734277593311, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1853 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1295.0, + "completions/max_terminated_length": 1295.0, + "completions/mean_length": 1040.125, + "completions/mean_terminated_length": 1040.125, + "completions/min_length": 895.0, + "completions/min_terminated_length": 895.0, + "epoch": 0.370874174834967, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7278123902067217, + "kl": 0.018951416015625, + "learning_rate": 8.136637469813322e-07, + "loss": 0.0131, + "num_tokens": 80949269.0, + "reward": 0.0, + "reward_std": 1.0293447971343994, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.007091613457825295, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0678269124757705, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059628479399994376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1854 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1198.375, + "completions/mean_terminated_length": 1178.2667236328125, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.3710742148429686, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.383759461398951, + "kl": 0.018341064453125, + "learning_rate": 8.134090411072658e-07, + "loss": 0.0019, + "num_tokens": 80992227.0, + "reward": 0.0, + "reward_std": 0.8227076530456543, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.019379231055794678, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03332022754271787, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.13333333333333336, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1855 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1427.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1113.625, + "completions/mean_terminated_length": 1113.625, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.3712742548509702, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5030307583665876, + "kl": 0.0168914794921875, + "learning_rate": 8.131542067937548e-07, + "loss": 0.0454, + "num_tokens": 81028813.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.963647723197937, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10351357049464145, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08424569933893901, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.026874192494328493, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1856 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1392.5625, + "completions/mean_terminated_length": 1285.125, + "completions/min_length": 1109.0, + "completions/min_terminated_length": 1109.0, + "epoch": 0.3714742948589718, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.171676539475257, + "kl": 0.01806640625, + "learning_rate": 8.128992441650576e-07, + "loss": 0.0084, + "num_tokens": 81085838.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9541500806808472, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0836333997143508, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11223289857739521, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08073734277593311, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1857 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1305.4375, + "completions/mean_terminated_length": 1277.6429443359375, + "completions/min_length": 1012.0, + "completions/min_terminated_length": 1012.0, + "epoch": 0.3716743348669734, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3140596499138675, + "kl": 0.018310546875, + "learning_rate": 8.12644153345495e-07, + "loss": -0.0428, + "num_tokens": 81136989.0, + "reward": 0.0, + "reward_std": 0.7865398526191711, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.24032713667711517, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14140455661798446, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09737289911202955, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1858 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1273.8125, + "completions/mean_terminated_length": 1258.7333984375, + "completions/min_length": 1086.0, + "completions/min_terminated_length": 1086.0, + "epoch": 0.371874374874975, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.522860434983106, + "kl": 0.0138092041015625, + "learning_rate": 8.123889344594509e-07, + "loss": 0.0117, + "num_tokens": 81181250.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7838484644889832, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.16436538782686513, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09088000349213667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04194352464039305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1859 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1402.0, + "completions/max_terminated_length": 1402.0, + "completions/mean_length": 999.6875, + "completions/mean_terminated_length": 999.6875, + "completions/min_length": 789.0, + "completions/min_terminated_length": 789.0, + "epoch": 0.3720744148829766, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0211121925149436, + "kl": 0.01126861572265625, + "learning_rate": 8.121335876313706e-07, + "loss": 0.0305, + "num_tokens": 81220093.0, + "reward": 0.0, + "reward_std": 0.889500617980957, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.058917456487658504, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05773575765272489, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1860 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1470.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1022.4375, + "completions/mean_terminated_length": 1022.4375, + "completions/min_length": 792.0, + "completions/min_terminated_length": 792.0, + "epoch": 0.3722744548909782, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8691210576741897, + "kl": 0.0113372802734375, + "learning_rate": 8.118781129857628e-07, + "loss": -0.0363, + "num_tokens": 81263396.0, + "reward": 0.0, + "reward_std": 0.9515474438667297, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.062229117346624854, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0629368480231236, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04849589520621153, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1861 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1355.0, + "completions/max_terminated_length": 1355.0, + "completions/mean_length": 1202.1875, + "completions/mean_terminated_length": 1202.1875, + "completions/min_length": 1032.0, + "completions/min_terminated_length": 1032.0, + "epoch": 0.37247449489897977, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4934780571434447, + "kl": 0.00952911376953125, + "learning_rate": 8.116225106471978e-07, + "loss": 0.0123, + "num_tokens": 81300223.0, + "reward": 0.0, + "reward_std": 0.8663266897201538, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.10741049988891452, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09871550313169179, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.029502040105226113, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1862 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1452.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 998.125, + "completions/mean_terminated_length": 998.125, + "completions/min_length": 713.0, + "completions/min_terminated_length": 713.0, + "epoch": 0.3726745349069814, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4369360276343914, + "kl": 0.02337646484375, + "learning_rate": 8.113667807403089e-07, + "loss": -0.0551, + "num_tokens": 81336745.0, + "reward": 0.0, + "reward_std": 0.5372064113616943, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.017687296623187257, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07186088021773503, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1002773930432755, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1863 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1244.5625, + "completions/mean_terminated_length": 1227.533447265625, + "completions/min_length": 825.0, + "completions/min_terminated_length": 825.0, + "epoch": 0.372874574914983, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4902483454560183, + "kl": 0.00933074951171875, + "learning_rate": 8.111109233897906e-07, + "loss": 0.0433, + "num_tokens": 81376034.0, + "reward": 0.0, + "reward_std": 0.8170404434204102, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03174682001512344, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16824473488147082, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9958333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.016666666666666663, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1864 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1493.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1126.6875, + "completions/mean_terminated_length": 1126.6875, + "completions/min_length": 689.0, + "completions/min_terminated_length": 689.0, + "epoch": 0.3730746149229846, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9001162684656774, + "kl": 0.0138397216796875, + "learning_rate": 8.108549387204003e-07, + "loss": 0.025, + "num_tokens": 81421757.0, + "reward": 0.0, + "reward_std": 0.3847854733467102, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.032503292402393844, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12474435041789964, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.052880017930181294, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1865 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1476.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1186.6875, + "completions/mean_terminated_length": 1186.6875, + "completions/min_length": 940.0, + "completions/min_terminated_length": 940.0, + "epoch": 0.3732746549309862, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9791068205624036, + "kl": 0.0157318115234375, + "learning_rate": 8.105988268569574e-07, + "loss": -0.037, + "num_tokens": 81466872.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0385462045669556, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0655503807547565, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17237637345449924, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06652763279965644, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 1866 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1322.1875, + "completions/mean_terminated_length": 1183.888916015625, + "completions/min_length": 953.0, + "completions/min_terminated_length": 953.0, + "epoch": 0.3734746949389878, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7552642700181145, + "kl": 0.0136871337890625, + "learning_rate": 8.103425879243434e-07, + "loss": 0.0257, + "num_tokens": 81519259.0, + "reward": 0.0, + "reward_std": 0.8975571990013123, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.021235809571828204, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14582585303171125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09888264649460884, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1867 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1243.5, + "completions/mean_terminated_length": 1226.4000244140625, + "completions/min_length": 1009.0, + "completions/min_terminated_length": 1009.0, + "epoch": 0.3736747349469894, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.804205516952818, + "kl": 0.015045166015625, + "learning_rate": 8.100862220475012e-07, + "loss": -0.025, + "num_tokens": 81554955.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0531667470932007, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1785325968883525, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07673075734025185, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05146016078626404, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1868 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1310.0, + "completions/max_terminated_length": 1310.0, + "completions/mean_length": 894.5, + "completions/mean_terminated_length": 894.5, + "completions/min_length": 613.0, + "completions/min_terminated_length": 613.0, + "epoch": 0.373874774954991, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.359521113709755, + "kl": 0.02081298828125, + "learning_rate": 8.098297293514361e-07, + "loss": -0.0984, + "num_tokens": 81598315.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9925675988197327, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12654873031821037, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07518196996136353, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.2063797291222968, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1869 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1245.0, + "completions/mean_length": 1086.9375, + "completions/mean_terminated_length": 1027.9285888671875, + "completions/min_length": 712.0, + "completions/min_terminated_length": 712.0, + "epoch": 0.3740748149629926, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7000400922645285, + "kl": 0.025238037109375, + "learning_rate": 8.095731099612152e-07, + "loss": 0.0018, + "num_tokens": 81647282.0, + "reward": 0.0, + "reward_std": 0.8678827285766602, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03716751969856694, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.145059180433727, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10852547064066471, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1870 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1304.0, + "completions/mean_length": 1005.9375, + "completions/mean_terminated_length": 973.0000610351562, + "completions/min_length": 694.0, + "completions/min_terminated_length": 694.0, + "epoch": 0.3742748549709942, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.440351056672888, + "kl": 0.015625, + "learning_rate": 8.093163640019671e-07, + "loss": 0.0182, + "num_tokens": 81684705.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8915789127349854, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03604221566786084, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06876553900021061, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1354690069789096, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1871 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1332.0, + "completions/max_terminated_length": 1332.0, + "completions/mean_length": 1181.1875, + "completions/mean_terminated_length": 1181.1875, + "completions/min_length": 1040.0, + "completions/min_terminated_length": 1040.0, + "epoch": 0.3744748949789958, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.484783321742923, + "kl": 0.013275146484375, + "learning_rate": 8.090594915988823e-07, + "loss": -0.0319, + "num_tokens": 81729100.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8963333964347839, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.012411571887269195, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04943766817815478, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1872 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1438.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1118.0625, + "completions/mean_terminated_length": 1118.0625, + "completions/min_length": 903.0, + "completions/min_terminated_length": 903.0, + "epoch": 0.3746749349869974, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9324891881815613, + "kl": 0.018585205078125, + "learning_rate": 8.088024928772133e-07, + "loss": -0.0562, + "num_tokens": 81779101.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7336465716362, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05950464840055975, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10722291558650891, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08243965245133131, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1873 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1201.75, + "completions/mean_terminated_length": 1181.86669921875, + "completions/min_length": 914.0, + "completions/min_terminated_length": 914.0, + "epoch": 0.374874974994999, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1986231710521214, + "kl": 0.0198974609375, + "learning_rate": 8.085453679622733e-07, + "loss": 0.0097, + "num_tokens": 81823705.0, + "reward": 0.0, + "reward_std": 0.8839429616928101, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0061313339004764236, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11410622483415252, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07969850595746354, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1874 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1316.375, + "completions/mean_terminated_length": 1290.1429443359375, + "completions/min_length": 1095.0, + "completions/min_terminated_length": 1095.0, + "epoch": 0.3750750150030006, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9871804305621983, + "kl": 0.0247344970703125, + "learning_rate": 8.08288116979438e-07, + "loss": -0.0159, + "num_tokens": 81877143.0, + "reward": 0.0, + "reward_std": 0.7465613484382629, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06788001229631213, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2584603910087497, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042258, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1875 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1415.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1133.875, + "completions/mean_terminated_length": 1133.875, + "completions/min_length": 870.0, + "completions/min_terminated_length": 870.0, + "epoch": 0.3752750550110022, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.545296150026501, + "kl": 0.0153656005859375, + "learning_rate": 8.080307400541438e-07, + "loss": -0.0161, + "num_tokens": 81927349.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7714577317237854, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04621140142609032, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11148800983239857, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05443310539518172, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1876 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1202.25, + "completions/mean_terminated_length": 1133.5384521484375, + "completions/min_length": 767.0, + "completions/min_terminated_length": 767.0, + "epoch": 0.3754750950190038, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1967980418194677, + "kl": 0.009002685546875, + "learning_rate": 8.077732373118892e-07, + "loss": -0.0032, + "num_tokens": 81979841.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7085874080657959, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.02453101495769142, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18060277698123256, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06763190130459201, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1877 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1108.625, + "completions/mean_terminated_length": 1082.533447265625, + "completions/min_length": 814.0, + "completions/min_terminated_length": 814.0, + "epoch": 0.3756751350270054, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8162229987055185, + "kl": 0.02496337890625, + "learning_rate": 8.075156088782336e-07, + "loss": 0.0121, + "num_tokens": 82021827.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9967281818389893, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04639679878157336, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06725001266189853, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08606629658238703, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1878 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1277.3125, + "completions/mean_terminated_length": 1245.5, + "completions/min_length": 1001.0, + "completions/min_terminated_length": 1001.0, + "epoch": 0.375875175035007, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.429624987000773, + "kl": 0.020172119140625, + "learning_rate": 8.072578548787977e-07, + "loss": -0.0102, + "num_tokens": 82071072.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6082307696342468, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0815722107919745, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0813443171606714, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14089659985908765, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1879 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1390.9375, + "completions/mean_terminated_length": 1281.875, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.3760752150430086, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.014187116386771, + "kl": 0.0155029296875, + "learning_rate": 8.069999754392635e-07, + "loss": 0.0104, + "num_tokens": 82122887.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7793769836425781, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2501053298324603, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.30237772800253054, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1172998689652263, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1880 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1244.5, + "completions/mean_terminated_length": 1185.5384521484375, + "completions/min_length": 1035.0, + "completions/min_terminated_length": 1035.0, + "epoch": 0.3762752550510102, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3111715087884237, + "kl": 0.01947021484375, + "learning_rate": 8.067419706853744e-07, + "loss": 0.021, + "num_tokens": 82168087.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.003624677658081, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.05739091434644501, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1023565406339543, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042255, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 1881 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1363.0, + "completions/max_terminated_length": 1363.0, + "completions/mean_length": 1009.875, + "completions/mean_terminated_length": 1009.875, + "completions/min_length": 817.0, + "completions/min_terminated_length": 817.0, + "epoch": 0.3764752950590118, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7208770858736124, + "kl": 0.0163116455078125, + "learning_rate": 8.064838407429346e-07, + "loss": -0.0609, + "num_tokens": 82202973.0, + "reward": 0.0, + "reward_std": 0.4970226287841797, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07075854479120738, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11750061204459726, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14168300559373406, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1882 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1330.125, + "completions/mean_terminated_length": 1305.857177734375, + "completions/min_length": 999.0, + "completions/min_terminated_length": 999.0, + "epoch": 0.3766753350670134, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.669923103100575, + "kl": 0.01531982421875, + "learning_rate": 8.062255857378093e-07, + "loss": -0.03, + "num_tokens": 82252687.0, + "reward": 0.0, + "reward_std": 0.8170748353004456, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.047687939099782056, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24084637619010266, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04999999999999999, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1883 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1207.4375, + "completions/mean_terminated_length": 1109.916748046875, + "completions/min_length": 854.0, + "completions/min_terminated_length": 854.0, + "epoch": 0.376875375075015, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.941382531744833, + "kl": 0.01715087890625, + "learning_rate": 8.059672057959249e-07, + "loss": -0.0219, + "num_tokens": 82306190.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0230287313461304, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2802238142957693, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.21748761789440926, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.067631901304592, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1884 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1392.0, + "completions/mean_length": 1231.625, + "completions/mean_terminated_length": 1169.6923828125, + "completions/min_length": 1012.0, + "completions/min_terminated_length": 1012.0, + "epoch": 0.3770754150830166, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.364031731357084, + "kl": 0.022430419921875, + "learning_rate": 8.057087010432686e-07, + "loss": -0.0604, + "num_tokens": 82358680.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6713124513626099, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07574376627031358, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24991357055368302, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10470416879457553, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1885 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1197.0, + "completions/max_terminated_length": 1197.0, + "completions/mean_length": 1074.5, + "completions/mean_terminated_length": 1074.5, + "completions/min_length": 909.0, + "completions/min_terminated_length": 909.0, + "epoch": 0.37727545509101823, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7214300921144603, + "kl": 0.023406982421875, + "learning_rate": 8.054500716058886e-07, + "loss": 0.0015, + "num_tokens": 82410456.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8924227952957153, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07518351997275877, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1984295028044171, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0958393717904348, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1886 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1145.25, + "completions/mean_terminated_length": 1121.60009765625, + "completions/min_length": 822.0, + "completions/min_terminated_length": 822.0, + "epoch": 0.3774754950990198, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5569033592626518, + "kl": 0.0133819580078125, + "learning_rate": 8.051913176098937e-07, + "loss": -0.051, + "num_tokens": 82452652.0, + "reward": 0.0, + "reward_std": 0.7995318174362183, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.008539789707364034, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11195364254454361, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0620632890834175, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1887 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1192.0, + "completions/max_terminated_length": 1192.0, + "completions/mean_length": 1059.5, + "completions/mean_terminated_length": 1059.5, + "completions/min_length": 772.0, + "completions/min_terminated_length": 772.0, + "epoch": 0.3776755351070214, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5053623377959133, + "kl": 0.02044677734375, + "learning_rate": 8.049324391814534e-07, + "loss": 0.0168, + "num_tokens": 82494820.0, + "reward": -1.862645149230957e-09, + "reward_std": 1.0402052402496338, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.862645149230957e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.3590751376995062, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.170356672504536, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08255189164891871, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1888 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1433.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1102.0, + "completions/mean_terminated_length": 1102.0, + "completions/min_length": 819.0, + "completions/min_terminated_length": 819.0, + "epoch": 0.377875575115023, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.292457463459446, + "kl": 0.01953125, + "learning_rate": 8.046734364467983e-07, + "loss": -0.0408, + "num_tokens": 82547364.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9165066480636597, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.01121486997649434, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11015964031860338, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03849001794597504, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1889 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1233.0, + "completions/max_terminated_length": 1233.0, + "completions/mean_length": 896.625, + "completions/mean_terminated_length": 896.625, + "completions/min_length": 566.0, + "completions/min_terminated_length": 566.0, + "epoch": 0.3780756151230246, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6658369158847597, + "kl": 0.01885986328125, + "learning_rate": 8.044143095322191e-07, + "loss": -0.0581, + "num_tokens": 82587078.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.566309928894043, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.14657367714766045, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1419472367850558, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04216370213557838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1890 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1355.5, + "completions/mean_terminated_length": 1211.0, + "completions/min_length": 942.0, + "completions/min_terminated_length": 942.0, + "epoch": 0.3782756551310262, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9609925228638962, + "kl": 0.0172882080078125, + "learning_rate": 8.041550585640672e-07, + "loss": -0.0188, + "num_tokens": 82635542.0, + "reward": 0.0, + "reward_std": 0.6462267637252808, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.20523246174004373, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.21767681496107322, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07490735018081411, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1891 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1258.75, + "completions/mean_terminated_length": 1224.2857666015625, + "completions/min_length": 1037.0, + "completions/min_terminated_length": 1037.0, + "epoch": 0.3784756951390278, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7798227455125137, + "kl": 0.01507568359375, + "learning_rate": 8.038956836687548e-07, + "loss": 0.0135, + "num_tokens": 82679538.0, + "reward": 0.0, + "reward_std": 0.9188128709793091, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.14195555648283684, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05343892516134494, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07290277645477444, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1892 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1217.3125, + "completions/mean_terminated_length": 1176.9285888671875, + "completions/min_length": 935.0, + "completions/min_terminated_length": 935.0, + "epoch": 0.3786757351470294, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7437534179760124, + "kl": 0.01094818115234375, + "learning_rate": 8.03636184972754e-07, + "loss": -0.0004, + "num_tokens": 82723583.0, + "reward": 0.0, + "reward_std": 0.630416989326477, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07604671120797964, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11511150943729753, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0697880388775209, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1893 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1259.875, + "completions/mean_terminated_length": 1115.800048828125, + "completions/min_length": 832.0, + "completions/min_terminated_length": 832.0, + "epoch": 0.378875775155031, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.016339880574447, + "kl": 0.0068511962890625, + "learning_rate": 8.033765626025977e-07, + "loss": -0.0089, + "num_tokens": 82772645.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5911318063735962, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12637528783480004, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09904624412532292, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08595864638818417, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1894 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1281.0, + "completions/max_terminated_length": 1281.0, + "completions/mean_length": 1140.1875, + "completions/mean_terminated_length": 1140.1875, + "completions/min_length": 970.0, + "completions/min_terminated_length": 970.0, + "epoch": 0.3790758151630326, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9197034671795103, + "kl": 0.01202392578125, + "learning_rate": 8.03116816684879e-07, + "loss": -0.0017, + "num_tokens": 82814768.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.0542267560958862, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0947801710707809, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05984014268464688, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.038248698840130005, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1895 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1356.0, + "completions/max_terminated_length": 1356.0, + "completions/mean_length": 1071.125, + "completions/mean_terminated_length": 1071.125, + "completions/min_length": 809.0, + "completions/min_terminated_length": 809.0, + "epoch": 0.3792758551710342, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.592668056650169, + "kl": 0.01898193359375, + "learning_rate": 8.028569473462509e-07, + "loss": -0.0042, + "num_tokens": 82847282.0, + "reward": 0.0, + "reward_std": 0.6577737331390381, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06155282913799306, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03526889087423377, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08595864638818418, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1896 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1282.0, + "completions/max_terminated_length": 1282.0, + "completions/mean_length": 982.5, + "completions/mean_terminated_length": 982.5, + "completions/min_length": 706.0, + "completions/min_terminated_length": 706.0, + "epoch": 0.3794758951790358, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.675851719403572, + "kl": 0.020538330078125, + "learning_rate": 8.025969547134273e-07, + "loss": -0.0237, + "num_tokens": 82884794.0, + "reward": 0.0, + "reward_std": 0.9520699977874756, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10498025976632032, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09315579468291248, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08681611046941136, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1897 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1158.1875, + "completions/mean_terminated_length": 1135.4000244140625, + "completions/min_length": 863.0, + "completions/min_terminated_length": 863.0, + "epoch": 0.3796759351870374, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.286801668958257, + "kl": 0.0145416259765625, + "learning_rate": 8.023368389131815e-07, + "loss": -0.0338, + "num_tokens": 82929701.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5607889890670776, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07692319761887469, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.3277164630896928, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05426273532033234, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1898 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1349.0, + "completions/max_terminated_length": 1349.0, + "completions/mean_length": 1085.25, + "completions/mean_terminated_length": 1085.25, + "completions/min_length": 821.0, + "completions/min_terminated_length": 821.0, + "epoch": 0.379875975195039, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1630994733501963, + "kl": 0.0147247314453125, + "learning_rate": 8.020766000723471e-07, + "loss": -0.0335, + "num_tokens": 82982233.0, + "reward": 0.0, + "reward_std": 0.6159155368804932, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.180388584061391, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.29080428035748923, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08606629658238704, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1899 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1306.0, + "completions/max_terminated_length": 1306.0, + "completions/mean_length": 1138.6875, + "completions/mean_terminated_length": 1138.6875, + "completions/min_length": 903.0, + "completions/min_terminated_length": 903.0, + "epoch": 0.38007601520304063, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.566752863972198, + "kl": 0.0185546875, + "learning_rate": 8.01816238317818e-07, + "loss": 0.0238, + "num_tokens": 83025252.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0008866786956787, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.2145686187091543, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13121647774170594, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08734775114237131, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1900 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1345.375, + "completions/mean_terminated_length": 1309.6923828125, + "completions/min_length": 1164.0, + "completions/min_terminated_length": 1164.0, + "epoch": 0.3802760552110422, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9554691498005994, + "kl": 0.01605224609375, + "learning_rate": 8.015557537765475e-07, + "loss": 0.0079, + "num_tokens": 83079314.0, + "reward": 0.0, + "reward_std": 0.9121376872062683, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.051541989070234204, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.164201931002081, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05146016078626404, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1901 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1281.1875, + "completions/mean_terminated_length": 1266.60009765625, + "completions/min_length": 975.0, + "completions/min_terminated_length": 975.0, + "epoch": 0.3804760952190438, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2262391361814773, + "kl": 0.0175628662109375, + "learning_rate": 8.012951465755493e-07, + "loss": -0.0457, + "num_tokens": 83133885.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9634988903999329, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04697197135586863, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05652007131131894, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06763190130459201, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1902 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1415.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 991.25, + "completions/mean_terminated_length": 991.25, + "completions/min_length": 607.0, + "completions/min_terminated_length": 607.0, + "epoch": 0.3806761352270454, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.747954568218607, + "kl": 0.017425537109375, + "learning_rate": 8.010344168418965e-07, + "loss": 0.0052, + "num_tokens": 83176489.0, + "reward": 1.862645149230957e-08, + "reward_std": 1.0459274053573608, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.862645149230957e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1040008049754691, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08181579361420918, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11385500851066221, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1903 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1490.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1223.6875, + "completions/mean_terminated_length": 1223.6875, + "completions/min_length": 1002.0, + "completions/min_terminated_length": 1002.0, + "epoch": 0.380876175235047, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.888124403778613, + "kl": 0.014678955078125, + "learning_rate": 8.00773564702722e-07, + "loss": 0.0203, + "num_tokens": 83218636.0, + "reward": 0.0, + "reward_std": 0.707563042640686, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.043042498677166634, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07082093582886276, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11792967144619461, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1904 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1090.0, + "completions/max_terminated_length": 1090.0, + "completions/mean_length": 907.125, + "completions/mean_terminated_length": 907.125, + "completions/min_length": 829.0, + "completions/min_terminated_length": 829.0, + "epoch": 0.38107621524304863, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8894934674823394, + "kl": 0.0111083984375, + "learning_rate": 8.005125902852187e-07, + "loss": 0.0081, + "num_tokens": 83258262.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.971579909324646, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04406800574826799, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07234175709645074, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07888106377466154, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1905 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1379.0, + "completions/mean_length": 1099.9375, + "completions/mean_terminated_length": 1073.2667236328125, + "completions/min_length": 871.0, + "completions/min_terminated_length": 871.0, + "epoch": 0.3812762552510502, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.578789461275344, + "kl": 0.021087646484375, + "learning_rate": 8.002514937166387e-07, + "loss": 0.0254, + "num_tokens": 83309525.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6968470811843872, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.17501563607128168, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.28358040056612865, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11013459778666117, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1906 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1308.0, + "completions/max_terminated_length": 1308.0, + "completions/mean_length": 1145.875, + "completions/mean_terminated_length": 1145.875, + "completions/min_length": 984.0, + "completions/min_terminated_length": 984.0, + "epoch": 0.3814762952590518, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5889243678084455, + "kl": 0.0166778564453125, + "learning_rate": 7.999902751242942e-07, + "loss": -0.0092, + "num_tokens": 83347715.0, + "reward": 0.0, + "reward_std": 0.7974227070808411, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.017643187143851012, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13118314938511855, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1907 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1218.75, + "completions/mean_terminated_length": 1153.84619140625, + "completions/min_length": 957.0, + "completions/min_terminated_length": 957.0, + "epoch": 0.3816763352670534, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6205296459862994, + "kl": 0.0134429931640625, + "learning_rate": 7.997289346355562e-07, + "loss": -0.0117, + "num_tokens": 83400303.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.740831732749939, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.24151536178644936, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.35525244516380416, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042255, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1908 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1180.0, + "completions/max_terminated_length": 1180.0, + "completions/mean_length": 1066.625, + "completions/mean_terminated_length": 1066.625, + "completions/min_length": 914.0, + "completions/min_terminated_length": 914.0, + "epoch": 0.381876375275055, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3065036582408567, + "kl": 0.0167236328125, + "learning_rate": 7.994674723778559e-07, + "loss": -0.0027, + "num_tokens": 83440457.0, + "reward": 2.2351741790771484e-08, + "reward_std": 0.9636315703392029, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08332528912434932, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1811554573363724, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10610965676722954, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1909 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1172.0, + "completions/max_terminated_length": 1172.0, + "completions/mean_length": 983.4375, + "completions/mean_terminated_length": 983.4375, + "completions/min_length": 861.0, + "completions/min_terminated_length": 861.0, + "epoch": 0.38207641528305664, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2072557938142134, + "kl": 0.018096923828125, + "learning_rate": 7.99205888478683e-07, + "loss": 0.0024, + "num_tokens": 83492696.0, + "reward": 0.0, + "reward_std": 1.0008792877197266, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1249023263462459, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.041688312530251834, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06652763279965644, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1910 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1470.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1181.5, + "completions/mean_terminated_length": 1181.5, + "completions/min_length": 859.0, + "completions/min_terminated_length": 859.0, + "epoch": 0.3822764552910582, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.174144837483836, + "kl": 0.019073486328125, + "learning_rate": 7.989441830655873e-07, + "loss": -0.0267, + "num_tokens": 83535304.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0463106632232666, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.01679211249184552, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05178837541351555, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.056927504255331086, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1911 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1203.875, + "completions/mean_terminated_length": 1135.5384521484375, + "completions/min_length": 581.0, + "completions/min_terminated_length": 581.0, + "epoch": 0.3824764952990598, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.922027874299306, + "kl": 0.0089874267578125, + "learning_rate": 7.986823562661776e-07, + "loss": -0.0352, + "num_tokens": 83591094.0, + "reward": 0.0, + "reward_std": 0.8507438898086548, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12438879244921211, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0715302611072199, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.15563490039905004, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1912 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1299.0, + "completions/mean_length": 1202.6875, + "completions/mean_terminated_length": 1182.86669921875, + "completions/min_length": 1061.0, + "completions/min_terminated_length": 1061.0, + "epoch": 0.3826765353070614, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.150683280321772, + "kl": 0.0157623291015625, + "learning_rate": 7.984204082081217e-07, + "loss": -0.0154, + "num_tokens": 83643401.0, + "reward": 0.0, + "reward_std": 0.2549275755882263, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.29496586980498307, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.46516886449545264, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08255189164891871, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1913 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1149.375, + "completions/mean_terminated_length": 1099.2857666015625, + "completions/min_length": 678.0, + "completions/min_terminated_length": 678.0, + "epoch": 0.38287657531506303, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5488296252126936, + "kl": 0.0206298828125, + "learning_rate": 7.981583390191468e-07, + "loss": -0.035, + "num_tokens": 83696663.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9657423496246338, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06759838431891052, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1495082383265221, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11279282877125756, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1914 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1216.0, + "completions/mean_terminated_length": 1121.3333740234375, + "completions/min_length": 337.0, + "completions/min_terminated_length": 337.0, + "epoch": 0.38307661532306464, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.593803579849798, + "kl": 0.0161895751953125, + "learning_rate": 7.978961488270389e-07, + "loss": -0.1485, + "num_tokens": 83747247.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.979979395866394, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.068575756586764, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1645157524457859, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.054262735320332364, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1915 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1106.0, + "completions/mean_terminated_length": 1079.7333984375, + "completions/min_length": 819.0, + "completions/min_terminated_length": 819.0, + "epoch": 0.3832766553310662, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4169550067495664, + "kl": 0.0203704833984375, + "learning_rate": 7.97633837759643e-07, + "loss": 0.0071, + "num_tokens": 83784399.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9542236924171448, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08049509363357947, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18810410459945578, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07888106377466156, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1916 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1450.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1177.5, + "completions/mean_terminated_length": 1177.5, + "completions/min_length": 930.0, + "completions/min_terminated_length": 930.0, + "epoch": 0.3834766953390678, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3012629622946967, + "kl": 0.017059326171875, + "learning_rate": 7.973714059448634e-07, + "loss": -0.0077, + "num_tokens": 83835647.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0284836292266846, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.08124681567315642, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13791129731982893, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1917 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1359.0, + "completions/max_terminated_length": 1359.0, + "completions/mean_length": 1210.4375, + "completions/mean_terminated_length": 1210.4375, + "completions/min_length": 970.0, + "completions/min_terminated_length": 970.0, + "epoch": 0.3836767353470694, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0541972979861414, + "kl": 0.0076904296875, + "learning_rate": 7.97108853510663e-07, + "loss": -0.0003, + "num_tokens": 83876502.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9927845597267151, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1044147234642221, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08426146397647352, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9833333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05163977794943221, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1918 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1487.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1230.8125, + "completions/mean_terminated_length": 1230.8125, + "completions/min_length": 952.0, + "completions/min_terminated_length": 952.0, + "epoch": 0.38387677535507103, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.997620065286383, + "kl": 0.015045166015625, + "learning_rate": 7.968461805850635e-07, + "loss": 0.0422, + "num_tokens": 83919907.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9170197248458862, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.14614663875821976, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1274921213331637, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06440611887195305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1919 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1312.0, + "completions/max_terminated_length": 1312.0, + "completions/mean_length": 1108.625, + "completions/mean_terminated_length": 1108.625, + "completions/min_length": 908.0, + "completions/min_terminated_length": 908.0, + "epoch": 0.3840768153630726, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.316260164538154, + "kl": 0.01666259765625, + "learning_rate": 7.965833872961455e-07, + "loss": -0.0147, + "num_tokens": 83961509.0, + "reward": 0.0, + "reward_std": 0.5539090633392334, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.305084834356324, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11574861215248765, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.052880017930181294, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1920 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1499.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1224.875, + "completions/mean_terminated_length": 1224.875, + "completions/min_length": 947.0, + "completions/min_terminated_length": 947.0, + "epoch": 0.3842768553710742, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8118777803772597, + "kl": 0.0116424560546875, + "learning_rate": 7.963204737720481e-07, + "loss": 0.0035, + "num_tokens": 83996147.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9159374237060547, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04021148559678761, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09553808249841222, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.052880017930181294, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1921 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1478.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1130.0625, + "completions/mean_terminated_length": 1130.0625, + "completions/min_length": 912.0, + "completions/min_terminated_length": 912.0, + "epoch": 0.3844768953790758, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6617800576841426, + "kl": 0.0135345458984375, + "learning_rate": 7.960574401409693e-07, + "loss": 0.0102, + "num_tokens": 84040004.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7883386611938477, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13465163345842301, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2713873862986053, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04849589520621153, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1922 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1492.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1095.0625, + "completions/mean_terminated_length": 1095.0625, + "completions/min_length": 859.0, + "completions/min_terminated_length": 859.0, + "epoch": 0.3846769353870774, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0941236985562557, + "kl": 0.0193634033203125, + "learning_rate": 7.957942865311652e-07, + "loss": -0.0206, + "num_tokens": 84082797.0, + "reward": 0.0, + "reward_std": 0.8744737505912781, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.15959679800341087, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1154901961986672, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12141145226353545, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1923 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1324.0, + "completions/max_terminated_length": 1324.0, + "completions/mean_length": 990.125, + "completions/mean_terminated_length": 990.125, + "completions/min_length": 685.0, + "completions/min_terminated_length": 685.0, + "epoch": 0.38487697539507904, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.396309956392981, + "kl": 0.02496337890625, + "learning_rate": 7.95531013070951e-07, + "loss": -0.0015, + "num_tokens": 84131511.0, + "reward": 0.0, + "reward_std": 0.7623987197875977, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09373143573375194, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09580553231593145, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1924 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1250.875, + "completions/mean_terminated_length": 1215.2857666015625, + "completions/min_length": 865.0, + "completions/min_terminated_length": 865.0, + "epoch": 0.3850770154030806, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2576077507798415, + "kl": 0.01763916015625, + "learning_rate": 7.952676198886997e-07, + "loss": -0.0023, + "num_tokens": 84181709.0, + "reward": 0.0, + "reward_std": 0.9603719711303711, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.13338302157852933, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07223748045840374, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05146016078626404, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1925 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1321.25, + "completions/mean_terminated_length": 1280.0, + "completions/min_length": 1129.0, + "completions/min_terminated_length": 1129.0, + "epoch": 0.3852770554110822, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1752663140787485, + "kl": 0.01922607421875, + "learning_rate": 7.950041071128433e-07, + "loss": -0.007, + "num_tokens": 84236625.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.9234722852706909, + "rewards/wordcountpos_reward_nokeypoint/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06777928356754079, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03518289305008413, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08388704928078612, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1926 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1343.0, + "completions/max_terminated_length": 1343.0, + "completions/mean_length": 1071.8125, + "completions/mean_terminated_length": 1071.8125, + "completions/min_length": 754.0, + "completions/min_terminated_length": 754.0, + "epoch": 0.3854770954190838, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.617636597537801, + "kl": 0.01727294921875, + "learning_rate": 7.947404748718717e-07, + "loss": 0.0194, + "num_tokens": 84271318.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.41907215118408203, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03547352161605124, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.18237241418407119, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06885303726590962, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1927 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1147.0, + "completions/max_terminated_length": 1147.0, + "completions/mean_length": 1043.4375, + "completions/mean_terminated_length": 1043.4375, + "completions/min_length": 908.0, + "completions/min_terminated_length": 908.0, + "epoch": 0.3856771354270854, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8598733145196973, + "kl": 0.014007568359375, + "learning_rate": 7.944767232943333e-07, + "loss": -0.005, + "num_tokens": 84320325.0, + "reward": 0.0, + "reward_std": 0.6065726280212402, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.004386566275931543, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.049347260889804616, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10470416879457554, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1928 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1392.0, + "completions/max_terminated_length": 1392.0, + "completions/mean_length": 1169.5, + "completions/mean_terminated_length": 1169.5, + "completions/min_length": 1013.0, + "completions/min_terminated_length": 1013.0, + "epoch": 0.38587717543508704, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2791590152022447, + "kl": 0.019195556640625, + "learning_rate": 7.942128525088344e-07, + "loss": 0.0054, + "num_tokens": 84364341.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.6959525346755981, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.02378749647923519, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1023804122310945, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11603000888978232, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1929 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1327.0, + "completions/mean_length": 1153.75, + "completions/mean_terminated_length": 1038.3333740234375, + "completions/min_length": 737.0, + "completions/min_terminated_length": 737.0, + "epoch": 0.3860772154430886, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.516455112934076, + "kl": 0.019073486328125, + "learning_rate": 7.939488626440398e-07, + "loss": 0.0191, + "num_tokens": 84406537.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9695510864257812, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.23071629361766643, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.20061985008955494, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11474609652039004, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1930 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1352.0, + "completions/max_terminated_length": 1352.0, + "completions/mean_length": 1134.0625, + "completions/mean_terminated_length": 1134.0625, + "completions/min_length": 951.0, + "completions/min_terminated_length": 951.0, + "epoch": 0.3862772554510902, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8690948106576415, + "kl": 0.0121612548828125, + "learning_rate": 7.936847538286718e-07, + "loss": -0.0289, + "num_tokens": 84454042.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8654049038887024, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07185983649147065, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13519335240224242, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09496588081262936, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1931 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1208.0, + "completions/max_terminated_length": 1208.0, + "completions/mean_length": 1022.75, + "completions/mean_terminated_length": 1022.75, + "completions/min_length": 876.0, + "completions/min_terminated_length": 876.0, + "epoch": 0.3864772954590918, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.205406597722537, + "kl": 0.0207366943359375, + "learning_rate": 7.934205261915114e-07, + "loss": -0.0015, + "num_tokens": 84492654.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0474894046783447, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03369783172138452, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09054343283588458, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059628479399994376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1932 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1138.9375, + "completions/mean_terminated_length": 1087.357177734375, + "completions/min_length": 785.0, + "completions/min_terminated_length": 785.0, + "epoch": 0.38667733546709343, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4723037571073694, + "kl": 0.013641357421875, + "learning_rate": 7.931561798613972e-07, + "loss": 0.0245, + "num_tokens": 84543709.0, + "reward": 0.0, + "reward_std": 0.7834723591804504, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.08913063493173777, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.22253370780289541, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.975, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1933 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1465.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1178.375, + "completions/mean_terminated_length": 1178.375, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.38687737547509504, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4405976724106964, + "kl": 0.02081298828125, + "learning_rate": 7.928917149672254e-07, + "loss": -0.0276, + "num_tokens": 84594867.0, + "reward": 0.0, + "reward_std": 0.9930076599121094, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06694006930824761, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08325043703989976, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0769800358919501, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1934 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1466.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1089.625, + "completions/mean_terminated_length": 1089.625, + "completions/min_length": 915.0, + "completions/min_terminated_length": 915.0, + "epoch": 0.3870774154830966, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.494521497826186, + "kl": 0.020233154296875, + "learning_rate": 7.926271316379505e-07, + "loss": 0.0273, + "num_tokens": 84644181.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9230048656463623, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07069907154296641, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14835914726021052, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0683130051063973, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1935 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1215.875, + "completions/mean_terminated_length": 1196.933349609375, + "completions/min_length": 936.0, + "completions/min_terminated_length": 936.0, + "epoch": 0.3872774554910982, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.092149975066181, + "kl": 0.019012451171875, + "learning_rate": 7.923624300025844e-07, + "loss": -0.0008, + "num_tokens": 84682107.0, + "reward": 0.0, + "reward_std": 0.7227044105529785, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.022676885001877978, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03487394476145424, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9625, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04194352464039305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1936 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1428.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1237.875, + "completions/mean_terminated_length": 1237.875, + "completions/min_length": 958.0, + "completions/min_terminated_length": 958.0, + "epoch": 0.3874774954990998, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.14640113167289, + "kl": 0.0142364501953125, + "learning_rate": 7.920976101901968e-07, + "loss": -0.0126, + "num_tokens": 84732009.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9398221969604492, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.01094635614849621, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.26713124276931993, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04216370213557838, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1937 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1268.5, + "completions/mean_terminated_length": 1253.0667724609375, + "completions/min_length": 836.0, + "completions/min_terminated_length": 836.0, + "epoch": 0.38767753550710143, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7700004612067914, + "kl": 0.01340484619140625, + "learning_rate": 7.918326723299154e-07, + "loss": -0.0147, + "num_tokens": 84781905.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8438171148300171, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.11363909892732882, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11658030803380749, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327954292297363, + "step": 1938 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1221.0, + "completions/max_terminated_length": 1221.0, + "completions/mean_length": 1083.4375, + "completions/mean_terminated_length": 1083.4375, + "completions/min_length": 929.0, + "completions/min_terminated_length": 929.0, + "epoch": 0.38787757551510305, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.90958689559038, + "kl": 0.015625, + "learning_rate": 7.915676165509248e-07, + "loss": -0.0203, + "num_tokens": 84820832.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.4406263828277588, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.16340631826761043, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1944554059075845, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09737289911202955, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1939 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1235.8125, + "completions/mean_terminated_length": 1218.2000732421875, + "completions/min_length": 1060.0, + "completions/min_terminated_length": 1060.0, + "epoch": 0.3880776155231046, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3060443464142812, + "kl": 0.0182037353515625, + "learning_rate": 7.913024429824672e-07, + "loss": -0.0014, + "num_tokens": 84868453.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.853193461894989, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.062304629395073294, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05121362846791609, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08509254221575908, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1940 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1137.6875, + "completions/mean_terminated_length": 1085.9285888671875, + "completions/min_length": 805.0, + "completions/min_terminated_length": 805.0, + "epoch": 0.3882776555311062, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5348058135628166, + "kl": 0.019744873046875, + "learning_rate": 7.910371517538428e-07, + "loss": -0.0836, + "num_tokens": 84911544.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8924041390419006, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07039905162222536, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06867825063232126, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09888264649460884, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1941 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1223.0, + "completions/max_terminated_length": 1223.0, + "completions/mean_length": 1014.0, + "completions/mean_terminated_length": 1014.0, + "completions/min_length": 829.0, + "completions/min_terminated_length": 829.0, + "epoch": 0.3884776955391078, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8375491969090858, + "kl": 0.0074291229248046875, + "learning_rate": 7.907717429944086e-07, + "loss": -0.0383, + "num_tokens": 84953320.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0060901641845703, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.03702214853254569, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08731644364720742, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1942 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1412.0, + "completions/max_terminated_length": 1412.0, + "completions/mean_length": 1067.5625, + "completions/mean_terminated_length": 1067.5625, + "completions/min_length": 812.0, + "completions/min_terminated_length": 812.0, + "epoch": 0.38867773554710944, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.801256970340836, + "kl": 0.021453857421875, + "learning_rate": 7.905062168335794e-07, + "loss": -0.0218, + "num_tokens": 84985897.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.4042457938194275, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.1524386206629153, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1578949125255138, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0843274042711568, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1943 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1374.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1017.875, + "completions/mean_terminated_length": 1017.875, + "completions/min_length": 814.0, + "completions/min_terminated_length": 814.0, + "epoch": 0.38887777555511105, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0739853699451944, + "kl": 0.018341064453125, + "learning_rate": 7.902405734008267e-07, + "loss": -0.0165, + "num_tokens": 85036199.0, + "reward": -1.862645149230957e-08, + "reward_std": 1.0503785610198975, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12948394642110164, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1846017605417109, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08333333333333333, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1944 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1300.0625, + "completions/mean_terminated_length": 1209.181884765625, + "completions/min_length": 990.0, + "completions/min_terminated_length": 990.0, + "epoch": 0.3890778155631126, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5964033782436884, + "kl": 0.017730712890625, + "learning_rate": 7.899748128256793e-07, + "loss": -0.0517, + "num_tokens": 85090496.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.915033221244812, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.022338653361671366, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06323041027419735, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07302967433402216, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1945 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1437.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1264.3125, + "completions/mean_terminated_length": 1264.3125, + "completions/min_length": 1002.0, + "completions/min_terminated_length": 1002.0, + "epoch": 0.3892778555711142, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.896218394068189, + "kl": 0.012664794921875, + "learning_rate": 7.897089352377237e-07, + "loss": -0.0099, + "num_tokens": 85135669.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.051049828529358, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0398777840660603, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06116967378727508, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.070841502796867, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1946 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1435.0, + "completions/max_terminated_length": 1435.0, + "completions/mean_length": 1066.125, + "completions/mean_terminated_length": 1066.125, + "completions/min_length": 854.0, + "completions/min_terminated_length": 854.0, + "epoch": 0.38947789557911583, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.167743245858248, + "kl": 0.0145263671875, + "learning_rate": 7.894429407666024e-07, + "loss": 0.0508, + "num_tokens": 85177135.0, + "reward": 0.0, + "reward_std": 1.0545061826705933, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04345945231935629, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07545679643731681, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08595864638818419, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1947 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 1275.0, + "completions/mean_terminated_length": 1260.0001220703125, + "completions/min_length": 1045.0, + "completions/min_terminated_length": 1045.0, + "epoch": 0.38967793558711744, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.305667919754736, + "kl": 0.009124755859375, + "learning_rate": 7.891768295420164e-07, + "loss": -0.0282, + "num_tokens": 85210343.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9451032876968384, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1757158571049244, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13796389977653284, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04367387557118565, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1948 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1263.75, + "completions/mean_terminated_length": 1209.2308349609375, + "completions/min_length": 905.0, + "completions/min_terminated_length": 905.0, + "epoch": 0.389877975595119, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1790706736604064, + "kl": 0.017791748046875, + "learning_rate": 7.889106016937219e-07, + "loss": -0.006, + "num_tokens": 85245515.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8175638914108276, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.00776609743969373, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05582737873257102, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06871842709362767, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1949 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1130.0, + "completions/max_terminated_length": 1130.0, + "completions/mean_length": 959.4375, + "completions/mean_terminated_length": 959.4375, + "completions/min_length": 754.0, + "completions/min_terminated_length": 754.0, + "epoch": 0.3900780156031206, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.475369513229021, + "kl": 0.015716552734375, + "learning_rate": 7.886442573515333e-07, + "loss": -0.0388, + "num_tokens": 85288306.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8007621765136719, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.032213251497721106, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14765864189867275, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12531441937663723, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1950 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1213.0, + "completions/mean_length": 1027.75, + "completions/mean_terminated_length": 996.2667236328125, + "completions/min_length": 866.0, + "completions/min_terminated_length": 866.0, + "epoch": 0.3902780556111222, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.680857648308946, + "kl": 0.0158843994140625, + "learning_rate": 7.88377796645321e-07, + "loss": 0.0348, + "num_tokens": 85328038.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0440020561218262, + "rewards/wordcountpos_reward_nokeypoint/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.044438932421097324, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07973373052157151, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.11122216672215288, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1951 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1292.0, + "completions/max_terminated_length": 1292.0, + "completions/mean_length": 1070.125, + "completions/mean_terminated_length": 1070.125, + "completions/min_length": 920.0, + "completions/min_terminated_length": 920.0, + "epoch": 0.39047809561912383, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8507507333404227, + "kl": 0.020721435546875, + "learning_rate": 7.881112197050128e-07, + "loss": 0.021, + "num_tokens": 85371856.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5691534280776978, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.016537937904413165, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13346823156936255, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.102469507659596, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1952 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1355.0, + "completions/mean_length": 1181.6875, + "completions/mean_terminated_length": 1136.21435546875, + "completions/min_length": 943.0, + "completions/min_terminated_length": 943.0, + "epoch": 0.39067813562712544, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.548444628322796, + "kl": 0.0128173828125, + "learning_rate": 7.878445266605926e-07, + "loss": -0.0852, + "num_tokens": 85417427.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7507799863815308, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.062352501142698, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10688424437826474, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1953 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1255.8125, + "completions/mean_terminated_length": 1220.9285888671875, + "completions/min_length": 1051.0, + "completions/min_terminated_length": 1051.0, + "epoch": 0.390878175635127, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.120548779916013, + "kl": 0.0134429931640625, + "learning_rate": 7.87577717642101e-07, + "loss": -0.0024, + "num_tokens": 85458952.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8272860646247864, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10218180091673479, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2995015604050858, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059628479399994376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1954 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1139.875, + "completions/mean_terminated_length": 1115.86669921875, + "completions/min_length": 841.0, + "completions/min_terminated_length": 841.0, + "epoch": 0.3910782156431286, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.561964930774243, + "kl": 0.0179443359375, + "learning_rate": 7.873107927796356e-07, + "loss": -0.0203, + "num_tokens": 85506238.0, + "reward": 0.0, + "reward_std": 0.6097987294197083, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16804334976461927, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.13007584485199708, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1955 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1287.5, + "completions/mean_terminated_length": 1257.1429443359375, + "completions/min_length": 985.0, + "completions/min_terminated_length": 985.0, + "epoch": 0.3912782556511302, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8134575209239676, + "kl": 0.0149688720703125, + "learning_rate": 7.8704375220335e-07, + "loss": 0.002, + "num_tokens": 85560750.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7667152881622314, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1424520110820342, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09002431107279084, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05146016078626404, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1956 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1049.0, + "completions/max_terminated_length": 1049.0, + "completions/mean_length": 821.75, + "completions/mean_terminated_length": 821.75, + "completions/min_length": 716.0, + "completions/min_terminated_length": 716.0, + "epoch": 0.39147829565913184, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8591981955823313, + "kl": 0.019989013671875, + "learning_rate": 7.867765960434543e-07, + "loss": -0.0044, + "num_tokens": 85600338.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7569316625595093, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.015966329386419162, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.17059664727424492, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07084150279686702, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1957 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1162.8125, + "completions/mean_terminated_length": 1085.0, + "completions/min_length": 593.0, + "completions/min_terminated_length": 593.0, + "epoch": 0.39167833566713345, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.215459293690643, + "kl": 0.0174560546875, + "learning_rate": 7.865093244302153e-07, + "loss": -0.1127, + "num_tokens": 85637303.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8049405813217163, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0654806354186979, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.19040218079612128, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07302967433402213, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1958 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1353.0, + "completions/max_terminated_length": 1353.0, + "completions/mean_length": 1095.5625, + "completions/mean_terminated_length": 1095.5625, + "completions/min_length": 906.0, + "completions/min_terminated_length": 906.0, + "epoch": 0.391878375675135, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.334719049493812, + "kl": 0.01751708984375, + "learning_rate": 7.862419374939559e-07, + "loss": 0.0425, + "num_tokens": 85667568.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6739866137504578, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.04665978662389716, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08178657125666018, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.046943622609505783, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1959 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1343.6875, + "completions/mean_terminated_length": 1222.111083984375, + "completions/min_length": 951.0, + "completions/min_terminated_length": 951.0, + "epoch": 0.3920784156831366, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2353654644220464, + "kl": 0.020660400390625, + "learning_rate": 7.859744353650548e-07, + "loss": 0.0073, + "num_tokens": 85722747.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0610501766204834, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.10392148031645611, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08333978033607449, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08432740427115679, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1960 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1177.0, + "completions/max_terminated_length": 1177.0, + "completions/mean_length": 1043.3125, + "completions/mean_terminated_length": 1043.3125, + "completions/min_length": 932.0, + "completions/min_terminated_length": 932.0, + "epoch": 0.39227845569113823, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9634399711154362, + "kl": 0.0155487060546875, + "learning_rate": 7.857068181739476e-07, + "loss": -0.0017, + "num_tokens": 85766944.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0036838054656982, + "rewards/wordcountpos_reward_nokeypoint/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.06143502933097074, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1203818416722349, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1961 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1330.0, + "completions/max_terminated_length": 1330.0, + "completions/mean_length": 1089.25, + "completions/mean_terminated_length": 1089.25, + "completions/min_length": 955.0, + "completions/min_terminated_length": 955.0, + "epoch": 0.39247849569913984, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9101568163314413, + "kl": 0.01385498046875, + "learning_rate": 7.854390860511255e-07, + "loss": -0.0189, + "num_tokens": 85807076.0, + "reward": 0.0, + "reward_std": 0.8783515691757202, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.029097264678266194, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10684996338264463, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04194352464039305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1962 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1070.0625, + "completions/mean_terminated_length": 970.84619140625, + "completions/min_length": 750.0, + "completions/min_terminated_length": 750.0, + "epoch": 0.39267853570714145, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.479343026878027, + "kl": 0.020050048828125, + "learning_rate": 7.851712391271359e-07, + "loss": -0.0144, + "num_tokens": 85857741.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0254409313201904, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04016221180810599, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03959424326915343, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.055611083361076424, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1963 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1292.0, + "completions/max_terminated_length": 1292.0, + "completions/mean_length": 1087.25, + "completions/mean_terminated_length": 1087.25, + "completions/min_length": 815.0, + "completions/min_terminated_length": 815.0, + "epoch": 0.392878575715143, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.985163872802555, + "kl": 0.01641845703125, + "learning_rate": 7.849032775325824e-07, + "loss": 0.005, + "num_tokens": 85901881.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0459303855895996, + "rewards/wordcountpos_reward_nokeypoint/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.12117025964793632, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05890932902938483, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04194352464039305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1964 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1366.0, + "completions/max_terminated_length": 1366.0, + "completions/mean_length": 912.0625, + "completions/mean_terminated_length": 912.0625, + "completions/min_length": 613.0, + "completions/min_terminated_length": 613.0, + "epoch": 0.3930786157231446, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6112389349470666, + "kl": 0.0187835693359375, + "learning_rate": 7.846352013981239e-07, + "loss": 0.0204, + "num_tokens": 85927394.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.995107889175415, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.056645318852818954, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07917193515825628, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.024343224778007377, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1965 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1213.0, + "completions/max_terminated_length": 1213.0, + "completions/mean_length": 1042.8125, + "completions/mean_terminated_length": 1042.8125, + "completions/min_length": 682.0, + "completions/min_terminated_length": 682.0, + "epoch": 0.39327865573114623, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3284539605997496, + "kl": 0.03326416015625, + "learning_rate": 7.843670108544756e-07, + "loss": -0.0121, + "num_tokens": 85971111.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9813012480735779, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16485727959528354, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06310546028713465, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04367387557118565, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1966 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1173.0, + "completions/max_terminated_length": 1173.0, + "completions/mean_length": 991.5625, + "completions/mean_terminated_length": 991.5625, + "completions/min_length": 770.0, + "completions/min_terminated_length": 770.0, + "epoch": 0.39347869573914784, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4768851172810966, + "kl": 0.0179595947265625, + "learning_rate": 7.840987060324089e-07, + "loss": -0.0472, + "num_tokens": 86002344.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.0651013851165771, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.029150373030054313, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.06762272574774515, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.048686449556014755, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1967 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1363.0, + "completions/mean_length": 1207.3125, + "completions/mean_terminated_length": 1187.800048828125, + "completions/min_length": 902.0, + "completions/min_terminated_length": 902.0, + "epoch": 0.39367873574714946, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4501822604183587, + "kl": 0.0179443359375, + "learning_rate": 7.8383028706275e-07, + "loss": -0.0171, + "num_tokens": 86054205.0, + "reward": 0.0, + "reward_std": 0.63642418384552, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.021146125410531715, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1339390050613186, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09259629622222519, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1968 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1340.0, + "completions/max_terminated_length": 1340.0, + "completions/mean_length": 992.875, + "completions/mean_terminated_length": 992.875, + "completions/min_length": 628.0, + "completions/min_terminated_length": 628.0, + "epoch": 0.393878775755151, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.708174526403805, + "kl": 0.016265869140625, + "learning_rate": 7.835617540763813e-07, + "loss": -0.0581, + "num_tokens": 86097483.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0672171115875244, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.004636896430435966, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03543692286839096, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0758897836290186, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1969 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1223.75, + "completions/mean_terminated_length": 1131.666748046875, + "completions/min_length": 802.0, + "completions/min_terminated_length": 802.0, + "epoch": 0.3940788157631526, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.278283507112732, + "kl": 0.0141754150390625, + "learning_rate": 7.832931072042408e-07, + "loss": 0.0041, + "num_tokens": 86144415.0, + "reward": 0.0, + "reward_std": 0.7354701161384583, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12171612389003693, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1970 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1412.0, + "completions/mean_length": 1138.5, + "completions/mean_terminated_length": 1018.0, + "completions/min_length": 804.0, + "completions/min_terminated_length": 804.0, + "epoch": 0.39427885577115424, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.09171656394686, + "kl": 0.015106201171875, + "learning_rate": 7.830243465773218e-07, + "loss": -0.045, + "num_tokens": 86189831.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0476982593536377, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07012690820253041, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09836380755057875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.14807405554629052, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1971 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1269.0, + "completions/max_terminated_length": 1269.0, + "completions/mean_length": 982.9375, + "completions/mean_terminated_length": 982.9375, + "completions/min_length": 653.0, + "completions/min_terminated_length": 653.0, + "epoch": 0.39447889577915585, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.665417600385431, + "kl": 0.018463134765625, + "learning_rate": 7.827554723266733e-07, + "loss": -0.022, + "num_tokens": 86229254.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9941109418869019, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.17752437842188618, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.30637016662745853, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06885303726590963, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1972 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1373.0, + "completions/max_terminated_length": 1373.0, + "completions/mean_length": 1137.0625, + "completions/mean_terminated_length": 1137.0625, + "completions/min_length": 977.0, + "completions/min_terminated_length": 977.0, + "epoch": 0.39467893578715746, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.002320067113991, + "kl": 0.015472412109375, + "learning_rate": 7.824864845833995e-07, + "loss": 0.0331, + "num_tokens": 86272271.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0459372997283936, + "rewards/wordcountpos_reward_nokeypoint/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.030586191817684377, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2030775517221361, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.067631901304592, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1973 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1013.3125, + "completions/mean_terminated_length": 980.86669921875, + "completions/min_length": 681.0, + "completions/min_terminated_length": 681.0, + "epoch": 0.394878975795159, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2096860940481764, + "kl": 0.01348876953125, + "learning_rate": 7.822173834786602e-07, + "loss": -0.0502, + "num_tokens": 86312004.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5632442831993103, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.20383581761911257, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24811893653389971, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05163977794943221, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1974 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1358.0, + "completions/max_terminated_length": 1358.0, + "completions/mean_length": 1198.25, + "completions/mean_terminated_length": 1198.25, + "completions/min_length": 811.0, + "completions/min_terminated_length": 811.0, + "epoch": 0.3950790158031606, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.625381322277234, + "kl": 0.016998291015625, + "learning_rate": 7.819481691436702e-07, + "loss": -0.0166, + "num_tokens": 86360760.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0613597631454468, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.038466659055763325, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05313592419907323, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.048686449556014755, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1975 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1205.0, + "completions/mean_length": 1114.875, + "completions/mean_terminated_length": 1089.2000732421875, + "completions/min_length": 947.0, + "completions/min_terminated_length": 947.0, + "epoch": 0.39527905581116224, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2588651348097812, + "kl": 0.0147552490234375, + "learning_rate": 7.816788417096997e-07, + "loss": 0.0192, + "num_tokens": 86401046.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.724941611289978, + "rewards/wordcountpos_reward_nokeypoint/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.14969135476681852, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09836451454766633, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059472994182545036, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1976 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1451.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1015.625, + "completions/mean_terminated_length": 1015.625, + "completions/min_length": 730.0, + "completions/min_terminated_length": 730.0, + "epoch": 0.39547909581916385, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.226887330816371, + "kl": 0.010992050170898438, + "learning_rate": 7.814094013080739e-07, + "loss": -0.0345, + "num_tokens": 86445584.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9373365640640259, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.0621867509951806, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05015350645616905, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.08027729719194864, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1977 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1096.6875, + "completions/mean_terminated_length": 1069.800048828125, + "completions/min_length": 797.0, + "completions/min_terminated_length": 797.0, + "epoch": 0.3956791358271654, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1438750094119885, + "kl": 0.0139007568359375, + "learning_rate": 7.811398480701733e-07, + "loss": 0.0559, + "num_tokens": 86484899.0, + "reward": 0.0, + "reward_std": 0.9214102625846863, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.07614838931270626, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.04988549089764539, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06206328908341753, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1978 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1262.4375, + "completions/mean_terminated_length": 1207.615478515625, + "completions/min_length": 1090.0, + "completions/min_terminated_length": 1090.0, + "epoch": 0.395879175835167, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0660750669626124, + "kl": 0.0200958251953125, + "learning_rate": 7.80870182127433e-07, + "loss": -0.006, + "num_tokens": 86538522.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.053501009941101, + "rewards/wordcountpos_reward_nokeypoint/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.09004077778923204, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.03704684420241432, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059628479399994376, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1979 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1421.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 1027.4375, + "completions/mean_terminated_length": 1027.4375, + "completions/min_length": 807.0, + "completions/min_terminated_length": 807.0, + "epoch": 0.39607921584316863, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0859992086627543, + "kl": 0.01568603515625, + "learning_rate": 7.806004036113436e-07, + "loss": -0.0337, + "num_tokens": 86578273.0, + "reward": 0.0, + "reward_std": 0.8661291599273682, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.14923885160253889, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.315157038150847, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06978803887752091, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1980 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1100.5, + "completions/mean_terminated_length": 1073.86669921875, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.39627925585117024, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3823737874869417, + "kl": 0.018829345703125, + "learning_rate": 7.803305126534505e-07, + "loss": 0.0183, + "num_tokens": 86611321.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6285615563392639, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.16074922005439446, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16452257975896567, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0697880388775209, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1981 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1476.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1093.75, + "completions/mean_terminated_length": 1093.75, + "completions/min_length": 827.0, + "completions/min_terminated_length": 827.0, + "epoch": 0.39647929585917185, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3373631774317203, + "kl": 0.01947021484375, + "learning_rate": 7.800605093853533e-07, + "loss": -0.0041, + "num_tokens": 86662093.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0392009019851685, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03074175523845742, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1317586939673774, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9708333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04194352464039305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1982 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1363.0, + "completions/mean_length": 1062.3125, + "completions/mean_terminated_length": 1033.1334228515625, + "completions/min_length": 806.0, + "completions/min_terminated_length": 806.0, + "epoch": 0.3966793358671734, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.617219224246853, + "kl": 0.0125274658203125, + "learning_rate": 7.797903939387071e-07, + "loss": -0.0091, + "num_tokens": 86704898.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0601592063903809, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.13010044489410838, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.16624220910038667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.1060223596263578, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1983 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1362.0, + "completions/mean_length": 1159.25, + "completions/mean_terminated_length": 1110.571533203125, + "completions/min_length": 790.0, + "completions/min_terminated_length": 790.0, + "epoch": 0.396879375875175, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.217231741385212, + "kl": 0.022216796875, + "learning_rate": 7.795201664452215e-07, + "loss": 0.0154, + "num_tokens": 86748566.0, + "reward": 0.0, + "reward_std": 0.7041600942611694, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.19232628504938576, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.12474652887836953, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07391185942027817, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1984 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1347.0, + "completions/mean_terminated_length": 1311.6923828125, + "completions/min_length": 1004.0, + "completions/min_terminated_length": 1004.0, + "epoch": 0.39707941588317663, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1879666238946385, + "kl": 0.017333984375, + "learning_rate": 7.792498270366603e-07, + "loss": 0.0233, + "num_tokens": 86806070.0, + "reward": 0.0, + "reward_std": 0.978766918182373, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0014619006994915605, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.08010756765668701, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10602235962635778, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1985 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1392.0, + "completions/mean_length": 1232.6875, + "completions/mean_terminated_length": 1214.86669921875, + "completions/min_length": 1051.0, + "completions/min_terminated_length": 1051.0, + "epoch": 0.39727945589117825, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0416778565510927, + "kl": 0.0168609619140625, + "learning_rate": 7.789793758448425e-07, + "loss": 0.0171, + "num_tokens": 86851825.0, + "reward": 0.0, + "reward_std": 1.026902437210083, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.009462536826410826, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.05583471008705364, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06440611887195305, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1986 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1173.75, + "completions/mean_terminated_length": 1127.1429443359375, + "completions/min_length": 927.0, + "completions/min_terminated_length": 927.0, + "epoch": 0.39747949589917986, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4185131243110747, + "kl": 0.019195556640625, + "learning_rate": 7.787088130016413e-07, + "loss": -0.0135, + "num_tokens": 86903269.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8373116254806519, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.3412122050028244, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07224360775904885, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1987 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1237.875, + "completions/mean_terminated_length": 1220.4000244140625, + "completions/min_length": 962.0, + "completions/min_terminated_length": 962.0, + "epoch": 0.3976795359071814, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7717638825840267, + "kl": 0.01593017578125, + "learning_rate": 7.784381386389842e-07, + "loss": -0.028, + "num_tokens": 86946331.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6155614852905273, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.1094464924136012, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09472863290463919, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.0683130051063973, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1988 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1373.0, + "completions/max_terminated_length": 1373.0, + "completions/mean_length": 1151.1875, + "completions/mean_terminated_length": 1151.1875, + "completions/min_length": 958.0, + "completions/min_terminated_length": 958.0, + "epoch": 0.397879575915183, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7508045995276316, + "kl": 0.00946807861328125, + "learning_rate": 7.781673528888536e-07, + "loss": -0.016, + "num_tokens": 86991366.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0638691186904907, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.07111322438332106, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.24381056872879533, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.06885303726590962, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1989 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1371.0, + "completions/max_terminated_length": 1371.0, + "completions/mean_length": 1189.4375, + "completions/mean_terminated_length": 1189.4375, + "completions/min_length": 994.0, + "completions/min_terminated_length": 994.0, + "epoch": 0.39807961592318464, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0743415029649928, + "kl": 0.0194549560546875, + "learning_rate": 7.778964558832855e-07, + "loss": -0.0153, + "num_tokens": 87044637.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9518977403640747, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.04274519051780247, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.14460760813036666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1990 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1410.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 961.5625, + "completions/mean_terminated_length": 961.5625, + "completions/min_length": 790.0, + "completions/min_terminated_length": 790.0, + "epoch": 0.39827965593118625, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.516640001938211, + "kl": 0.023712158203125, + "learning_rate": 7.776254477543706e-07, + "loss": -0.0076, + "num_tokens": 87085958.0, + "reward": 0.0, + "reward_std": 0.7875781059265137, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.19870457548841217, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.2610518068920006, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.055611083361076424, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1991 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1405.0, + "completions/mean_length": 1098.5625, + "completions/mean_terminated_length": 1041.21435546875, + "completions/min_length": 825.0, + "completions/min_terminated_length": 825.0, + "epoch": 0.39847969593918786, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4811694693247195, + "kl": 0.017120361328125, + "learning_rate": 7.77354328634254e-07, + "loss": -0.04, + "num_tokens": 87118143.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0013395547866821, + "rewards/wordcountpos_reward_nokeypoint/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.003552444613454165, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.01420977845381666, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12881223774390613, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1992 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1210.125, + "completions/mean_terminated_length": 1168.71435546875, + "completions/min_length": 957.0, + "completions/min_terminated_length": 957.0, + "epoch": 0.3986797359471894, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.334072850896518, + "kl": 0.0158843994140625, + "learning_rate": 7.770830986551341e-07, + "loss": -0.0075, + "num_tokens": 87161009.0, + "reward": 0.0, + "reward_std": 0.4358697533607483, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.12748416158903444, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07889655894579065, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.12041594578792297, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1993 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1435.0, + "completions/max_terminated_length": 1435.0, + "completions/mean_length": 1150.9375, + "completions/mean_terminated_length": 1150.9375, + "completions/min_length": 851.0, + "completions/min_terminated_length": 851.0, + "epoch": 0.39887977595519103, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9483989587078554, + "kl": 0.0147857666015625, + "learning_rate": 7.768117579492643e-07, + "loss": -0.0413, + "num_tokens": 87216160.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0577335357666016, + "rewards/wordcountpos_reward_nokeypoint/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03445251324012806, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.10519621482491882, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.046943622609505783, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1994 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1268.0, + "completions/max_terminated_length": 1268.0, + "completions/mean_length": 1093.875, + "completions/mean_terminated_length": 1093.875, + "completions/min_length": 790.0, + "completions/min_terminated_length": 790.0, + "epoch": 0.39907981596319264, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8542904175604753, + "kl": 0.0201416015625, + "learning_rate": 7.765403066489513e-07, + "loss": -0.0164, + "num_tokens": 87262782.0, + "reward": 0.0, + "reward_std": 0.7994167804718018, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.14040694805116938, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1405124814240768, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.09953596037316068, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1995 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1396.0, + "completions/mean_length": 1248.375, + "completions/mean_terminated_length": 1231.60009765625, + "completions/min_length": 978.0, + "completions/min_terminated_length": 978.0, + "epoch": 0.39927985597119425, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7656759721499307, + "kl": 0.0133209228515625, + "learning_rate": 7.762687448865561e-07, + "loss": 0.0249, + "num_tokens": 87303508.0, + "reward": 0.0, + "reward_std": 0.9770182371139526, + "rewards/wordcountpos_reward_nokeypoint/mean": 0.0, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.03213864711913934, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07155116540281939, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.05443310539518172, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1996 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1488.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1164.875, + "completions/mean_terminated_length": 1164.875, + "completions/min_length": 922.0, + "completions/min_terminated_length": 922.0, + "epoch": 0.39947989597919586, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2057001869415056, + "kl": 0.0245361328125, + "learning_rate": 7.759970727944935e-07, + "loss": 0.0465, + "num_tokens": 87349762.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.9502090811729431, + "rewards/wordcountpos_reward_nokeypoint/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.0508424512967331, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.07905158159754003, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9875, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.04999999999999999, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1997 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1151.0, + "completions/mean_length": 1059.625, + "completions/mean_terminated_length": 1030.2667236328125, + "completions/min_length": 919.0, + "completions/min_terminated_length": 919.0, + "epoch": 0.3996799359871974, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.217154481871798, + "kl": 0.01934814453125, + "learning_rate": 7.757252905052318e-07, + "loss": -0.0203, + "num_tokens": 87390292.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7615423798561096, + "rewards/wordcountpos_reward_nokeypoint/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.024933348797664175, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.11435790946803243, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327956676483154, + "step": 1998 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1446.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1146.5, + "completions/mean_terminated_length": 1146.5, + "completions/min_length": 935.0, + "completions/min_terminated_length": 935.0, + "epoch": 0.39987997599519903, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.892336750570762, + "kl": 0.0133209228515625, + "learning_rate": 7.754533981512936e-07, + "loss": -0.0047, + "num_tokens": 87435636.0, + "reward": -9.313225746154785e-09, + "reward_std": 1.042407512664795, + "rewards/wordcountpos_reward_nokeypoint/mean": -9.313225746154785e-09, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": 0.06039448661107055, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.09415244031252842, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.07302967433402213, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 1999 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 1240.3125, + "completions/mean_terminated_length": 1223.0001220703125, + "completions/min_length": 917.0, + "completions/min_terminated_length": 917.0, + "epoch": 0.40008001600320064, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0587678686209747, + "kl": 0.0169525146484375, + "learning_rate": 7.751813958652548e-07, + "loss": -0.0354, + "num_tokens": 87478481.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.6497548818588257, + "rewards/wordcountpos_reward_nokeypoint/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/mean": -0.007473391371599013, + "rewards/wordcountpos_reward_nokeypoint/raw_geo/std": 0.1020766751686233, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_nokeypoint/raw_rule/std": 0.03849001794597504, + "rewards/wordcountpos_reward_nokeypoint/std": 1.0327955484390259, + "step": 2000 + } + ], + "logging_steps": 1, + "max_steps": 4999, + "num_input_tokens_seen": 87478481, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}