| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "episode": 69632, | |
| "epoch": 0.29828649760109666, | |
| "eval_steps": 500, | |
| "global_step": 136, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "episode": 512, | |
| "epoch": 0.0021932830705962986, | |
| "eps": 6, | |
| "loss/policy_avg": 0.15234875679016113, | |
| "lr": 3e-06, | |
| "objective/entropy": -45.407432556152344, | |
| "objective/kl": 0.3935524821281433, | |
| "objective/non_score_reward": -0.001967762364074588, | |
| "objective/rlhf_reward": 4.677086353302002, | |
| "objective/scores": 4.679054260253906, | |
| "policy/approxkl_avg": 0.11690396815538406, | |
| "policy/clipfrac_avg": 0.34375, | |
| "policy/entropy_avg": 0.8878304958343506, | |
| "step": 0, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2729, | |
| "val/ratio": 1.000101089477539, | |
| "val/ratio_var": 2.341874278499745e-06 | |
| }, | |
| { | |
| "episode": 1024, | |
| "epoch": 0.004386566141192597, | |
| "eps": 6, | |
| "loss/policy_avg": 0.10656304657459259, | |
| "lr": 2.9882812500000002e-06, | |
| "objective/entropy": -49.59076690673828, | |
| "objective/kl": 0.6760815978050232, | |
| "objective/non_score_reward": -0.0033804080449044704, | |
| "objective/rlhf_reward": 4.551654815673828, | |
| "objective/scores": 4.55503511428833, | |
| "policy/approxkl_avg": 0.12034796178340912, | |
| "policy/clipfrac_avg": 0.32421875, | |
| "policy/entropy_avg": 0.8024331331253052, | |
| "step": 1, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 3517, | |
| "val/ratio": 0.9998391270637512, | |
| "val/ratio_var": 2.0723566649394343e-06 | |
| }, | |
| { | |
| "episode": 1536, | |
| "epoch": 0.006579849211788897, | |
| "eps": 6, | |
| "loss/policy_avg": 0.17722828686237335, | |
| "lr": 2.9765625e-06, | |
| "objective/entropy": -51.09684371948242, | |
| "objective/kl": 1.214200735092163, | |
| "objective/non_score_reward": -0.006071004085242748, | |
| "objective/rlhf_reward": 4.974597930908203, | |
| "objective/scores": 4.980669021606445, | |
| "policy/approxkl_avg": 0.12037836015224457, | |
| "policy/clipfrac_avg": 0.318359375, | |
| "policy/entropy_avg": 0.8043380379676819, | |
| "step": 2, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 3252, | |
| "val/ratio": 1.0001596212387085, | |
| "val/ratio_var": 2.1233340703474823e-06 | |
| }, | |
| { | |
| "episode": 2048, | |
| "epoch": 0.008773132282385195, | |
| "eps": 6, | |
| "loss/policy_avg": 0.11922580748796463, | |
| "lr": 2.96484375e-06, | |
| "objective/entropy": -44.18528366088867, | |
| "objective/kl": 1.4338339567184448, | |
| "objective/non_score_reward": -0.007169169839471579, | |
| "objective/rlhf_reward": 4.915553569793701, | |
| "objective/scores": 4.922722339630127, | |
| "policy/approxkl_avg": 0.16550302505493164, | |
| "policy/clipfrac_avg": 0.375, | |
| "policy/entropy_avg": 0.7921671271324158, | |
| "step": 3, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2435, | |
| "val/ratio": 0.9996773600578308, | |
| "val/ratio_var": 2.4303119516844163e-06 | |
| }, | |
| { | |
| "episode": 2560, | |
| "epoch": 0.010966415352981495, | |
| "eps": 6, | |
| "loss/policy_avg": 0.15014484524726868, | |
| "lr": 2.953125e-06, | |
| "objective/entropy": -42.067405700683594, | |
| "objective/kl": 2.0778450965881348, | |
| "objective/non_score_reward": -0.01038922369480133, | |
| "objective/rlhf_reward": 5.2342209815979, | |
| "objective/scores": 5.244609832763672, | |
| "policy/approxkl_avg": 0.1276472508907318, | |
| "policy/clipfrac_avg": 0.31640625, | |
| "policy/entropy_avg": 0.7496817111968994, | |
| "step": 4, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1877, | |
| "val/ratio": 1.0003182888031006, | |
| "val/ratio_var": 2.2540482405020157e-06 | |
| }, | |
| { | |
| "episode": 3072, | |
| "epoch": 0.013159698423577794, | |
| "eps": 6, | |
| "loss/policy_avg": 0.1916506588459015, | |
| "lr": 2.94140625e-06, | |
| "objective/entropy": -39.82306671142578, | |
| "objective/kl": 3.079103708267212, | |
| "objective/non_score_reward": -0.015395518392324448, | |
| "objective/rlhf_reward": 5.611381530761719, | |
| "objective/scores": 5.626776695251465, | |
| "policy/approxkl_avg": 0.13991527259349823, | |
| "policy/clipfrac_avg": 0.33203125, | |
| "policy/entropy_avg": 0.7127301692962646, | |
| "step": 5, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1565, | |
| "val/ratio": 0.9999192357063293, | |
| "val/ratio_var": 1.5017312762211077e-06 | |
| }, | |
| { | |
| "episode": 3584, | |
| "epoch": 0.015352981494174092, | |
| "eps": 6, | |
| "loss/policy_avg": 0.1968970149755478, | |
| "lr": 2.9296875e-06, | |
| "objective/entropy": -32.000404357910156, | |
| "objective/kl": 4.284768581390381, | |
| "objective/non_score_reward": -0.021423842757940292, | |
| "objective/rlhf_reward": 5.7136030197143555, | |
| "objective/scores": 5.7350263595581055, | |
| "policy/approxkl_avg": 0.1647772341966629, | |
| "policy/clipfrac_avg": 0.3515625, | |
| "policy/entropy_avg": 0.6934345364570618, | |
| "step": 6, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1364, | |
| "val/ratio": 0.9998640418052673, | |
| "val/ratio_var": 2.735638872763957e-06 | |
| }, | |
| { | |
| "episode": 4096, | |
| "epoch": 0.01754626456477039, | |
| "eps": 6, | |
| "loss/policy_avg": 0.20194123685359955, | |
| "lr": 2.91796875e-06, | |
| "objective/entropy": -35.15373992919922, | |
| "objective/kl": 4.795171737670898, | |
| "objective/non_score_reward": -0.023975860327482224, | |
| "objective/rlhf_reward": 6.039769649505615, | |
| "objective/scores": 6.063745498657227, | |
| "policy/approxkl_avg": 0.19040407240390778, | |
| "policy/clipfrac_avg": 0.365234375, | |
| "policy/entropy_avg": 0.6659716367721558, | |
| "step": 7, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1595, | |
| "val/ratio": 0.9995955228805542, | |
| "val/ratio_var": 2.328864411538234e-06 | |
| }, | |
| { | |
| "episode": 4608, | |
| "epoch": 0.01973954763536669, | |
| "eps": 6, | |
| "loss/policy_avg": 0.2287895679473877, | |
| "lr": 2.90625e-06, | |
| "objective/entropy": -33.49115753173828, | |
| "objective/kl": 6.028232097625732, | |
| "objective/non_score_reward": -0.030141159892082214, | |
| "objective/rlhf_reward": 6.286189556121826, | |
| "objective/scores": 6.316330909729004, | |
| "policy/approxkl_avg": 0.16508515179157257, | |
| "policy/clipfrac_avg": 0.35546875, | |
| "policy/entropy_avg": 0.6881623268127441, | |
| "step": 8, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1875, | |
| "val/ratio": 0.9999189376831055, | |
| "val/ratio_var": 2.560210077717784e-06 | |
| }, | |
| { | |
| "episode": 5120, | |
| "epoch": 0.02193283070596299, | |
| "eps": 6, | |
| "loss/policy_avg": 0.23305176198482513, | |
| "lr": 2.89453125e-06, | |
| "objective/entropy": -31.895954132080078, | |
| "objective/kl": 7.27902889251709, | |
| "objective/non_score_reward": -0.03639514371752739, | |
| "objective/rlhf_reward": 6.584256172180176, | |
| "objective/scores": 6.6206512451171875, | |
| "policy/approxkl_avg": 0.15825235843658447, | |
| "policy/clipfrac_avg": 0.333984375, | |
| "policy/entropy_avg": 0.6986380219459534, | |
| "step": 9, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2233, | |
| "val/ratio": 1.000351905822754, | |
| "val/ratio_var": 2.54610608863004e-06 | |
| }, | |
| { | |
| "episode": 5632, | |
| "epoch": 0.024126113776559287, | |
| "eps": 6, | |
| "loss/policy_avg": 0.2743307948112488, | |
| "lr": 2.8828125e-06, | |
| "objective/entropy": -30.13780975341797, | |
| "objective/kl": 8.618773460388184, | |
| "objective/non_score_reward": -0.04309386759996414, | |
| "objective/rlhf_reward": 6.671762466430664, | |
| "objective/scores": 6.714856147766113, | |
| "policy/approxkl_avg": 0.2310631275177002, | |
| "policy/clipfrac_avg": 0.376953125, | |
| "policy/entropy_avg": 0.6997334957122803, | |
| "step": 10, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2288, | |
| "val/ratio": 0.9999397397041321, | |
| "val/ratio_var": 2.7119385777041316e-06 | |
| }, | |
| { | |
| "episode": 6144, | |
| "epoch": 0.026319396847155587, | |
| "eps": 6, | |
| "loss/policy_avg": 0.47530221939086914, | |
| "lr": 2.87109375e-06, | |
| "objective/entropy": -25.868932723999023, | |
| "objective/kl": 10.715995788574219, | |
| "objective/non_score_reward": -0.05357997864484787, | |
| "objective/rlhf_reward": 6.660526275634766, | |
| "objective/scores": 6.714106559753418, | |
| "policy/approxkl_avg": 0.23832854628562927, | |
| "policy/clipfrac_avg": 0.33984375, | |
| "policy/entropy_avg": 0.6960214376449585, | |
| "step": 11, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2465, | |
| "val/ratio": 1.0001269578933716, | |
| "val/ratio_var": 4.06193794333376e-06 | |
| }, | |
| { | |
| "episode": 6656, | |
| "epoch": 0.028512679917751884, | |
| "eps": 6, | |
| "loss/policy_avg": 0.2478591650724411, | |
| "lr": 2.859375e-06, | |
| "objective/entropy": -28.481605529785156, | |
| "objective/kl": 11.356575012207031, | |
| "objective/non_score_reward": -0.05678287148475647, | |
| "objective/rlhf_reward": 7.32498025894165, | |
| "objective/scores": 7.381762981414795, | |
| "policy/approxkl_avg": 0.23808607459068298, | |
| "policy/clipfrac_avg": 0.345703125, | |
| "policy/entropy_avg": 0.6703023910522461, | |
| "step": 12, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2564, | |
| "val/ratio": 0.999792754650116, | |
| "val/ratio_var": 2.613600599943311e-06 | |
| }, | |
| { | |
| "episode": 7168, | |
| "epoch": 0.030705962988348184, | |
| "eps": 6, | |
| "loss/policy_avg": 0.18863295018672943, | |
| "lr": 2.84765625e-06, | |
| "objective/entropy": -31.87090301513672, | |
| "objective/kl": 12.704984664916992, | |
| "objective/non_score_reward": -0.06352491676807404, | |
| "objective/rlhf_reward": 7.6809868812561035, | |
| "objective/scores": 7.744511604309082, | |
| "policy/approxkl_avg": 0.20181894302368164, | |
| "policy/clipfrac_avg": 0.3828125, | |
| "policy/entropy_avg": 0.6681352853775024, | |
| "step": 13, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2607, | |
| "val/ratio": 1.0001722574234009, | |
| "val/ratio_var": 3.1368097097583814e-06 | |
| }, | |
| { | |
| "episode": 7680, | |
| "epoch": 0.03289924605894448, | |
| "eps": 6, | |
| "loss/policy_avg": 0.23149724304676056, | |
| "lr": 2.8359375e-06, | |
| "objective/entropy": -31.151302337646484, | |
| "objective/kl": 14.230264663696289, | |
| "objective/non_score_reward": -0.07115132361650467, | |
| "objective/rlhf_reward": 7.844120025634766, | |
| "objective/scores": 7.915271282196045, | |
| "policy/approxkl_avg": 0.23936033248901367, | |
| "policy/clipfrac_avg": 0.40625, | |
| "policy/entropy_avg": 0.6505059003829956, | |
| "step": 14, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2523, | |
| "val/ratio": 0.999779224395752, | |
| "val/ratio_var": 3.855011527775787e-06 | |
| }, | |
| { | |
| "episode": 8192, | |
| "epoch": 0.03509252912954078, | |
| "eps": 6, | |
| "loss/policy_avg": 0.25306057929992676, | |
| "lr": 2.82421875e-06, | |
| "objective/entropy": -33.95362091064453, | |
| "objective/kl": 15.211322784423828, | |
| "objective/non_score_reward": -0.07605661451816559, | |
| "objective/rlhf_reward": 8.07334041595459, | |
| "objective/scores": 8.149396896362305, | |
| "policy/approxkl_avg": 0.2219444364309311, | |
| "policy/clipfrac_avg": 0.376953125, | |
| "policy/entropy_avg": 0.6534073352813721, | |
| "step": 15, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2392, | |
| "val/ratio": 1.000091791152954, | |
| "val/ratio_var": 3.7099030123499688e-06 | |
| }, | |
| { | |
| "episode": 8704, | |
| "epoch": 0.03728581220013708, | |
| "eps": 6, | |
| "loss/policy_avg": 0.3671952486038208, | |
| "lr": 2.8125e-06, | |
| "objective/entropy": -36.56987762451172, | |
| "objective/kl": 15.896276473999023, | |
| "objective/non_score_reward": -0.07948137074708939, | |
| "objective/rlhf_reward": 8.115487098693848, | |
| "objective/scores": 8.194968223571777, | |
| "policy/approxkl_avg": 0.2001374065876007, | |
| "policy/clipfrac_avg": 0.376953125, | |
| "policy/entropy_avg": 0.5611248016357422, | |
| "step": 16, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2099, | |
| "val/ratio": 1.0001428127288818, | |
| "val/ratio_var": 2.9763129987259163e-06 | |
| }, | |
| { | |
| "episode": 9216, | |
| "epoch": 0.03947909527073338, | |
| "eps": 6, | |
| "loss/policy_avg": 0.2377510964870453, | |
| "lr": 2.80078125e-06, | |
| "objective/entropy": -38.7183837890625, | |
| "objective/kl": 16.948450088500977, | |
| "objective/non_score_reward": -0.08474224805831909, | |
| "objective/rlhf_reward": 8.287287712097168, | |
| "objective/scores": 8.372029304504395, | |
| "policy/approxkl_avg": 0.19774162769317627, | |
| "policy/clipfrac_avg": 0.33984375, | |
| "policy/entropy_avg": 0.5588769912719727, | |
| "step": 17, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2173, | |
| "val/ratio": 0.9999226331710815, | |
| "val/ratio_var": 2.539955858082976e-06 | |
| }, | |
| { | |
| "episode": 9728, | |
| "epoch": 0.041672378341329676, | |
| "eps": 6, | |
| "loss/policy_avg": 0.30208033323287964, | |
| "lr": 2.7890625e-06, | |
| "objective/entropy": -37.54444122314453, | |
| "objective/kl": 17.52410316467285, | |
| "objective/non_score_reward": -0.08762051165103912, | |
| "objective/rlhf_reward": 8.367655754089355, | |
| "objective/scores": 8.455276489257812, | |
| "policy/approxkl_avg": 0.20374038815498352, | |
| "policy/clipfrac_avg": 0.33984375, | |
| "policy/entropy_avg": 0.5387827157974243, | |
| "step": 18, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2168, | |
| "val/ratio": 0.99991774559021, | |
| "val/ratio_var": 3.0660403353977017e-06 | |
| }, | |
| { | |
| "episode": 10240, | |
| "epoch": 0.04386566141192598, | |
| "eps": 6, | |
| "loss/policy_avg": 0.22399874031543732, | |
| "lr": 2.77734375e-06, | |
| "objective/entropy": -39.96161651611328, | |
| "objective/kl": 17.749210357666016, | |
| "objective/non_score_reward": -0.08874605596065521, | |
| "objective/rlhf_reward": 8.257219314575195, | |
| "objective/scores": 8.345966339111328, | |
| "policy/approxkl_avg": 0.2131994068622589, | |
| "policy/clipfrac_avg": 0.359375, | |
| "policy/entropy_avg": 0.5233398079872131, | |
| "step": 19, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2201, | |
| "val/ratio": 1.0001959800720215, | |
| "val/ratio_var": 3.099843979725847e-06 | |
| }, | |
| { | |
| "episode": 10752, | |
| "epoch": 0.04605894448252228, | |
| "eps": 6, | |
| "loss/policy_avg": 0.16000297665596008, | |
| "lr": 2.765625e-06, | |
| "objective/entropy": -40.21980285644531, | |
| "objective/kl": 18.693737030029297, | |
| "objective/non_score_reward": -0.09346868097782135, | |
| "objective/rlhf_reward": 8.49026107788086, | |
| "objective/scores": 8.58372974395752, | |
| "policy/approxkl_avg": 0.2675846219062805, | |
| "policy/clipfrac_avg": 0.38671875, | |
| "policy/entropy_avg": 0.524081826210022, | |
| "step": 20, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2342, | |
| "val/ratio": 0.9995419979095459, | |
| "val/ratio_var": 3.2623988772684243e-06 | |
| }, | |
| { | |
| "episode": 11264, | |
| "epoch": 0.048252227553118573, | |
| "eps": 6, | |
| "loss/policy_avg": 0.23491168022155762, | |
| "lr": 2.75390625e-06, | |
| "objective/entropy": -38.962947845458984, | |
| "objective/kl": 19.98727035522461, | |
| "objective/non_score_reward": -0.099936343729496, | |
| "objective/rlhf_reward": 8.68085765838623, | |
| "objective/scores": 8.780793190002441, | |
| "policy/approxkl_avg": 0.19925493001937866, | |
| "policy/clipfrac_avg": 0.3359375, | |
| "policy/entropy_avg": 0.5088595151901245, | |
| "step": 21, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2385, | |
| "val/ratio": 0.9999128580093384, | |
| "val/ratio_var": 3.195402769051725e-06 | |
| }, | |
| { | |
| "episode": 11776, | |
| "epoch": 0.05044551062371487, | |
| "eps": 6, | |
| "loss/policy_avg": 0.20272764563560486, | |
| "lr": 2.7421875e-06, | |
| "objective/entropy": -39.06739807128906, | |
| "objective/kl": 20.73778533935547, | |
| "objective/non_score_reward": -0.10368892550468445, | |
| "objective/rlhf_reward": 8.66939640045166, | |
| "objective/scores": 8.77308464050293, | |
| "policy/approxkl_avg": 0.24808868765830994, | |
| "policy/clipfrac_avg": 0.3671875, | |
| "policy/entropy_avg": 0.5206277370452881, | |
| "step": 22, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2328, | |
| "val/ratio": 0.9997999668121338, | |
| "val/ratio_var": 2.6692378014558926e-06 | |
| }, | |
| { | |
| "episode": 12288, | |
| "epoch": 0.052638793694311174, | |
| "eps": 6, | |
| "loss/policy_avg": 0.1657373309135437, | |
| "lr": 2.73046875e-06, | |
| "objective/entropy": -41.76697540283203, | |
| "objective/kl": 21.265926361083984, | |
| "objective/non_score_reward": -0.10632962733507156, | |
| "objective/rlhf_reward": 8.899429321289062, | |
| "objective/scores": 9.005760192871094, | |
| "policy/approxkl_avg": 0.21000100672245026, | |
| "policy/clipfrac_avg": 0.330078125, | |
| "policy/entropy_avg": 0.4891219437122345, | |
| "step": 23, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2460, | |
| "val/ratio": 1.000035285949707, | |
| "val/ratio_var": 3.269958597229561e-06 | |
| }, | |
| { | |
| "episode": 12800, | |
| "epoch": 0.05483207676490747, | |
| "eps": 6, | |
| "loss/policy_avg": 0.17451098561286926, | |
| "lr": 2.71875e-06, | |
| "objective/entropy": -40.41400146484375, | |
| "objective/kl": 22.69107437133789, | |
| "objective/non_score_reward": -0.11345535516738892, | |
| "objective/rlhf_reward": 8.97903060913086, | |
| "objective/scores": 9.092485427856445, | |
| "policy/approxkl_avg": 0.2612704336643219, | |
| "policy/clipfrac_avg": 0.37109375, | |
| "policy/entropy_avg": 0.4792313873767853, | |
| "step": 24, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2361, | |
| "val/ratio": 0.9997962713241577, | |
| "val/ratio_var": 4.033691766380798e-06 | |
| }, | |
| { | |
| "episode": 13312, | |
| "epoch": 0.05702535983550377, | |
| "eps": 6, | |
| "loss/policy_avg": 0.3060297966003418, | |
| "lr": 2.70703125e-06, | |
| "objective/entropy": -39.72850036621094, | |
| "objective/kl": 24.020402908325195, | |
| "objective/non_score_reward": -0.12010201811790466, | |
| "objective/rlhf_reward": 8.757214546203613, | |
| "objective/scores": 8.877317428588867, | |
| "policy/approxkl_avg": 0.20275026559829712, | |
| "policy/clipfrac_avg": 0.333984375, | |
| "policy/entropy_avg": 0.47031739354133606, | |
| "step": 25, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2365, | |
| "val/ratio": 1.0004143714904785, | |
| "val/ratio_var": 4.571053977997508e-06 | |
| }, | |
| { | |
| "episode": 13824, | |
| "epoch": 0.05921864290610007, | |
| "eps": 6, | |
| "loss/policy_avg": 0.2459714114665985, | |
| "lr": 2.6953125e-06, | |
| "objective/entropy": -42.67210388183594, | |
| "objective/kl": 24.816062927246094, | |
| "objective/non_score_reward": -0.12408032268285751, | |
| "objective/rlhf_reward": 8.767488479614258, | |
| "objective/scores": 8.891569137573242, | |
| "policy/approxkl_avg": 0.16891685128211975, | |
| "policy/clipfrac_avg": 0.314453125, | |
| "policy/entropy_avg": 0.4268378019332886, | |
| "step": 26, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2338, | |
| "val/ratio": 1.0001232624053955, | |
| "val/ratio_var": 2.055706318060402e-06 | |
| }, | |
| { | |
| "episode": 14336, | |
| "epoch": 0.06141192597669637, | |
| "eps": 6, | |
| "loss/policy_avg": 0.2477143555879593, | |
| "lr": 2.68359375e-06, | |
| "objective/entropy": -40.98384094238281, | |
| "objective/kl": 26.158355712890625, | |
| "objective/non_score_reward": -0.1307917833328247, | |
| "objective/rlhf_reward": 8.869270324707031, | |
| "objective/scores": 9.000061988830566, | |
| "policy/approxkl_avg": 0.21648374199867249, | |
| "policy/clipfrac_avg": 0.35546875, | |
| "policy/entropy_avg": 0.43942803144454956, | |
| "step": 27, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2325, | |
| "val/ratio": 1.00002920627594, | |
| "val/ratio_var": 3.6563492358254734e-06 | |
| }, | |
| { | |
| "episode": 14848, | |
| "epoch": 0.06360520904729267, | |
| "eps": 6, | |
| "loss/policy_avg": 0.18666958808898926, | |
| "lr": 2.671875e-06, | |
| "objective/entropy": -39.86817169189453, | |
| "objective/kl": 26.283226013183594, | |
| "objective/non_score_reward": -0.13141612708568573, | |
| "objective/rlhf_reward": 9.001875877380371, | |
| "objective/scores": 9.133292198181152, | |
| "policy/approxkl_avg": 0.22604086995124817, | |
| "policy/clipfrac_avg": 0.37109375, | |
| "policy/entropy_avg": 0.4275825023651123, | |
| "step": 28, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2388, | |
| "val/ratio": 0.999720573425293, | |
| "val/ratio_var": 2.5127403660007985e-06 | |
| }, | |
| { | |
| "episode": 15360, | |
| "epoch": 0.06579849211788896, | |
| "eps": 6, | |
| "loss/policy_avg": 0.21501849591732025, | |
| "lr": 2.66015625e-06, | |
| "objective/entropy": -34.56945037841797, | |
| "objective/kl": 28.54375457763672, | |
| "objective/non_score_reward": -0.14271876215934753, | |
| "objective/rlhf_reward": 8.915486335754395, | |
| "objective/scores": 9.058204650878906, | |
| "policy/approxkl_avg": 0.23621705174446106, | |
| "policy/clipfrac_avg": 0.361328125, | |
| "policy/entropy_avg": 0.42039746046066284, | |
| "step": 29, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2389, | |
| "val/ratio": 0.9995044469833374, | |
| "val/ratio_var": 4.271460511517944e-06 | |
| }, | |
| { | |
| "episode": 15872, | |
| "epoch": 0.06799177518848526, | |
| "eps": 6, | |
| "loss/policy_avg": 0.343304842710495, | |
| "lr": 2.6484375e-06, | |
| "objective/entropy": -35.42715072631836, | |
| "objective/kl": 28.722532272338867, | |
| "objective/non_score_reward": -0.14361265301704407, | |
| "objective/rlhf_reward": 9.159289360046387, | |
| "objective/scores": 9.302902221679688, | |
| "policy/approxkl_avg": 0.221934974193573, | |
| "policy/clipfrac_avg": 0.3046875, | |
| "policy/entropy_avg": 0.3945683240890503, | |
| "step": 30, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2236, | |
| "val/ratio": 1.0000040531158447, | |
| "val/ratio_var": 3.5196928820369067e-06 | |
| }, | |
| { | |
| "episode": 16384, | |
| "epoch": 0.07018505825908156, | |
| "eps": 6, | |
| "loss/policy_avg": 0.4800252318382263, | |
| "lr": 2.63671875e-06, | |
| "objective/entropy": -33.33995056152344, | |
| "objective/kl": 29.83435821533203, | |
| "objective/non_score_reward": -0.14917179942131042, | |
| "objective/rlhf_reward": 9.096776962280273, | |
| "objective/scores": 9.245948791503906, | |
| "policy/approxkl_avg": 0.267780601978302, | |
| "policy/clipfrac_avg": 0.337890625, | |
| "policy/entropy_avg": 0.401678204536438, | |
| "step": 31, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2179, | |
| "val/ratio": 1.0002527236938477, | |
| "val/ratio_var": 3.2502473459317116e-06 | |
| }, | |
| { | |
| "episode": 16896, | |
| "epoch": 0.07237834132967787, | |
| "eps": 6, | |
| "loss/policy_avg": 0.2809556722640991, | |
| "lr": 2.6250000000000003e-06, | |
| "objective/entropy": -34.06169509887695, | |
| "objective/kl": 31.27130889892578, | |
| "objective/non_score_reward": -0.156356543302536, | |
| "objective/rlhf_reward": 9.190299034118652, | |
| "objective/scores": 9.346654891967773, | |
| "policy/approxkl_avg": 0.2590065896511078, | |
| "policy/clipfrac_avg": 0.333984375, | |
| "policy/entropy_avg": 0.39545533061027527, | |
| "step": 32, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2267, | |
| "val/ratio": 1.0000057220458984, | |
| "val/ratio_var": 3.7364695799624315e-06 | |
| }, | |
| { | |
| "episode": 17408, | |
| "epoch": 0.07457162440027416, | |
| "eps": 6, | |
| "loss/policy_avg": 0.22726668417453766, | |
| "lr": 2.61328125e-06, | |
| "objective/entropy": -29.238605499267578, | |
| "objective/kl": 32.63374710083008, | |
| "objective/non_score_reward": -0.16316872835159302, | |
| "objective/rlhf_reward": 9.312760353088379, | |
| "objective/scores": 9.475929260253906, | |
| "policy/approxkl_avg": 0.25571638345718384, | |
| "policy/clipfrac_avg": 0.328125, | |
| "policy/entropy_avg": 0.38992634415626526, | |
| "step": 33, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2153, | |
| "val/ratio": 0.9997001886367798, | |
| "val/ratio_var": 2.696072442631703e-06 | |
| }, | |
| { | |
| "episode": 17920, | |
| "epoch": 0.07676490747087046, | |
| "eps": 6, | |
| "loss/policy_avg": 0.31760329008102417, | |
| "lr": 2.6015625e-06, | |
| "objective/entropy": -30.36035919189453, | |
| "objective/kl": 33.105316162109375, | |
| "objective/non_score_reward": -0.16552656888961792, | |
| "objective/rlhf_reward": 9.344242095947266, | |
| "objective/scores": 9.509769439697266, | |
| "policy/approxkl_avg": 0.26468268036842346, | |
| "policy/clipfrac_avg": 0.31640625, | |
| "policy/entropy_avg": 0.3962373435497284, | |
| "step": 34, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2063, | |
| "val/ratio": 0.9997377395629883, | |
| "val/ratio_var": 3.3684978006931487e-06 | |
| }, | |
| { | |
| "episode": 18432, | |
| "epoch": 0.07895819054146676, | |
| "eps": 6, | |
| "loss/policy_avg": 0.29205620288848877, | |
| "lr": 2.5898437500000003e-06, | |
| "objective/entropy": -31.297311782836914, | |
| "objective/kl": 34.06657409667969, | |
| "objective/non_score_reward": -0.1703328639268875, | |
| "objective/rlhf_reward": 9.36227035522461, | |
| "objective/scores": 9.532604217529297, | |
| "policy/approxkl_avg": 0.23790021240711212, | |
| "policy/clipfrac_avg": 0.359375, | |
| "policy/entropy_avg": 0.39074409008026123, | |
| "step": 35, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2030, | |
| "val/ratio": 1.0001909732818604, | |
| "val/ratio_var": 4.307842573325615e-06 | |
| }, | |
| { | |
| "episode": 18944, | |
| "epoch": 0.08115147361206305, | |
| "eps": 6, | |
| "loss/policy_avg": 0.27079224586486816, | |
| "lr": 2.578125e-06, | |
| "objective/entropy": -31.059741973876953, | |
| "objective/kl": 34.23755645751953, | |
| "objective/non_score_reward": -0.17118775844573975, | |
| "objective/rlhf_reward": 9.575616836547852, | |
| "objective/scores": 9.746803283691406, | |
| "policy/approxkl_avg": 0.2780650556087494, | |
| "policy/clipfrac_avg": 0.359375, | |
| "policy/entropy_avg": 0.36144694685935974, | |
| "step": 36, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1946, | |
| "val/ratio": 1.0000677108764648, | |
| "val/ratio_var": 5.890160991839366e-06 | |
| }, | |
| { | |
| "episode": 19456, | |
| "epoch": 0.08334475668265935, | |
| "eps": 6, | |
| "loss/policy_avg": 0.35481345653533936, | |
| "lr": 2.56640625e-06, | |
| "objective/entropy": -29.504077911376953, | |
| "objective/kl": 35.572723388671875, | |
| "objective/non_score_reward": -0.17786362767219543, | |
| "objective/rlhf_reward": 9.650212287902832, | |
| "objective/scores": 9.828075408935547, | |
| "policy/approxkl_avg": 0.27989301085472107, | |
| "policy/clipfrac_avg": 0.341796875, | |
| "policy/entropy_avg": 0.36728498339653015, | |
| "step": 37, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1913, | |
| "val/ratio": 1.000013828277588, | |
| "val/ratio_var": 3.834660674328916e-06 | |
| }, | |
| { | |
| "episode": 19968, | |
| "epoch": 0.08553803975325565, | |
| "eps": 6, | |
| "loss/policy_avg": 0.32023417949676514, | |
| "lr": 2.5546875000000003e-06, | |
| "objective/entropy": -32.94467544555664, | |
| "objective/kl": 35.05192565917969, | |
| "objective/non_score_reward": -0.17525961995124817, | |
| "objective/rlhf_reward": 9.715032577514648, | |
| "objective/scores": 9.89029312133789, | |
| "policy/approxkl_avg": 0.28202196955680847, | |
| "policy/clipfrac_avg": 0.32421875, | |
| "policy/entropy_avg": 0.3595014214515686, | |
| "step": 38, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1895, | |
| "val/ratio": 1.0004128217697144, | |
| "val/ratio_var": 3.976154857809888e-06 | |
| }, | |
| { | |
| "episode": 20480, | |
| "epoch": 0.08773132282385196, | |
| "eps": 6, | |
| "loss/policy_avg": 0.3284844160079956, | |
| "lr": 2.54296875e-06, | |
| "objective/entropy": -33.950965881347656, | |
| "objective/kl": 35.06300354003906, | |
| "objective/non_score_reward": -0.1753150224685669, | |
| "objective/rlhf_reward": 9.614059448242188, | |
| "objective/scores": 9.789375305175781, | |
| "policy/approxkl_avg": 0.2808968126773834, | |
| "policy/clipfrac_avg": 0.3359375, | |
| "policy/entropy_avg": 0.3299378454685211, | |
| "step": 39, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1822, | |
| "val/ratio": 1.0001529455184937, | |
| "val/ratio_var": 3.939061116398079e-06 | |
| }, | |
| { | |
| "episode": 20992, | |
| "epoch": 0.08992460589444826, | |
| "eps": 6, | |
| "loss/policy_avg": 0.25570446252822876, | |
| "lr": 2.53125e-06, | |
| "objective/entropy": -33.10383605957031, | |
| "objective/kl": 37.40079116821289, | |
| "objective/non_score_reward": -0.1870039403438568, | |
| "objective/rlhf_reward": 9.649069786071777, | |
| "objective/scores": 9.836073875427246, | |
| "policy/approxkl_avg": 0.28119906783103943, | |
| "policy/clipfrac_avg": 0.349609375, | |
| "policy/entropy_avg": 0.33145490288734436, | |
| "step": 40, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1757, | |
| "val/ratio": 0.999976396560669, | |
| "val/ratio_var": 3.935067525162594e-06 | |
| }, | |
| { | |
| "episode": 21504, | |
| "epoch": 0.09211788896504455, | |
| "eps": 6, | |
| "loss/policy_avg": 0.3803982436656952, | |
| "lr": 2.5195312500000003e-06, | |
| "objective/entropy": -35.517391204833984, | |
| "objective/kl": 35.55995178222656, | |
| "objective/non_score_reward": -0.1777997612953186, | |
| "objective/rlhf_reward": 9.623468399047852, | |
| "objective/scores": 9.801267623901367, | |
| "policy/approxkl_avg": 0.28167036175727844, | |
| "policy/clipfrac_avg": 0.3828125, | |
| "policy/entropy_avg": 0.33138006925582886, | |
| "step": 41, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1765, | |
| "val/ratio": 1.000349760055542, | |
| "val/ratio_var": 4.55302324553486e-06 | |
| }, | |
| { | |
| "episode": 22016, | |
| "epoch": 0.09431117203564085, | |
| "eps": 6, | |
| "loss/policy_avg": 0.2066161185503006, | |
| "lr": 2.5078125e-06, | |
| "objective/entropy": -34.937923431396484, | |
| "objective/kl": 34.828224182128906, | |
| "objective/non_score_reward": -0.17414110898971558, | |
| "objective/rlhf_reward": 10.007875442504883, | |
| "objective/scores": 10.18201732635498, | |
| "policy/approxkl_avg": 0.25226256251335144, | |
| "policy/clipfrac_avg": 0.349609375, | |
| "policy/entropy_avg": 0.31548449397087097, | |
| "step": 42, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1692, | |
| "val/ratio": 1.0003182888031006, | |
| "val/ratio_var": 3.0859027901897207e-06 | |
| }, | |
| { | |
| "episode": 22528, | |
| "epoch": 0.09650445510623715, | |
| "eps": 6, | |
| "loss/policy_avg": 0.21718214452266693, | |
| "lr": 2.49609375e-06, | |
| "objective/entropy": -36.30521774291992, | |
| "objective/kl": 35.64817810058594, | |
| "objective/non_score_reward": -0.17824086546897888, | |
| "objective/rlhf_reward": 9.524388313293457, | |
| "objective/scores": 9.702629089355469, | |
| "policy/approxkl_avg": 0.3034236431121826, | |
| "policy/clipfrac_avg": 0.359375, | |
| "policy/entropy_avg": 0.333159863948822, | |
| "step": 43, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1736, | |
| "val/ratio": 0.9996454119682312, | |
| "val/ratio_var": 3.5439479688648134e-06 | |
| }, | |
| { | |
| "episode": 23040, | |
| "epoch": 0.09869773817683344, | |
| "eps": 6, | |
| "loss/policy_avg": 0.20844724774360657, | |
| "lr": 2.4843750000000002e-06, | |
| "objective/entropy": -37.294647216796875, | |
| "objective/kl": 34.449188232421875, | |
| "objective/non_score_reward": -0.17224593460559845, | |
| "objective/rlhf_reward": 9.562028884887695, | |
| "objective/scores": 9.734274864196777, | |
| "policy/approxkl_avg": 0.2691035270690918, | |
| "policy/clipfrac_avg": 0.369140625, | |
| "policy/entropy_avg": 0.3254753351211548, | |
| "step": 44, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1712, | |
| "val/ratio": 0.9999078512191772, | |
| "val/ratio_var": 3.873953573929612e-06 | |
| }, | |
| { | |
| "episode": 23552, | |
| "epoch": 0.10089102124742974, | |
| "eps": 6, | |
| "loss/policy_avg": 0.26617997884750366, | |
| "lr": 2.47265625e-06, | |
| "objective/entropy": -38.45973205566406, | |
| "objective/kl": 33.36463165283203, | |
| "objective/non_score_reward": -0.166823148727417, | |
| "objective/rlhf_reward": 9.529053688049316, | |
| "objective/scores": 9.695877075195312, | |
| "policy/approxkl_avg": 0.2848384976387024, | |
| "policy/clipfrac_avg": 0.32421875, | |
| "policy/entropy_avg": 0.33362650871276855, | |
| "step": 45, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1776, | |
| "val/ratio": 0.9998953938484192, | |
| "val/ratio_var": 4.734669346362352e-06 | |
| }, | |
| { | |
| "episode": 24064, | |
| "epoch": 0.10308430431802605, | |
| "eps": 6, | |
| "loss/policy_avg": 0.14936941862106323, | |
| "lr": 2.4609375e-06, | |
| "objective/entropy": -41.61138153076172, | |
| "objective/kl": 32.80962371826172, | |
| "objective/non_score_reward": -0.16404810547828674, | |
| "objective/rlhf_reward": 9.842905044555664, | |
| "objective/scores": 10.006953239440918, | |
| "policy/approxkl_avg": 0.237547367811203, | |
| "policy/clipfrac_avg": 0.376953125, | |
| "policy/entropy_avg": 0.311695396900177, | |
| "step": 46, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1758, | |
| "val/ratio": 1.0000953674316406, | |
| "val/ratio_var": 2.808139925036812e-06 | |
| }, | |
| { | |
| "episode": 24576, | |
| "epoch": 0.10527758738862235, | |
| "eps": 6, | |
| "loss/policy_avg": 0.30603936314582825, | |
| "lr": 2.4492187500000002e-06, | |
| "objective/entropy": -41.17787551879883, | |
| "objective/kl": 32.624759674072266, | |
| "objective/non_score_reward": -0.16312378644943237, | |
| "objective/rlhf_reward": 9.724346160888672, | |
| "objective/scores": 9.887470245361328, | |
| "policy/approxkl_avg": 0.29364651441574097, | |
| "policy/clipfrac_avg": 0.353515625, | |
| "policy/entropy_avg": 0.3342776894569397, | |
| "step": 47, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1791, | |
| "val/ratio": 1.000326156616211, | |
| "val/ratio_var": 4.5322440200834535e-06 | |
| }, | |
| { | |
| "episode": 25088, | |
| "epoch": 0.10747087045921865, | |
| "eps": 6, | |
| "loss/policy_avg": 0.2642097771167755, | |
| "lr": 2.4375e-06, | |
| "objective/entropy": -39.200443267822266, | |
| "objective/kl": 32.45161437988281, | |
| "objective/non_score_reward": -0.1622580885887146, | |
| "objective/rlhf_reward": 9.750046730041504, | |
| "objective/scores": 9.912304878234863, | |
| "policy/approxkl_avg": 0.30972546339035034, | |
| "policy/clipfrac_avg": 0.345703125, | |
| "policy/entropy_avg": 0.33516496419906616, | |
| "step": 48, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1872, | |
| "val/ratio": 1.0000823736190796, | |
| "val/ratio_var": 3.669098759928602e-06 | |
| }, | |
| { | |
| "episode": 25600, | |
| "epoch": 0.10966415352981494, | |
| "eps": 6, | |
| "loss/policy_avg": 0.2650497555732727, | |
| "lr": 2.42578125e-06, | |
| "objective/entropy": -42.08147430419922, | |
| "objective/kl": 32.27870178222656, | |
| "objective/non_score_reward": -0.16139349341392517, | |
| "objective/rlhf_reward": 9.627498626708984, | |
| "objective/scores": 9.788891792297363, | |
| "policy/approxkl_avg": 0.24272559583187103, | |
| "policy/clipfrac_avg": 0.34375, | |
| "policy/entropy_avg": 0.3419151306152344, | |
| "step": 49, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1885, | |
| "val/ratio": 1.0000895261764526, | |
| "val/ratio_var": 3.1425495308212703e-06 | |
| }, | |
| { | |
| "episode": 26112, | |
| "epoch": 0.11185743660041124, | |
| "eps": 6, | |
| "loss/policy_avg": 0.19408683478832245, | |
| "lr": 2.4140625000000002e-06, | |
| "objective/entropy": -40.94256591796875, | |
| "objective/kl": 31.59925651550293, | |
| "objective/non_score_reward": -0.1579962968826294, | |
| "objective/rlhf_reward": 9.357413291931152, | |
| "objective/scores": 9.515409469604492, | |
| "policy/approxkl_avg": 0.2539810538291931, | |
| "policy/clipfrac_avg": 0.345703125, | |
| "policy/entropy_avg": 0.33615610003471375, | |
| "step": 50, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1861, | |
| "val/ratio": 1.0000097751617432, | |
| "val/ratio_var": 4.071615421707975e-06 | |
| }, | |
| { | |
| "episode": 26624, | |
| "epoch": 0.11405071967100754, | |
| "eps": 6, | |
| "loss/policy_avg": 0.12506979703903198, | |
| "lr": 2.40234375e-06, | |
| "objective/entropy": -43.869224548339844, | |
| "objective/kl": 30.23246955871582, | |
| "objective/non_score_reward": -0.15116234123706818, | |
| "objective/rlhf_reward": 9.635049819946289, | |
| "objective/scores": 9.786212921142578, | |
| "policy/approxkl_avg": 0.22058835625648499, | |
| "policy/clipfrac_avg": 0.3828125, | |
| "policy/entropy_avg": 0.35689815878868103, | |
| "step": 51, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1916, | |
| "val/ratio": 1.0003091096878052, | |
| "val/ratio_var": 3.508681174935191e-06 | |
| }, | |
| { | |
| "episode": 27136, | |
| "epoch": 0.11624400274160383, | |
| "eps": 6, | |
| "loss/policy_avg": 0.2060900628566742, | |
| "lr": 2.390625e-06, | |
| "objective/entropy": -45.10155487060547, | |
| "objective/kl": 30.40532684326172, | |
| "objective/non_score_reward": -0.15202662348747253, | |
| "objective/rlhf_reward": 9.444595336914062, | |
| "objective/scores": 9.596622467041016, | |
| "policy/approxkl_avg": 0.2469383329153061, | |
| "policy/clipfrac_avg": 0.337890625, | |
| "policy/entropy_avg": 0.33809155225753784, | |
| "step": 52, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1910, | |
| "val/ratio": 0.9997259974479675, | |
| "val/ratio_var": 3.441498392930953e-06 | |
| }, | |
| { | |
| "episode": 27648, | |
| "epoch": 0.11843728581220014, | |
| "eps": 6, | |
| "loss/policy_avg": 0.19970384240150452, | |
| "lr": 2.3789062500000002e-06, | |
| "objective/entropy": -43.50267028808594, | |
| "objective/kl": 29.95575714111328, | |
| "objective/non_score_reward": -0.14977876842021942, | |
| "objective/rlhf_reward": 9.758223533630371, | |
| "objective/scores": 9.908000946044922, | |
| "policy/approxkl_avg": 0.23129594326019287, | |
| "policy/clipfrac_avg": 0.34765625, | |
| "policy/entropy_avg": 0.3461906909942627, | |
| "step": 53, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1897, | |
| "val/ratio": 1.00007164478302, | |
| "val/ratio_var": 4.8727483772381674e-06 | |
| }, | |
| { | |
| "episode": 28160, | |
| "epoch": 0.12063056888279644, | |
| "eps": 6, | |
| "loss/policy_avg": 0.17737600207328796, | |
| "lr": 2.3671875e-06, | |
| "objective/entropy": -43.82114791870117, | |
| "objective/kl": 30.448320388793945, | |
| "objective/non_score_reward": -0.15224160254001617, | |
| "objective/rlhf_reward": 9.586687088012695, | |
| "objective/scores": 9.738929748535156, | |
| "policy/approxkl_avg": 0.2304307222366333, | |
| "policy/clipfrac_avg": 0.38671875, | |
| "policy/entropy_avg": 0.35419148206710815, | |
| "step": 54, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1930, | |
| "val/ratio": 1.000131368637085, | |
| "val/ratio_var": 2.835533905454213e-06 | |
| }, | |
| { | |
| "episode": 28672, | |
| "epoch": 0.12282385195339274, | |
| "eps": 6, | |
| "loss/policy_avg": 0.20781438052654266, | |
| "lr": 2.35546875e-06, | |
| "objective/entropy": -42.808937072753906, | |
| "objective/kl": 30.716829299926758, | |
| "objective/non_score_reward": -0.15358413755893707, | |
| "objective/rlhf_reward": 9.48070240020752, | |
| "objective/scores": 9.634286880493164, | |
| "policy/approxkl_avg": 0.2257956713438034, | |
| "policy/clipfrac_avg": 0.34375, | |
| "policy/entropy_avg": 0.35503190755844116, | |
| "step": 55, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1936, | |
| "val/ratio": 0.9999227523803711, | |
| "val/ratio_var": 3.047591235372238e-06 | |
| }, | |
| { | |
| "episode": 29184, | |
| "epoch": 0.12501713502398903, | |
| "eps": 6, | |
| "loss/policy_avg": 0.17236250638961792, | |
| "lr": 2.3437500000000002e-06, | |
| "objective/entropy": -42.00170135498047, | |
| "objective/kl": 30.85394859313965, | |
| "objective/non_score_reward": -0.154269739985466, | |
| "objective/rlhf_reward": 9.769851684570312, | |
| "objective/scores": 9.924121856689453, | |
| "policy/approxkl_avg": 0.25827232003211975, | |
| "policy/clipfrac_avg": 0.365234375, | |
| "policy/entropy_avg": 0.35941898822784424, | |
| "step": 56, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1899, | |
| "val/ratio": 0.9998503923416138, | |
| "val/ratio_var": 3.5402435969444923e-06 | |
| }, | |
| { | |
| "episode": 29696, | |
| "epoch": 0.12721041809458533, | |
| "eps": 6, | |
| "loss/policy_avg": 0.1874779909849167, | |
| "lr": 2.33203125e-06, | |
| "objective/entropy": -41.37017822265625, | |
| "objective/kl": 30.405405044555664, | |
| "objective/non_score_reward": -0.15202701091766357, | |
| "objective/rlhf_reward": 9.552864074707031, | |
| "objective/scores": 9.704890251159668, | |
| "policy/approxkl_avg": 0.2579526901245117, | |
| "policy/clipfrac_avg": 0.3671875, | |
| "policy/entropy_avg": 0.36883309483528137, | |
| "step": 57, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1996, | |
| "val/ratio": 1.0000630617141724, | |
| "val/ratio_var": 3.2543882753088837e-06 | |
| }, | |
| { | |
| "episode": 30208, | |
| "epoch": 0.12940370116518163, | |
| "eps": 6, | |
| "loss/policy_avg": 0.24861405789852142, | |
| "lr": 2.3203125e-06, | |
| "objective/entropy": -39.66204833984375, | |
| "objective/kl": 32.23133850097656, | |
| "objective/non_score_reward": -0.16115668416023254, | |
| "objective/rlhf_reward": 9.291080474853516, | |
| "objective/scores": 9.452238082885742, | |
| "policy/approxkl_avg": 0.24864479899406433, | |
| "policy/clipfrac_avg": 0.34765625, | |
| "policy/entropy_avg": 0.39884617924690247, | |
| "step": 58, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1956, | |
| "val/ratio": 0.9995733499526978, | |
| "val/ratio_var": 2.4565586045355303e-06 | |
| }, | |
| { | |
| "episode": 30720, | |
| "epoch": 0.13159698423577793, | |
| "eps": 6, | |
| "loss/policy_avg": 0.15614402294158936, | |
| "lr": 2.30859375e-06, | |
| "objective/entropy": -39.843353271484375, | |
| "objective/kl": 31.747220993041992, | |
| "objective/non_score_reward": -0.15873610973358154, | |
| "objective/rlhf_reward": 9.32120418548584, | |
| "objective/scores": 9.479940414428711, | |
| "policy/approxkl_avg": 0.2175564169883728, | |
| "policy/clipfrac_avg": 0.375, | |
| "policy/entropy_avg": 0.40173786878585815, | |
| "step": 59, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1870, | |
| "val/ratio": 1.0000216960906982, | |
| "val/ratio_var": 2.7907508410862647e-06 | |
| }, | |
| { | |
| "episode": 31232, | |
| "epoch": 0.13379026730637422, | |
| "eps": 6, | |
| "loss/policy_avg": 0.1781693994998932, | |
| "lr": 2.296875e-06, | |
| "objective/entropy": -39.122676849365234, | |
| "objective/kl": 32.07661437988281, | |
| "objective/non_score_reward": -0.16038307547569275, | |
| "objective/rlhf_reward": 9.458475112915039, | |
| "objective/scores": 9.618858337402344, | |
| "policy/approxkl_avg": 0.2337334156036377, | |
| "policy/clipfrac_avg": 0.3828125, | |
| "policy/entropy_avg": 0.40583640336990356, | |
| "step": 60, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1988, | |
| "val/ratio": 1.000226378440857, | |
| "val/ratio_var": 2.6703253297455376e-06 | |
| }, | |
| { | |
| "episode": 31744, | |
| "epoch": 0.13598355037697052, | |
| "eps": 6, | |
| "loss/policy_avg": 0.22470299899578094, | |
| "lr": 2.28515625e-06, | |
| "objective/entropy": -35.38658905029297, | |
| "objective/kl": 33.76303482055664, | |
| "objective/non_score_reward": -0.16881518065929413, | |
| "objective/rlhf_reward": 9.489850044250488, | |
| "objective/scores": 9.658665657043457, | |
| "policy/approxkl_avg": 0.2829976975917816, | |
| "policy/clipfrac_avg": 0.359375, | |
| "policy/entropy_avg": 0.40766507387161255, | |
| "step": 61, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1899, | |
| "val/ratio": 1.0002920627593994, | |
| "val/ratio_var": 4.702184469351778e-06 | |
| }, | |
| { | |
| "episode": 32256, | |
| "epoch": 0.13817683344756682, | |
| "eps": 6, | |
| "loss/policy_avg": 0.18476197123527527, | |
| "lr": 2.2734375e-06, | |
| "objective/entropy": -35.610679626464844, | |
| "objective/kl": 32.4565544128418, | |
| "objective/non_score_reward": -0.1622827649116516, | |
| "objective/rlhf_reward": 9.31367015838623, | |
| "objective/scores": 9.475953102111816, | |
| "policy/approxkl_avg": 0.2603687644004822, | |
| "policy/clipfrac_avg": 0.375, | |
| "policy/entropy_avg": 0.4185265302658081, | |
| "step": 62, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1909, | |
| "val/ratio": 0.9998178482055664, | |
| "val/ratio_var": 3.281563522250508e-06 | |
| }, | |
| { | |
| "episode": 32768, | |
| "epoch": 0.1403701165181631, | |
| "eps": 6, | |
| "loss/policy_avg": 0.26672354340553284, | |
| "lr": 2.26171875e-06, | |
| "objective/entropy": -34.83911895751953, | |
| "objective/kl": 33.32848358154297, | |
| "objective/non_score_reward": -0.166642427444458, | |
| "objective/rlhf_reward": 9.365612030029297, | |
| "objective/scores": 9.532255172729492, | |
| "policy/approxkl_avg": 0.21640878915786743, | |
| "policy/clipfrac_avg": 0.35546875, | |
| "policy/entropy_avg": 0.4103761911392212, | |
| "step": 63, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1924, | |
| "val/ratio": 1.000182867050171, | |
| "val/ratio_var": 2.308673174411524e-06 | |
| }, | |
| { | |
| "episode": 33280, | |
| "epoch": 0.14256339958875944, | |
| "eps": 6, | |
| "loss/policy_avg": 0.22516316175460815, | |
| "lr": 2.25e-06, | |
| "objective/entropy": -35.21662902832031, | |
| "objective/kl": 33.290374755859375, | |
| "objective/non_score_reward": -0.16645187139511108, | |
| "objective/rlhf_reward": 9.381125450134277, | |
| "objective/scores": 9.547576904296875, | |
| "policy/approxkl_avg": 0.3286188542842865, | |
| "policy/clipfrac_avg": 0.357421875, | |
| "policy/entropy_avg": 0.41928941011428833, | |
| "step": 64, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1964, | |
| "val/ratio": 1.000014066696167, | |
| "val/ratio_var": 3.4851202599384123e-06 | |
| }, | |
| { | |
| "episode": 33792, | |
| "epoch": 0.14475668265935573, | |
| "eps": 6, | |
| "loss/policy_avg": 0.18076206743717194, | |
| "lr": 2.23828125e-06, | |
| "objective/entropy": -37.27294921875, | |
| "objective/kl": 32.67399978637695, | |
| "objective/non_score_reward": -0.16336998343467712, | |
| "objective/rlhf_reward": 9.350773811340332, | |
| "objective/scores": 9.514143943786621, | |
| "policy/approxkl_avg": 0.23823928833007812, | |
| "policy/clipfrac_avg": 0.388671875, | |
| "policy/entropy_avg": 0.4244874119758606, | |
| "step": 65, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1935, | |
| "val/ratio": 0.9997513294219971, | |
| "val/ratio_var": 3.028096216439735e-06 | |
| }, | |
| { | |
| "episode": 34304, | |
| "epoch": 0.14694996572995203, | |
| "eps": 6, | |
| "loss/policy_avg": 0.17251071333885193, | |
| "lr": 2.2265625e-06, | |
| "objective/entropy": -33.87648010253906, | |
| "objective/kl": 33.30876159667969, | |
| "objective/non_score_reward": -0.16654381155967712, | |
| "objective/rlhf_reward": 9.384592056274414, | |
| "objective/scores": 9.551136016845703, | |
| "policy/approxkl_avg": 0.25553739070892334, | |
| "policy/clipfrac_avg": 0.369140625, | |
| "policy/entropy_avg": 0.450764536857605, | |
| "step": 66, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1977, | |
| "val/ratio": 0.9999786019325256, | |
| "val/ratio_var": 3.189375547663076e-06 | |
| }, | |
| { | |
| "episode": 34816, | |
| "epoch": 0.14914324880054833, | |
| "eps": 6, | |
| "loss/policy_avg": 0.2089308649301529, | |
| "lr": 2.21484375e-06, | |
| "objective/entropy": -34.71873092651367, | |
| "objective/kl": 32.921295166015625, | |
| "objective/non_score_reward": -0.1646064817905426, | |
| "objective/rlhf_reward": 9.405241966247559, | |
| "objective/scores": 9.56984806060791, | |
| "policy/approxkl_avg": 0.24852168560028076, | |
| "policy/clipfrac_avg": 0.369140625, | |
| "policy/entropy_avg": 0.4264836013317108, | |
| "step": 67, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2023, | |
| "val/ratio": 0.9999498724937439, | |
| "val/ratio_var": 3.718509105965495e-06 | |
| }, | |
| { | |
| "episode": 35328, | |
| "epoch": 0.15133653187114463, | |
| "eps": 6, | |
| "loss/policy_avg": 0.30707770586013794, | |
| "lr": 2.203125e-06, | |
| "objective/entropy": -33.18562698364258, | |
| "objective/kl": 34.398094177246094, | |
| "objective/non_score_reward": -0.17199048399925232, | |
| "objective/rlhf_reward": 9.318955421447754, | |
| "objective/scores": 9.490945816040039, | |
| "policy/approxkl_avg": 0.2854606509208679, | |
| "policy/clipfrac_avg": 0.35546875, | |
| "policy/entropy_avg": 0.43861570954322815, | |
| "step": 68, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2134, | |
| "val/ratio": 1.0002244710922241, | |
| "val/ratio_var": 3.3594094475120073e-06 | |
| }, | |
| { | |
| "episode": 35840, | |
| "epoch": 0.15352981494174092, | |
| "eps": 6, | |
| "loss/policy_avg": 0.21776500344276428, | |
| "lr": 2.19140625e-06, | |
| "objective/entropy": -33.90901565551758, | |
| "objective/kl": 33.42001724243164, | |
| "objective/non_score_reward": -0.16710007190704346, | |
| "objective/rlhf_reward": 9.314750671386719, | |
| "objective/scores": 9.481850624084473, | |
| "policy/approxkl_avg": 0.27862101793289185, | |
| "policy/clipfrac_avg": 0.318359375, | |
| "policy/entropy_avg": 0.4583716094493866, | |
| "step": 69, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2177, | |
| "val/ratio": 1.0001673698425293, | |
| "val/ratio_var": 3.758811544685159e-06 | |
| }, | |
| { | |
| "episode": 36352, | |
| "epoch": 0.15572309801233722, | |
| "eps": 6, | |
| "loss/policy_avg": 0.2049793154001236, | |
| "lr": 2.1796875e-06, | |
| "objective/entropy": -33.58552169799805, | |
| "objective/kl": 33.38475799560547, | |
| "objective/non_score_reward": -0.16692380607128143, | |
| "objective/rlhf_reward": 9.471214294433594, | |
| "objective/scores": 9.638137817382812, | |
| "policy/approxkl_avg": 0.23455402255058289, | |
| "policy/clipfrac_avg": 0.353515625, | |
| "policy/entropy_avg": 0.4269852638244629, | |
| "step": 70, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2168, | |
| "val/ratio": 1.0000548362731934, | |
| "val/ratio_var": 3.26197096001124e-06 | |
| }, | |
| { | |
| "episode": 36864, | |
| "epoch": 0.15791638108293352, | |
| "eps": 6, | |
| "loss/policy_avg": 0.2173652946949005, | |
| "lr": 2.16796875e-06, | |
| "objective/entropy": -32.59899139404297, | |
| "objective/kl": 34.50146484375, | |
| "objective/non_score_reward": -0.17250731587409973, | |
| "objective/rlhf_reward": 9.358166694641113, | |
| "objective/scores": 9.53067398071289, | |
| "policy/approxkl_avg": 0.26884737610816956, | |
| "policy/clipfrac_avg": 0.3828125, | |
| "policy/entropy_avg": 0.4478238523006439, | |
| "step": 71, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2152, | |
| "val/ratio": 0.9999924302101135, | |
| "val/ratio_var": 3.7096308460604632e-06 | |
| }, | |
| { | |
| "episode": 37376, | |
| "epoch": 0.1601096641535298, | |
| "eps": 6, | |
| "loss/policy_avg": 0.1738312691450119, | |
| "lr": 2.15625e-06, | |
| "objective/entropy": -33.73279571533203, | |
| "objective/kl": 34.007720947265625, | |
| "objective/non_score_reward": -0.1700385957956314, | |
| "objective/rlhf_reward": 9.712380409240723, | |
| "objective/scores": 9.88241958618164, | |
| "policy/approxkl_avg": 0.27387821674346924, | |
| "policy/clipfrac_avg": 0.365234375, | |
| "policy/entropy_avg": 0.44403284788131714, | |
| "step": 72, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2140, | |
| "val/ratio": 0.9999028444290161, | |
| "val/ratio_var": 4.58646536571905e-06 | |
| }, | |
| { | |
| "episode": 37888, | |
| "epoch": 0.1623029472241261, | |
| "eps": 6, | |
| "loss/policy_avg": 0.16620802879333496, | |
| "lr": 2.14453125e-06, | |
| "objective/entropy": -35.81139373779297, | |
| "objective/kl": 32.9052734375, | |
| "objective/non_score_reward": -0.1645263433456421, | |
| "objective/rlhf_reward": 9.479964256286621, | |
| "objective/scores": 9.644491195678711, | |
| "policy/approxkl_avg": 0.25569581985473633, | |
| "policy/clipfrac_avg": 0.3984375, | |
| "policy/entropy_avg": 0.42350655794143677, | |
| "step": 73, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2139, | |
| "val/ratio": 0.9998481273651123, | |
| "val/ratio_var": 3.5795840176433558e-06 | |
| }, | |
| { | |
| "episode": 38400, | |
| "epoch": 0.1644962302947224, | |
| "eps": 6, | |
| "loss/policy_avg": 0.22138690948486328, | |
| "lr": 2.1328125e-06, | |
| "objective/entropy": -34.88341522216797, | |
| "objective/kl": 33.08055877685547, | |
| "objective/non_score_reward": -0.16540278494358063, | |
| "objective/rlhf_reward": 9.436692237854004, | |
| "objective/scores": 9.602094650268555, | |
| "policy/approxkl_avg": 0.21768277883529663, | |
| "policy/clipfrac_avg": 0.33203125, | |
| "policy/entropy_avg": 0.43899866938591003, | |
| "step": 74, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2290, | |
| "val/ratio": 1.000012993812561, | |
| "val/ratio_var": 2.8327724521659547e-06 | |
| }, | |
| { | |
| "episode": 38912, | |
| "epoch": 0.1666895133653187, | |
| "eps": 6, | |
| "loss/policy_avg": 0.17552754282951355, | |
| "lr": 2.12109375e-06, | |
| "objective/entropy": -32.78399658203125, | |
| "objective/kl": 33.330780029296875, | |
| "objective/non_score_reward": -0.16665390133857727, | |
| "objective/rlhf_reward": 9.520395278930664, | |
| "objective/scores": 9.687049865722656, | |
| "policy/approxkl_avg": 0.24098150432109833, | |
| "policy/clipfrac_avg": 0.396484375, | |
| "policy/entropy_avg": 0.43451428413391113, | |
| "step": 75, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2224, | |
| "val/ratio": 1.0003948211669922, | |
| "val/ratio_var": 3.863194706354989e-06 | |
| }, | |
| { | |
| "episode": 39424, | |
| "epoch": 0.168882796435915, | |
| "eps": 6, | |
| "loss/policy_avg": 0.16361400485038757, | |
| "lr": 2.109375e-06, | |
| "objective/entropy": -36.86976623535156, | |
| "objective/kl": 32.36768341064453, | |
| "objective/non_score_reward": -0.16183842718601227, | |
| "objective/rlhf_reward": 9.400718688964844, | |
| "objective/scores": 9.562556266784668, | |
| "policy/approxkl_avg": 0.2723867893218994, | |
| "policy/clipfrac_avg": 0.353515625, | |
| "policy/entropy_avg": 0.4423583149909973, | |
| "step": 76, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2366, | |
| "val/ratio": 0.9998538494110107, | |
| "val/ratio_var": 3.881733846355928e-06 | |
| }, | |
| { | |
| "episode": 39936, | |
| "epoch": 0.1710760795065113, | |
| "eps": 6, | |
| "loss/policy_avg": 0.20113158226013184, | |
| "lr": 2.09765625e-06, | |
| "objective/entropy": -36.3262939453125, | |
| "objective/kl": 31.993881225585938, | |
| "objective/non_score_reward": -0.15996940433979034, | |
| "objective/rlhf_reward": 9.591435432434082, | |
| "objective/scores": 9.751405715942383, | |
| "policy/approxkl_avg": 0.21415743231773376, | |
| "policy/clipfrac_avg": 0.330078125, | |
| "policy/entropy_avg": 0.417125403881073, | |
| "step": 77, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2304, | |
| "val/ratio": 1.0003247261047363, | |
| "val/ratio_var": 3.1106928872759454e-06 | |
| }, | |
| { | |
| "episode": 40448, | |
| "epoch": 0.17326936257710762, | |
| "eps": 6, | |
| "loss/policy_avg": 0.19219069182872772, | |
| "lr": 2.0859375e-06, | |
| "objective/entropy": -35.67451858520508, | |
| "objective/kl": 32.77976989746094, | |
| "objective/non_score_reward": -0.16389885544776917, | |
| "objective/rlhf_reward": 9.648540496826172, | |
| "objective/scores": 9.81243896484375, | |
| "policy/approxkl_avg": 0.24051856994628906, | |
| "policy/clipfrac_avg": 0.357421875, | |
| "policy/entropy_avg": 0.39866721630096436, | |
| "step": 78, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2242, | |
| "val/ratio": 0.9996334910392761, | |
| "val/ratio_var": 2.991866267620935e-06 | |
| }, | |
| { | |
| "episode": 40960, | |
| "epoch": 0.17546264564770392, | |
| "eps": 6, | |
| "loss/policy_avg": 0.22769129276275635, | |
| "lr": 2.07421875e-06, | |
| "objective/entropy": -33.94448471069336, | |
| "objective/kl": 33.3216552734375, | |
| "objective/non_score_reward": -0.1666082888841629, | |
| "objective/rlhf_reward": 9.335718154907227, | |
| "objective/scores": 9.502326965332031, | |
| "policy/approxkl_avg": 0.2864202558994293, | |
| "policy/clipfrac_avg": 0.3515625, | |
| "policy/entropy_avg": 0.4178268313407898, | |
| "step": 79, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2216, | |
| "val/ratio": 1.0000176429748535, | |
| "val/ratio_var": 2.602319455036195e-06 | |
| }, | |
| { | |
| "episode": 41472, | |
| "epoch": 0.17765592871830022, | |
| "eps": 6, | |
| "loss/policy_avg": 0.1986619234085083, | |
| "lr": 2.0625e-06, | |
| "objective/entropy": -36.479896545410156, | |
| "objective/kl": 33.37843322753906, | |
| "objective/non_score_reward": -0.1668921709060669, | |
| "objective/rlhf_reward": 9.689064025878906, | |
| "objective/scores": 9.855956077575684, | |
| "policy/approxkl_avg": 0.2828975319862366, | |
| "policy/clipfrac_avg": 0.369140625, | |
| "policy/entropy_avg": 0.39649999141693115, | |
| "step": 80, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2166, | |
| "val/ratio": 1.0000145435333252, | |
| "val/ratio_var": 3.323544660815969e-06 | |
| }, | |
| { | |
| "episode": 41984, | |
| "epoch": 0.1798492117888965, | |
| "eps": 6, | |
| "loss/policy_avg": 0.36803221702575684, | |
| "lr": 2.05078125e-06, | |
| "objective/entropy": -34.6856803894043, | |
| "objective/kl": 32.34107971191406, | |
| "objective/non_score_reward": -0.1617053896188736, | |
| "objective/rlhf_reward": 9.39497184753418, | |
| "objective/scores": 9.556676864624023, | |
| "policy/approxkl_avg": 0.30969473719596863, | |
| "policy/clipfrac_avg": 0.337890625, | |
| "policy/entropy_avg": 0.40510374307632446, | |
| "step": 81, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2162, | |
| "val/ratio": 0.9998123049736023, | |
| "val/ratio_var": 5.09162737216684e-06 | |
| }, | |
| { | |
| "episode": 42496, | |
| "epoch": 0.1820424948594928, | |
| "eps": 6, | |
| "loss/policy_avg": 0.16523130238056183, | |
| "lr": 2.0390625e-06, | |
| "objective/entropy": -36.272003173828125, | |
| "objective/kl": 34.003143310546875, | |
| "objective/non_score_reward": -0.17001570761203766, | |
| "objective/rlhf_reward": 9.380304336547852, | |
| "objective/scores": 9.55031967163086, | |
| "policy/approxkl_avg": 0.24730241298675537, | |
| "policy/clipfrac_avg": 0.345703125, | |
| "policy/entropy_avg": 0.40937918424606323, | |
| "step": 82, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2186, | |
| "val/ratio": 0.999603807926178, | |
| "val/ratio_var": 2.4847686290740967e-06 | |
| }, | |
| { | |
| "episode": 43008, | |
| "epoch": 0.1842357779300891, | |
| "eps": 6, | |
| "loss/policy_avg": 0.2093135118484497, | |
| "lr": 2.02734375e-06, | |
| "objective/entropy": -35.804931640625, | |
| "objective/kl": 33.306732177734375, | |
| "objective/non_score_reward": -0.1665336787700653, | |
| "objective/rlhf_reward": 9.440333366394043, | |
| "objective/scores": 9.606866836547852, | |
| "policy/approxkl_avg": 0.22019661962985992, | |
| "policy/clipfrac_avg": 0.3359375, | |
| "policy/entropy_avg": 0.404262512922287, | |
| "step": 83, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2208, | |
| "val/ratio": 1.000349521636963, | |
| "val/ratio_var": 2.718377345445333e-06 | |
| }, | |
| { | |
| "episode": 43520, | |
| "epoch": 0.1864290610006854, | |
| "eps": 6, | |
| "loss/policy_avg": 0.24589769542217255, | |
| "lr": 2.015625e-06, | |
| "objective/entropy": -35.195167541503906, | |
| "objective/kl": 32.886680603027344, | |
| "objective/non_score_reward": -0.16443338990211487, | |
| "objective/rlhf_reward": 9.737478256225586, | |
| "objective/scores": 9.901910781860352, | |
| "policy/approxkl_avg": 0.224727600812912, | |
| "policy/clipfrac_avg": 0.357421875, | |
| "policy/entropy_avg": 0.3911252021789551, | |
| "step": 84, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2169, | |
| "val/ratio": 1.0001827478408813, | |
| "val/ratio_var": 2.8814699817303335e-06 | |
| }, | |
| { | |
| "episode": 44032, | |
| "epoch": 0.1886223440712817, | |
| "eps": 6, | |
| "loss/policy_avg": 0.2011430561542511, | |
| "lr": 2.00390625e-06, | |
| "objective/entropy": -36.270198822021484, | |
| "objective/kl": 32.86201095581055, | |
| "objective/non_score_reward": -0.1643100529909134, | |
| "objective/rlhf_reward": 9.618223190307617, | |
| "objective/scores": 9.782532691955566, | |
| "policy/approxkl_avg": 0.29179248213768005, | |
| "policy/clipfrac_avg": 0.33203125, | |
| "policy/entropy_avg": 0.3870548605918884, | |
| "step": 85, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2167, | |
| "val/ratio": 1.0001106262207031, | |
| "val/ratio_var": 3.691868414534838e-06 | |
| }, | |
| { | |
| "episode": 44544, | |
| "epoch": 0.190815627141878, | |
| "eps": 6, | |
| "loss/policy_avg": 0.1226603090763092, | |
| "lr": 1.9921875e-06, | |
| "objective/entropy": -35.539337158203125, | |
| "objective/kl": 33.36035919189453, | |
| "objective/non_score_reward": -0.1668018102645874, | |
| "objective/rlhf_reward": 9.765243530273438, | |
| "objective/scores": 9.932045936584473, | |
| "policy/approxkl_avg": 0.23382121324539185, | |
| "policy/clipfrac_avg": 0.365234375, | |
| "policy/entropy_avg": 0.39480116963386536, | |
| "step": 86, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2204, | |
| "val/ratio": 0.9996500015258789, | |
| "val/ratio_var": 3.449556515988661e-06 | |
| }, | |
| { | |
| "episode": 45056, | |
| "epoch": 0.1930089102124743, | |
| "eps": 6, | |
| "loss/policy_avg": 0.22705024480819702, | |
| "lr": 1.98046875e-06, | |
| "objective/entropy": -34.78813171386719, | |
| "objective/kl": 33.85824203491211, | |
| "objective/non_score_reward": -0.1692911982536316, | |
| "objective/rlhf_reward": 9.508092880249023, | |
| "objective/scores": 9.677383422851562, | |
| "policy/approxkl_avg": 0.2564248740673065, | |
| "policy/clipfrac_avg": 0.341796875, | |
| "policy/entropy_avg": 0.39244288206100464, | |
| "step": 87, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2230, | |
| "val/ratio": 0.9998552799224854, | |
| "val/ratio_var": 2.9433140298351645e-06 | |
| }, | |
| { | |
| "episode": 45568, | |
| "epoch": 0.1952021932830706, | |
| "eps": 6, | |
| "loss/policy_avg": 0.25029683113098145, | |
| "lr": 1.96875e-06, | |
| "objective/entropy": -38.32817077636719, | |
| "objective/kl": 32.53593826293945, | |
| "objective/non_score_reward": -0.16267967224121094, | |
| "objective/rlhf_reward": 9.676530838012695, | |
| "objective/scores": 9.839210510253906, | |
| "policy/approxkl_avg": 0.24715110659599304, | |
| "policy/clipfrac_avg": 0.345703125, | |
| "policy/entropy_avg": 0.36855989694595337, | |
| "step": 88, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2186, | |
| "val/ratio": 1.0001134872436523, | |
| "val/ratio_var": 3.477802692941623e-06 | |
| }, | |
| { | |
| "episode": 46080, | |
| "epoch": 0.1973954763536669, | |
| "eps": 6, | |
| "loss/policy_avg": 0.2676100730895996, | |
| "lr": 1.95703125e-06, | |
| "objective/entropy": -34.89455795288086, | |
| "objective/kl": 34.006202697753906, | |
| "objective/non_score_reward": -0.17003101110458374, | |
| "objective/rlhf_reward": 9.682475090026855, | |
| "objective/scores": 9.852506637573242, | |
| "policy/approxkl_avg": 0.2565373182296753, | |
| "policy/clipfrac_avg": 0.337890625, | |
| "policy/entropy_avg": 0.38856256008148193, | |
| "step": 89, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2148, | |
| "val/ratio": 0.9999434947967529, | |
| "val/ratio_var": 3.6854912650596816e-06 | |
| }, | |
| { | |
| "episode": 46592, | |
| "epoch": 0.19958875942426318, | |
| "eps": 6, | |
| "loss/policy_avg": 0.17851054668426514, | |
| "lr": 1.9453125e-06, | |
| "objective/entropy": -37.629817962646484, | |
| "objective/kl": 33.07708740234375, | |
| "objective/non_score_reward": -0.1653854250907898, | |
| "objective/rlhf_reward": 9.640422821044922, | |
| "objective/scores": 9.805809020996094, | |
| "policy/approxkl_avg": 0.2017795443534851, | |
| "policy/clipfrac_avg": 0.34765625, | |
| "policy/entropy_avg": 0.37295690178871155, | |
| "step": 90, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2112, | |
| "val/ratio": 1.0001370906829834, | |
| "val/ratio_var": 2.355475317017408e-06 | |
| }, | |
| { | |
| "episode": 47104, | |
| "epoch": 0.20178204249485948, | |
| "eps": 6, | |
| "loss/policy_avg": 0.1686045229434967, | |
| "lr": 1.93359375e-06, | |
| "objective/entropy": -38.061744689941406, | |
| "objective/kl": 32.129512786865234, | |
| "objective/non_score_reward": -0.16064755618572235, | |
| "objective/rlhf_reward": 9.790443420410156, | |
| "objective/scores": 9.951091766357422, | |
| "policy/approxkl_avg": 0.21093934774398804, | |
| "policy/clipfrac_avg": 0.36328125, | |
| "policy/entropy_avg": 0.3660760521888733, | |
| "step": 91, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2086, | |
| "val/ratio": 0.9999697208404541, | |
| "val/ratio_var": 3.8051555293350248e-06 | |
| }, | |
| { | |
| "episode": 47616, | |
| "epoch": 0.20397532556545578, | |
| "eps": 6, | |
| "loss/policy_avg": 0.20496009290218353, | |
| "lr": 1.921875e-06, | |
| "objective/entropy": -38.42815399169922, | |
| "objective/kl": 33.15034484863281, | |
| "objective/non_score_reward": -0.16575172543525696, | |
| "objective/rlhf_reward": 9.645820617675781, | |
| "objective/scores": 9.811572074890137, | |
| "policy/approxkl_avg": 0.2742815613746643, | |
| "policy/clipfrac_avg": 0.353515625, | |
| "policy/entropy_avg": 0.35775893926620483, | |
| "step": 92, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2077, | |
| "val/ratio": 1.0003482103347778, | |
| "val/ratio_var": 3.3726596484484617e-06 | |
| }, | |
| { | |
| "episode": 48128, | |
| "epoch": 0.2061686086360521, | |
| "eps": 6, | |
| "loss/policy_avg": 0.1944173276424408, | |
| "lr": 1.91015625e-06, | |
| "objective/entropy": -37.453712463378906, | |
| "objective/kl": 33.70136260986328, | |
| "objective/non_score_reward": -0.16850680112838745, | |
| "objective/rlhf_reward": 9.822844505310059, | |
| "objective/scores": 9.991350173950195, | |
| "policy/approxkl_avg": 0.264553964138031, | |
| "policy/clipfrac_avg": 0.337890625, | |
| "policy/entropy_avg": 0.3743836581707001, | |
| "step": 93, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2082, | |
| "val/ratio": 0.9996594786643982, | |
| "val/ratio_var": 2.970580226246966e-06 | |
| }, | |
| { | |
| "episode": 48640, | |
| "epoch": 0.2083618917066484, | |
| "eps": 6, | |
| "loss/policy_avg": 0.20501361787319183, | |
| "lr": 1.8984375e-06, | |
| "objective/entropy": -36.34868621826172, | |
| "objective/kl": 33.67938995361328, | |
| "objective/non_score_reward": -0.1683969646692276, | |
| "objective/rlhf_reward": 9.771129608154297, | |
| "objective/scores": 9.939525604248047, | |
| "policy/approxkl_avg": 0.29675331711769104, | |
| "policy/clipfrac_avg": 0.31640625, | |
| "policy/entropy_avg": 0.35725563764572144, | |
| "step": 94, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2083, | |
| "val/ratio": 1.0001224279403687, | |
| "val/ratio_var": 3.695943178172456e-06 | |
| }, | |
| { | |
| "episode": 49152, | |
| "epoch": 0.2105551747772447, | |
| "eps": 6, | |
| "loss/policy_avg": 0.19600501656532288, | |
| "lr": 1.8867187500000001e-06, | |
| "objective/entropy": -34.53193664550781, | |
| "objective/kl": 34.90612030029297, | |
| "objective/non_score_reward": -0.17453059554100037, | |
| "objective/rlhf_reward": 9.953956604003906, | |
| "objective/scores": 10.128486633300781, | |
| "policy/approxkl_avg": 0.21787747740745544, | |
| "policy/clipfrac_avg": 0.318359375, | |
| "policy/entropy_avg": 0.339312881231308, | |
| "step": 95, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1995, | |
| "val/ratio": 0.999893844127655, | |
| "val/ratio_var": 2.4975431642815238e-06 | |
| }, | |
| { | |
| "episode": 49664, | |
| "epoch": 0.212748457847841, | |
| "eps": 6, | |
| "loss/policy_avg": 0.22088220715522766, | |
| "lr": 1.875e-06, | |
| "objective/entropy": -34.256370544433594, | |
| "objective/kl": 35.238216400146484, | |
| "objective/non_score_reward": -0.1761910766363144, | |
| "objective/rlhf_reward": 9.564779281616211, | |
| "objective/scores": 9.74096965789795, | |
| "policy/approxkl_avg": 0.245886892080307, | |
| "policy/clipfrac_avg": 0.35546875, | |
| "policy/entropy_avg": 0.35175642371177673, | |
| "step": 96, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1972, | |
| "val/ratio": 0.9998222589492798, | |
| "val/ratio_var": 3.3520850593049545e-06 | |
| }, | |
| { | |
| "episode": 50176, | |
| "epoch": 0.2149417409184373, | |
| "eps": 6, | |
| "loss/policy_avg": 0.1739550083875656, | |
| "lr": 1.86328125e-06, | |
| "objective/entropy": -36.02185821533203, | |
| "objective/kl": 35.261474609375, | |
| "objective/non_score_reward": -0.17630735039710999, | |
| "objective/rlhf_reward": 9.913810729980469, | |
| "objective/scores": 10.090118408203125, | |
| "policy/approxkl_avg": 0.19460904598236084, | |
| "policy/clipfrac_avg": 0.337890625, | |
| "policy/entropy_avg": 0.3500151038169861, | |
| "step": 97, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2014, | |
| "val/ratio": 1.0000877380371094, | |
| "val/ratio_var": 2.4969581318146084e-06 | |
| }, | |
| { | |
| "episode": 50688, | |
| "epoch": 0.2171350239890336, | |
| "eps": 6, | |
| "loss/policy_avg": 0.27473074197769165, | |
| "lr": 1.8515625000000001e-06, | |
| "objective/entropy": -35.280155181884766, | |
| "objective/kl": 36.09398651123047, | |
| "objective/non_score_reward": -0.18046993017196655, | |
| "objective/rlhf_reward": 9.731412887573242, | |
| "objective/scores": 9.911883354187012, | |
| "policy/approxkl_avg": 0.23806479573249817, | |
| "policy/clipfrac_avg": 0.34765625, | |
| "policy/entropy_avg": 0.36351966857910156, | |
| "step": 98, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 2009, | |
| "val/ratio": 1.0001752376556396, | |
| "val/ratio_var": 4.085913587914547e-06 | |
| }, | |
| { | |
| "episode": 51200, | |
| "epoch": 0.21932830705962988, | |
| "eps": 6, | |
| "loss/policy_avg": 0.28736796975135803, | |
| "lr": 1.83984375e-06, | |
| "objective/entropy": -35.07633972167969, | |
| "objective/kl": 35.50556182861328, | |
| "objective/non_score_reward": -0.1775278002023697, | |
| "objective/rlhf_reward": 9.971885681152344, | |
| "objective/scores": 10.1494140625, | |
| "policy/approxkl_avg": 0.22533981502056122, | |
| "policy/clipfrac_avg": 0.345703125, | |
| "policy/entropy_avg": 0.33290359377861023, | |
| "step": 99, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1977, | |
| "val/ratio": 0.9998661279678345, | |
| "val/ratio_var": 5.175881597097032e-06 | |
| }, | |
| { | |
| "episode": 51712, | |
| "epoch": 0.22152159013022618, | |
| "eps": 6, | |
| "loss/policy_avg": 0.24332170188426971, | |
| "lr": 1.828125e-06, | |
| "objective/entropy": -31.962793350219727, | |
| "objective/kl": 36.79883575439453, | |
| "objective/non_score_reward": -0.18399417400360107, | |
| "objective/rlhf_reward": 9.824310302734375, | |
| "objective/scores": 10.008304595947266, | |
| "policy/approxkl_avg": 0.2577008008956909, | |
| "policy/clipfrac_avg": 0.40234375, | |
| "policy/entropy_avg": 0.3309575021266937, | |
| "step": 100, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1903, | |
| "val/ratio": 1.000428318977356, | |
| "val/ratio_var": 2.6899542717728764e-06 | |
| }, | |
| { | |
| "episode": 52224, | |
| "epoch": 0.22371487320082248, | |
| "eps": 6, | |
| "loss/policy_avg": 0.17923694849014282, | |
| "lr": 1.81640625e-06, | |
| "objective/entropy": -34.664024353027344, | |
| "objective/kl": 35.354034423828125, | |
| "objective/non_score_reward": -0.1767701804637909, | |
| "objective/rlhf_reward": 10.139245986938477, | |
| "objective/scores": 10.316017150878906, | |
| "policy/approxkl_avg": 0.19910496473312378, | |
| "policy/clipfrac_avg": 0.31640625, | |
| "policy/entropy_avg": 0.3107799291610718, | |
| "step": 101, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1891, | |
| "val/ratio": 0.9996460676193237, | |
| "val/ratio_var": 3.313231900392566e-06 | |
| }, | |
| { | |
| "episode": 52736, | |
| "epoch": 0.22590815627141878, | |
| "eps": 6, | |
| "loss/policy_avg": 0.17553430795669556, | |
| "lr": 1.8046875e-06, | |
| "objective/entropy": -34.35253143310547, | |
| "objective/kl": 35.79362869262695, | |
| "objective/non_score_reward": -0.1789681315422058, | |
| "objective/rlhf_reward": 10.026729583740234, | |
| "objective/scores": 10.205698013305664, | |
| "policy/approxkl_avg": 0.21303945779800415, | |
| "policy/clipfrac_avg": 0.328125, | |
| "policy/entropy_avg": 0.3087007999420166, | |
| "step": 102, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1905, | |
| "val/ratio": 0.9998683929443359, | |
| "val/ratio_var": 3.267524562033941e-06 | |
| }, | |
| { | |
| "episode": 53248, | |
| "epoch": 0.22810143934201507, | |
| "eps": 6, | |
| "loss/policy_avg": 0.20496255159378052, | |
| "lr": 1.79296875e-06, | |
| "objective/entropy": -34.044639587402344, | |
| "objective/kl": 35.621826171875, | |
| "objective/non_score_reward": -0.17810912430286407, | |
| "objective/rlhf_reward": 10.048910140991211, | |
| "objective/scores": 10.227019309997559, | |
| "policy/approxkl_avg": 0.2181515246629715, | |
| "policy/clipfrac_avg": 0.357421875, | |
| "policy/entropy_avg": 0.3069969415664673, | |
| "step": 103, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1940, | |
| "val/ratio": 0.9999847412109375, | |
| "val/ratio_var": 3.6619542242988246e-06 | |
| }, | |
| { | |
| "episode": 53760, | |
| "epoch": 0.23029472241261137, | |
| "eps": 6, | |
| "loss/policy_avg": 0.22224432229995728, | |
| "lr": 1.78125e-06, | |
| "objective/entropy": -32.43903732299805, | |
| "objective/kl": 36.86625289916992, | |
| "objective/non_score_reward": -0.1843312531709671, | |
| "objective/rlhf_reward": 9.84052562713623, | |
| "objective/scores": 10.024856567382812, | |
| "policy/approxkl_avg": 0.1902885138988495, | |
| "policy/clipfrac_avg": 0.328125, | |
| "policy/entropy_avg": 0.30074501037597656, | |
| "step": 104, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1854, | |
| "val/ratio": 1.0004634857177734, | |
| "val/ratio_var": 2.30164823733503e-06 | |
| }, | |
| { | |
| "episode": 54272, | |
| "epoch": 0.23248800548320767, | |
| "eps": 6, | |
| "loss/policy_avg": 0.13901078701019287, | |
| "lr": 1.76953125e-06, | |
| "objective/entropy": -33.599708557128906, | |
| "objective/kl": 35.801998138427734, | |
| "objective/non_score_reward": -0.17900997400283813, | |
| "objective/rlhf_reward": 9.889688491821289, | |
| "objective/scores": 10.06869888305664, | |
| "policy/approxkl_avg": 0.16544947028160095, | |
| "policy/clipfrac_avg": 0.337890625, | |
| "policy/entropy_avg": 0.30036184191703796, | |
| "step": 105, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1859, | |
| "val/ratio": 1.0002105236053467, | |
| "val/ratio_var": 2.6306556719646323e-06 | |
| }, | |
| { | |
| "episode": 54784, | |
| "epoch": 0.23468128855380396, | |
| "eps": 6, | |
| "loss/policy_avg": 0.14493289589881897, | |
| "lr": 1.7578125e-06, | |
| "objective/entropy": -33.55315017700195, | |
| "objective/kl": 35.825531005859375, | |
| "objective/non_score_reward": -0.17912766337394714, | |
| "objective/rlhf_reward": 9.871797561645508, | |
| "objective/scores": 10.050926208496094, | |
| "policy/approxkl_avg": 0.1833547204732895, | |
| "policy/clipfrac_avg": 0.328125, | |
| "policy/entropy_avg": 0.2901462912559509, | |
| "step": 106, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1835, | |
| "val/ratio": 0.9998902678489685, | |
| "val/ratio_var": 2.489499593139044e-06 | |
| }, | |
| { | |
| "episode": 55296, | |
| "epoch": 0.2368745716244003, | |
| "eps": 6, | |
| "loss/policy_avg": 0.17336603999137878, | |
| "lr": 1.74609375e-06, | |
| "objective/entropy": -34.462337493896484, | |
| "objective/kl": 37.12619400024414, | |
| "objective/non_score_reward": -0.18563096225261688, | |
| "objective/rlhf_reward": 10.155291557312012, | |
| "objective/scores": 10.340921401977539, | |
| "policy/approxkl_avg": 0.18350008130073547, | |
| "policy/clipfrac_avg": 0.3359375, | |
| "policy/entropy_avg": 0.29074928164482117, | |
| "step": 107, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1954, | |
| "val/ratio": 1.0000966787338257, | |
| "val/ratio_var": 2.0360935195640195e-06 | |
| }, | |
| { | |
| "episode": 55808, | |
| "epoch": 0.23906785469499658, | |
| "eps": 6, | |
| "loss/policy_avg": 0.20065046846866608, | |
| "lr": 1.734375e-06, | |
| "objective/entropy": -35.58911895751953, | |
| "objective/kl": 35.46983337402344, | |
| "objective/non_score_reward": -0.17734915018081665, | |
| "objective/rlhf_reward": 9.807424545288086, | |
| "objective/scores": 9.984773635864258, | |
| "policy/approxkl_avg": 0.21259769797325134, | |
| "policy/clipfrac_avg": 0.3359375, | |
| "policy/entropy_avg": 0.2804480195045471, | |
| "step": 108, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1868, | |
| "val/ratio": 0.9998682737350464, | |
| "val/ratio_var": 4.149359028815525e-06 | |
| }, | |
| { | |
| "episode": 56320, | |
| "epoch": 0.24126113776559288, | |
| "eps": 6, | |
| "loss/policy_avg": 0.31327635049819946, | |
| "lr": 1.72265625e-06, | |
| "objective/entropy": -35.60870361328125, | |
| "objective/kl": 34.600460052490234, | |
| "objective/non_score_reward": -0.17300228774547577, | |
| "objective/rlhf_reward": 9.828847885131836, | |
| "objective/scores": 10.001850128173828, | |
| "policy/approxkl_avg": 0.2347819209098816, | |
| "policy/clipfrac_avg": 0.326171875, | |
| "policy/entropy_avg": 0.2796494960784912, | |
| "step": 109, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1903, | |
| "val/ratio": 1.000025749206543, | |
| "val/ratio_var": 3.987162926932797e-06 | |
| }, | |
| { | |
| "episode": 56832, | |
| "epoch": 0.24345442083618918, | |
| "eps": 6, | |
| "loss/policy_avg": 0.19177474081516266, | |
| "lr": 1.7109375e-06, | |
| "objective/entropy": -35.206573486328125, | |
| "objective/kl": 35.755706787109375, | |
| "objective/non_score_reward": -0.1787785291671753, | |
| "objective/rlhf_reward": 10.03399658203125, | |
| "objective/scores": 10.212775230407715, | |
| "policy/approxkl_avg": 0.2251434177160263, | |
| "policy/clipfrac_avg": 0.388671875, | |
| "policy/entropy_avg": 0.2768189311027527, | |
| "step": 110, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1854, | |
| "val/ratio": 0.9997500777244568, | |
| "val/ratio_var": 2.821056341417716e-06 | |
| }, | |
| { | |
| "episode": 57344, | |
| "epoch": 0.24564770390678548, | |
| "eps": 6, | |
| "loss/policy_avg": 0.17819970846176147, | |
| "lr": 1.69921875e-06, | |
| "objective/entropy": -35.49714279174805, | |
| "objective/kl": 35.21353530883789, | |
| "objective/non_score_reward": -0.17606768012046814, | |
| "objective/rlhf_reward": 9.934215545654297, | |
| "objective/scores": 10.110282897949219, | |
| "policy/approxkl_avg": 0.2090197205543518, | |
| "policy/clipfrac_avg": 0.30859375, | |
| "policy/entropy_avg": 0.2791983485221863, | |
| "step": 111, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1825, | |
| "val/ratio": 1.0002068281173706, | |
| "val/ratio_var": 3.191226596754859e-06 | |
| }, | |
| { | |
| "episode": 57856, | |
| "epoch": 0.24784098697738177, | |
| "eps": 6, | |
| "loss/policy_avg": 0.13477346301078796, | |
| "lr": 1.6875e-06, | |
| "objective/entropy": -35.38134002685547, | |
| "objective/kl": 35.48783874511719, | |
| "objective/non_score_reward": -0.17743918299674988, | |
| "objective/rlhf_reward": 10.00422191619873, | |
| "objective/scores": 10.181660652160645, | |
| "policy/approxkl_avg": 0.21763411164283752, | |
| "policy/clipfrac_avg": 0.37109375, | |
| "policy/entropy_avg": 0.27607789635658264, | |
| "step": 112, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1811, | |
| "val/ratio": 1.0001035928726196, | |
| "val/ratio_var": 3.5984410260425648e-06 | |
| }, | |
| { | |
| "episode": 58368, | |
| "epoch": 0.25003427004797807, | |
| "eps": 6, | |
| "loss/policy_avg": 0.16193845868110657, | |
| "lr": 1.67578125e-06, | |
| "objective/entropy": -35.136634826660156, | |
| "objective/kl": 35.53947830200195, | |
| "objective/non_score_reward": -0.17769737541675568, | |
| "objective/rlhf_reward": 10.041696548461914, | |
| "objective/scores": 10.219392776489258, | |
| "policy/approxkl_avg": 0.18851657211780548, | |
| "policy/clipfrac_avg": 0.322265625, | |
| "policy/entropy_avg": 0.2754863500595093, | |
| "step": 113, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1822, | |
| "val/ratio": 0.9998536109924316, | |
| "val/ratio_var": 2.3457650968339294e-06 | |
| }, | |
| { | |
| "episode": 58880, | |
| "epoch": 0.25222755311857437, | |
| "eps": 6, | |
| "loss/policy_avg": 0.2662353217601776, | |
| "lr": 1.6640625e-06, | |
| "objective/entropy": -36.19813537597656, | |
| "objective/kl": 35.548973083496094, | |
| "objective/non_score_reward": -0.17774485051631927, | |
| "objective/rlhf_reward": 9.807806015014648, | |
| "objective/scores": 9.985549926757812, | |
| "policy/approxkl_avg": 0.22413387894630432, | |
| "policy/clipfrac_avg": 0.318359375, | |
| "policy/entropy_avg": 0.28221410512924194, | |
| "step": 114, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1945, | |
| "val/ratio": 1.0000545978546143, | |
| "val/ratio_var": 3.614477236624225e-06 | |
| }, | |
| { | |
| "episode": 59392, | |
| "epoch": 0.25442083618917066, | |
| "eps": 6, | |
| "loss/policy_avg": 0.1269383728504181, | |
| "lr": 1.6523437500000001e-06, | |
| "objective/entropy": -37.98196792602539, | |
| "objective/kl": 34.821964263916016, | |
| "objective/non_score_reward": -0.1741098165512085, | |
| "objective/rlhf_reward": 9.85723876953125, | |
| "objective/scores": 10.031347274780273, | |
| "policy/approxkl_avg": 0.21144835650920868, | |
| "policy/clipfrac_avg": 0.31640625, | |
| "policy/entropy_avg": 0.27860069274902344, | |
| "step": 115, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1918, | |
| "val/ratio": 1.0000715255737305, | |
| "val/ratio_var": 4.044930847157957e-06 | |
| }, | |
| { | |
| "episode": 59904, | |
| "epoch": 0.25661411925976696, | |
| "eps": 6, | |
| "loss/policy_avg": 0.18478526175022125, | |
| "lr": 1.640625e-06, | |
| "objective/entropy": -39.03168869018555, | |
| "objective/kl": 34.09989929199219, | |
| "objective/non_score_reward": -0.1704995036125183, | |
| "objective/rlhf_reward": 10.072548866271973, | |
| "objective/scores": 10.243047714233398, | |
| "policy/approxkl_avg": 0.18763652443885803, | |
| "policy/clipfrac_avg": 0.32421875, | |
| "policy/entropy_avg": 0.276102751493454, | |
| "step": 116, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1964, | |
| "val/ratio": 1.0003482103347778, | |
| "val/ratio_var": 3.092050519626355e-06 | |
| }, | |
| { | |
| "episode": 60416, | |
| "epoch": 0.25880740233036326, | |
| "eps": 6, | |
| "loss/policy_avg": 0.2445678561925888, | |
| "lr": 1.62890625e-06, | |
| "objective/entropy": -37.85968017578125, | |
| "objective/kl": 34.59711456298828, | |
| "objective/non_score_reward": -0.17298556864261627, | |
| "objective/rlhf_reward": 9.838810920715332, | |
| "objective/scores": 10.011796951293945, | |
| "policy/approxkl_avg": 0.2142867147922516, | |
| "policy/clipfrac_avg": 0.287109375, | |
| "policy/entropy_avg": 0.2769862413406372, | |
| "step": 117, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1913, | |
| "val/ratio": 1.0000524520874023, | |
| "val/ratio_var": 2.518341943869018e-06 | |
| }, | |
| { | |
| "episode": 60928, | |
| "epoch": 0.26100068540095955, | |
| "eps": 6, | |
| "loss/policy_avg": 0.14503635466098785, | |
| "lr": 1.6171875000000001e-06, | |
| "objective/entropy": -42.66570281982422, | |
| "objective/kl": 33.559688568115234, | |
| "objective/non_score_reward": -0.1677984595298767, | |
| "objective/rlhf_reward": 10.2135591506958, | |
| "objective/scores": 10.381357192993164, | |
| "policy/approxkl_avg": 0.17557722330093384, | |
| "policy/clipfrac_avg": 0.267578125, | |
| "policy/entropy_avg": 0.27216678857803345, | |
| "step": 118, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1978, | |
| "val/ratio": 0.9997793436050415, | |
| "val/ratio_var": 2.1884602574573364e-06 | |
| }, | |
| { | |
| "episode": 61440, | |
| "epoch": 0.26319396847155585, | |
| "eps": 6, | |
| "loss/policy_avg": 0.12860137224197388, | |
| "lr": 1.60546875e-06, | |
| "objective/entropy": -40.882850646972656, | |
| "objective/kl": 35.01697540283203, | |
| "objective/non_score_reward": -0.17508485913276672, | |
| "objective/rlhf_reward": 9.812532424926758, | |
| "objective/scores": 9.987617492675781, | |
| "policy/approxkl_avg": 0.21601755917072296, | |
| "policy/clipfrac_avg": 0.333984375, | |
| "policy/entropy_avg": 0.2853110432624817, | |
| "step": 119, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1991, | |
| "val/ratio": 1.0000758171081543, | |
| "val/ratio_var": 3.1630045214114944e-06 | |
| }, | |
| { | |
| "episode": 61952, | |
| "epoch": 0.26538725154215215, | |
| "eps": 6, | |
| "loss/policy_avg": 0.13918112218379974, | |
| "lr": 1.59375e-06, | |
| "objective/entropy": -42.673091888427734, | |
| "objective/kl": 33.7802734375, | |
| "objective/non_score_reward": -0.16890135407447815, | |
| "objective/rlhf_reward": 9.991598129272461, | |
| "objective/scores": 10.160500526428223, | |
| "policy/approxkl_avg": 0.2096889615058899, | |
| "policy/clipfrac_avg": 0.3125, | |
| "policy/entropy_avg": 0.2698075771331787, | |
| "step": 120, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1962, | |
| "val/ratio": 0.9996597766876221, | |
| "val/ratio_var": 2.5640338208177127e-06 | |
| }, | |
| { | |
| "episode": 62464, | |
| "epoch": 0.26758053461274844, | |
| "eps": 6, | |
| "loss/policy_avg": 0.14262011647224426, | |
| "lr": 1.5820312500000001e-06, | |
| "objective/entropy": -40.981964111328125, | |
| "objective/kl": 33.93827819824219, | |
| "objective/non_score_reward": -0.16969136893749237, | |
| "objective/rlhf_reward": 9.967924118041992, | |
| "objective/scores": 10.137615203857422, | |
| "policy/approxkl_avg": 0.17477768659591675, | |
| "policy/clipfrac_avg": 0.326171875, | |
| "policy/entropy_avg": 0.27177703380584717, | |
| "step": 121, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1948, | |
| "val/ratio": 1.0003581047058105, | |
| "val/ratio_var": 2.333908696527942e-06 | |
| }, | |
| { | |
| "episode": 62976, | |
| "epoch": 0.26977381768334474, | |
| "eps": 6, | |
| "loss/policy_avg": 0.1601879894733429, | |
| "lr": 1.5703125e-06, | |
| "objective/entropy": -39.27589797973633, | |
| "objective/kl": 34.60676574707031, | |
| "objective/non_score_reward": -0.17303383350372314, | |
| "objective/rlhf_reward": 9.912843704223633, | |
| "objective/scores": 10.085877418518066, | |
| "policy/approxkl_avg": 0.20070144534111023, | |
| "policy/clipfrac_avg": 0.337890625, | |
| "policy/entropy_avg": 0.2859034836292267, | |
| "step": 122, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1959, | |
| "val/ratio": 0.9996767044067383, | |
| "val/ratio_var": 2.508757233954384e-06 | |
| }, | |
| { | |
| "episode": 63488, | |
| "epoch": 0.27196710075394104, | |
| "eps": 6, | |
| "loss/policy_avg": 0.15391629934310913, | |
| "lr": 1.55859375e-06, | |
| "objective/entropy": -38.832088470458984, | |
| "objective/kl": 35.0777473449707, | |
| "objective/non_score_reward": -0.17538872361183167, | |
| "objective/rlhf_reward": 9.996062278747559, | |
| "objective/scores": 10.1714506149292, | |
| "policy/approxkl_avg": 0.22946523129940033, | |
| "policy/clipfrac_avg": 0.326171875, | |
| "policy/entropy_avg": 0.27930349111557007, | |
| "step": 123, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1959, | |
| "val/ratio": 0.9997979402542114, | |
| "val/ratio_var": 3.5869422845280496e-06 | |
| }, | |
| { | |
| "episode": 64000, | |
| "epoch": 0.27416038382453733, | |
| "eps": 6, | |
| "loss/policy_avg": 0.16380949318408966, | |
| "lr": 1.5468750000000001e-06, | |
| "objective/entropy": -39.75455856323242, | |
| "objective/kl": 35.5320930480957, | |
| "objective/non_score_reward": -0.17766046524047852, | |
| "objective/rlhf_reward": 9.862564086914062, | |
| "objective/scores": 10.040224075317383, | |
| "policy/approxkl_avg": 0.1650054007768631, | |
| "policy/clipfrac_avg": 0.318359375, | |
| "policy/entropy_avg": 0.28795722126960754, | |
| "step": 124, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1916, | |
| "val/ratio": 0.9999822378158569, | |
| "val/ratio_var": 2.5005276711453917e-06 | |
| }, | |
| { | |
| "episode": 64512, | |
| "epoch": 0.27635366689513363, | |
| "eps": 6, | |
| "loss/policy_avg": 0.14548206329345703, | |
| "lr": 1.53515625e-06, | |
| "objective/entropy": -40.37278747558594, | |
| "objective/kl": 34.826271057128906, | |
| "objective/non_score_reward": -0.1741313487291336, | |
| "objective/rlhf_reward": 10.072320938110352, | |
| "objective/scores": 10.246453285217285, | |
| "policy/approxkl_avg": 0.1895635575056076, | |
| "policy/clipfrac_avg": 0.328125, | |
| "policy/entropy_avg": 0.28454479575157166, | |
| "step": 125, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1992, | |
| "val/ratio": 0.999925971031189, | |
| "val/ratio_var": 2.291703822265845e-06 | |
| }, | |
| { | |
| "episode": 65024, | |
| "epoch": 0.27854694996572993, | |
| "eps": 6, | |
| "loss/policy_avg": 0.17899231612682343, | |
| "lr": 1.5234375e-06, | |
| "objective/entropy": -39.21260070800781, | |
| "objective/kl": 36.2283935546875, | |
| "objective/non_score_reward": -0.18114197254180908, | |
| "objective/rlhf_reward": 9.876224517822266, | |
| "objective/scores": 10.057367324829102, | |
| "policy/approxkl_avg": 0.17429962754249573, | |
| "policy/clipfrac_avg": 0.328125, | |
| "policy/entropy_avg": 0.27740728855133057, | |
| "step": 126, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1933, | |
| "val/ratio": 1.000385046005249, | |
| "val/ratio_var": 2.276981604154571e-06 | |
| }, | |
| { | |
| "episode": 65536, | |
| "epoch": 0.2807402330363262, | |
| "eps": 6, | |
| "loss/policy_avg": 0.2138238549232483, | |
| "lr": 1.5117187500000001e-06, | |
| "objective/entropy": -40.66987991333008, | |
| "objective/kl": 34.04345703125, | |
| "objective/non_score_reward": -0.17021729052066803, | |
| "objective/rlhf_reward": 10.10722541809082, | |
| "objective/scores": 10.277442932128906, | |
| "policy/approxkl_avg": 0.184827983379364, | |
| "policy/clipfrac_avg": 0.306640625, | |
| "policy/entropy_avg": 0.2644522190093994, | |
| "step": 127, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1960, | |
| "val/ratio": 0.9999203681945801, | |
| "val/ratio_var": 2.535048452045885e-06 | |
| }, | |
| { | |
| "episode": 66048, | |
| "epoch": 0.2829335161069225, | |
| "eps": 6, | |
| "loss/policy_avg": 0.17386293411254883, | |
| "lr": 1.5e-06, | |
| "objective/entropy": -38.950401306152344, | |
| "objective/kl": 36.11156463623047, | |
| "objective/non_score_reward": -0.18055780231952667, | |
| "objective/rlhf_reward": 9.913749694824219, | |
| "objective/scores": 10.094307899475098, | |
| "policy/approxkl_avg": 0.2340121865272522, | |
| "policy/clipfrac_avg": 0.283203125, | |
| "policy/entropy_avg": 0.26781517267227173, | |
| "step": 128, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1912, | |
| "val/ratio": 0.9998964667320251, | |
| "val/ratio_var": 2.7165133360540494e-06 | |
| }, | |
| { | |
| "episode": 66560, | |
| "epoch": 0.2851267991775189, | |
| "eps": 6, | |
| "loss/policy_avg": 0.11376126110553741, | |
| "lr": 1.48828125e-06, | |
| "objective/entropy": -37.08405303955078, | |
| "objective/kl": 36.694183349609375, | |
| "objective/non_score_reward": -0.1834709197282791, | |
| "objective/rlhf_reward": 9.986257553100586, | |
| "objective/scores": 10.169729232788086, | |
| "policy/approxkl_avg": 0.19061046838760376, | |
| "policy/clipfrac_avg": 0.296875, | |
| "policy/entropy_avg": 0.2567726969718933, | |
| "step": 129, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1826, | |
| "val/ratio": 0.9997936487197876, | |
| "val/ratio_var": 3.173572395098745e-06 | |
| }, | |
| { | |
| "episode": 67072, | |
| "epoch": 0.28732008224811517, | |
| "eps": 6, | |
| "loss/policy_avg": 0.16814622282981873, | |
| "lr": 1.4765625e-06, | |
| "objective/entropy": -37.90453338623047, | |
| "objective/kl": 36.63753128051758, | |
| "objective/non_score_reward": -0.18318764865398407, | |
| "objective/rlhf_reward": 9.85715103149414, | |
| "objective/scores": 10.040338516235352, | |
| "policy/approxkl_avg": 0.2051369547843933, | |
| "policy/clipfrac_avg": 0.345703125, | |
| "policy/entropy_avg": 0.2719249129295349, | |
| "step": 130, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1896, | |
| "val/ratio": 1.0000419616699219, | |
| "val/ratio_var": 2.5849769826891134e-06 | |
| }, | |
| { | |
| "episode": 67584, | |
| "epoch": 0.28951336531871147, | |
| "eps": 6, | |
| "loss/policy_avg": 0.11204706132411957, | |
| "lr": 1.46484375e-06, | |
| "objective/entropy": -38.804443359375, | |
| "objective/kl": 36.267181396484375, | |
| "objective/non_score_reward": -0.181335911154747, | |
| "objective/rlhf_reward": 10.217302322387695, | |
| "objective/scores": 10.398637771606445, | |
| "policy/approxkl_avg": 0.24441036581993103, | |
| "policy/clipfrac_avg": 0.3125, | |
| "policy/entropy_avg": 0.25722193717956543, | |
| "step": 131, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1899, | |
| "val/ratio": 0.9999180436134338, | |
| "val/ratio_var": 3.2145369459612994e-06 | |
| }, | |
| { | |
| "episode": 68096, | |
| "epoch": 0.29170664838930777, | |
| "eps": 6, | |
| "loss/policy_avg": 0.1794815957546234, | |
| "lr": 1.453125e-06, | |
| "objective/entropy": -35.273468017578125, | |
| "objective/kl": 37.08363342285156, | |
| "objective/non_score_reward": -0.18541815876960754, | |
| "objective/rlhf_reward": 9.986465454101562, | |
| "objective/scores": 10.171884536743164, | |
| "policy/approxkl_avg": 0.24921618402004242, | |
| "policy/clipfrac_avg": 0.30859375, | |
| "policy/entropy_avg": 0.2578710913658142, | |
| "step": 132, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1833, | |
| "val/ratio": 0.9998206496238708, | |
| "val/ratio_var": 3.307860424683895e-06 | |
| }, | |
| { | |
| "episode": 68608, | |
| "epoch": 0.29389993145990406, | |
| "eps": 6, | |
| "loss/policy_avg": 0.16161808371543884, | |
| "lr": 1.44140625e-06, | |
| "objective/entropy": -35.44409942626953, | |
| "objective/kl": 37.412166595458984, | |
| "objective/non_score_reward": -0.18706083297729492, | |
| "objective/rlhf_reward": 9.986528396606445, | |
| "objective/scores": 10.173589706420898, | |
| "policy/approxkl_avg": 0.22423642873764038, | |
| "policy/clipfrac_avg": 0.32421875, | |
| "policy/entropy_avg": 0.2606107294559479, | |
| "step": 133, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1926, | |
| "val/ratio": 1.0000818967819214, | |
| "val/ratio_var": 3.4354557101323735e-06 | |
| }, | |
| { | |
| "episode": 69120, | |
| "epoch": 0.29609321453050036, | |
| "eps": 6, | |
| "loss/policy_avg": 0.19688278436660767, | |
| "lr": 1.4296875e-06, | |
| "objective/entropy": -35.44132614135742, | |
| "objective/kl": 38.76708984375, | |
| "objective/non_score_reward": -0.19383545219898224, | |
| "objective/rlhf_reward": 10.073609352111816, | |
| "objective/scores": 10.26744556427002, | |
| "policy/approxkl_avg": 0.22920569777488708, | |
| "policy/clipfrac_avg": 0.333984375, | |
| "policy/entropy_avg": 0.2607892155647278, | |
| "step": 134, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1839, | |
| "val/ratio": 1.0001198053359985, | |
| "val/ratio_var": 3.989252945757471e-06 | |
| }, | |
| { | |
| "episode": 69632, | |
| "epoch": 0.29828649760109666, | |
| "eps": 6, | |
| "loss/policy_avg": 0.15440833568572998, | |
| "lr": 1.41796875e-06, | |
| "objective/entropy": -34.1768798828125, | |
| "objective/kl": 37.674827575683594, | |
| "objective/non_score_reward": -0.1883741319179535, | |
| "objective/rlhf_reward": 10.175031661987305, | |
| "objective/scores": 10.363405227661133, | |
| "policy/approxkl_avg": 0.21583065390586853, | |
| "policy/clipfrac_avg": 0.32421875, | |
| "policy/entropy_avg": 0.2518673539161682, | |
| "step": 135, | |
| "val/clipfrac_avg": 0.0, | |
| "val/num_eos_tokens": 1858, | |
| "val/ratio": 0.9998531937599182, | |
| "val/ratio_var": 1.8523942344472744e-06 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 128, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1.122960932145305, | |
| "save_steps": 26, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": true, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0, | |
| "train_batch_size": null, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |