| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "episode": 26624, | |
| "epoch": 0.22810143934201507, | |
| "eval_steps": 500, | |
| "global_step": 52, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "episode": 512, | |
| "epoch": 0.004386566141192597, | |
| "eps": 5, | |
| "loss/policy_avg": -0.0010775011032819748, | |
| "loss/value_avg": 2.152796983718872, | |
| "lr": 3e-06, | |
| "objective/entropy": -46.88261795043945, | |
| "objective/kl": -2.2351741790771484e-07, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 4.597599983215332, | |
| "objective/scores": 4.597599983215332, | |
| "policy/approxkl_avg": 0.0034045230131596327, | |
| "policy/clipfrac_avg": 0.05483449250459671, | |
| "policy/entropy_avg": 0.840014636516571, | |
| "step": 1, | |
| "val/clipfrac_avg": 0.0006964995991438627, | |
| "val/num_eos_tokens": 511, | |
| "val/ratio": 0.9999260902404785, | |
| "val/ratio_var": 6.746347025909927e-06 | |
| }, | |
| { | |
| "episode": 1024, | |
| "epoch": 0.008773132282385195, | |
| "eps": 6, | |
| "loss/policy_avg": 0.002331731840968132, | |
| "loss/value_avg": 1.406424641609192, | |
| "lr": 2.9882812500000002e-06, | |
| "objective/entropy": 5.527715682983398, | |
| "objective/kl": 0.9096249341964722, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 5.0396623611450195, | |
| "objective/scores": 5.0396623611450195, | |
| "policy/approxkl_avg": 0.0057543180882930756, | |
| "policy/clipfrac_avg": 0.04763935133814812, | |
| "policy/entropy_avg": 0.7740105390548706, | |
| "step": 2, | |
| "val/clipfrac_avg": 0.0013810684904456139, | |
| "val/num_eos_tokens": 512, | |
| "val/ratio": 1.0008430480957031, | |
| "val/ratio_var": 2.5710842237458564e-05 | |
| }, | |
| { | |
| "episode": 1536, | |
| "epoch": 0.013159698423577794, | |
| "eps": 6, | |
| "loss/policy_avg": 0.004994675051420927, | |
| "loss/value_avg": 1.4159741401672363, | |
| "lr": 2.9765625e-06, | |
| "objective/entropy": -27.37961196899414, | |
| "objective/kl": 1.9729511737823486, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 4.9908952713012695, | |
| "objective/scores": 4.9908952713012695, | |
| "policy/approxkl_avg": 0.0038739778101444244, | |
| "policy/clipfrac_avg": 0.0459367111325264, | |
| "policy/entropy_avg": 0.6048773527145386, | |
| "step": 3, | |
| "val/clipfrac_avg": 0.0031044897623360157, | |
| "val/num_eos_tokens": 512, | |
| "val/ratio": 0.9995440244674683, | |
| "val/ratio_var": 1.1859823644044809e-05 | |
| }, | |
| { | |
| "episode": 2048, | |
| "epoch": 0.01754626456477039, | |
| "eps": 6, | |
| "loss/policy_avg": 0.008640453219413757, | |
| "loss/value_avg": 1.9971219301223755, | |
| "lr": 2.96484375e-06, | |
| "objective/entropy": -14.0813627243042, | |
| "objective/kl": 2.724120616912842, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 5.457592487335205, | |
| "objective/scores": 5.457592487335205, | |
| "policy/approxkl_avg": 0.00431688129901886, | |
| "policy/clipfrac_avg": 0.039867501705884933, | |
| "policy/entropy_avg": 0.5995867252349854, | |
| "step": 4, | |
| "val/clipfrac_avg": 0.003971057012677193, | |
| "val/num_eos_tokens": 512, | |
| "val/ratio": 1.00046968460083, | |
| "val/ratio_var": 1.5058150893310085e-05 | |
| }, | |
| { | |
| "episode": 2560, | |
| "epoch": 0.02193283070596299, | |
| "eps": 6, | |
| "loss/policy_avg": 0.008194871246814728, | |
| "loss/value_avg": 2.207172393798828, | |
| "lr": 2.953125e-06, | |
| "objective/entropy": 0.22097766399383545, | |
| "objective/kl": 3.6732287406921387, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 5.538914680480957, | |
| "objective/scores": 5.538914680480957, | |
| "policy/approxkl_avg": 0.005579844582825899, | |
| "policy/clipfrac_avg": 0.04047102481126785, | |
| "policy/entropy_avg": 0.554892361164093, | |
| "step": 5, | |
| "val/clipfrac_avg": 0.003971456084400415, | |
| "val/num_eos_tokens": 512, | |
| "val/ratio": 0.9994141459465027, | |
| "val/ratio_var": 2.683511411305517e-05 | |
| }, | |
| { | |
| "episode": 3072, | |
| "epoch": 0.026319396847155587, | |
| "eps": 6, | |
| "loss/policy_avg": 0.008127043955028057, | |
| "loss/value_avg": 1.9443745613098145, | |
| "lr": 2.94140625e-06, | |
| "objective/entropy": -4.540916442871094, | |
| "objective/kl": 4.489357948303223, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 5.669499397277832, | |
| "objective/scores": 5.669499397277832, | |
| "policy/approxkl_avg": 0.00525766983628273, | |
| "policy/clipfrac_avg": 0.040956489741802216, | |
| "policy/entropy_avg": 0.5248321890830994, | |
| "step": 6, | |
| "val/clipfrac_avg": 0.0037213233299553394, | |
| "val/num_eos_tokens": 512, | |
| "val/ratio": 0.9998031854629517, | |
| "val/ratio_var": 1.9533537852112204e-05 | |
| }, | |
| { | |
| "episode": 3584, | |
| "epoch": 0.030705962988348184, | |
| "eps": 6, | |
| "loss/policy_avg": 0.0053918734192848206, | |
| "loss/value_avg": 1.4462230205535889, | |
| "lr": 2.9296875e-06, | |
| "objective/entropy": -18.436466217041016, | |
| "objective/kl": 6.404374122619629, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 6.3423004150390625, | |
| "objective/scores": 6.3423004150390625, | |
| "policy/approxkl_avg": 0.004276195541024208, | |
| "policy/clipfrac_avg": 0.041515737771987915, | |
| "policy/entropy_avg": 0.5162253379821777, | |
| "step": 7, | |
| "val/clipfrac_avg": 0.0022959401831030846, | |
| "val/num_eos_tokens": 512, | |
| "val/ratio": 0.9992671608924866, | |
| "val/ratio_var": 1.4565547644451726e-05 | |
| }, | |
| { | |
| "episode": 4096, | |
| "epoch": 0.03509252912954078, | |
| "eps": 6, | |
| "loss/policy_avg": 0.009467152878642082, | |
| "loss/value_avg": 1.0647578239440918, | |
| "lr": 2.91796875e-06, | |
| "objective/entropy": -52.239105224609375, | |
| "objective/kl": 9.6763334274292, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 6.394425392150879, | |
| "objective/scores": 6.394425392150879, | |
| "policy/approxkl_avg": 0.003496184479445219, | |
| "policy/clipfrac_avg": 0.042081158608198166, | |
| "policy/entropy_avg": 0.6195869445800781, | |
| "step": 8, | |
| "val/clipfrac_avg": 0.0018653525039553642, | |
| "val/num_eos_tokens": 506, | |
| "val/ratio": 0.9998865127563477, | |
| "val/ratio_var": 9.700558621261735e-06 | |
| }, | |
| { | |
| "episode": 4608, | |
| "epoch": 0.03947909527073338, | |
| "eps": 6, | |
| "loss/policy_avg": -0.003379741683602333, | |
| "loss/value_avg": 0.8534075617790222, | |
| "lr": 2.90625e-06, | |
| "objective/entropy": -37.56623077392578, | |
| "objective/kl": 17.91680145263672, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 6.905078887939453, | |
| "objective/scores": 6.905078887939453, | |
| "policy/approxkl_avg": 0.00449257530272007, | |
| "policy/clipfrac_avg": 0.04568759351968765, | |
| "policy/entropy_avg": 0.8169364929199219, | |
| "step": 9, | |
| "val/clipfrac_avg": 0.0014340076595544815, | |
| "val/num_eos_tokens": 499, | |
| "val/ratio": 0.9995401501655579, | |
| "val/ratio_var": 5.685657924914267e-06 | |
| }, | |
| { | |
| "episode": 5120, | |
| "epoch": 0.04386566141192598, | |
| "eps": 5, | |
| "loss/policy_avg": -0.005874279886484146, | |
| "loss/value_avg": 0.8848444819450378, | |
| "lr": 2.89453125e-06, | |
| "objective/entropy": -13.25861930847168, | |
| "objective/kl": 33.02040100097656, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 6.507898807525635, | |
| "objective/scores": 6.507898807525635, | |
| "policy/approxkl_avg": 0.005827038548886776, | |
| "policy/clipfrac_avg": 0.04495222494006157, | |
| "policy/entropy_avg": 0.957590639591217, | |
| "step": 10, | |
| "val/clipfrac_avg": 0.0012696427293121815, | |
| "val/num_eos_tokens": 450, | |
| "val/ratio": 0.9994459748268127, | |
| "val/ratio_var": 1.387734209856717e-05 | |
| }, | |
| { | |
| "episode": 5632, | |
| "epoch": 0.048252227553118573, | |
| "eps": 5, | |
| "loss/policy_avg": -0.014378623105585575, | |
| "loss/value_avg": 0.9888654947280884, | |
| "lr": 2.8828125e-06, | |
| "objective/entropy": 1.6588702201843262, | |
| "objective/kl": 43.53641891479492, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 6.313203811645508, | |
| "objective/scores": 6.313203811645508, | |
| "policy/approxkl_avg": 0.007957661524415016, | |
| "policy/clipfrac_avg": 0.04707237333059311, | |
| "policy/entropy_avg": 1.0334728956222534, | |
| "step": 11, | |
| "val/clipfrac_avg": 0.0014614008832722902, | |
| "val/num_eos_tokens": 388, | |
| "val/ratio": 1.0005269050598145, | |
| "val/ratio_var": 1.659244480833877e-05 | |
| }, | |
| { | |
| "episode": 6144, | |
| "epoch": 0.052638793694311174, | |
| "eps": 5, | |
| "loss/policy_avg": -0.022208962589502335, | |
| "loss/value_avg": 0.9836709499359131, | |
| "lr": 2.87109375e-06, | |
| "objective/entropy": -5.948256492614746, | |
| "objective/kl": 43.305381774902344, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 6.90786600112915, | |
| "objective/scores": 6.90786600112915, | |
| "policy/approxkl_avg": 0.006931103765964508, | |
| "policy/clipfrac_avg": 0.04469376057386398, | |
| "policy/entropy_avg": 1.1865200996398926, | |
| "step": 12, | |
| "val/clipfrac_avg": 0.000864459783770144, | |
| "val/num_eos_tokens": 400, | |
| "val/ratio": 0.9998042583465576, | |
| "val/ratio_var": 1.6377791325794533e-05 | |
| }, | |
| { | |
| "episode": 6656, | |
| "epoch": 0.05702535983550377, | |
| "eps": 5, | |
| "loss/policy_avg": -0.02128465473651886, | |
| "loss/value_avg": 0.9731428623199463, | |
| "lr": 2.859375e-06, | |
| "objective/entropy": -8.351584434509277, | |
| "objective/kl": 41.11138916015625, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 6.677196502685547, | |
| "objective/scores": 6.677196502685547, | |
| "policy/approxkl_avg": 0.006365190725773573, | |
| "policy/clipfrac_avg": 0.04388276860117912, | |
| "policy/entropy_avg": 1.2417210340499878, | |
| "step": 13, | |
| "val/clipfrac_avg": 0.0005712928250432014, | |
| "val/num_eos_tokens": 383, | |
| "val/ratio": 1.0003130435943604, | |
| "val/ratio_var": 1.1928386811632663e-05 | |
| }, | |
| { | |
| "episode": 7168, | |
| "epoch": 0.06141192597669637, | |
| "eps": 5, | |
| "loss/policy_avg": -0.02828969433903694, | |
| "loss/value_avg": 0.9882746934890747, | |
| "lr": 2.84765625e-06, | |
| "objective/entropy": -17.76400375366211, | |
| "objective/kl": 37.17643356323242, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 6.9648332595825195, | |
| "objective/scores": 6.9648332595825195, | |
| "policy/approxkl_avg": 0.005724512506276369, | |
| "policy/clipfrac_avg": 0.04208240285515785, | |
| "policy/entropy_avg": 1.2095826864242554, | |
| "step": 14, | |
| "val/clipfrac_avg": 0.0012288970174267888, | |
| "val/num_eos_tokens": 400, | |
| "val/ratio": 0.9997619390487671, | |
| "val/ratio_var": 1.3792546269542072e-05 | |
| }, | |
| { | |
| "episode": 7680, | |
| "epoch": 0.06579849211788896, | |
| "eps": 5, | |
| "loss/policy_avg": -0.038550965487957, | |
| "loss/value_avg": 0.9785877466201782, | |
| "lr": 2.8359375e-06, | |
| "objective/entropy": -28.61048126220703, | |
| "objective/kl": 32.39018630981445, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 7.317845344543457, | |
| "objective/scores": 7.317845344543457, | |
| "policy/approxkl_avg": 0.005465254653245211, | |
| "policy/clipfrac_avg": 0.03845732659101486, | |
| "policy/entropy_avg": 1.1608960628509521, | |
| "step": 15, | |
| "val/clipfrac_avg": 0.00025615841150283813, | |
| "val/num_eos_tokens": 419, | |
| "val/ratio": 1.0000056028366089, | |
| "val/ratio_var": 1.2135635188315064e-05 | |
| }, | |
| { | |
| "episode": 8192, | |
| "epoch": 0.07018505825908156, | |
| "eps": 5, | |
| "loss/policy_avg": -0.042696814984083176, | |
| "loss/value_avg": 0.8722034692764282, | |
| "lr": 2.82421875e-06, | |
| "objective/entropy": -39.38507843017578, | |
| "objective/kl": 29.655807495117188, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 7.833863735198975, | |
| "objective/scores": 7.833863735198975, | |
| "policy/approxkl_avg": 0.004913633689284325, | |
| "policy/clipfrac_avg": 0.04184533655643463, | |
| "policy/entropy_avg": 1.0446637868881226, | |
| "step": 16, | |
| "val/clipfrac_avg": 0.00022731353237759322, | |
| "val/num_eos_tokens": 456, | |
| "val/ratio": 0.9998592138290405, | |
| "val/ratio_var": 8.872881153365597e-06 | |
| }, | |
| { | |
| "episode": 8704, | |
| "epoch": 0.07457162440027416, | |
| "eps": 5, | |
| "loss/policy_avg": -0.030250171199440956, | |
| "loss/value_avg": 0.73753422498703, | |
| "lr": 2.8125e-06, | |
| "objective/entropy": -48.727821350097656, | |
| "objective/kl": 25.98404312133789, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 7.985202312469482, | |
| "objective/scores": 7.985202312469482, | |
| "policy/approxkl_avg": 0.004400444217026234, | |
| "policy/clipfrac_avg": 0.04398694634437561, | |
| "policy/entropy_avg": 1.066787600517273, | |
| "step": 17, | |
| "val/clipfrac_avg": 0.000578696490265429, | |
| "val/num_eos_tokens": 479, | |
| "val/ratio": 0.9993942975997925, | |
| "val/ratio_var": 9.115313332586084e-06 | |
| }, | |
| { | |
| "episode": 9216, | |
| "epoch": 0.07895819054146676, | |
| "eps": 5, | |
| "loss/policy_avg": -0.013007078319787979, | |
| "loss/value_avg": 0.6708067059516907, | |
| "lr": 2.80078125e-06, | |
| "objective/entropy": -53.65514373779297, | |
| "objective/kl": 26.27431869506836, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 8.508003234863281, | |
| "objective/scores": 8.508003234863281, | |
| "policy/approxkl_avg": 0.004381729289889336, | |
| "policy/clipfrac_avg": 0.042573459446430206, | |
| "policy/entropy_avg": 0.9618682265281677, | |
| "step": 18, | |
| "val/clipfrac_avg": 0.0002770860446617007, | |
| "val/num_eos_tokens": 496, | |
| "val/ratio": 0.9999381899833679, | |
| "val/ratio_var": 5.949283149675466e-06 | |
| }, | |
| { | |
| "episode": 9728, | |
| "epoch": 0.08334475668265935, | |
| "eps": 5, | |
| "loss/policy_avg": -0.0033736806362867355, | |
| "loss/value_avg": 0.536386251449585, | |
| "lr": 2.7890625e-06, | |
| "objective/entropy": -56.456016540527344, | |
| "objective/kl": 25.067127227783203, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 8.319454193115234, | |
| "objective/scores": 8.319454193115234, | |
| "policy/approxkl_avg": 0.004075043834745884, | |
| "policy/clipfrac_avg": 0.04519989341497421, | |
| "policy/entropy_avg": 0.9269654154777527, | |
| "step": 19, | |
| "val/clipfrac_avg": 0.0003078244626522064, | |
| "val/num_eos_tokens": 501, | |
| "val/ratio": 0.9994814395904541, | |
| "val/ratio_var": 9.405189302924555e-06 | |
| }, | |
| { | |
| "episode": 10240, | |
| "epoch": 0.08773132282385196, | |
| "eps": 5, | |
| "loss/policy_avg": 0.004913418088108301, | |
| "loss/value_avg": 0.5358260869979858, | |
| "lr": 2.77734375e-06, | |
| "objective/entropy": -59.48634338378906, | |
| "objective/kl": 24.29659652709961, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 8.499679565429688, | |
| "objective/scores": 8.499679565429688, | |
| "policy/approxkl_avg": 0.0036819700617343187, | |
| "policy/clipfrac_avg": 0.043816424906253815, | |
| "policy/entropy_avg": 0.9329521059989929, | |
| "step": 20, | |
| "val/clipfrac_avg": 0.00022827752400189638, | |
| "val/num_eos_tokens": 508, | |
| "val/ratio": 1.0002903938293457, | |
| "val/ratio_var": 7.933602319099009e-06 | |
| }, | |
| { | |
| "episode": 10752, | |
| "epoch": 0.09211788896504455, | |
| "eps": 5, | |
| "loss/policy_avg": 0.0012558624148368835, | |
| "loss/value_avg": 0.5767730474472046, | |
| "lr": 2.765625e-06, | |
| "objective/entropy": -61.19065856933594, | |
| "objective/kl": 25.785268783569336, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 8.370229721069336, | |
| "objective/scores": 8.370229721069336, | |
| "policy/approxkl_avg": 0.0040285466238856316, | |
| "policy/clipfrac_avg": 0.04525526985526085, | |
| "policy/entropy_avg": 0.949712336063385, | |
| "step": 21, | |
| "val/clipfrac_avg": 0.00048373237950727344, | |
| "val/num_eos_tokens": 507, | |
| "val/ratio": 0.9995108246803284, | |
| "val/ratio_var": 7.613601610501064e-06 | |
| }, | |
| { | |
| "episode": 11264, | |
| "epoch": 0.09650445510623715, | |
| "eps": 5, | |
| "loss/policy_avg": 0.007160295266658068, | |
| "loss/value_avg": 0.6456326842308044, | |
| "lr": 2.75390625e-06, | |
| "objective/entropy": -61.760406494140625, | |
| "objective/kl": 25.645444869995117, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 8.45798110961914, | |
| "objective/scores": 8.45798110961914, | |
| "policy/approxkl_avg": 0.003935192245990038, | |
| "policy/clipfrac_avg": 0.04455619305372238, | |
| "policy/entropy_avg": 0.9833183884620667, | |
| "step": 22, | |
| "val/clipfrac_avg": 0.0008266369113698602, | |
| "val/num_eos_tokens": 508, | |
| "val/ratio": 0.9999653697013855, | |
| "val/ratio_var": 6.215088887984166e-06 | |
| }, | |
| { | |
| "episode": 11776, | |
| "epoch": 0.10089102124742974, | |
| "eps": 5, | |
| "loss/policy_avg": 0.014776970259845257, | |
| "loss/value_avg": 0.7030456066131592, | |
| "lr": 2.7421875e-06, | |
| "objective/entropy": -60.44242858886719, | |
| "objective/kl": 26.446081161499023, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 8.580012321472168, | |
| "objective/scores": 8.580012321472168, | |
| "policy/approxkl_avg": 0.003829989116638899, | |
| "policy/clipfrac_avg": 0.04393254220485687, | |
| "policy/entropy_avg": 1.067291259765625, | |
| "step": 23, | |
| "val/clipfrac_avg": 0.0012957865837961435, | |
| "val/num_eos_tokens": 510, | |
| "val/ratio": 1.0001041889190674, | |
| "val/ratio_var": 7.751174962322693e-06 | |
| }, | |
| { | |
| "episode": 12288, | |
| "epoch": 0.10527758738862235, | |
| "eps": 5, | |
| "loss/policy_avg": 0.006113366223871708, | |
| "loss/value_avg": 0.6351609230041504, | |
| "lr": 2.73046875e-06, | |
| "objective/entropy": -56.61647415161133, | |
| "objective/kl": 30.12872886657715, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 8.864429473876953, | |
| "objective/scores": 8.864429473876953, | |
| "policy/approxkl_avg": 0.004433917347341776, | |
| "policy/clipfrac_avg": 0.04813031852245331, | |
| "policy/entropy_avg": 1.052201747894287, | |
| "step": 24, | |
| "val/clipfrac_avg": 0.0007775876438245177, | |
| "val/num_eos_tokens": 510, | |
| "val/ratio": 0.9999730587005615, | |
| "val/ratio_var": 2.6995268854079768e-05 | |
| }, | |
| { | |
| "episode": 12800, | |
| "epoch": 0.10966415352981494, | |
| "eps": 5, | |
| "loss/policy_avg": 0.010391147807240486, | |
| "loss/value_avg": 0.6229045391082764, | |
| "lr": 2.71875e-06, | |
| "objective/entropy": -55.23644256591797, | |
| "objective/kl": 32.96910095214844, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 8.8991117477417, | |
| "objective/scores": 8.8991117477417, | |
| "policy/approxkl_avg": 0.004299290012568235, | |
| "policy/clipfrac_avg": 0.04626629129052162, | |
| "policy/entropy_avg": 1.0567516088485718, | |
| "step": 25, | |
| "val/clipfrac_avg": 0.0009714451734907925, | |
| "val/num_eos_tokens": 510, | |
| "val/ratio": 1.0003576278686523, | |
| "val/ratio_var": 2.934046096925158e-05 | |
| }, | |
| { | |
| "episode": 13312, | |
| "epoch": 0.11405071967100754, | |
| "eps": 5, | |
| "loss/policy_avg": 0.005770000629127026, | |
| "loss/value_avg": 0.6085966229438782, | |
| "lr": 2.70703125e-06, | |
| "objective/entropy": -47.69157409667969, | |
| "objective/kl": 39.49090576171875, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 8.737468719482422, | |
| "objective/scores": 8.737468719482422, | |
| "policy/approxkl_avg": 0.0053903451189398766, | |
| "policy/clipfrac_avg": 0.05019207298755646, | |
| "policy/entropy_avg": 1.0364032983779907, | |
| "step": 26, | |
| "val/clipfrac_avg": 0.001478280988521874, | |
| "val/num_eos_tokens": 504, | |
| "val/ratio": 0.9998453259468079, | |
| "val/ratio_var": 9.86140457825968e-06 | |
| }, | |
| { | |
| "episode": 13824, | |
| "epoch": 0.11843728581220014, | |
| "eps": 5, | |
| "loss/policy_avg": -0.007628859020769596, | |
| "loss/value_avg": 0.6527573466300964, | |
| "lr": 2.6953125e-06, | |
| "objective/entropy": -39.07084655761719, | |
| "objective/kl": 47.1235237121582, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 8.737848281860352, | |
| "objective/scores": 8.737848281860352, | |
| "policy/approxkl_avg": 0.005542443133890629, | |
| "policy/clipfrac_avg": 0.046551190316677094, | |
| "policy/entropy_avg": 0.9338756203651428, | |
| "step": 27, | |
| "val/clipfrac_avg": 0.0006895526312291622, | |
| "val/num_eos_tokens": 481, | |
| "val/ratio": 0.9999536275863647, | |
| "val/ratio_var": 1.1857218851218931e-05 | |
| }, | |
| { | |
| "episode": 14336, | |
| "epoch": 0.12282385195339274, | |
| "eps": 5, | |
| "loss/policy_avg": -0.013978306204080582, | |
| "loss/value_avg": 0.7715175151824951, | |
| "lr": 2.68359375e-06, | |
| "objective/entropy": -29.249040603637695, | |
| "objective/kl": 58.577266693115234, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 8.21330738067627, | |
| "objective/scores": 8.21330738067627, | |
| "policy/approxkl_avg": 0.006117562763392925, | |
| "policy/clipfrac_avg": 0.0474059171974659, | |
| "policy/entropy_avg": 0.8380322456359863, | |
| "step": 28, | |
| "val/clipfrac_avg": 0.0009613880538381636, | |
| "val/num_eos_tokens": 456, | |
| "val/ratio": 1.0004549026489258, | |
| "val/ratio_var": 1.6783484170446172e-05 | |
| }, | |
| { | |
| "episode": 14848, | |
| "epoch": 0.12721041809458533, | |
| "eps": 5, | |
| "loss/policy_avg": -0.00955754891037941, | |
| "loss/value_avg": 0.7996537089347839, | |
| "lr": 2.671875e-06, | |
| "objective/entropy": -16.704195022583008, | |
| "objective/kl": 67.1770248413086, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 7.992954254150391, | |
| "objective/scores": 7.992954254150391, | |
| "policy/approxkl_avg": 0.006946917623281479, | |
| "policy/clipfrac_avg": 0.04584059491753578, | |
| "policy/entropy_avg": 0.7783851623535156, | |
| "step": 29, | |
| "val/clipfrac_avg": 0.0007248380570672452, | |
| "val/num_eos_tokens": 428, | |
| "val/ratio": 1.0003103017807007, | |
| "val/ratio_var": 1.1794147212640382e-05 | |
| }, | |
| { | |
| "episode": 15360, | |
| "epoch": 0.13159698423577793, | |
| "eps": 5, | |
| "loss/policy_avg": -0.01961817592382431, | |
| "loss/value_avg": 0.7010769844055176, | |
| "lr": 2.66015625e-06, | |
| "objective/entropy": -17.91378402709961, | |
| "objective/kl": 65.80282592773438, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 8.037736892700195, | |
| "objective/scores": 8.037736892700195, | |
| "policy/approxkl_avg": 0.006708525121212006, | |
| "policy/clipfrac_avg": 0.04874643683433533, | |
| "policy/entropy_avg": 0.7650750279426575, | |
| "step": 30, | |
| "val/clipfrac_avg": 0.0005629804218187928, | |
| "val/num_eos_tokens": 437, | |
| "val/ratio": 0.9999438524246216, | |
| "val/ratio_var": 1.5312107279896736e-05 | |
| }, | |
| { | |
| "episode": 15872, | |
| "epoch": 0.13598355037697052, | |
| "eps": 5, | |
| "loss/policy_avg": -0.008987879380583763, | |
| "loss/value_avg": 0.5842263698577881, | |
| "lr": 2.6484375e-06, | |
| "objective/entropy": -24.265296936035156, | |
| "objective/kl": 60.957061767578125, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 8.903877258300781, | |
| "objective/scores": 8.903877258300781, | |
| "policy/approxkl_avg": 0.00701293908059597, | |
| "policy/clipfrac_avg": 0.05128619819879532, | |
| "policy/entropy_avg": 0.8259672522544861, | |
| "step": 31, | |
| "val/clipfrac_avg": 0.0004929137649014592, | |
| "val/num_eos_tokens": 477, | |
| "val/ratio": 0.9996868371963501, | |
| "val/ratio_var": 1.6662374036968686e-05 | |
| }, | |
| { | |
| "episode": 16384, | |
| "epoch": 0.1403701165181631, | |
| "eps": 5, | |
| "loss/policy_avg": -0.004771184176206589, | |
| "loss/value_avg": 0.558784008026123, | |
| "lr": 2.63671875e-06, | |
| "objective/entropy": -26.573974609375, | |
| "objective/kl": 58.13153839111328, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 8.980989456176758, | |
| "objective/scores": 8.980989456176758, | |
| "policy/approxkl_avg": 0.006675435695797205, | |
| "policy/clipfrac_avg": 0.05361879989504814, | |
| "policy/entropy_avg": 0.8607890009880066, | |
| "step": 32, | |
| "val/clipfrac_avg": 0.00043037798604927957, | |
| "val/num_eos_tokens": 491, | |
| "val/ratio": 0.9996814727783203, | |
| "val/ratio_var": 1.2863994015788194e-05 | |
| }, | |
| { | |
| "episode": 16896, | |
| "epoch": 0.14475668265935573, | |
| "eps": 5, | |
| "loss/policy_avg": 0.000527138588950038, | |
| "loss/value_avg": 0.5865890979766846, | |
| "lr": 2.6250000000000003e-06, | |
| "objective/entropy": -30.55011749267578, | |
| "objective/kl": 54.622886657714844, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 9.211483001708984, | |
| "objective/scores": 9.211483001708984, | |
| "policy/approxkl_avg": 0.006334663834422827, | |
| "policy/clipfrac_avg": 0.054518453776836395, | |
| "policy/entropy_avg": 0.9017385244369507, | |
| "step": 33, | |
| "val/clipfrac_avg": 0.0004077432386111468, | |
| "val/num_eos_tokens": 504, | |
| "val/ratio": 1.000108003616333, | |
| "val/ratio_var": 1.4822944649495184e-05 | |
| }, | |
| { | |
| "episode": 17408, | |
| "epoch": 0.14914324880054833, | |
| "eps": 5, | |
| "loss/policy_avg": -0.0033687106333673, | |
| "loss/value_avg": 0.6050545573234558, | |
| "lr": 2.61328125e-06, | |
| "objective/entropy": -33.91044616699219, | |
| "objective/kl": 52.96797180175781, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 9.475627899169922, | |
| "objective/scores": 9.475627899169922, | |
| "policy/approxkl_avg": 0.00646405853331089, | |
| "policy/clipfrac_avg": 0.05471920967102051, | |
| "policy/entropy_avg": 0.9273003339767456, | |
| "step": 34, | |
| "val/clipfrac_avg": 0.0001560871460242197, | |
| "val/num_eos_tokens": 504, | |
| "val/ratio": 1.000005841255188, | |
| "val/ratio_var": 1.5097434697963763e-05 | |
| }, | |
| { | |
| "episode": 17920, | |
| "epoch": 0.15352981494174092, | |
| "eps": 5, | |
| "loss/policy_avg": 0.006220666225999594, | |
| "loss/value_avg": 0.5275569558143616, | |
| "lr": 2.6015625e-06, | |
| "objective/entropy": -36.52558898925781, | |
| "objective/kl": 49.886573791503906, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 9.341217041015625, | |
| "objective/scores": 9.341217041015625, | |
| "policy/approxkl_avg": 0.0063529182225465775, | |
| "policy/clipfrac_avg": 0.0546303354203701, | |
| "policy/entropy_avg": 0.977063775062561, | |
| "step": 35, | |
| "val/clipfrac_avg": 0.00036523916060104966, | |
| "val/num_eos_tokens": 511, | |
| "val/ratio": 1.000399112701416, | |
| "val/ratio_var": 1.2374664038361516e-05 | |
| }, | |
| { | |
| "episode": 18432, | |
| "epoch": 0.15791638108293352, | |
| "eps": 5, | |
| "loss/policy_avg": 0.006454125978052616, | |
| "loss/value_avg": 0.500439465045929, | |
| "lr": 2.5898437500000003e-06, | |
| "objective/entropy": -40.24166488647461, | |
| "objective/kl": 49.15092468261719, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 9.569242477416992, | |
| "objective/scores": 9.569242477416992, | |
| "policy/approxkl_avg": 0.005947995465248823, | |
| "policy/clipfrac_avg": 0.05309908464550972, | |
| "policy/entropy_avg": 1.0032403469085693, | |
| "step": 36, | |
| "val/clipfrac_avg": 0.00021327620197553188, | |
| "val/num_eos_tokens": 509, | |
| "val/ratio": 1.0006883144378662, | |
| "val/ratio_var": 1.43506422318751e-05 | |
| }, | |
| { | |
| "episode": 18944, | |
| "epoch": 0.1623029472241261, | |
| "eps": 5, | |
| "loss/policy_avg": 0.007721163332462311, | |
| "loss/value_avg": 0.4169340431690216, | |
| "lr": 2.578125e-06, | |
| "objective/entropy": -43.73438262939453, | |
| "objective/kl": 47.38279724121094, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 9.466113090515137, | |
| "objective/scores": 9.466113090515137, | |
| "policy/approxkl_avg": 0.0057074325159192085, | |
| "policy/clipfrac_avg": 0.05311349779367447, | |
| "policy/entropy_avg": 1.0183720588684082, | |
| "step": 37, | |
| "val/clipfrac_avg": 0.0005003074184060097, | |
| "val/num_eos_tokens": 510, | |
| "val/ratio": 0.9993772506713867, | |
| "val/ratio_var": 1.3632562513521407e-05 | |
| }, | |
| { | |
| "episode": 19456, | |
| "epoch": 0.1666895133653187, | |
| "eps": 5, | |
| "loss/policy_avg": 0.005469637922942638, | |
| "loss/value_avg": 0.35471123456954956, | |
| "lr": 2.56640625e-06, | |
| "objective/entropy": -46.80841827392578, | |
| "objective/kl": 47.71710205078125, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 9.39622688293457, | |
| "objective/scores": 9.39622688293457, | |
| "policy/approxkl_avg": 0.005291135516017675, | |
| "policy/clipfrac_avg": 0.053695324808359146, | |
| "policy/entropy_avg": 1.0006449222564697, | |
| "step": 38, | |
| "val/clipfrac_avg": 0.00037036644062027335, | |
| "val/num_eos_tokens": 510, | |
| "val/ratio": 1.0000135898590088, | |
| "val/ratio_var": 9.616092029318679e-06 | |
| }, | |
| { | |
| "episode": 19968, | |
| "epoch": 0.1710760795065113, | |
| "eps": 5, | |
| "loss/policy_avg": 0.007845397107303143, | |
| "loss/value_avg": 0.3138006925582886, | |
| "lr": 2.5546875000000003e-06, | |
| "objective/entropy": -49.33262634277344, | |
| "objective/kl": 45.213348388671875, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 9.463075637817383, | |
| "objective/scores": 9.463075637817383, | |
| "policy/approxkl_avg": 0.0048331525176763535, | |
| "policy/clipfrac_avg": 0.05174440145492554, | |
| "policy/entropy_avg": 0.9827993512153625, | |
| "step": 39, | |
| "val/clipfrac_avg": 0.0006140347104519606, | |
| "val/num_eos_tokens": 511, | |
| "val/ratio": 0.9993776082992554, | |
| "val/ratio_var": 1.478405738453148e-05 | |
| }, | |
| { | |
| "episode": 20480, | |
| "epoch": 0.17546264564770392, | |
| "eps": 5, | |
| "loss/policy_avg": 0.009613022208213806, | |
| "loss/value_avg": 0.2865651249885559, | |
| "lr": 2.54296875e-06, | |
| "objective/entropy": -21.233320236206055, | |
| "objective/kl": 45.59048843383789, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 9.34876537322998, | |
| "objective/scores": 9.34876537322998, | |
| "policy/approxkl_avg": 0.00592905143275857, | |
| "policy/clipfrac_avg": 0.048290204256772995, | |
| "policy/entropy_avg": 0.8851035833358765, | |
| "step": 40, | |
| "val/clipfrac_avg": 0.0003385603195056319, | |
| "val/num_eos_tokens": 512, | |
| "val/ratio": 1.0003713369369507, | |
| "val/ratio_var": 2.0009263607789762e-05 | |
| }, | |
| { | |
| "episode": 20992, | |
| "epoch": 0.1798492117888965, | |
| "eps": 5, | |
| "loss/policy_avg": 0.013228874653577805, | |
| "loss/value_avg": 0.3579314947128296, | |
| "lr": 2.53125e-06, | |
| "objective/entropy": -12.196764945983887, | |
| "objective/kl": 43.65990447998047, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 9.419660568237305, | |
| "objective/scores": 9.419660568237305, | |
| "policy/approxkl_avg": 0.006829770281910896, | |
| "policy/clipfrac_avg": 0.049680888652801514, | |
| "policy/entropy_avg": 0.8308127522468567, | |
| "step": 41, | |
| "val/clipfrac_avg": 0.00030408138991333544, | |
| "val/num_eos_tokens": 512, | |
| "val/ratio": 0.9990015029907227, | |
| "val/ratio_var": 1.62386004376458e-05 | |
| }, | |
| { | |
| "episode": 21504, | |
| "epoch": 0.1842357779300891, | |
| "eps": 5, | |
| "loss/policy_avg": 0.011590891517698765, | |
| "loss/value_avg": 0.41505149006843567, | |
| "lr": 2.5195312500000003e-06, | |
| "objective/entropy": -50.10555648803711, | |
| "objective/kl": 43.93341064453125, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 9.235706329345703, | |
| "objective/scores": 9.235706329345703, | |
| "policy/approxkl_avg": 0.005015837028622627, | |
| "policy/clipfrac_avg": 0.05047174170613289, | |
| "policy/entropy_avg": 0.9143767356872559, | |
| "step": 42, | |
| "val/clipfrac_avg": 0.0005529467016458511, | |
| "val/num_eos_tokens": 510, | |
| "val/ratio": 1.0000828504562378, | |
| "val/ratio_var": 1.0489389751455747e-05 | |
| }, | |
| { | |
| "episode": 22016, | |
| "epoch": 0.1886223440712817, | |
| "eps": 5, | |
| "loss/policy_avg": 0.012541696429252625, | |
| "loss/value_avg": 0.42272669076919556, | |
| "lr": 2.5078125e-06, | |
| "objective/entropy": -28.646146774291992, | |
| "objective/kl": 41.60773468017578, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 9.567056655883789, | |
| "objective/scores": 9.567056655883789, | |
| "policy/approxkl_avg": 0.005777922924607992, | |
| "policy/clipfrac_avg": 0.04737301170825958, | |
| "policy/entropy_avg": 0.8459863662719727, | |
| "step": 43, | |
| "val/clipfrac_avg": 0.0008770625572651625, | |
| "val/num_eos_tokens": 512, | |
| "val/ratio": 1.0003621578216553, | |
| "val/ratio_var": 1.7744689102983102e-05 | |
| }, | |
| { | |
| "episode": 22528, | |
| "epoch": 0.1930089102124743, | |
| "eps": 5, | |
| "loss/policy_avg": 0.010437501594424248, | |
| "loss/value_avg": 0.44466620683670044, | |
| "lr": 2.49609375e-06, | |
| "objective/entropy": -47.16653823852539, | |
| "objective/kl": 43.96792984008789, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 9.598703384399414, | |
| "objective/scores": 9.598703384399414, | |
| "policy/approxkl_avg": 0.004931645467877388, | |
| "policy/clipfrac_avg": 0.05061531811952591, | |
| "policy/entropy_avg": 0.8815011978149414, | |
| "step": 44, | |
| "val/clipfrac_avg": 0.0006886773044243455, | |
| "val/num_eos_tokens": 510, | |
| "val/ratio": 1.0000885725021362, | |
| "val/ratio_var": 1.353884090349311e-05 | |
| }, | |
| { | |
| "episode": 23040, | |
| "epoch": 0.1973954763536669, | |
| "eps": 5, | |
| "loss/policy_avg": 0.008954339660704136, | |
| "loss/value_avg": 0.3882572650909424, | |
| "lr": 2.4843750000000002e-06, | |
| "objective/entropy": -45.80089569091797, | |
| "objective/kl": 43.658973693847656, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 9.753758430480957, | |
| "objective/scores": 9.753758430480957, | |
| "policy/approxkl_avg": 0.0052681779488921165, | |
| "policy/clipfrac_avg": 0.04858952760696411, | |
| "policy/entropy_avg": 0.8804545998573303, | |
| "step": 45, | |
| "val/clipfrac_avg": 0.00039701920468360186, | |
| "val/num_eos_tokens": 510, | |
| "val/ratio": 1.0002995729446411, | |
| "val/ratio_var": 1.4173986528476235e-05 | |
| }, | |
| { | |
| "episode": 23552, | |
| "epoch": 0.20178204249485948, | |
| "eps": 5, | |
| "loss/policy_avg": 0.008618440479040146, | |
| "loss/value_avg": 0.38125473260879517, | |
| "lr": 2.47265625e-06, | |
| "objective/entropy": -43.78852844238281, | |
| "objective/kl": 45.42605209350586, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 9.775838851928711, | |
| "objective/scores": 9.775838851928711, | |
| "policy/approxkl_avg": 0.004843084141612053, | |
| "policy/clipfrac_avg": 0.04662889242172241, | |
| "policy/entropy_avg": 0.8556495904922485, | |
| "step": 46, | |
| "val/clipfrac_avg": 0.0007612881599925458, | |
| "val/num_eos_tokens": 511, | |
| "val/ratio": 0.9999899864196777, | |
| "val/ratio_var": 1.0652936907717958e-05 | |
| }, | |
| { | |
| "episode": 24064, | |
| "epoch": 0.2061686086360521, | |
| "eps": 5, | |
| "loss/policy_avg": 0.00817467924207449, | |
| "loss/value_avg": 0.31694096326828003, | |
| "lr": 2.4609375e-06, | |
| "objective/entropy": -40.105125427246094, | |
| "objective/kl": 47.7975959777832, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 9.824735641479492, | |
| "objective/scores": 9.824735641479492, | |
| "policy/approxkl_avg": 0.0051739090122282505, | |
| "policy/clipfrac_avg": 0.0448913611471653, | |
| "policy/entropy_avg": 0.8633439540863037, | |
| "step": 47, | |
| "val/clipfrac_avg": 0.0006875419057905674, | |
| "val/num_eos_tokens": 511, | |
| "val/ratio": 0.9996302723884583, | |
| "val/ratio_var": 9.456825864617713e-06 | |
| }, | |
| { | |
| "episode": 24576, | |
| "epoch": 0.2105551747772447, | |
| "eps": 5, | |
| "loss/policy_avg": 0.005447334609925747, | |
| "loss/value_avg": 0.29413723945617676, | |
| "lr": 2.4492187500000002e-06, | |
| "objective/entropy": -35.29613494873047, | |
| "objective/kl": 51.42682647705078, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 10.033798217773438, | |
| "objective/scores": 10.033798217773438, | |
| "policy/approxkl_avg": 0.005674813874065876, | |
| "policy/clipfrac_avg": 0.0471038892865181, | |
| "policy/entropy_avg": 0.8564262390136719, | |
| "step": 48, | |
| "val/clipfrac_avg": 0.0006171433487907052, | |
| "val/num_eos_tokens": 511, | |
| "val/ratio": 1.000201940536499, | |
| "val/ratio_var": 1.2321422218519729e-05 | |
| }, | |
| { | |
| "episode": 25088, | |
| "epoch": 0.2149417409184373, | |
| "eps": 5, | |
| "loss/policy_avg": 0.004657389596104622, | |
| "loss/value_avg": 0.2838543653488159, | |
| "lr": 2.4375e-06, | |
| "objective/entropy": -30.200895309448242, | |
| "objective/kl": 56.05735778808594, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 9.83310317993164, | |
| "objective/scores": 9.83310317993164, | |
| "policy/approxkl_avg": 0.005895932205021381, | |
| "policy/clipfrac_avg": 0.045708563178777695, | |
| "policy/entropy_avg": 0.8755910396575928, | |
| "step": 49, | |
| "val/clipfrac_avg": 0.00047186214942485094, | |
| "val/num_eos_tokens": 509, | |
| "val/ratio": 1.0001733303070068, | |
| "val/ratio_var": 1.263204376300564e-05 | |
| }, | |
| { | |
| "episode": 25600, | |
| "epoch": 0.21932830705962988, | |
| "eps": 5, | |
| "loss/policy_avg": 0.006238073576241732, | |
| "loss/value_avg": 0.282005250453949, | |
| "lr": 2.42578125e-06, | |
| "objective/entropy": -25.376914978027344, | |
| "objective/kl": 60.16917419433594, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 9.97581672668457, | |
| "objective/scores": 9.97581672668457, | |
| "policy/approxkl_avg": 0.006535097025334835, | |
| "policy/clipfrac_avg": 0.04995926469564438, | |
| "policy/entropy_avg": 0.8645215034484863, | |
| "step": 50, | |
| "val/clipfrac_avg": 0.000910063972696662, | |
| "val/num_eos_tokens": 508, | |
| "val/ratio": 1.0001347064971924, | |
| "val/ratio_var": 1.2472723028622568e-05 | |
| }, | |
| { | |
| "episode": 26112, | |
| "epoch": 0.22371487320082248, | |
| "eps": 5, | |
| "loss/policy_avg": -0.00042364001274108887, | |
| "loss/value_avg": 0.2829969525337219, | |
| "lr": 2.4140625000000002e-06, | |
| "objective/entropy": -19.165199279785156, | |
| "objective/kl": 66.85098266601562, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 10.05324935913086, | |
| "objective/scores": 10.05324935913086, | |
| "policy/approxkl_avg": 0.006855464540421963, | |
| "policy/clipfrac_avg": 0.04632896929979324, | |
| "policy/entropy_avg": 0.8457518815994263, | |
| "step": 51, | |
| "val/clipfrac_avg": 0.000306067755445838, | |
| "val/num_eos_tokens": 504, | |
| "val/ratio": 0.9993095397949219, | |
| "val/ratio_var": 1.3966228834760841e-05 | |
| }, | |
| { | |
| "episode": 26624, | |
| "epoch": 0.22810143934201507, | |
| "eps": 5, | |
| "loss/policy_avg": 0.000534743070602417, | |
| "loss/value_avg": 0.30065596103668213, | |
| "lr": 2.40234375e-06, | |
| "objective/entropy": -15.858919143676758, | |
| "objective/kl": 69.84648132324219, | |
| "objective/non_score_reward": 0.0, | |
| "objective/rlhf_reward": 9.9315185546875, | |
| "objective/scores": 9.9315185546875, | |
| "policy/approxkl_avg": 0.007337586954236031, | |
| "policy/clipfrac_avg": 0.04750536382198334, | |
| "policy/entropy_avg": 0.823326587677002, | |
| "step": 52, | |
| "val/clipfrac_avg": 0.00029861342045478523, | |
| "val/num_eos_tokens": 495, | |
| "val/ratio": 0.999434232711792, | |
| "val/ratio_var": 1.7595832105143927e-05 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 256, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1.122960932145305, | |
| "save_steps": 52, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0, | |
| "train_batch_size": null, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |