| { | |
| "best_metric": 0.6424652338027954, | |
| "best_model_checkpoint": "./output/checkpoints/2024-05-27_09-03-47/checkpoint-1100", | |
| "epoch": 0.8654602675059009, | |
| "eval_steps": 100, | |
| "global_step": 1100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003933910306845004, | |
| "grad_norm": 3.3316688537597656, | |
| "learning_rate": 1.5625e-06, | |
| "logits/chosen": -0.23308876156806946, | |
| "logits/rejected": -0.7131475806236267, | |
| "logps/chosen": -206.99105834960938, | |
| "logps/rejected": -177.7274169921875, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.2874999940395355, | |
| "rewards/chosen": -0.0002649927628226578, | |
| "rewards/margins": -0.00024207351088989526, | |
| "rewards/rejected": -2.2919160983292386e-05, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.007867820613690008, | |
| "grad_norm": 3.3357934951782227, | |
| "learning_rate": 3.125e-06, | |
| "logits/chosen": -0.3985016942024231, | |
| "logits/rejected": -0.7369264960289001, | |
| "logps/chosen": -200.9151153564453, | |
| "logps/rejected": -176.99038696289062, | |
| "loss": 0.6925, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.005903811659663916, | |
| "rewards/margins": 0.0016752362716943026, | |
| "rewards/rejected": 0.004228575620800257, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.011801730920535013, | |
| "grad_norm": 3.923295259475708, | |
| "learning_rate": 4.6875000000000004e-06, | |
| "logits/chosen": -0.35743942856788635, | |
| "logits/rejected": -0.6597133278846741, | |
| "logps/chosen": -217.01358032226562, | |
| "logps/rejected": -193.7523651123047, | |
| "loss": 0.6918, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.024984199553728104, | |
| "rewards/margins": 0.0036054111551493406, | |
| "rewards/rejected": 0.021378787234425545, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.015735641227380016, | |
| "grad_norm": 3.3765008449554443, | |
| "learning_rate": 6.25e-06, | |
| "logits/chosen": -0.39036694169044495, | |
| "logits/rejected": -0.7354592680931091, | |
| "logps/chosen": -208.59579467773438, | |
| "logps/rejected": -179.28121948242188, | |
| "loss": 0.6882, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.045353055000305176, | |
| "rewards/margins": 0.012958641164004803, | |
| "rewards/rejected": 0.0323944166302681, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01966955153422502, | |
| "grad_norm": 3.515080451965332, | |
| "learning_rate": 7.8125e-06, | |
| "logits/chosen": -0.24885046482086182, | |
| "logits/rejected": -0.7073865532875061, | |
| "logps/chosen": -195.0355682373047, | |
| "logps/rejected": -177.77667236328125, | |
| "loss": 0.6863, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.08099620789289474, | |
| "rewards/margins": 0.01894540525972843, | |
| "rewards/rejected": 0.06205080822110176, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.023603461841070025, | |
| "grad_norm": 3.865326404571533, | |
| "learning_rate": 9.375000000000001e-06, | |
| "logits/chosen": -0.3395718038082123, | |
| "logits/rejected": -0.6314858794212341, | |
| "logps/chosen": -206.6709442138672, | |
| "logps/rejected": -182.71951293945312, | |
| "loss": 0.6878, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.1167207807302475, | |
| "rewards/margins": 0.017358621582388878, | |
| "rewards/rejected": 0.09936217218637466, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02753737214791503, | |
| "grad_norm": 2.9975130558013916, | |
| "learning_rate": 1.0937500000000002e-05, | |
| "logits/chosen": -0.4961719512939453, | |
| "logits/rejected": -0.7695944309234619, | |
| "logps/chosen": -219.01626586914062, | |
| "logps/rejected": -197.20523071289062, | |
| "loss": 0.6792, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.19775943458080292, | |
| "rewards/margins": 0.04304610937833786, | |
| "rewards/rejected": 0.15471334755420685, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.03147128245476003, | |
| "grad_norm": 3.01800537109375, | |
| "learning_rate": 1.25e-05, | |
| "logits/chosen": -0.372029185295105, | |
| "logits/rejected": -0.7374556064605713, | |
| "logps/chosen": -192.9886932373047, | |
| "logps/rejected": -176.5814666748047, | |
| "loss": 0.6884, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.22313375771045685, | |
| "rewards/margins": 0.022503957152366638, | |
| "rewards/rejected": 0.2006298005580902, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03540519276160504, | |
| "grad_norm": 3.402693033218384, | |
| "learning_rate": 1.4062500000000001e-05, | |
| "logits/chosen": -0.7087343335151672, | |
| "logits/rejected": -1.0628368854522705, | |
| "logps/chosen": -192.5178680419922, | |
| "logps/rejected": -162.42861938476562, | |
| "loss": 0.6825, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": 0.2631172239780426, | |
| "rewards/margins": 0.049251049757003784, | |
| "rewards/rejected": 0.2138661891222, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.03933910306845004, | |
| "grad_norm": 3.4222493171691895, | |
| "learning_rate": 1.5625e-05, | |
| "logits/chosen": -0.2554173171520233, | |
| "logits/rejected": -0.364858478307724, | |
| "logps/chosen": -204.78167724609375, | |
| "logps/rejected": -201.2439727783203, | |
| "loss": 0.6679, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.3668735921382904, | |
| "rewards/margins": 0.08200156688690186, | |
| "rewards/rejected": 0.28487205505371094, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.043273013375295044, | |
| "grad_norm": 4.184055805206299, | |
| "learning_rate": 1.71875e-05, | |
| "logits/chosen": -0.620415449142456, | |
| "logits/rejected": -0.823716938495636, | |
| "logps/chosen": -187.72640991210938, | |
| "logps/rejected": -173.8060760498047, | |
| "loss": 0.6819, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.4213651716709137, | |
| "rewards/margins": 0.06363700330257416, | |
| "rewards/rejected": 0.35772818326950073, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.04720692368214005, | |
| "grad_norm": 3.231966257095337, | |
| "learning_rate": 1.84375e-05, | |
| "logits/chosen": -0.3609635829925537, | |
| "logits/rejected": -0.8268268704414368, | |
| "logps/chosen": -207.54598999023438, | |
| "logps/rejected": -162.4337158203125, | |
| "loss": 0.654, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.45634379982948303, | |
| "rewards/margins": 0.14015206694602966, | |
| "rewards/rejected": 0.31619176268577576, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.05114083398898505, | |
| "grad_norm": 3.524355173110962, | |
| "learning_rate": 2e-05, | |
| "logits/chosen": -0.4630275368690491, | |
| "logits/rejected": -0.768640398979187, | |
| "logps/chosen": -203.02206420898438, | |
| "logps/rejected": -192.63986206054688, | |
| "loss": 0.6403, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.5891874432563782, | |
| "rewards/margins": 0.21450451016426086, | |
| "rewards/rejected": 0.3746829628944397, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.05507474429583006, | |
| "grad_norm": 2.4383580684661865, | |
| "learning_rate": 2.1562500000000002e-05, | |
| "logits/chosen": -0.4744432866573334, | |
| "logits/rejected": -0.8423410654067993, | |
| "logps/chosen": -203.81185913085938, | |
| "logps/rejected": -167.61216735839844, | |
| "loss": 0.6629, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.5474480390548706, | |
| "rewards/margins": 0.14927390217781067, | |
| "rewards/rejected": 0.39817413687705994, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.059008654602675056, | |
| "grad_norm": 3.5547280311584473, | |
| "learning_rate": 2.3125000000000003e-05, | |
| "logits/chosen": -0.14800386130809784, | |
| "logits/rejected": -0.5766772031784058, | |
| "logps/chosen": -197.69107055664062, | |
| "logps/rejected": -178.98941040039062, | |
| "loss": 0.6811, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.5598623752593994, | |
| "rewards/margins": 0.10427387058734894, | |
| "rewards/rejected": 0.4555884897708893, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.06294256490952006, | |
| "grad_norm": 4.430344581604004, | |
| "learning_rate": 2.46875e-05, | |
| "logits/chosen": -0.26265189051628113, | |
| "logits/rejected": -0.48529067635536194, | |
| "logps/chosen": -201.1565399169922, | |
| "logps/rejected": -191.77737426757812, | |
| "loss": 0.687, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.5075116157531738, | |
| "rewards/margins": 0.11926877498626709, | |
| "rewards/rejected": 0.3882428705692291, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.06687647521636507, | |
| "grad_norm": 3.615553855895996, | |
| "learning_rate": 2.625e-05, | |
| "logits/chosen": -0.25758761167526245, | |
| "logits/rejected": -0.7858015298843384, | |
| "logps/chosen": -199.0157928466797, | |
| "logps/rejected": -172.79676818847656, | |
| "loss": 0.6775, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.4158558249473572, | |
| "rewards/margins": 0.12296488136053085, | |
| "rewards/rejected": 0.2928909659385681, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.07081038552321008, | |
| "grad_norm": 3.164381504058838, | |
| "learning_rate": 2.7812500000000002e-05, | |
| "logits/chosen": -0.3559994101524353, | |
| "logits/rejected": -0.5853601694107056, | |
| "logps/chosen": -204.28671264648438, | |
| "logps/rejected": -182.05642700195312, | |
| "loss": 0.6827, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.242137148976326, | |
| "rewards/margins": 0.10781852900981903, | |
| "rewards/rejected": 0.13431859016418457, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.07474429583005507, | |
| "grad_norm": 2.60341739654541, | |
| "learning_rate": 2.9375000000000003e-05, | |
| "logits/chosen": -0.23700566589832306, | |
| "logits/rejected": -0.6567382216453552, | |
| "logps/chosen": -198.1564178466797, | |
| "logps/rejected": -181.05433654785156, | |
| "loss": 0.6825, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.2288060188293457, | |
| "rewards/margins": 0.06799931824207306, | |
| "rewards/rejected": 0.16080673038959503, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.07867820613690008, | |
| "grad_norm": 3.4135797023773193, | |
| "learning_rate": 3.09375e-05, | |
| "logits/chosen": -0.4318512976169586, | |
| "logits/rejected": -0.7775865793228149, | |
| "logps/chosen": -203.27996826171875, | |
| "logps/rejected": -176.29310607910156, | |
| "loss": 0.6604, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.29336217045783997, | |
| "rewards/margins": 0.14028367400169373, | |
| "rewards/rejected": 0.15307846665382385, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07867820613690008, | |
| "eval_logits/chosen": 1.2891764640808105, | |
| "eval_logits/rejected": 1.0409352779388428, | |
| "eval_logps/chosen": -200.52639770507812, | |
| "eval_logps/rejected": -176.06118774414062, | |
| "eval_loss": 0.6588146686553955, | |
| "eval_rewards/accuracies": 0.6328125, | |
| "eval_rewards/chosen": 0.3721943497657776, | |
| "eval_rewards/margins": 0.156110480427742, | |
| "eval_rewards/rejected": 0.2160838395357132, | |
| "eval_runtime": 269.7198, | |
| "eval_samples_per_second": 2.373, | |
| "eval_steps_per_second": 0.148, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08261211644374508, | |
| "grad_norm": 3.112549304962158, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "logits/chosen": -0.19725528359413147, | |
| "logits/rejected": -0.48951825499534607, | |
| "logps/chosen": -195.0865478515625, | |
| "logps/rejected": -172.87655639648438, | |
| "loss": 0.6538, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.3994670808315277, | |
| "rewards/margins": 0.14713595807552338, | |
| "rewards/rejected": 0.2523311674594879, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.08654602675059009, | |
| "grad_norm": 2.9215586185455322, | |
| "learning_rate": 3.40625e-05, | |
| "logits/chosen": -0.48983463644981384, | |
| "logits/rejected": -0.9664360880851746, | |
| "logps/chosen": -188.05780029296875, | |
| "logps/rejected": -164.89785766601562, | |
| "loss": 0.6544, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.4765814244747162, | |
| "rewards/margins": 0.20020703971385956, | |
| "rewards/rejected": 0.2763743996620178, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0904799370574351, | |
| "grad_norm": 3.5994484424591064, | |
| "learning_rate": 3.5625000000000005e-05, | |
| "logits/chosen": -0.6095945239067078, | |
| "logits/rejected": -0.8989366292953491, | |
| "logps/chosen": -189.4413299560547, | |
| "logps/rejected": -168.590576171875, | |
| "loss": 0.6849, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.5448002815246582, | |
| "rewards/margins": 0.17536191642284393, | |
| "rewards/rejected": 0.36943838000297546, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.0944138473642801, | |
| "grad_norm": 3.423135757446289, | |
| "learning_rate": 3.71875e-05, | |
| "logits/chosen": -0.5748457908630371, | |
| "logits/rejected": -0.8502093553543091, | |
| "logps/chosen": -184.19601440429688, | |
| "logps/rejected": -171.8808135986328, | |
| "loss": 0.6885, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.5411900281906128, | |
| "rewards/margins": 0.10678620636463165, | |
| "rewards/rejected": 0.43440380692481995, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0983477576711251, | |
| "grad_norm": 3.1769092082977295, | |
| "learning_rate": 3.875e-05, | |
| "logits/chosen": -0.45406079292297363, | |
| "logits/rejected": -0.6608825325965881, | |
| "logps/chosen": -179.5055389404297, | |
| "logps/rejected": -170.6797332763672, | |
| "loss": 0.6673, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.42708373069763184, | |
| "rewards/margins": 0.13880428671836853, | |
| "rewards/rejected": 0.2882794141769409, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.1022816679779701, | |
| "grad_norm": 4.020690441131592, | |
| "learning_rate": 3.999992445477636e-05, | |
| "logits/chosen": -0.3575161099433899, | |
| "logits/rejected": -0.771265983581543, | |
| "logps/chosen": -198.4453887939453, | |
| "logps/rejected": -175.13534545898438, | |
| "loss": 0.6391, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.15054018795490265, | |
| "rewards/margins": 0.24290549755096436, | |
| "rewards/rejected": -0.09236530214548111, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.10621557828481511, | |
| "grad_norm": 3.482036590576172, | |
| "learning_rate": 3.999728043187288e-05, | |
| "logits/chosen": -0.42866629362106323, | |
| "logits/rejected": -0.8494951128959656, | |
| "logps/chosen": -204.20066833496094, | |
| "logps/rejected": -170.35543823242188, | |
| "loss": 0.6757, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.10458537191152573, | |
| "rewards/margins": 0.13884462416172028, | |
| "rewards/rejected": -0.2434299886226654, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.11014948859166011, | |
| "grad_norm": 3.43479061126709, | |
| "learning_rate": 3.9990859718476166e-05, | |
| "logits/chosen": -0.27079272270202637, | |
| "logits/rejected": -0.6653593182563782, | |
| "logps/chosen": -195.72396850585938, | |
| "logps/rejected": -176.048828125, | |
| "loss": 0.6438, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.24364686012268066, | |
| "rewards/margins": 0.21982736885547638, | |
| "rewards/rejected": 0.023819489404559135, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.11408339889850512, | |
| "grad_norm": 3.0492546558380127, | |
| "learning_rate": 3.998066352720348e-05, | |
| "logits/chosen": -0.23022302985191345, | |
| "logits/rejected": -0.6385291218757629, | |
| "logps/chosen": -198.3330535888672, | |
| "logps/rejected": -178.70986938476562, | |
| "loss": 0.603, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.36241570115089417, | |
| "rewards/margins": 0.3550655245780945, | |
| "rewards/rejected": 0.007350207772105932, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.11801730920535011, | |
| "grad_norm": 5.166781902313232, | |
| "learning_rate": 3.9966693783709596e-05, | |
| "logits/chosen": -0.4016013741493225, | |
| "logits/rejected": -0.5505572557449341, | |
| "logps/chosen": -189.4708709716797, | |
| "logps/rejected": -180.41171264648438, | |
| "loss": 0.6901, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.23043100535869598, | |
| "rewards/margins": 0.23260650038719177, | |
| "rewards/rejected": -0.0021755113266408443, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.12195121951219512, | |
| "grad_norm": 3.765799045562744, | |
| "learning_rate": 3.9948953126323144e-05, | |
| "logits/chosen": -0.5663745403289795, | |
| "logits/rejected": -0.7190238237380981, | |
| "logps/chosen": -195.40989685058594, | |
| "logps/rejected": -183.31375122070312, | |
| "loss": 0.6779, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.17181581258773804, | |
| "rewards/margins": 0.2907728850841522, | |
| "rewards/rejected": -0.11895710229873657, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.12588512981904013, | |
| "grad_norm": 4.032809734344482, | |
| "learning_rate": 3.992744490554832e-05, | |
| "logits/chosen": -0.39315614104270935, | |
| "logits/rejected": -0.8620352745056152, | |
| "logps/chosen": -190.1297149658203, | |
| "logps/rejected": -167.43600463867188, | |
| "loss": 0.6692, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.1846497356891632, | |
| "rewards/margins": 0.24123652279376984, | |
| "rewards/rejected": -0.05658679082989693, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.12981904012588513, | |
| "grad_norm": 3.013624429702759, | |
| "learning_rate": 3.990217318343214e-05, | |
| "logits/chosen": -0.21116772294044495, | |
| "logits/rejected": -0.6241986155509949, | |
| "logps/chosen": -206.351806640625, | |
| "logps/rejected": -182.8619384765625, | |
| "loss": 0.6244, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.4654034674167633, | |
| "rewards/margins": 0.3156747817993164, | |
| "rewards/rejected": 0.14972873032093048, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.13375295043273014, | |
| "grad_norm": 3.62973690032959, | |
| "learning_rate": 3.987314273279721e-05, | |
| "logits/chosen": -0.32551443576812744, | |
| "logits/rejected": -0.759263813495636, | |
| "logps/chosen": -197.4362335205078, | |
| "logps/rejected": -169.08892822265625, | |
| "loss": 0.6441, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.521133303642273, | |
| "rewards/margins": 0.24507877230644226, | |
| "rewards/rejected": 0.2760545313358307, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.13768686073957515, | |
| "grad_norm": 4.152413845062256, | |
| "learning_rate": 3.9840359036340424e-05, | |
| "logits/chosen": -0.33808866143226624, | |
| "logits/rejected": -0.6686294078826904, | |
| "logps/chosen": -210.7691192626953, | |
| "logps/rejected": -182.05105590820312, | |
| "loss": 0.6531, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.6028085947036743, | |
| "rewards/margins": 0.2191469967365265, | |
| "rewards/rejected": 0.3836616277694702, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.14162077104642015, | |
| "grad_norm": 2.187406301498413, | |
| "learning_rate": 3.980382828559743e-05, | |
| "logits/chosen": -0.12574271857738495, | |
| "logits/rejected": -0.46328145265579224, | |
| "logps/chosen": -221.7871856689453, | |
| "logps/rejected": -199.050048828125, | |
| "loss": 0.6714, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.48261910676956177, | |
| "rewards/margins": 0.22359433770179749, | |
| "rewards/rejected": 0.2590247690677643, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.14555468135326516, | |
| "grad_norm": 3.3965179920196533, | |
| "learning_rate": 3.9763557379773316e-05, | |
| "logits/chosen": -0.21986201405525208, | |
| "logits/rejected": -0.7373820543289185, | |
| "logps/chosen": -199.28152465820312, | |
| "logps/rejected": -170.54849243164062, | |
| "loss": 0.6352, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.22457706928253174, | |
| "rewards/margins": 0.29483360052108765, | |
| "rewards/rejected": -0.0702565535902977, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.14948859166011014, | |
| "grad_norm": 2.321876049041748, | |
| "learning_rate": 3.971955392443965e-05, | |
| "logits/chosen": -0.12594549357891083, | |
| "logits/rejected": -0.5465633869171143, | |
| "logps/chosen": -201.79563903808594, | |
| "logps/rejected": -183.66622924804688, | |
| "loss": 0.6454, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.2642104923725128, | |
| "rewards/margins": 0.27526894211769104, | |
| "rewards/rejected": -0.011058444157242775, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.15342250196695514, | |
| "grad_norm": 2.6755359172821045, | |
| "learning_rate": 3.9671826230098045e-05, | |
| "logits/chosen": -0.22815421223640442, | |
| "logits/rejected": -0.5611529350280762, | |
| "logps/chosen": -193.92630004882812, | |
| "logps/rejected": -169.29806518554688, | |
| "loss": 0.6391, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.10009317100048065, | |
| "rewards/margins": 0.32271507382392883, | |
| "rewards/rejected": -0.22262191772460938, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.15735641227380015, | |
| "grad_norm": 2.5886878967285156, | |
| "learning_rate": 3.962038331061065e-05, | |
| "logits/chosen": -0.13410992920398712, | |
| "logits/rejected": -0.5009399652481079, | |
| "logps/chosen": -202.4084014892578, | |
| "logps/rejected": -182.69384765625, | |
| "loss": 0.6441, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.09047303348779678, | |
| "rewards/margins": 0.3143870532512665, | |
| "rewards/rejected": -0.2239139974117279, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.15735641227380015, | |
| "eval_logits/chosen": 1.1994984149932861, | |
| "eval_logits/rejected": 0.9288139343261719, | |
| "eval_logps/chosen": -204.9707794189453, | |
| "eval_logps/rejected": -183.18075561523438, | |
| "eval_loss": 0.6612199544906616, | |
| "eval_rewards/accuracies": 0.621874988079071, | |
| "eval_rewards/chosen": 0.1499754935503006, | |
| "eval_rewards/margins": 0.2898713946342468, | |
| "eval_rewards/rejected": -0.13989591598510742, | |
| "eval_runtime": 272.3181, | |
| "eval_samples_per_second": 2.35, | |
| "eval_steps_per_second": 0.147, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.16129032258064516, | |
| "grad_norm": 1.9733163118362427, | |
| "learning_rate": 3.9565234881497835e-05, | |
| "logits/chosen": -0.11721654236316681, | |
| "logits/rejected": -0.4624119699001312, | |
| "logps/chosen": -196.6512451171875, | |
| "logps/rejected": -182.5417938232422, | |
| "loss": 0.6908, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0795515924692154, | |
| "rewards/margins": 0.23799221217632294, | |
| "rewards/rejected": -0.15844061970710754, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.16522423288749016, | |
| "grad_norm": 4.967929840087891, | |
| "learning_rate": 3.950639135810326e-05, | |
| "logits/chosen": -0.22830729186534882, | |
| "logits/rejected": -0.462864488363266, | |
| "logps/chosen": -210.42221069335938, | |
| "logps/rejected": -187.0999298095703, | |
| "loss": 0.706, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": 0.3507956564426422, | |
| "rewards/margins": 0.12327106297016144, | |
| "rewards/rejected": 0.2275245636701584, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.16915814319433517, | |
| "grad_norm": 2.4658010005950928, | |
| "learning_rate": 3.944386385362683e-05, | |
| "logits/chosen": -0.011775115504860878, | |
| "logits/rejected": -0.4366488456726074, | |
| "logps/chosen": -204.55856323242188, | |
| "logps/rejected": -178.90711975097656, | |
| "loss": 0.6459, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.272403746843338, | |
| "rewards/margins": 0.3043806552886963, | |
| "rewards/rejected": -0.031976908445358276, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.17309205350118018, | |
| "grad_norm": 3.224493980407715, | |
| "learning_rate": 3.937766417702591e-05, | |
| "logits/chosen": -0.1112290471792221, | |
| "logits/rejected": -0.3926088213920593, | |
| "logps/chosen": -201.01698303222656, | |
| "logps/rejected": -173.99655151367188, | |
| "loss": 0.6401, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.4972043037414551, | |
| "rewards/margins": 0.30367809534072876, | |
| "rewards/rejected": 0.19352616369724274, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.17702596380802518, | |
| "grad_norm": 3.155855178833008, | |
| "learning_rate": 3.9307804830785033e-05, | |
| "logits/chosen": -0.19071224331855774, | |
| "logits/rejected": -0.5681020021438599, | |
| "logps/chosen": -208.1164093017578, | |
| "logps/rejected": -180.89883422851562, | |
| "loss": 0.6387, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.5927615761756897, | |
| "rewards/margins": 0.30187565088272095, | |
| "rewards/rejected": 0.29088592529296875, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.1809598741148702, | |
| "grad_norm": 2.853398561477661, | |
| "learning_rate": 3.923429900855468e-05, | |
| "logits/chosen": -0.32427653670310974, | |
| "logits/rejected": -0.7451451420783997, | |
| "logps/chosen": -195.99334716796875, | |
| "logps/rejected": -176.32376098632812, | |
| "loss": 0.6046, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.6133124828338623, | |
| "rewards/margins": 0.471308171749115, | |
| "rewards/rejected": 0.1420043259859085, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1848937844217152, | |
| "grad_norm": 3.0965123176574707, | |
| "learning_rate": 3.915716059265956e-05, | |
| "logits/chosen": -0.279052197933197, | |
| "logits/rejected": -0.7491546869277954, | |
| "logps/chosen": -194.71755981445312, | |
| "logps/rejected": -172.21261596679688, | |
| "loss": 0.6186, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.28268158435821533, | |
| "rewards/margins": 0.38793596625328064, | |
| "rewards/rejected": -0.10525435209274292, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.1888276947285602, | |
| "grad_norm": 3.3253092765808105, | |
| "learning_rate": 3.907640415147675e-05, | |
| "logits/chosen": -0.019503340125083923, | |
| "logits/rejected": -0.36629518866539, | |
| "logps/chosen": -194.757568359375, | |
| "logps/rejected": -172.39112854003906, | |
| "loss": 0.6227, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.22514204680919647, | |
| "rewards/margins": 0.3900582194328308, | |
| "rewards/rejected": -0.16491621732711792, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.19276160503540518, | |
| "grad_norm": 3.1997830867767334, | |
| "learning_rate": 3.8992044936684326e-05, | |
| "logits/chosen": -0.2767964005470276, | |
| "logits/rejected": -0.5810253024101257, | |
| "logps/chosen": -209.00927734375, | |
| "logps/rejected": -203.82435607910156, | |
| "loss": 0.6911, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.02888309396803379, | |
| "rewards/margins": 0.26339852809906006, | |
| "rewards/rejected": -0.23451539874076843, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.1966955153422502, | |
| "grad_norm": 2.5685486793518066, | |
| "learning_rate": 3.8904098880380946e-05, | |
| "logits/chosen": -0.07118010520935059, | |
| "logits/rejected": -0.26244014501571655, | |
| "logps/chosen": -200.22207641601562, | |
| "logps/rejected": -189.04476928710938, | |
| "loss": 0.6803, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.25201526284217834, | |
| "rewards/margins": 0.21217398345470428, | |
| "rewards/rejected": 0.03984128683805466, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2006294256490952, | |
| "grad_norm": 2.657667875289917, | |
| "learning_rate": 3.881258259207688e-05, | |
| "logits/chosen": -0.10402607917785645, | |
| "logits/rejected": -0.3553788661956787, | |
| "logps/chosen": -209.177978515625, | |
| "logps/rejected": -189.50613403320312, | |
| "loss": 0.622, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.5768004655838013, | |
| "rewards/margins": 0.3895408511161804, | |
| "rewards/rejected": 0.18725955486297607, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.2045633359559402, | |
| "grad_norm": 3.1508238315582275, | |
| "learning_rate": 3.8717513355557156e-05, | |
| "logits/chosen": -0.3484202027320862, | |
| "logits/rejected": -0.5296080708503723, | |
| "logps/chosen": -189.06045532226562, | |
| "logps/rejected": -176.0762481689453, | |
| "loss": 0.6827, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.420137494802475, | |
| "rewards/margins": 0.1734926998615265, | |
| "rewards/rejected": 0.2466447800397873, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.2084972462627852, | |
| "grad_norm": 2.7286055088043213, | |
| "learning_rate": 3.863891188678869e-05, | |
| "logits/chosen": -0.4520476758480072, | |
| "logits/rejected": -0.7075552940368652, | |
| "logps/chosen": -189.4468994140625, | |
| "logps/rejected": -163.7788543701172, | |
| "loss": 0.6341, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.6261769533157349, | |
| "rewards/margins": 0.373447060585022, | |
| "rewards/rejected": 0.2527299225330353, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.21243115656963021, | |
| "grad_norm": 3.117647647857666, | |
| "learning_rate": 3.855805751006794e-05, | |
| "logits/chosen": -0.17646411061286926, | |
| "logits/rejected": -0.5497474074363708, | |
| "logps/chosen": -206.43603515625, | |
| "logps/rejected": -169.03512573242188, | |
| "loss": 0.6173, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.6339025497436523, | |
| "rewards/margins": 0.41420817375183105, | |
| "rewards/rejected": 0.2196943461894989, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.21636506687647522, | |
| "grad_norm": 2.892782688140869, | |
| "learning_rate": 3.8453836301696134e-05, | |
| "logits/chosen": -0.19655899703502655, | |
| "logits/rejected": -0.5140531063079834, | |
| "logps/chosen": -183.30697631835938, | |
| "logps/rejected": -176.54672241210938, | |
| "loss": 0.6484, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.5197827219963074, | |
| "rewards/margins": 0.2724143862724304, | |
| "rewards/rejected": 0.24736833572387695, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.22029897718332023, | |
| "grad_norm": 2.6433639526367188, | |
| "learning_rate": 3.8346129898006815e-05, | |
| "logits/chosen": -0.002289932919666171, | |
| "logits/rejected": -0.22839269042015076, | |
| "logps/chosen": -190.36752319335938, | |
| "logps/rejected": -177.63619995117188, | |
| "loss": 0.6335, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.6721555590629578, | |
| "rewards/margins": 0.42399945855140686, | |
| "rewards/rejected": 0.24815604090690613, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.22423288749016523, | |
| "grad_norm": 3.024284601211548, | |
| "learning_rate": 3.8234958640453525e-05, | |
| "logits/chosen": -0.2748643755912781, | |
| "logits/rejected": -0.4274403154850006, | |
| "logps/chosen": -188.20431518554688, | |
| "logps/rejected": -175.0402069091797, | |
| "loss": 0.687, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.6293169856071472, | |
| "rewards/margins": 0.20790882408618927, | |
| "rewards/rejected": 0.4214082360267639, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.22816679779701024, | |
| "grad_norm": 2.4070165157318115, | |
| "learning_rate": 3.812034352486282e-05, | |
| "logits/chosen": -0.2434273660182953, | |
| "logits/rejected": -0.42474550008773804, | |
| "logps/chosen": -182.94415283203125, | |
| "logps/rejected": -180.78018188476562, | |
| "loss": 0.6534, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.6648632287979126, | |
| "rewards/margins": 0.2526378333568573, | |
| "rewards/rejected": 0.4122254252433777, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.23210070810385522, | |
| "grad_norm": 2.867053270339966, | |
| "learning_rate": 3.8002306197468984e-05, | |
| "logits/chosen": -0.4638332426548004, | |
| "logits/rejected": -0.7584436535835266, | |
| "logps/chosen": -182.22518920898438, | |
| "logps/rejected": -159.70726013183594, | |
| "loss": 0.6708, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.7026508450508118, | |
| "rewards/margins": 0.22519198060035706, | |
| "rewards/rejected": 0.4774588644504547, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.23603461841070023, | |
| "grad_norm": 4.559377193450928, | |
| "learning_rate": 3.788086895082594e-05, | |
| "logits/chosen": -0.23287639021873474, | |
| "logits/rejected": -0.6191486120223999, | |
| "logps/chosen": -187.29556274414062, | |
| "logps/rejected": -169.7303466796875, | |
| "loss": 0.6531, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.7334493398666382, | |
| "rewards/margins": 0.30577802658081055, | |
| "rewards/rejected": 0.42767134308815, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.23603461841070023, | |
| "eval_logits/chosen": 1.4386663436889648, | |
| "eval_logits/rejected": 1.2204749584197998, | |
| "eval_logps/chosen": -193.3839569091797, | |
| "eval_logps/rejected": -170.81265258789062, | |
| "eval_loss": 0.6754804849624634, | |
| "eval_rewards/accuracies": 0.604687511920929, | |
| "eval_rewards/chosen": 0.7293164134025574, | |
| "eval_rewards/margins": 0.250806599855423, | |
| "eval_rewards/rejected": 0.4785098135471344, | |
| "eval_runtime": 269.9288, | |
| "eval_samples_per_second": 2.371, | |
| "eval_steps_per_second": 0.148, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.23996852871754523, | |
| "grad_norm": 3.1589412689208984, | |
| "learning_rate": 3.7756054719597054e-05, | |
| "logits/chosen": -0.5340144038200378, | |
| "logits/rejected": -0.8338343501091003, | |
| "logps/chosen": -186.1717529296875, | |
| "logps/rejected": -158.400634765625, | |
| "loss": 0.646, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.6378523707389832, | |
| "rewards/margins": 0.24317416548728943, | |
| "rewards/rejected": 0.3946782052516937, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.24390243902439024, | |
| "grad_norm": 2.2399826049804688, | |
| "learning_rate": 3.762788707622369e-05, | |
| "logits/chosen": -0.270986407995224, | |
| "logits/rejected": -0.5672872066497803, | |
| "logps/chosen": -196.96316528320312, | |
| "logps/rejected": -184.83358764648438, | |
| "loss": 0.6971, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.6476441621780396, | |
| "rewards/margins": 0.14692308008670807, | |
| "rewards/rejected": 0.5007210373878479, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.24783634933123525, | |
| "grad_norm": 2.3587327003479004, | |
| "learning_rate": 3.749639022647332e-05, | |
| "logits/chosen": -0.15218928456306458, | |
| "logits/rejected": -0.4672822952270508, | |
| "logps/chosen": -200.14068603515625, | |
| "logps/rejected": -178.75242614746094, | |
| "loss": 0.6694, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.6684466600418091, | |
| "rewards/margins": 0.1981629729270935, | |
| "rewards/rejected": 0.4702836871147156, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.25177025963808025, | |
| "grad_norm": 2.7182111740112305, | |
| "learning_rate": 3.7361589004868035e-05, | |
| "logits/chosen": -0.049394361674785614, | |
| "logits/rejected": -0.4820259213447571, | |
| "logps/chosen": -206.01303100585938, | |
| "logps/rejected": -174.92691040039062, | |
| "loss": 0.6542, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.5747781991958618, | |
| "rewards/margins": 0.2632519006729126, | |
| "rewards/rejected": 0.3115262985229492, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.25570416994492523, | |
| "grad_norm": 2.9146876335144043, | |
| "learning_rate": 3.722350886999425e-05, | |
| "logits/chosen": -0.3709413707256317, | |
| "logits/rejected": -0.9792379140853882, | |
| "logps/chosen": -205.5619659423828, | |
| "logps/rejected": -165.84860229492188, | |
| "loss": 0.6523, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.43693408370018005, | |
| "rewards/margins": 0.28134581446647644, | |
| "rewards/rejected": 0.1555882692337036, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.25963808025177026, | |
| "grad_norm": 3.1756091117858887, | |
| "learning_rate": 3.708217589969461e-05, | |
| "logits/chosen": -0.51994788646698, | |
| "logits/rejected": -0.7154557108879089, | |
| "logps/chosen": -205.0553436279297, | |
| "logps/rejected": -192.25355529785156, | |
| "loss": 0.6436, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.45878300070762634, | |
| "rewards/margins": 0.30509161949157715, | |
| "rewards/rejected": 0.153691366314888, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.26357199055861524, | |
| "grad_norm": 2.1937553882598877, | |
| "learning_rate": 3.693761678614296e-05, | |
| "logits/chosen": -0.12544885277748108, | |
| "logits/rejected": -0.5810226202011108, | |
| "logps/chosen": -216.4395294189453, | |
| "logps/rejected": -177.5430908203125, | |
| "loss": 0.6281, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.42521587014198303, | |
| "rewards/margins": 0.363994836807251, | |
| "rewards/rejected": 0.06122100353240967, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.2675059008654603, | |
| "grad_norm": 2.9555442333221436, | |
| "learning_rate": 3.678985883080319e-05, | |
| "logits/chosen": -0.3867124319076538, | |
| "logits/rejected": -0.522415041923523, | |
| "logps/chosen": -193.5556640625, | |
| "logps/rejected": -190.2833251953125, | |
| "loss": 0.6844, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.23950310051441193, | |
| "rewards/margins": 0.2469015121459961, | |
| "rewards/rejected": -0.007398429326713085, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.27143981117230526, | |
| "grad_norm": 3.0025393962860107, | |
| "learning_rate": 3.6638929939273126e-05, | |
| "logits/chosen": -0.14068982005119324, | |
| "logits/rejected": -0.5062462091445923, | |
| "logps/chosen": -228.0189971923828, | |
| "logps/rejected": -192.20274353027344, | |
| "loss": 0.6403, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.36681967973709106, | |
| "rewards/margins": 0.3117842972278595, | |
| "rewards/rejected": 0.055035412311553955, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.2753737214791503, | |
| "grad_norm": 2.586651086807251, | |
| "learning_rate": 3.6484858616014244e-05, | |
| "logits/chosen": -0.33626532554626465, | |
| "logits/rejected": -0.636184811592102, | |
| "logps/chosen": -188.93069458007812, | |
| "logps/rejected": -176.8048858642578, | |
| "loss": 0.6398, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.2167416363954544, | |
| "rewards/margins": 0.3219740688800812, | |
| "rewards/rejected": -0.10523247718811035, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.27930763178599527, | |
| "grad_norm": 3.2158050537109375, | |
| "learning_rate": 3.632767395896833e-05, | |
| "logits/chosen": -0.2993752956390381, | |
| "logits/rejected": -0.7341363430023193, | |
| "logps/chosen": -201.6867218017578, | |
| "logps/rejected": -178.9425048828125, | |
| "loss": 0.6371, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.12748529016971588, | |
| "rewards/margins": 0.39171117544174194, | |
| "rewards/rejected": -0.26422587037086487, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.2832415420928403, | |
| "grad_norm": 2.831960439682007, | |
| "learning_rate": 3.6167405654062026e-05, | |
| "logits/chosen": -0.2205226868391037, | |
| "logits/rejected": -0.7331427931785583, | |
| "logps/chosen": -206.3934326171875, | |
| "logps/rejected": -173.00375366210938, | |
| "loss": 0.6077, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.30700379610061646, | |
| "rewards/margins": 0.5170584321022034, | |
| "rewards/rejected": -0.21005460619926453, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2871754523996853, | |
| "grad_norm": 2.9702041149139404, | |
| "learning_rate": 3.6004083969600346e-05, | |
| "logits/chosen": -0.004690873436629772, | |
| "logits/rejected": -0.3766002058982849, | |
| "logps/chosen": -209.6323699951172, | |
| "logps/rejected": -193.38568115234375, | |
| "loss": 0.6808, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.138104647397995, | |
| "rewards/margins": 0.3060592710971832, | |
| "rewards/rejected": -0.16795465350151062, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.2911093627065303, | |
| "grad_norm": 2.8821372985839844, | |
| "learning_rate": 3.5837739750550186e-05, | |
| "logits/chosen": -0.4767018258571625, | |
| "logits/rejected": -0.545331597328186, | |
| "logps/chosen": -186.98947143554688, | |
| "logps/rejected": -182.7305908203125, | |
| "loss": 0.7061, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.23797090351581573, | |
| "rewards/margins": 0.13995619118213654, | |
| "rewards/rejected": -0.3779270648956299, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2950432730133753, | |
| "grad_norm": 3.13258695602417, | |
| "learning_rate": 3.566840441271496e-05, | |
| "logits/chosen": -0.2899293005466461, | |
| "logits/rejected": -0.5522339344024658, | |
| "logps/chosen": -199.23922729492188, | |
| "logps/rejected": -181.1404266357422, | |
| "loss": 0.6617, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.10681506246328354, | |
| "rewards/margins": 0.29632216691970825, | |
| "rewards/rejected": -0.18950709700584412, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.2989771833202203, | |
| "grad_norm": 2.4494378566741943, | |
| "learning_rate": 3.549610993680137e-05, | |
| "logits/chosen": -0.33420437574386597, | |
| "logits/rejected": -0.6601684093475342, | |
| "logps/chosen": -201.4299774169922, | |
| "logps/rejected": -178.04800415039062, | |
| "loss": 0.663, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.2849748432636261, | |
| "rewards/margins": 0.3158971071243286, | |
| "rewards/rejected": -0.03092227876186371, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.3029110936270653, | |
| "grad_norm": 2.70623517036438, | |
| "learning_rate": 3.532088886237956e-05, | |
| "logits/chosen": -0.3244260847568512, | |
| "logits/rejected": -0.6104148626327515, | |
| "logps/chosen": -197.0400848388672, | |
| "logps/rejected": -186.9191131591797, | |
| "loss": 0.6289, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.5122109651565552, | |
| "rewards/margins": 0.3946057856082916, | |
| "rewards/rejected": 0.11760516464710236, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.3068450039339103, | |
| "grad_norm": 2.2485783100128174, | |
| "learning_rate": 3.514277428173768e-05, | |
| "logits/chosen": -0.42509007453918457, | |
| "logits/rejected": -0.5977455973625183, | |
| "logps/chosen": -191.63674926757812, | |
| "logps/rejected": -179.55569458007812, | |
| "loss": 0.6614, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.6912093758583069, | |
| "rewards/margins": 0.2689628005027771, | |
| "rewards/rejected": 0.4222465455532074, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.3107789142407553, | |
| "grad_norm": 2.2680320739746094, | |
| "learning_rate": 3.4961799833632024e-05, | |
| "logits/chosen": -0.44027456641197205, | |
| "logits/rejected": -0.754062831401825, | |
| "logps/chosen": -190.41526794433594, | |
| "logps/rejected": -180.15870666503906, | |
| "loss": 0.7046, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.3976368308067322, | |
| "rewards/margins": 0.1356535255908966, | |
| "rewards/rejected": 0.26198333501815796, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.3147128245476003, | |
| "grad_norm": 2.215034246444702, | |
| "learning_rate": 3.4777999696934076e-05, | |
| "logits/chosen": -0.5266469120979309, | |
| "logits/rejected": -0.8849590420722961, | |
| "logps/chosen": -189.89480590820312, | |
| "logps/rejected": -174.1988983154297, | |
| "loss": 0.687, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.3705733120441437, | |
| "rewards/margins": 0.14841142296791077, | |
| "rewards/rejected": 0.2221618890762329, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3147128245476003, | |
| "eval_logits/chosen": 1.5457346439361572, | |
| "eval_logits/rejected": 1.3295015096664429, | |
| "eval_logps/chosen": -200.59536743164062, | |
| "eval_logps/rejected": -177.43679809570312, | |
| "eval_loss": 0.6690323948860168, | |
| "eval_rewards/accuracies": 0.6171875, | |
| "eval_rewards/chosen": 0.36874598264694214, | |
| "eval_rewards/margins": 0.221443772315979, | |
| "eval_rewards/rejected": 0.14730218052864075, | |
| "eval_runtime": 270.0147, | |
| "eval_samples_per_second": 2.37, | |
| "eval_steps_per_second": 0.148, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.31864673485444533, | |
| "grad_norm": 2.7971677780151367, | |
| "learning_rate": 3.459140858417543e-05, | |
| "logits/chosen": -0.4693244397640228, | |
| "logits/rejected": -0.7125430703163147, | |
| "logps/chosen": -185.1755828857422, | |
| "logps/rejected": -170.6441192626953, | |
| "loss": 0.6856, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.26629552245140076, | |
| "rewards/margins": 0.13958953320980072, | |
| "rewards/rejected": 0.12670595943927765, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.3225806451612903, | |
| "grad_norm": 2.304231643676758, | |
| "learning_rate": 3.440206173499201e-05, | |
| "logits/chosen": -0.44367557764053345, | |
| "logits/rejected": -0.7319197058677673, | |
| "logps/chosen": -203.03834533691406, | |
| "logps/rejected": -175.77809143066406, | |
| "loss": 0.6702, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.1391323059797287, | |
| "rewards/margins": 0.23978285491466522, | |
| "rewards/rejected": -0.10065053403377533, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.32651455546813535, | |
| "grad_norm": 2.381197214126587, | |
| "learning_rate": 3.420999490946868e-05, | |
| "logits/chosen": -0.4150654375553131, | |
| "logits/rejected": -0.6321390867233276, | |
| "logps/chosen": -192.13885498046875, | |
| "logps/rejected": -186.078125, | |
| "loss": 0.6627, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.16537415981292725, | |
| "rewards/margins": 0.19638404250144958, | |
| "rewards/rejected": -0.03100987896323204, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.3304484657749803, | |
| "grad_norm": 5.048974514007568, | |
| "learning_rate": 3.401524438138556e-05, | |
| "logits/chosen": -0.3787829279899597, | |
| "logits/rejected": -0.7062126398086548, | |
| "logps/chosen": -207.6349639892578, | |
| "logps/rejected": -181.1364288330078, | |
| "loss": 0.6424, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.13254733383655548, | |
| "rewards/margins": 0.2832036316394806, | |
| "rewards/rejected": -0.15065628290176392, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.33438237608182536, | |
| "grad_norm": 2.7124593257904053, | |
| "learning_rate": 3.3817846931367457e-05, | |
| "logits/chosen": -0.2681703269481659, | |
| "logits/rejected": -0.5482879281044006, | |
| "logps/chosen": -210.68704223632812, | |
| "logps/rejected": -195.8542938232422, | |
| "loss": 0.6459, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.17737464606761932, | |
| "rewards/margins": 0.23065805435180664, | |
| "rewards/rejected": -0.053283415734767914, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.33831628638867034, | |
| "grad_norm": 2.651090145111084, | |
| "learning_rate": 3.361783983993734e-05, | |
| "logits/chosen": -0.3691546618938446, | |
| "logits/rejected": -0.7862486243247986, | |
| "logps/chosen": -187.84292602539062, | |
| "logps/rejected": -162.55624389648438, | |
| "loss": 0.6133, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.24909105896949768, | |
| "rewards/margins": 0.33308321237564087, | |
| "rewards/rejected": -0.08399216830730438, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.3422501966955153, | |
| "grad_norm": 2.3644063472747803, | |
| "learning_rate": 3.3415260880475626e-05, | |
| "logits/chosen": -0.4831025004386902, | |
| "logits/rejected": -0.732842206954956, | |
| "logps/chosen": -190.97463989257812, | |
| "logps/rejected": -176.51910400390625, | |
| "loss": 0.6519, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.28320473432540894, | |
| "rewards/margins": 0.24385762214660645, | |
| "rewards/rejected": 0.03934710472822189, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.34618410700236035, | |
| "grad_norm": 2.3520524501800537, | |
| "learning_rate": 3.321014831208622e-05, | |
| "logits/chosen": -0.1195015162229538, | |
| "logits/rejected": -0.5789833664894104, | |
| "logps/chosen": -202.43490600585938, | |
| "logps/rejected": -170.22422790527344, | |
| "loss": 0.6539, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.17942115664482117, | |
| "rewards/margins": 0.2874607741832733, | |
| "rewards/rejected": -0.10803960263729095, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.35011801730920533, | |
| "grad_norm": 2.6864142417907715, | |
| "learning_rate": 3.300254087237097e-05, | |
| "logits/chosen": -0.5105729103088379, | |
| "logits/rejected": -0.8194944262504578, | |
| "logps/chosen": -185.3482666015625, | |
| "logps/rejected": -173.4835205078125, | |
| "loss": 0.6784, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.04661710932850838, | |
| "rewards/margins": 0.19141128659248352, | |
| "rewards/rejected": -0.14479416608810425, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.35405192761605037, | |
| "grad_norm": 2.69472599029541, | |
| "learning_rate": 3.2792477770113626e-05, | |
| "logits/chosen": -0.5864490866661072, | |
| "logits/rejected": -0.945260226726532, | |
| "logps/chosen": -187.8654327392578, | |
| "logps/rejected": -176.0469207763672, | |
| "loss": 0.6799, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.10426144301891327, | |
| "rewards/margins": 0.20091381669044495, | |
| "rewards/rejected": -0.09665237367153168, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.35798583792289534, | |
| "grad_norm": 2.4134440422058105, | |
| "learning_rate": 3.2579998677874855e-05, | |
| "logits/chosen": -0.5262443423271179, | |
| "logits/rejected": -0.8252617120742798, | |
| "logps/chosen": -218.12850952148438, | |
| "logps/rejected": -184.57481384277344, | |
| "loss": 0.6837, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.2644971013069153, | |
| "rewards/margins": 0.19908662140369415, | |
| "rewards/rejected": 0.06541050970554352, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.3619197482297404, | |
| "grad_norm": 2.596764087677002, | |
| "learning_rate": 3.236514372449969e-05, | |
| "logits/chosen": -0.6542834043502808, | |
| "logits/rejected": -0.9779418706893921, | |
| "logps/chosen": -197.54644775390625, | |
| "logps/rejected": -173.59286499023438, | |
| "loss": 0.646, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.1335887908935547, | |
| "rewards/margins": 0.24607214331626892, | |
| "rewards/rejected": -0.11248335987329483, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.36585365853658536, | |
| "grad_norm": 2.247732400894165, | |
| "learning_rate": 3.21479534875388e-05, | |
| "logits/chosen": -0.3415656089782715, | |
| "logits/rejected": -0.5303326845169067, | |
| "logps/chosen": -185.9266815185547, | |
| "logps/rejected": -186.3561553955078, | |
| "loss": 0.67, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.1211063489317894, | |
| "rewards/margins": 0.2140074223279953, | |
| "rewards/rejected": -0.0929010808467865, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.3697875688434304, | |
| "grad_norm": 2.8643524646759033, | |
| "learning_rate": 3.192846898558498e-05, | |
| "logits/chosen": -0.2663280963897705, | |
| "logits/rejected": -0.5768864750862122, | |
| "logps/chosen": -202.178466796875, | |
| "logps/rejected": -183.9998779296875, | |
| "loss": 0.7073, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.25511378049850464, | |
| "rewards/margins": 0.07217863947153091, | |
| "rewards/rejected": 0.18293514847755432, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.37372147915027537, | |
| "grad_norm": 2.451780080795288, | |
| "learning_rate": 3.1706731670526396e-05, | |
| "logits/chosen": -0.30710285902023315, | |
| "logits/rejected": -0.6132678985595703, | |
| "logps/chosen": -217.419189453125, | |
| "logps/rejected": -196.37863159179688, | |
| "loss": 0.6562, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.33428412675857544, | |
| "rewards/margins": 0.18632589280605316, | |
| "rewards/rejected": 0.1479582041501999, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.3776553894571204, | |
| "grad_norm": 2.6921944618225098, | |
| "learning_rate": 3.1482783419717954e-05, | |
| "logits/chosen": -0.3893197178840637, | |
| "logits/rejected": -0.5811697840690613, | |
| "logps/chosen": -201.89862060546875, | |
| "logps/rejected": -189.1037139892578, | |
| "loss": 0.6386, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.273657888174057, | |
| "rewards/margins": 0.27013736963272095, | |
| "rewards/rejected": 0.003520522266626358, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.3815892997639654, | |
| "grad_norm": 2.6391797065734863, | |
| "learning_rate": 3.125666652807233e-05, | |
| "logits/chosen": -0.25475451350212097, | |
| "logits/rejected": -0.4850390553474426, | |
| "logps/chosen": -209.5026397705078, | |
| "logps/rejected": -193.64634704589844, | |
| "loss": 0.6968, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.5278674960136414, | |
| "rewards/margins": 0.1647617667913437, | |
| "rewards/rejected": 0.3631057143211365, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.38552321007081036, | |
| "grad_norm": 2.6771578788757324, | |
| "learning_rate": 3.1028423700072174e-05, | |
| "logits/chosen": -0.6072514653205872, | |
| "logits/rejected": -1.0645798444747925, | |
| "logps/chosen": -190.10302734375, | |
| "logps/rejected": -158.712890625, | |
| "loss": 0.6248, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.5022831559181213, | |
| "rewards/margins": 0.34136638045310974, | |
| "rewards/rejected": 0.16091671586036682, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.3894571203776554, | |
| "grad_norm": 2.391671657562256, | |
| "learning_rate": 3.0798098041704894e-05, | |
| "logits/chosen": -0.02403702773153782, | |
| "logits/rejected": -0.4747490882873535, | |
| "logps/chosen": -200.26260375976562, | |
| "logps/rejected": -169.43063354492188, | |
| "loss": 0.6397, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.6997808218002319, | |
| "rewards/margins": 0.28766918182373047, | |
| "rewards/rejected": 0.412111759185791, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.3933910306845004, | |
| "grad_norm": 1.8531993627548218, | |
| "learning_rate": 3.0565733052321674e-05, | |
| "logits/chosen": -0.024867746978998184, | |
| "logits/rejected": -0.4925617277622223, | |
| "logps/chosen": -196.09439086914062, | |
| "logps/rejected": -173.60400390625, | |
| "loss": 0.6031, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.7350005507469177, | |
| "rewards/margins": 0.4232231676578522, | |
| "rewards/rejected": 0.31177738308906555, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3933910306845004, | |
| "eval_logits/chosen": 1.4818682670593262, | |
| "eval_logits/rejected": 1.2831951379776, | |
| "eval_logps/chosen": -196.52505493164062, | |
| "eval_logps/rejected": -174.25390625, | |
| "eval_loss": 0.666137158870697, | |
| "eval_rewards/accuracies": 0.6171875, | |
| "eval_rewards/chosen": 0.5722616314888, | |
| "eval_rewards/margins": 0.2658155560493469, | |
| "eval_rewards/rejected": 0.3064460754394531, | |
| "eval_runtime": 280.1463, | |
| "eval_samples_per_second": 2.285, | |
| "eval_steps_per_second": 0.143, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3973249409913454, | |
| "grad_norm": 2.125474214553833, | |
| "learning_rate": 3.0331372616422192e-05, | |
| "logits/chosen": 0.048880137503147125, | |
| "logits/rejected": -0.3218826651573181, | |
| "logps/chosen": -198.61683654785156, | |
| "logps/rejected": -181.21099853515625, | |
| "loss": 0.6293, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.47136014699935913, | |
| "rewards/margins": 0.4129703640937805, | |
| "rewards/rejected": 0.05838974192738533, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.4012588512981904, | |
| "grad_norm": 3.3055050373077393, | |
| "learning_rate": 3.0095060995366532e-05, | |
| "logits/chosen": 0.05968475341796875, | |
| "logits/rejected": -0.5222965478897095, | |
| "logps/chosen": -206.580322265625, | |
| "logps/rejected": -170.30113220214844, | |
| "loss": 0.6095, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.33514371514320374, | |
| "rewards/margins": 0.4338752329349518, | |
| "rewards/rejected": -0.09873148053884506, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.4051927616050354, | |
| "grad_norm": 1.8534085750579834, | |
| "learning_rate": 2.9856842819016035e-05, | |
| "logits/chosen": -0.2114788293838501, | |
| "logits/rejected": -0.30956801772117615, | |
| "logps/chosen": -200.4823760986328, | |
| "logps/rejected": -184.68728637695312, | |
| "loss": 0.6356, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.10386836528778076, | |
| "rewards/margins": 0.3247467577457428, | |
| "rewards/rejected": -0.22087836265563965, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.4091266719118804, | |
| "grad_norm": 3.4791042804718018, | |
| "learning_rate": 2.961676307730446e-05, | |
| "logits/chosen": 0.03065328672528267, | |
| "logits/rejected": -0.21640470623970032, | |
| "logps/chosen": -191.57827758789062, | |
| "logps/rejected": -176.0836181640625, | |
| "loss": 0.69, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.05484437197446823, | |
| "rewards/margins": 0.3477749228477478, | |
| "rewards/rejected": -0.292930543422699, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.41306058221872544, | |
| "grad_norm": 2.4210503101348877, | |
| "learning_rate": 2.9374867111741178e-05, | |
| "logits/chosen": 0.07030024379491806, | |
| "logits/rejected": -0.24621661007404327, | |
| "logps/chosen": -216.01968383789062, | |
| "logps/rejected": -192.189453125, | |
| "loss": 0.7027, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.33896225690841675, | |
| "rewards/margins": 0.13148066401481628, | |
| "rewards/rejected": -0.47044292092323303, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.4169944925255704, | |
| "grad_norm": 2.2114298343658447, | |
| "learning_rate": 2.9131200606847963e-05, | |
| "logits/chosen": -0.1181550845503807, | |
| "logits/rejected": -0.4900067448616028, | |
| "logps/chosen": -203.14413452148438, | |
| "logps/rejected": -174.24661254882812, | |
| "loss": 0.6213, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.09447288513183594, | |
| "rewards/margins": 0.383715957403183, | |
| "rewards/rejected": -0.28924307227134705, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.4209284028324154, | |
| "grad_norm": 1.7830487489700317, | |
| "learning_rate": 2.888580958153103e-05, | |
| "logits/chosen": 0.10650482028722763, | |
| "logits/rejected": -0.2542504072189331, | |
| "logps/chosen": -204.3175048828125, | |
| "logps/rejected": -186.55433654785156, | |
| "loss": 0.6465, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.14108797907829285, | |
| "rewards/margins": 0.3486561179161072, | |
| "rewards/rejected": -0.20756816864013672, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.42486231313926043, | |
| "grad_norm": 1.8033552169799805, | |
| "learning_rate": 2.8638740380389866e-05, | |
| "logits/chosen": -0.02104266732931137, | |
| "logits/rejected": -0.36019057035446167, | |
| "logps/chosen": -196.21939086914062, | |
| "logps/rejected": -174.48251342773438, | |
| "loss": 0.615, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.40048947930336, | |
| "rewards/margins": 0.566994309425354, | |
| "rewards/rejected": -0.1665048748254776, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.4287962234461054, | |
| "grad_norm": 2.6771342754364014, | |
| "learning_rate": 2.839003966496458e-05, | |
| "logits/chosen": 0.14330948889255524, | |
| "logits/rejected": -0.22386810183525085, | |
| "logps/chosen": -201.0225372314453, | |
| "logps/rejected": -180.50277709960938, | |
| "loss": 0.6037, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.3381316661834717, | |
| "rewards/margins": 0.43739938735961914, | |
| "rewards/rejected": -0.09926770627498627, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.43273013375295044, | |
| "grad_norm": 2.9870705604553223, | |
| "learning_rate": 2.8139754404923425e-05, | |
| "logits/chosen": 0.015574288554489613, | |
| "logits/rejected": -0.344787061214447, | |
| "logps/chosen": -212.2222137451172, | |
| "logps/rejected": -192.35641479492188, | |
| "loss": 0.7468, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.21686002612113953, | |
| "rewards/margins": 0.08241648972034454, | |
| "rewards/rejected": 0.13444355130195618, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.4366640440597954, | |
| "grad_norm": 2.0503382682800293, | |
| "learning_rate": 2.7887931869192053e-05, | |
| "logits/chosen": -0.4219673275947571, | |
| "logits/rejected": -0.701318085193634, | |
| "logps/chosen": -208.24050903320312, | |
| "logps/rejected": -186.3321075439453, | |
| "loss": 0.618, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.27399730682373047, | |
| "rewards/margins": 0.38380298018455505, | |
| "rewards/rejected": -0.10980571806430817, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.44059795436664045, | |
| "grad_norm": 3.30218505859375, | |
| "learning_rate": 2.7634619617026338e-05, | |
| "logits/chosen": -0.11477865278720856, | |
| "logits/rejected": -0.41164344549179077, | |
| "logps/chosen": -206.08749389648438, | |
| "logps/rejected": -195.32931518554688, | |
| "loss": 0.6696, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.3716500401496887, | |
| "rewards/margins": 0.2603529393672943, | |
| "rewards/rejected": 0.1112971305847168, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.44453186467348543, | |
| "grad_norm": 2.4470412731170654, | |
| "learning_rate": 2.7379865489030294e-05, | |
| "logits/chosen": -0.052120137959718704, | |
| "logits/rejected": -0.3156708776950836, | |
| "logps/chosen": -207.0930938720703, | |
| "logps/rejected": -195.6240997314453, | |
| "loss": 0.7201, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.3207889795303345, | |
| "rewards/margins": 0.10586751997470856, | |
| "rewards/rejected": 0.21492144465446472, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.44846577498033047, | |
| "grad_norm": 2.422102689743042, | |
| "learning_rate": 2.7123717598120897e-05, | |
| "logits/chosen": -0.14508476853370667, | |
| "logits/rejected": -0.5477874875068665, | |
| "logps/chosen": -203.1314697265625, | |
| "logps/rejected": -183.87571716308594, | |
| "loss": 0.6262, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.36026519536972046, | |
| "rewards/margins": 0.36862918734550476, | |
| "rewards/rejected": -0.00836402177810669, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.45239968528717545, | |
| "grad_norm": 2.728649377822876, | |
| "learning_rate": 2.6866224320441494e-05, | |
| "logits/chosen": 0.006149314343929291, | |
| "logits/rejected": -0.32141727209091187, | |
| "logps/chosen": -218.5233917236328, | |
| "logps/rejected": -202.95529174804688, | |
| "loss": 0.6625, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.23595723509788513, | |
| "rewards/margins": 0.24294991791248322, | |
| "rewards/rejected": -0.006992635317146778, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.4563335955940205, | |
| "grad_norm": 2.0676088333129883, | |
| "learning_rate": 2.660743428622543e-05, | |
| "logits/chosen": -0.16871149837970734, | |
| "logits/rejected": -0.5247394442558289, | |
| "logps/chosen": -202.14022827148438, | |
| "logps/rejected": -180.7646484375, | |
| "loss": 0.6624, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.1394488662481308, | |
| "rewards/margins": 0.2882606089115143, | |
| "rewards/rejected": -0.14881177246570587, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.46026750590086546, | |
| "grad_norm": 2.3843834400177, | |
| "learning_rate": 2.6347396370611775e-05, | |
| "logits/chosen": -0.17608961462974548, | |
| "logits/rejected": -0.6268019676208496, | |
| "logps/chosen": -211.22805786132812, | |
| "logps/rejected": -186.0729217529297, | |
| "loss": 0.6532, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.2502868175506592, | |
| "rewards/margins": 0.2955116629600525, | |
| "rewards/rejected": -0.045224837958812714, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.46420141620771044, | |
| "grad_norm": 3.202221155166626, | |
| "learning_rate": 2.6086159684414726e-05, | |
| "logits/chosen": -0.15955504775047302, | |
| "logits/rejected": -0.34359580278396606, | |
| "logps/chosen": -192.7601776123047, | |
| "logps/rejected": -189.27383422851562, | |
| "loss": 0.6688, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.008236098103225231, | |
| "rewards/margins": 0.19347552955150604, | |
| "rewards/rejected": -0.18523943424224854, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.46813532651455547, | |
| "grad_norm": 2.1680684089660645, | |
| "learning_rate": 2.5823773564848537e-05, | |
| "logits/chosen": -0.2290937900543213, | |
| "logits/rejected": -0.7466328740119934, | |
| "logps/chosen": -201.345703125, | |
| "logps/rejected": -170.40640258789062, | |
| "loss": 0.6081, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.21915683150291443, | |
| "rewards/margins": 0.515847384929657, | |
| "rewards/rejected": -0.29669052362442017, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.47206923682140045, | |
| "grad_norm": 2.583576202392578, | |
| "learning_rate": 2.556028756620969e-05, | |
| "logits/chosen": -0.23650631308555603, | |
| "logits/rejected": -0.495327889919281, | |
| "logps/chosen": -196.6111297607422, | |
| "logps/rejected": -177.023681640625, | |
| "loss": 0.6193, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.21434268355369568, | |
| "rewards/margins": 0.3948139548301697, | |
| "rewards/rejected": -0.180471271276474, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.47206923682140045, | |
| "eval_logits/chosen": 1.6984869241714478, | |
| "eval_logits/rejected": 1.5151393413543701, | |
| "eval_logps/chosen": -204.12509155273438, | |
| "eval_logps/rejected": -182.4573974609375, | |
| "eval_loss": 0.6511049866676331, | |
| "eval_rewards/accuracies": 0.6484375, | |
| "eval_rewards/chosen": 0.19226065278053284, | |
| "eval_rewards/margins": 0.29598867893218994, | |
| "eval_rewards/rejected": -0.10372801125049591, | |
| "eval_runtime": 296.1142, | |
| "eval_samples_per_second": 2.161, | |
| "eval_steps_per_second": 0.135, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4760031471282455, | |
| "grad_norm": 1.8097811937332153, | |
| "learning_rate": 2.5295751450518053e-05, | |
| "logits/chosen": -0.08073046803474426, | |
| "logits/rejected": -0.5139580965042114, | |
| "logps/chosen": -194.52102661132812, | |
| "logps/rejected": -165.5224609375, | |
| "loss": 0.6238, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.10419633239507675, | |
| "rewards/margins": 0.34015730023384094, | |
| "rewards/rejected": -0.2359609305858612, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.47993705743509046, | |
| "grad_norm": 2.6317665576934814, | |
| "learning_rate": 2.5030215178118825e-05, | |
| "logits/chosen": -0.034722644835710526, | |
| "logits/rejected": -0.3910979926586151, | |
| "logps/chosen": -223.3106689453125, | |
| "logps/rejected": -194.74200439453125, | |
| "loss": 0.6597, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.23497729003429413, | |
| "rewards/margins": 0.2651119828224182, | |
| "rewards/rejected": -0.030134687200188637, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.4838709677419355, | |
| "grad_norm": 2.597273588180542, | |
| "learning_rate": 2.4763728898246987e-05, | |
| "logits/chosen": -0.09993930906057358, | |
| "logits/rejected": -0.34456345438957214, | |
| "logps/chosen": -198.48321533203125, | |
| "logps/rejected": -197.89112854003906, | |
| "loss": 0.6804, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.1525276005268097, | |
| "rewards/margins": 0.21232759952545166, | |
| "rewards/rejected": -0.05979999899864197, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.4878048780487805, | |
| "grad_norm": 2.3702244758605957, | |
| "learning_rate": 2.4496342939556134e-05, | |
| "logits/chosen": -0.3617291748523712, | |
| "logits/rejected": -0.6139717102050781, | |
| "logps/chosen": -192.67788696289062, | |
| "logps/rejected": -174.16844177246094, | |
| "loss": 0.6493, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.18741658329963684, | |
| "rewards/margins": 0.32338622212409973, | |
| "rewards/rejected": -0.1359696388244629, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.4917387883556255, | |
| "grad_norm": 1.963498592376709, | |
| "learning_rate": 2.4228107800613402e-05, | |
| "logits/chosen": -0.095928855240345, | |
| "logits/rejected": -0.5435774326324463, | |
| "logps/chosen": -197.4298553466797, | |
| "logps/rejected": -202.0504913330078, | |
| "loss": 0.6076, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.08684808760881424, | |
| "rewards/margins": 0.40268635749816895, | |
| "rewards/rejected": -0.3158382773399353, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.4956726986624705, | |
| "grad_norm": 3.043241262435913, | |
| "learning_rate": 2.3959074140362275e-05, | |
| "logits/chosen": 0.050285615026950836, | |
| "logits/rejected": -0.4371914267539978, | |
| "logps/chosen": -209.23727416992188, | |
| "logps/rejected": -185.2758331298828, | |
| "loss": 0.6736, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.18328359723091125, | |
| "rewards/margins": 0.32128214836120605, | |
| "rewards/rejected": -0.1379985362291336, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.4996066089693155, | |
| "grad_norm": 2.762970209121704, | |
| "learning_rate": 2.368929276855512e-05, | |
| "logits/chosen": -0.26551324129104614, | |
| "logits/rejected": -0.512377142906189, | |
| "logps/chosen": -185.96188354492188, | |
| "logps/rejected": -175.6711883544922, | |
| "loss": 0.7274, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.11835911124944687, | |
| "rewards/margins": 0.10997643321752548, | |
| "rewards/rejected": -0.22833557426929474, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.5035405192761605, | |
| "grad_norm": 2.516867160797119, | |
| "learning_rate": 2.3418814636157287e-05, | |
| "logits/chosen": 0.10214798152446747, | |
| "logits/rejected": -0.3175424635410309, | |
| "logps/chosen": -207.58535766601562, | |
| "logps/rejected": -178.94619750976562, | |
| "loss": 0.6342, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.1707356721162796, | |
| "rewards/margins": 0.4251019358634949, | |
| "rewards/rejected": -0.2543662488460541, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.5074744295830055, | |
| "grad_norm": 1.9342600107192993, | |
| "learning_rate": 2.314769082572446e-05, | |
| "logits/chosen": 0.10380957275629044, | |
| "logits/rejected": -0.1748799979686737, | |
| "logps/chosen": -203.0720672607422, | |
| "logps/rejected": -190.49082946777344, | |
| "loss": 0.623, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.04285300523042679, | |
| "rewards/margins": 0.3142266571521759, | |
| "rewards/rejected": -0.2713736593723297, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.5114083398898505, | |
| "grad_norm": 2.091183662414551, | |
| "learning_rate": 2.287597254175521e-05, | |
| "logits/chosen": 0.041324738413095474, | |
| "logits/rejected": -0.44386720657348633, | |
| "logps/chosen": -217.5801544189453, | |
| "logps/rejected": -206.60678100585938, | |
| "loss": 0.6049, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.1052078977227211, | |
| "rewards/margins": 0.4272983968257904, | |
| "rewards/rejected": -0.3220905065536499, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.5153422501966956, | |
| "grad_norm": 2.1388349533081055, | |
| "learning_rate": 2.2603711101020505e-05, | |
| "logits/chosen": -0.29771262407302856, | |
| "logits/rejected": -0.4897081255912781, | |
| "logps/chosen": -194.26113891601562, | |
| "logps/rejected": -191.60336303710938, | |
| "loss": 0.6039, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.061424724757671356, | |
| "rewards/margins": 0.5192595720291138, | |
| "rewards/rejected": -0.4578348994255066, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.5192761605035405, | |
| "grad_norm": 2.286508083343506, | |
| "learning_rate": 2.2330957922872018e-05, | |
| "logits/chosen": -0.013643920421600342, | |
| "logits/rejected": -0.4556857645511627, | |
| "logps/chosen": -216.83694458007812, | |
| "logps/rejected": -181.0327911376953, | |
| "loss": 0.6168, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.22640368342399597, | |
| "rewards/margins": 0.40599650144577026, | |
| "rewards/rejected": -0.17959284782409668, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.5232100708103855, | |
| "grad_norm": 2.5265846252441406, | |
| "learning_rate": 2.205776451953104e-05, | |
| "logits/chosen": -0.13337358832359314, | |
| "logits/rejected": -0.3839452862739563, | |
| "logps/chosen": -200.55133056640625, | |
| "logps/rejected": -189.0050048828125, | |
| "loss": 0.6718, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.024026717990636826, | |
| "rewards/margins": 0.2730267345905304, | |
| "rewards/rejected": -0.29705342650413513, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.5271439811172305, | |
| "grad_norm": 2.4897048473358154, | |
| "learning_rate": 2.1784182486359882e-05, | |
| "logits/chosen": -0.04155014082789421, | |
| "logits/rejected": -0.4188694357872009, | |
| "logps/chosen": -206.1847686767578, | |
| "logps/rejected": -186.2632293701172, | |
| "loss": 0.6215, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.10479116439819336, | |
| "rewards/margins": 0.46938610076904297, | |
| "rewards/rejected": -0.36459487676620483, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.5310778914240756, | |
| "grad_norm": 1.8308933973312378, | |
| "learning_rate": 2.151026349211758e-05, | |
| "logits/chosen": 0.00799607764929533, | |
| "logits/rejected": -0.3815770745277405, | |
| "logps/chosen": -203.25927734375, | |
| "logps/rejected": -194.2572021484375, | |
| "loss": 0.5795, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.33486539125442505, | |
| "rewards/margins": 0.5302278399467468, | |
| "rewards/rejected": -0.19536247849464417, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.5350118017309206, | |
| "grad_norm": 2.761611223220825, | |
| "learning_rate": 2.1236059269201686e-05, | |
| "logits/chosen": 0.037347499281167984, | |
| "logits/rejected": -0.23829114437103271, | |
| "logps/chosen": -198.4077911376953, | |
| "logps/rejected": -188.66552734375, | |
| "loss": 0.666, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.23368187248706818, | |
| "rewards/margins": 0.28406110405921936, | |
| "rewards/rejected": -0.05037923902273178, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.5389457120377655, | |
| "grad_norm": 2.5448572635650635, | |
| "learning_rate": 2.0961621603878113e-05, | |
| "logits/chosen": 0.07817180454730988, | |
| "logits/rejected": -0.394483745098114, | |
| "logps/chosen": -212.9225311279297, | |
| "logps/rejected": -186.3399200439453, | |
| "loss": 0.6374, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.20021621882915497, | |
| "rewards/margins": 0.3734167516231537, | |
| "rewards/rejected": -0.1732005476951599, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.5428796223446105, | |
| "grad_norm": 2.08206844329834, | |
| "learning_rate": 2.0687002326500748e-05, | |
| "logits/chosen": -0.10365025699138641, | |
| "logits/rejected": -0.35274845361709595, | |
| "logps/chosen": -188.2677459716797, | |
| "logps/rejected": -176.98977661132812, | |
| "loss": 0.6271, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.22998547554016113, | |
| "rewards/margins": 0.37053442001342773, | |
| "rewards/rejected": -0.1405489593744278, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.5468135326514555, | |
| "grad_norm": 2.88472056388855, | |
| "learning_rate": 2.0412253301722774e-05, | |
| "logits/chosen": -0.04653149098157883, | |
| "logits/rejected": -0.5510588884353638, | |
| "logps/chosen": -202.66006469726562, | |
| "logps/rejected": -174.1446075439453, | |
| "loss": 0.6219, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.22523736953735352, | |
| "rewards/margins": 0.4242261052131653, | |
| "rewards/rejected": -0.19898872077465057, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.5507474429583006, | |
| "grad_norm": 2.5013091564178467, | |
| "learning_rate": 2.013742641870149e-05, | |
| "logits/chosen": -0.2037276029586792, | |
| "logits/rejected": -0.3935338854789734, | |
| "logps/chosen": -197.0646209716797, | |
| "logps/rejected": -188.81358337402344, | |
| "loss": 0.6663, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.1926575005054474, | |
| "rewards/margins": 0.29346001148223877, | |
| "rewards/rejected": -0.10080249607563019, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5507474429583006, | |
| "eval_logits/chosen": 1.448291540145874, | |
| "eval_logits/rejected": 1.247455358505249, | |
| "eval_logps/chosen": -205.36190795898438, | |
| "eval_logps/rejected": -184.75527954101562, | |
| "eval_loss": 0.6528521776199341, | |
| "eval_rewards/accuracies": 0.635937511920929, | |
| "eval_rewards/chosen": 0.13041910529136658, | |
| "eval_rewards/margins": 0.3490408658981323, | |
| "eval_rewards/rejected": -0.21862177550792694, | |
| "eval_runtime": 310.4545, | |
| "eval_samples_per_second": 2.061, | |
| "eval_steps_per_second": 0.129, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5546813532651456, | |
| "grad_norm": 2.5953361988067627, | |
| "learning_rate": 1.986257358129852e-05, | |
| "logits/chosen": 0.22972527146339417, | |
| "logits/rejected": -0.06759750843048096, | |
| "logps/chosen": -196.53292846679688, | |
| "logps/rejected": -178.35305786132812, | |
| "loss": 0.6857, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.06380387395620346, | |
| "rewards/margins": 0.2038646936416626, | |
| "rewards/rejected": -0.26766857504844666, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.5586152635719905, | |
| "grad_norm": 2.167508125305176, | |
| "learning_rate": 1.9587746698277236e-05, | |
| "logits/chosen": 0.07684750109910965, | |
| "logits/rejected": -0.32069578766822815, | |
| "logps/chosen": -211.81216430664062, | |
| "logps/rejected": -185.2478485107422, | |
| "loss": 0.622, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.17962387204170227, | |
| "rewards/margins": 0.4280010163784027, | |
| "rewards/rejected": -0.24837720394134521, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.5625491738788355, | |
| "grad_norm": 2.32083797454834, | |
| "learning_rate": 1.9312997673499262e-05, | |
| "logits/chosen": 0.12921884655952454, | |
| "logits/rejected": -0.3880526125431061, | |
| "logps/chosen": -209.3653106689453, | |
| "logps/rejected": -180.76580810546875, | |
| "loss": 0.6045, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": 0.11903480440378189, | |
| "rewards/margins": 0.4623745083808899, | |
| "rewards/rejected": -0.34333962202072144, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.5664830841856806, | |
| "grad_norm": 2.7552459239959717, | |
| "learning_rate": 1.9038378396121897e-05, | |
| "logits/chosen": 0.13564559817314148, | |
| "logits/rejected": -0.10839700698852539, | |
| "logps/chosen": -216.60537719726562, | |
| "logps/rejected": -193.63003540039062, | |
| "loss": 0.6654, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.14677533507347107, | |
| "rewards/margins": 0.2059493511915207, | |
| "rewards/rejected": -0.05917401239275932, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.5704169944925256, | |
| "grad_norm": 2.3545451164245605, | |
| "learning_rate": 1.8763940730798324e-05, | |
| "logits/chosen": 0.04216325283050537, | |
| "logits/rejected": -0.34147733449935913, | |
| "logps/chosen": -196.35096740722656, | |
| "logps/rejected": -171.8896942138672, | |
| "loss": 0.615, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.22952958941459656, | |
| "rewards/margins": 0.47519558668136597, | |
| "rewards/rejected": -0.24566598236560822, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.5743509047993706, | |
| "grad_norm": 5.674262523651123, | |
| "learning_rate": 1.8489736507882428e-05, | |
| "logits/chosen": 0.26032572984695435, | |
| "logits/rejected": -0.004704379942268133, | |
| "logps/chosen": -205.56625366210938, | |
| "logps/rejected": -192.25689697265625, | |
| "loss": 0.6522, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.23235079646110535, | |
| "rewards/margins": 0.3532930314540863, | |
| "rewards/rejected": -0.12094223499298096, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.5782848151062155, | |
| "grad_norm": 2.7927699089050293, | |
| "learning_rate": 1.8215817513640124e-05, | |
| "logits/chosen": -0.13913585245609283, | |
| "logits/rejected": -0.46495765447616577, | |
| "logps/chosen": -202.8736572265625, | |
| "logps/rejected": -175.65243530273438, | |
| "loss": 0.621, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.16645391285419464, | |
| "rewards/margins": 0.389155775308609, | |
| "rewards/rejected": -0.22270183265209198, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.5822187254130606, | |
| "grad_norm": 2.1000728607177734, | |
| "learning_rate": 1.7942235480468965e-05, | |
| "logits/chosen": 0.10451068729162216, | |
| "logits/rejected": -0.19033582508563995, | |
| "logps/chosen": -222.5543212890625, | |
| "logps/rejected": -205.25717163085938, | |
| "loss": 0.6119, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.2822187840938568, | |
| "rewards/margins": 0.4702509939670563, | |
| "rewards/rejected": -0.18803219497203827, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.5861526357199056, | |
| "grad_norm": 2.417529821395874, | |
| "learning_rate": 1.7669042077127986e-05, | |
| "logits/chosen": 0.1573796570301056, | |
| "logits/rejected": -0.09679815173149109, | |
| "logps/chosen": -206.7544403076172, | |
| "logps/rejected": -186.4736328125, | |
| "loss": 0.6651, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.14593636989593506, | |
| "rewards/margins": 0.30300992727279663, | |
| "rewards/rejected": -0.15707358717918396, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.5900865460267506, | |
| "grad_norm": 2.300747871398926, | |
| "learning_rate": 1.7396288898979498e-05, | |
| "logits/chosen": 0.2694866359233856, | |
| "logits/rejected": -0.14985588192939758, | |
| "logps/chosen": -213.40347290039062, | |
| "logps/rejected": -190.8516845703125, | |
| "loss": 0.6213, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.2800352871417999, | |
| "rewards/margins": 0.4598168432712555, | |
| "rewards/rejected": -0.17978155612945557, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5940204563335956, | |
| "grad_norm": 2.4545648097991943, | |
| "learning_rate": 1.7124027458244795e-05, | |
| "logits/chosen": 0.13348327577114105, | |
| "logits/rejected": -0.2745976746082306, | |
| "logps/chosen": -205.34384155273438, | |
| "logps/rejected": -189.4317626953125, | |
| "loss": 0.617, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.29095253348350525, | |
| "rewards/margins": 0.49482065439224243, | |
| "rewards/rejected": -0.203868106007576, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.5979543666404405, | |
| "grad_norm": 2.2178571224212646, | |
| "learning_rate": 1.6852309174275544e-05, | |
| "logits/chosen": 0.03373800590634346, | |
| "logits/rejected": -0.38450998067855835, | |
| "logps/chosen": -201.083251953125, | |
| "logps/rejected": -173.52415466308594, | |
| "loss": 0.6477, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.12074605375528336, | |
| "rewards/margins": 0.31737667322158813, | |
| "rewards/rejected": -0.19663064181804657, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.6018882769472856, | |
| "grad_norm": 2.853874444961548, | |
| "learning_rate": 1.658118536384272e-05, | |
| "logits/chosen": -0.027989035472273827, | |
| "logits/rejected": -0.5765674114227295, | |
| "logps/chosen": -207.234619140625, | |
| "logps/rejected": -168.69090270996094, | |
| "loss": 0.6058, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.3612646162509918, | |
| "rewards/margins": 0.5696166157722473, | |
| "rewards/rejected": -0.20835192501544952, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.6058221872541306, | |
| "grad_norm": 1.6519763469696045, | |
| "learning_rate": 1.6310707231444884e-05, | |
| "logits/chosen": 0.18471361696720123, | |
| "logits/rejected": -0.12388062477111816, | |
| "logps/chosen": -208.86746215820312, | |
| "logps/rejected": -190.293212890625, | |
| "loss": 0.6259, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.3237128257751465, | |
| "rewards/margins": 0.36399954557418823, | |
| "rewards/rejected": -0.04028671234846115, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.6097560975609756, | |
| "grad_norm": 2.2588582038879395, | |
| "learning_rate": 1.6040925859637732e-05, | |
| "logits/chosen": 0.3290981650352478, | |
| "logits/rejected": -0.12312372773885727, | |
| "logps/chosen": -212.65933227539062, | |
| "logps/rejected": -181.22055053710938, | |
| "loss": 0.6199, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.11960609257221222, | |
| "rewards/margins": 0.43050146102905273, | |
| "rewards/rejected": -0.3108953833580017, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.6136900078678206, | |
| "grad_norm": 1.8849164247512817, | |
| "learning_rate": 1.57718921993866e-05, | |
| "logits/chosen": 0.23942828178405762, | |
| "logits/rejected": -0.044407326728105545, | |
| "logps/chosen": -194.55221557617188, | |
| "logps/rejected": -193.2657928466797, | |
| "loss": 0.6054, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.09872087091207504, | |
| "rewards/margins": 0.48265451192855835, | |
| "rewards/rejected": -0.3839336335659027, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.6176239181746657, | |
| "grad_norm": 1.8967937231063843, | |
| "learning_rate": 1.550365706044387e-05, | |
| "logits/chosen": -0.1253087818622589, | |
| "logits/rejected": -0.49744290113449097, | |
| "logps/chosen": -188.6408233642578, | |
| "logps/rejected": -168.89077758789062, | |
| "loss": 0.647, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.15920981764793396, | |
| "rewards/margins": 0.4097214341163635, | |
| "rewards/rejected": -0.25051164627075195, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.6215578284815106, | |
| "grad_norm": 2.6422152519226074, | |
| "learning_rate": 1.523627110175302e-05, | |
| "logits/chosen": 0.15482959151268005, | |
| "logits/rejected": -0.10624520480632782, | |
| "logps/chosen": -199.2796173095703, | |
| "logps/rejected": -187.12889099121094, | |
| "loss": 0.6552, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.12308132648468018, | |
| "rewards/margins": 0.3106541633605957, | |
| "rewards/rejected": -0.18757285177707672, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.6254917387883556, | |
| "grad_norm": 2.2159316539764404, | |
| "learning_rate": 1.496978482188118e-05, | |
| "logits/chosen": -0.10331370681524277, | |
| "logits/rejected": -0.3492235243320465, | |
| "logps/chosen": -197.7713623046875, | |
| "logps/rejected": -183.83486938476562, | |
| "loss": 0.6775, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.15521839261054993, | |
| "rewards/margins": 0.3734431564807892, | |
| "rewards/rejected": -0.21822479367256165, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.6294256490952006, | |
| "grad_norm": 2.4503073692321777, | |
| "learning_rate": 1.4704248549481947e-05, | |
| "logits/chosen": 0.18897327780723572, | |
| "logits/rejected": -0.08603793382644653, | |
| "logps/chosen": -194.5688018798828, | |
| "logps/rejected": -177.0404052734375, | |
| "loss": 0.6341, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.1881575882434845, | |
| "rewards/margins": 0.3320578336715698, | |
| "rewards/rejected": -0.14390026032924652, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6294256490952006, | |
| "eval_logits/chosen": 1.4848413467407227, | |
| "eval_logits/rejected": 1.2969497442245483, | |
| "eval_logps/chosen": -205.3857879638672, | |
| "eval_logps/rejected": -184.94149780273438, | |
| "eval_loss": 0.6510594487190247, | |
| "eval_rewards/accuracies": 0.635937511920929, | |
| "eval_rewards/chosen": 0.12922506034374237, | |
| "eval_rewards/margins": 0.35715794563293457, | |
| "eval_rewards/rejected": -0.2279329001903534, | |
| "eval_runtime": 307.786, | |
| "eval_samples_per_second": 2.079, | |
| "eval_steps_per_second": 0.13, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6333595594020456, | |
| "grad_norm": 3.450169801712036, | |
| "learning_rate": 1.4439712433790312e-05, | |
| "logits/chosen": 0.2176450937986374, | |
| "logits/rejected": -0.1273191273212433, | |
| "logps/chosen": -222.93380737304688, | |
| "logps/rejected": -194.19715881347656, | |
| "loss": 0.6942, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.06242651492357254, | |
| "rewards/margins": 0.21511706709861755, | |
| "rewards/rejected": -0.1526905596256256, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.6372934697088907, | |
| "grad_norm": 2.21840500831604, | |
| "learning_rate": 1.4176226435151463e-05, | |
| "logits/chosen": 0.19291488826274872, | |
| "logits/rejected": -0.10205423831939697, | |
| "logps/chosen": -198.19786071777344, | |
| "logps/rejected": -196.7605438232422, | |
| "loss": 0.6455, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.10709993541240692, | |
| "rewards/margins": 0.2662377953529358, | |
| "rewards/rejected": -0.1591378152370453, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.6412273800157356, | |
| "grad_norm": 3.1514971256256104, | |
| "learning_rate": 1.3913840315585279e-05, | |
| "logits/chosen": 0.26872119307518005, | |
| "logits/rejected": -0.0016453296411782503, | |
| "logps/chosen": -229.64315795898438, | |
| "logps/rejected": -203.69235229492188, | |
| "loss": 0.6781, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.07703550159931183, | |
| "rewards/margins": 0.23492412269115448, | |
| "rewards/rejected": -0.15788865089416504, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.6451612903225806, | |
| "grad_norm": 2.354876756668091, | |
| "learning_rate": 1.3652603629388226e-05, | |
| "logits/chosen": 0.05659332871437073, | |
| "logits/rejected": -0.3407860994338989, | |
| "logps/chosen": -197.16531372070312, | |
| "logps/rejected": -185.6110076904297, | |
| "loss": 0.6145, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.014247948303818703, | |
| "rewards/margins": 0.44806042313575745, | |
| "rewards/rejected": -0.4338124692440033, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.6490952006294256, | |
| "grad_norm": 2.8119733333587646, | |
| "learning_rate": 1.3392565713774577e-05, | |
| "logits/chosen": 0.3637182116508484, | |
| "logits/rejected": -0.10527028143405914, | |
| "logps/chosen": -191.6344757080078, | |
| "logps/rejected": -171.97213745117188, | |
| "loss": 0.6333, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.01124632079154253, | |
| "rewards/margins": 0.42724609375, | |
| "rewards/rejected": -0.41599979996681213, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.6530291109362707, | |
| "grad_norm": 2.2308883666992188, | |
| "learning_rate": 1.3133775679558514e-05, | |
| "logits/chosen": 0.03254476562142372, | |
| "logits/rejected": -0.3083351254463196, | |
| "logps/chosen": -201.2935791015625, | |
| "logps/rejected": -182.278564453125, | |
| "loss": 0.6212, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.04961629584431648, | |
| "rewards/margins": 0.3992883563041687, | |
| "rewards/rejected": -0.4489046633243561, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.6569630212431157, | |
| "grad_norm": 2.1679656505584717, | |
| "learning_rate": 1.2876282401879108e-05, | |
| "logits/chosen": 0.43080347776412964, | |
| "logits/rejected": -0.06827671080827713, | |
| "logps/chosen": -217.26284790039062, | |
| "logps/rejected": -191.2751007080078, | |
| "loss": 0.6378, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.02513251081109047, | |
| "rewards/margins": 0.37300729751586914, | |
| "rewards/rejected": -0.3981397747993469, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.6608969315499607, | |
| "grad_norm": 2.3326401710510254, | |
| "learning_rate": 1.262013451096972e-05, | |
| "logits/chosen": 0.056190334260463715, | |
| "logits/rejected": -0.25090888142585754, | |
| "logps/chosen": -195.7120361328125, | |
| "logps/rejected": -189.41294860839844, | |
| "loss": 0.6046, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.009902209043502808, | |
| "rewards/margins": 0.43464556336402893, | |
| "rewards/rejected": -0.4445478022098541, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.6648308418568056, | |
| "grad_norm": 2.85198712348938, | |
| "learning_rate": 1.236538038297367e-05, | |
| "logits/chosen": 0.04097587615251541, | |
| "logits/rejected": -0.2062218189239502, | |
| "logps/chosen": -208.66928100585938, | |
| "logps/rejected": -197.4303741455078, | |
| "loss": 0.6461, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.05257856100797653, | |
| "rewards/margins": 0.336517870426178, | |
| "rewards/rejected": -0.28393927216529846, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.6687647521636507, | |
| "grad_norm": 2.54502272605896, | |
| "learning_rate": 1.211206813080795e-05, | |
| "logits/chosen": 0.2434895783662796, | |
| "logits/rejected": -0.11432347446680069, | |
| "logps/chosen": -216.93624877929688, | |
| "logps/rejected": -193.50289916992188, | |
| "loss": 0.6424, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.16703644394874573, | |
| "rewards/margins": 0.3197667896747589, | |
| "rewards/rejected": -0.48680323362350464, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6726986624704957, | |
| "grad_norm": 2.6188857555389404, | |
| "learning_rate": 1.1860245595076584e-05, | |
| "logits/chosen": 0.11275175958871841, | |
| "logits/rejected": -0.07601265609264374, | |
| "logps/chosen": -197.09104919433594, | |
| "logps/rejected": -198.6499786376953, | |
| "loss": 0.667, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.18728098273277283, | |
| "rewards/margins": 0.33224430680274963, | |
| "rewards/rejected": -0.5195252895355225, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.6766325727773407, | |
| "grad_norm": 2.1628315448760986, | |
| "learning_rate": 1.1609960335035423e-05, | |
| "logits/chosen": 0.048135705292224884, | |
| "logits/rejected": -0.2367614507675171, | |
| "logps/chosen": -205.294921875, | |
| "logps/rejected": -193.1056365966797, | |
| "loss": 0.6234, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.26431113481521606, | |
| "rewards/margins": 0.36919263005256653, | |
| "rewards/rejected": -0.6335037350654602, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.6805664830841857, | |
| "grad_norm": 2.3180811405181885, | |
| "learning_rate": 1.1361259619610139e-05, | |
| "logits/chosen": -0.1178649291396141, | |
| "logits/rejected": -0.44798412919044495, | |
| "logps/chosen": -199.19705200195312, | |
| "logps/rejected": -184.29293823242188, | |
| "loss": 0.6865, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.20577684044837952, | |
| "rewards/margins": 0.31853482127189636, | |
| "rewards/rejected": -0.5243116617202759, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.6845003933910306, | |
| "grad_norm": 2.848353147506714, | |
| "learning_rate": 1.1114190418468973e-05, | |
| "logits/chosen": 0.23848970234394073, | |
| "logits/rejected": -0.08090641349554062, | |
| "logps/chosen": -223.01504516601562, | |
| "logps/rejected": -194.83154296875, | |
| "loss": 0.6556, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.0016554594039916992, | |
| "rewards/margins": 0.26366448402404785, | |
| "rewards/rejected": -0.26200905442237854, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.6884343036978757, | |
| "grad_norm": 2.7061009407043457, | |
| "learning_rate": 1.0868799393152037e-05, | |
| "logits/chosen": -0.10446061939001083, | |
| "logits/rejected": -0.597304105758667, | |
| "logps/chosen": -205.68099975585938, | |
| "logps/rejected": -174.41299438476562, | |
| "loss": 0.6323, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.15544500946998596, | |
| "rewards/margins": 0.38598471879959106, | |
| "rewards/rejected": -0.5414296984672546, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.6923682140047207, | |
| "grad_norm": 2.5832557678222656, | |
| "learning_rate": 1.0625132888258834e-05, | |
| "logits/chosen": -0.02000749669969082, | |
| "logits/rejected": -0.41175442934036255, | |
| "logps/chosen": -215.56741333007812, | |
| "logps/rejected": -180.83627319335938, | |
| "loss": 0.6433, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.02432412840425968, | |
| "rewards/margins": 0.36454257369041443, | |
| "rewards/rejected": -0.38886672258377075, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.6963021243115657, | |
| "grad_norm": 2.688718318939209, | |
| "learning_rate": 1.0383236922695545e-05, | |
| "logits/chosen": 0.3951621651649475, | |
| "logits/rejected": -0.16989345848560333, | |
| "logps/chosen": -220.72592163085938, | |
| "logps/rejected": -185.58865356445312, | |
| "loss": 0.6479, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.09778473526239395, | |
| "rewards/margins": 0.33538150787353516, | |
| "rewards/rejected": -0.4331662058830261, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.7002360346184107, | |
| "grad_norm": 2.309346914291382, | |
| "learning_rate": 1.0143157180983967e-05, | |
| "logits/chosen": -0.15827368199825287, | |
| "logits/rejected": -0.44364356994628906, | |
| "logps/chosen": -205.26345825195312, | |
| "logps/rejected": -179.46121215820312, | |
| "loss": 0.6507, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.19296525418758392, | |
| "rewards/margins": 0.3191848695278168, | |
| "rewards/rejected": -0.5121501684188843, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.7041699449252558, | |
| "grad_norm": 1.9287734031677246, | |
| "learning_rate": 9.90493900463347e-06, | |
| "logits/chosen": 0.06571565568447113, | |
| "logits/rejected": -0.4809319078922272, | |
| "logps/chosen": -201.1472930908203, | |
| "logps/rejected": -177.11959838867188, | |
| "loss": 0.5998, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.03680630773305893, | |
| "rewards/margins": 0.4870053827762604, | |
| "rewards/rejected": -0.45019906759262085, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.7081038552321007, | |
| "grad_norm": 3.4344065189361572, | |
| "learning_rate": 9.668627383577813e-06, | |
| "logits/chosen": -0.2456584870815277, | |
| "logits/rejected": -0.40960049629211426, | |
| "logps/chosen": -206.5409393310547, | |
| "logps/rejected": -192.875, | |
| "loss": 0.6547, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.03170660138130188, | |
| "rewards/margins": 0.30798882246017456, | |
| "rewards/rejected": -0.2762822210788727, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7081038552321007, | |
| "eval_logits/chosen": 1.5410864353179932, | |
| "eval_logits/rejected": 1.3544774055480957, | |
| "eval_logps/chosen": -207.73684692382812, | |
| "eval_logps/rejected": -187.0003662109375, | |
| "eval_loss": 0.6462491750717163, | |
| "eval_rewards/accuracies": 0.65625, | |
| "eval_rewards/chosen": 0.011672723107039928, | |
| "eval_rewards/margins": 0.3425491750240326, | |
| "eval_rewards/rejected": -0.3308764696121216, | |
| "eval_runtime": 278.2768, | |
| "eval_samples_per_second": 2.3, | |
| "eval_steps_per_second": 0.144, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7120377655389457, | |
| "grad_norm": 2.0168943405151367, | |
| "learning_rate": 9.434266947678326e-06, | |
| "logits/chosen": 0.12568683922290802, | |
| "logits/rejected": -0.2064806967973709, | |
| "logps/chosen": -185.58462524414062, | |
| "logps/rejected": -178.7863006591797, | |
| "loss": 0.6173, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.0271898303180933, | |
| "rewards/margins": 0.4776444435119629, | |
| "rewards/rejected": -0.45045462250709534, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.7159716758457907, | |
| "grad_norm": 3.7507474422454834, | |
| "learning_rate": 9.201901958295116e-06, | |
| "logits/chosen": 0.2663114666938782, | |
| "logits/rejected": -0.21068088710308075, | |
| "logps/chosen": -222.8455047607422, | |
| "logps/rejected": -197.8881072998047, | |
| "loss": 0.6613, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.19876088201999664, | |
| "rewards/margins": 0.3615323007106781, | |
| "rewards/rejected": -0.16277141869068146, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.7199055861526357, | |
| "grad_norm": 2.49344801902771, | |
| "learning_rate": 8.971576299927833e-06, | |
| "logits/chosen": 0.24711892008781433, | |
| "logits/rejected": -0.30316418409347534, | |
| "logps/chosen": -205.91763305664062, | |
| "logps/rejected": -174.68478393554688, | |
| "loss": 0.6148, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.0421910285949707, | |
| "rewards/margins": 0.4187415540218353, | |
| "rewards/rejected": -0.3765505254268646, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.7238394964594808, | |
| "grad_norm": 1.589440107345581, | |
| "learning_rate": 8.743333471927673e-06, | |
| "logits/chosen": 0.05540518835186958, | |
| "logits/rejected": -0.2582542300224304, | |
| "logps/chosen": -185.76052856445312, | |
| "logps/rejected": -178.44631958007812, | |
| "loss": 0.6323, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.050637077540159225, | |
| "rewards/margins": 0.37040454149246216, | |
| "rewards/rejected": -0.31976741552352905, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.7277734067663257, | |
| "grad_norm": 2.0918121337890625, | |
| "learning_rate": 8.517216580282048e-06, | |
| "logits/chosen": -0.25447550415992737, | |
| "logits/rejected": -0.5505613088607788, | |
| "logps/chosen": -194.76841735839844, | |
| "logps/rejected": -182.89212036132812, | |
| "loss": 0.6478, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.048875343054533005, | |
| "rewards/margins": 0.3347932696342468, | |
| "rewards/rejected": -0.38366860151290894, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.7317073170731707, | |
| "grad_norm": 1.9831795692443848, | |
| "learning_rate": 8.293268329473602e-06, | |
| "logits/chosen": -0.02813979983329773, | |
| "logits/rejected": -0.3501664698123932, | |
| "logps/chosen": -211.9995880126953, | |
| "logps/rejected": -202.5315399169922, | |
| "loss": 0.6627, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.021062618121504784, | |
| "rewards/margins": 0.3136800229549408, | |
| "rewards/rejected": -0.29261741042137146, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.7356412273800157, | |
| "grad_norm": 2.2177882194519043, | |
| "learning_rate": 8.071531014415019e-06, | |
| "logits/chosen": -0.2367478907108307, | |
| "logits/rejected": -0.5733985900878906, | |
| "logps/chosen": -180.80612182617188, | |
| "logps/rejected": -169.3168182373047, | |
| "loss": 0.6652, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.19370688498020172, | |
| "rewards/margins": 0.3258829414844513, | |
| "rewards/rejected": -0.5195897817611694, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.7395751376868608, | |
| "grad_norm": 2.075753927230835, | |
| "learning_rate": 7.852046512461202e-06, | |
| "logits/chosen": 0.1959037333726883, | |
| "logits/rejected": -0.2151847630739212, | |
| "logps/chosen": -221.7503204345703, | |
| "logps/rejected": -195.74659729003906, | |
| "loss": 0.6153, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.06760503351688385, | |
| "rewards/margins": 0.404818058013916, | |
| "rewards/rejected": -0.47242307662963867, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.7435090479937058, | |
| "grad_norm": 2.3592565059661865, | |
| "learning_rate": 7.634856275500316e-06, | |
| "logits/chosen": -0.037016820162534714, | |
| "logits/rejected": -0.31524938344955444, | |
| "logps/chosen": -204.08740234375, | |
| "logps/rejected": -185.71417236328125, | |
| "loss": 0.675, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.09079615771770477, | |
| "rewards/margins": 0.37884068489074707, | |
| "rewards/rejected": -0.2880445122718811, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.7474429583005507, | |
| "grad_norm": 2.19828724861145, | |
| "learning_rate": 7.420001322125156e-06, | |
| "logits/chosen": 0.015210944227874279, | |
| "logits/rejected": -0.3576655983924866, | |
| "logps/chosen": -211.49569702148438, | |
| "logps/rejected": -179.06173706054688, | |
| "loss": 0.642, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.15431848168373108, | |
| "rewards/margins": 0.31605640053749084, | |
| "rewards/rejected": -0.4703748822212219, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.7513768686073957, | |
| "grad_norm": 2.330610990524292, | |
| "learning_rate": 7.2075222298863786e-06, | |
| "logits/chosen": 0.07341478019952774, | |
| "logits/rejected": -0.38820725679397583, | |
| "logps/chosen": -224.9347381591797, | |
| "logps/rejected": -192.65350341796875, | |
| "loss": 0.6317, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.08989708870649338, | |
| "rewards/margins": 0.483518123626709, | |
| "rewards/rejected": -0.393621027469635, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.7553107789142408, | |
| "grad_norm": 2.543734550476074, | |
| "learning_rate": 6.9974591276290295e-06, | |
| "logits/chosen": 0.10347223281860352, | |
| "logits/rejected": -0.19919316470623016, | |
| "logps/chosen": -215.1173553466797, | |
| "logps/rejected": -195.3154296875, | |
| "loss": 0.6501, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.10300693660974503, | |
| "rewards/margins": 0.32648521661758423, | |
| "rewards/rejected": -0.42949214577674866, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.7592446892210858, | |
| "grad_norm": 2.2518229484558105, | |
| "learning_rate": 6.789851687913784e-06, | |
| "logits/chosen": 0.04955869913101196, | |
| "logits/rejected": -0.3323279023170471, | |
| "logps/chosen": -207.77767944335938, | |
| "logps/rejected": -178.6419219970703, | |
| "loss": 0.6297, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.07483512163162231, | |
| "rewards/margins": 0.34836816787719727, | |
| "rewards/rejected": -0.4232032895088196, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.7631785995279308, | |
| "grad_norm": 2.465470552444458, | |
| "learning_rate": 6.584739119524384e-06, | |
| "logits/chosen": 0.16735856235027313, | |
| "logits/rejected": -0.14222998917102814, | |
| "logps/chosen": -188.8686981201172, | |
| "logps/rejected": -171.84774780273438, | |
| "loss": 0.6693, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.03049059771001339, | |
| "rewards/margins": 0.3306547701358795, | |
| "rewards/rejected": -0.36114537715911865, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.7671125098347757, | |
| "grad_norm": 2.564055919647217, | |
| "learning_rate": 6.382160160062663e-06, | |
| "logits/chosen": -0.1304897665977478, | |
| "logits/rejected": -0.3616050183773041, | |
| "logps/chosen": -193.47817993164062, | |
| "logps/rejected": -182.4466552734375, | |
| "loss": 0.6282, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.010683831758797169, | |
| "rewards/margins": 0.4327141344547272, | |
| "rewards/rejected": -0.4220302700996399, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.7710464201416207, | |
| "grad_norm": 2.4666337966918945, | |
| "learning_rate": 6.182153068632546e-06, | |
| "logits/chosen": 0.2652967572212219, | |
| "logits/rejected": -0.010537607595324516, | |
| "logps/chosen": -213.3832550048828, | |
| "logps/rejected": -197.07296752929688, | |
| "loss": 0.6335, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.13390854001045227, | |
| "rewards/margins": 0.30471378564834595, | |
| "rewards/rejected": -0.17080524563789368, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.7749803304484658, | |
| "grad_norm": 2.0473670959472656, | |
| "learning_rate": 5.984755618614444e-06, | |
| "logits/chosen": -0.12875613570213318, | |
| "logits/rejected": -0.6200467944145203, | |
| "logps/chosen": -192.61300659179688, | |
| "logps/rejected": -172.0157928466797, | |
| "loss": 0.605, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.051413632929325104, | |
| "rewards/margins": 0.3970768451690674, | |
| "rewards/rejected": -0.3456632196903229, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.7789142407553108, | |
| "grad_norm": 2.04880690574646, | |
| "learning_rate": 5.7900050905313345e-06, | |
| "logits/chosen": -0.21358175575733185, | |
| "logits/rejected": -0.5731441974639893, | |
| "logps/chosen": -193.90147399902344, | |
| "logps/rejected": -178.61111450195312, | |
| "loss": 0.6353, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.1605134904384613, | |
| "rewards/margins": 0.31240013241767883, | |
| "rewards/rejected": -0.47291359305381775, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.7828481510621558, | |
| "grad_norm": 1.985092043876648, | |
| "learning_rate": 5.597938265007994e-06, | |
| "logits/chosen": 0.510907769203186, | |
| "logits/rejected": -0.0802483856678009, | |
| "logps/chosen": -206.6349639892578, | |
| "logps/rejected": -182.4473876953125, | |
| "loss": 0.5881, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": 0.031928837299346924, | |
| "rewards/margins": 0.4933392405509949, | |
| "rewards/rejected": -0.4614104628562927, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.7867820613690008, | |
| "grad_norm": 2.3329551219940186, | |
| "learning_rate": 5.408591415824571e-06, | |
| "logits/chosen": 0.1821351796388626, | |
| "logits/rejected": -0.3620257079601288, | |
| "logps/chosen": -224.0210723876953, | |
| "logps/rejected": -185.69363403320312, | |
| "loss": 0.6062, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.1057119145989418, | |
| "rewards/margins": 0.44287848472595215, | |
| "rewards/rejected": -0.33716651797294617, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7867820613690008, | |
| "eval_logits/chosen": 1.5291143655776978, | |
| "eval_logits/rejected": 1.3392162322998047, | |
| "eval_logps/chosen": -207.0912628173828, | |
| "eval_logps/rejected": -186.51864624023438, | |
| "eval_loss": 0.6429952383041382, | |
| "eval_rewards/accuracies": 0.645312488079071, | |
| "eval_rewards/chosen": 0.04395235329866409, | |
| "eval_rewards/margins": 0.3507426679134369, | |
| "eval_rewards/rejected": -0.3067903220653534, | |
| "eval_runtime": 280.2507, | |
| "eval_samples_per_second": 2.284, | |
| "eval_steps_per_second": 0.143, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7907159716758458, | |
| "grad_norm": 2.6991782188415527, | |
| "learning_rate": 5.222000303065928e-06, | |
| "logits/chosen": 0.1721642166376114, | |
| "logits/rejected": -0.04354752227663994, | |
| "logps/chosen": -213.06289672851562, | |
| "logps/rejected": -202.95700073242188, | |
| "loss": 0.6189, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": 0.031207973137497902, | |
| "rewards/margins": 0.4261496663093567, | |
| "rewards/rejected": -0.39494162797927856, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.7946498819826908, | |
| "grad_norm": 2.08817982673645, | |
| "learning_rate": 5.038200166367979e-06, | |
| "logits/chosen": -0.025506436824798584, | |
| "logits/rejected": -0.3635373115539551, | |
| "logps/chosen": -209.90451049804688, | |
| "logps/rejected": -186.5208740234375, | |
| "loss": 0.6475, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.017850453034043312, | |
| "rewards/margins": 0.28239428997039795, | |
| "rewards/rejected": -0.3002447485923767, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.7985837922895358, | |
| "grad_norm": 1.7719428539276123, | |
| "learning_rate": 4.8572257182623305e-06, | |
| "logits/chosen": -0.042554982006549835, | |
| "logits/rejected": -0.5459092259407043, | |
| "logps/chosen": -198.80331420898438, | |
| "logps/rejected": -168.67318725585938, | |
| "loss": 0.6296, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.11431936174631119, | |
| "rewards/margins": 0.39055347442626953, | |
| "rewards/rejected": -0.5048728585243225, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.8025177025963808, | |
| "grad_norm": 1.5831013917922974, | |
| "learning_rate": 4.679111137620442e-06, | |
| "logits/chosen": -0.04387597367167473, | |
| "logits/rejected": -0.5404433608055115, | |
| "logps/chosen": -208.5739288330078, | |
| "logps/rejected": -182.9171600341797, | |
| "loss": 0.5629, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": 0.12051211297512054, | |
| "rewards/margins": 0.5967931151390076, | |
| "rewards/rejected": -0.47628098726272583, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.8064516129032258, | |
| "grad_norm": 1.9797825813293457, | |
| "learning_rate": 4.503890063198637e-06, | |
| "logits/chosen": 0.13021646440029144, | |
| "logits/rejected": -0.29057878255844116, | |
| "logps/chosen": -196.6991424560547, | |
| "logps/rejected": -173.8551025390625, | |
| "loss": 0.6085, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.05341099575161934, | |
| "rewards/margins": 0.42261919379234314, | |
| "rewards/rejected": -0.47603026032447815, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.8103855232100708, | |
| "grad_norm": 2.1103105545043945, | |
| "learning_rate": 4.3315955872850464e-06, | |
| "logits/chosen": 0.27238404750823975, | |
| "logits/rejected": -0.1460699737071991, | |
| "logps/chosen": -212.850341796875, | |
| "logps/rejected": -185.5171356201172, | |
| "loss": 0.6392, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.08649823069572449, | |
| "rewards/margins": 0.4158404469490051, | |
| "rewards/rejected": -0.5023386478424072, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.8143194335169158, | |
| "grad_norm": 2.785482168197632, | |
| "learning_rate": 4.162260249449817e-06, | |
| "logits/chosen": 0.14135488867759705, | |
| "logits/rejected": -0.18456022441387177, | |
| "logps/chosen": -200.50779724121094, | |
| "logps/rejected": -185.84165954589844, | |
| "loss": 0.6551, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.0783742293715477, | |
| "rewards/margins": 0.3307613432407379, | |
| "rewards/rejected": -0.252387136220932, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.8182533438237608, | |
| "grad_norm": 2.464423656463623, | |
| "learning_rate": 3.995916030399658e-06, | |
| "logits/chosen": -0.29520073533058167, | |
| "logits/rejected": -0.5148777961730957, | |
| "logps/chosen": -182.30633544921875, | |
| "logps/rejected": -176.0092010498047, | |
| "loss": 0.6413, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.0033335983753204346, | |
| "rewards/margins": 0.4054691195487976, | |
| "rewards/rejected": -0.4021355211734772, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.8221872541306058, | |
| "grad_norm": 3.012695550918579, | |
| "learning_rate": 3.832594345937974e-06, | |
| "logits/chosen": -0.07361414283514023, | |
| "logits/rejected": -0.4096745550632477, | |
| "logps/chosen": -212.64395141601562, | |
| "logps/rejected": -192.7116241455078, | |
| "loss": 0.7325, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.25282061100006104, | |
| "rewards/margins": 0.20071497559547424, | |
| "rewards/rejected": -0.4535354971885681, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.8261211644374509, | |
| "grad_norm": 1.8916617631912231, | |
| "learning_rate": 3.6723260410316777e-06, | |
| "logits/chosen": 0.12402760982513428, | |
| "logits/rejected": -0.28017571568489075, | |
| "logps/chosen": -207.8403778076172, | |
| "logps/rejected": -183.1613311767578, | |
| "loss": 0.6603, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.131439670920372, | |
| "rewards/margins": 0.3610256314277649, | |
| "rewards/rejected": -0.4924653172492981, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.8300550747442959, | |
| "grad_norm": 2.8689301013946533, | |
| "learning_rate": 3.515141383985763e-06, | |
| "logits/chosen": -0.19540981948375702, | |
| "logits/rejected": -0.22350387275218964, | |
| "logps/chosen": -200.00564575195312, | |
| "logps/rejected": -193.63632202148438, | |
| "loss": 0.6975, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.004730015993118286, | |
| "rewards/margins": 0.2367696762084961, | |
| "rewards/rejected": -0.24149969220161438, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.8339889850511408, | |
| "grad_norm": 2.823575973510742, | |
| "learning_rate": 3.361070060726881e-06, | |
| "logits/chosen": 0.19453616440296173, | |
| "logits/rejected": -0.047756943851709366, | |
| "logps/chosen": -213.205078125, | |
| "logps/rejected": -198.85020446777344, | |
| "loss": 0.6682, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.024269048124551773, | |
| "rewards/margins": 0.2689816951751709, | |
| "rewards/rejected": -0.29325076937675476, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.8379228953579858, | |
| "grad_norm": 3.1164157390594482, | |
| "learning_rate": 3.210141169196812e-06, | |
| "logits/chosen": 0.055872343480587006, | |
| "logits/rejected": -0.2360413372516632, | |
| "logps/chosen": -199.78369140625, | |
| "logps/rejected": -194.6487274169922, | |
| "loss": 0.6654, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.17452116310596466, | |
| "rewards/margins": 0.2973538339138031, | |
| "rewards/rejected": -0.47187501192092896, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.8418568056648308, | |
| "grad_norm": 2.419414758682251, | |
| "learning_rate": 3.0623832138570475e-06, | |
| "logits/chosen": -0.018596932291984558, | |
| "logits/rejected": -0.17724603414535522, | |
| "logps/chosen": -200.5649871826172, | |
| "logps/rejected": -196.24949645996094, | |
| "loss": 0.6371, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.20276539027690887, | |
| "rewards/margins": 0.30727338790893555, | |
| "rewards/rejected": -0.5100387334823608, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.8457907159716759, | |
| "grad_norm": 2.426133871078491, | |
| "learning_rate": 2.917824100305393e-06, | |
| "logits/chosen": -0.244663804769516, | |
| "logits/rejected": -0.4893515706062317, | |
| "logps/chosen": -197.4519500732422, | |
| "logps/rejected": -183.88381958007812, | |
| "loss": 0.6558, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.14478914439678192, | |
| "rewards/margins": 0.25045084953308105, | |
| "rewards/rejected": -0.39524000883102417, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.8497246262785209, | |
| "grad_norm": 2.0746254920959473, | |
| "learning_rate": 2.7764911300057584e-06, | |
| "logits/chosen": -0.0608975775539875, | |
| "logits/rejected": -0.24979321658611298, | |
| "logps/chosen": -202.14073181152344, | |
| "logps/rejected": -192.38632202148438, | |
| "loss": 0.6443, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.13447943329811096, | |
| "rewards/margins": 0.33891427516937256, | |
| "rewards/rejected": -0.4733937382698059, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.8536585365853658, | |
| "grad_norm": 3.9822261333465576, | |
| "learning_rate": 2.638410995131966e-06, | |
| "logits/chosen": -0.02557748556137085, | |
| "logits/rejected": -0.278294175863266, | |
| "logps/chosen": -194.00274658203125, | |
| "logps/rejected": -185.4058074951172, | |
| "loss": 0.6824, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.2879054546356201, | |
| "rewards/margins": 0.20160725712776184, | |
| "rewards/rejected": -0.48951274156570435, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.8575924468922108, | |
| "grad_norm": 2.6279852390289307, | |
| "learning_rate": 2.5036097735266807e-06, | |
| "logits/chosen": 0.027390051633119583, | |
| "logits/rejected": -0.41016143560409546, | |
| "logps/chosen": -192.32449340820312, | |
| "logps/rejected": -174.3748016357422, | |
| "loss": 0.6204, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.1535167396068573, | |
| "rewards/margins": 0.40089765191078186, | |
| "rewards/rejected": -0.5544143915176392, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.8615263571990559, | |
| "grad_norm": 2.3923299312591553, | |
| "learning_rate": 2.372112923776315e-06, | |
| "logits/chosen": 0.02239897847175598, | |
| "logits/rejected": -0.4596307873725891, | |
| "logps/chosen": -180.89537048339844, | |
| "logps/rejected": -151.65426635742188, | |
| "loss": 0.648, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.24943062663078308, | |
| "rewards/margins": 0.25950321555137634, | |
| "rewards/rejected": -0.5089339017868042, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.8654602675059009, | |
| "grad_norm": 1.7112442255020142, | |
| "learning_rate": 2.243945280402953e-06, | |
| "logits/chosen": 0.018297865986824036, | |
| "logits/rejected": -0.2831381857395172, | |
| "logps/chosen": -195.10971069335938, | |
| "logps/rejected": -180.3495635986328, | |
| "loss": 0.6608, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.09053869545459747, | |
| "rewards/margins": 0.3507447838783264, | |
| "rewards/rejected": -0.4412834644317627, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.8654602675059009, | |
| "eval_logits/chosen": 1.4525092840194702, | |
| "eval_logits/rejected": 1.2631471157073975, | |
| "eval_logps/chosen": -209.303955078125, | |
| "eval_logps/rejected": -188.3060302734375, | |
| "eval_loss": 0.6424652338027954, | |
| "eval_rewards/accuracies": 0.645312488079071, | |
| "eval_rewards/chosen": -0.06668321788311005, | |
| "eval_rewards/margins": 0.32947659492492676, | |
| "eval_rewards/rejected": -0.3961597979068756, | |
| "eval_runtime": 289.5939, | |
| "eval_samples_per_second": 2.21, | |
| "eval_steps_per_second": 0.138, | |
| "step": 1100 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1271, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |