Instructions to use xinlai/Qwen2-7B-SFT-Step-DPO with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use xinlai/Qwen2-7B-SFT-Step-DPO with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="xinlai/Qwen2-7B-SFT-Step-DPO") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("xinlai/Qwen2-7B-SFT-Step-DPO") model = AutoModelForCausalLM.from_pretrained("xinlai/Qwen2-7B-SFT-Step-DPO") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use xinlai/Qwen2-7B-SFT-Step-DPO with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "xinlai/Qwen2-7B-SFT-Step-DPO" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "xinlai/Qwen2-7B-SFT-Step-DPO", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/xinlai/Qwen2-7B-SFT-Step-DPO
- SGLang
How to use xinlai/Qwen2-7B-SFT-Step-DPO with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "xinlai/Qwen2-7B-SFT-Step-DPO" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "xinlai/Qwen2-7B-SFT-Step-DPO", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "xinlai/Qwen2-7B-SFT-Step-DPO" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "xinlai/Qwen2-7B-SFT-Step-DPO", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use xinlai/Qwen2-7B-SFT-Step-DPO with Docker Model Runner:
docker model run hf.co/xinlai/Qwen2-7B-SFT-Step-DPO
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 8.0, | |
| "eval_steps": 1, | |
| "global_step": 472, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01694915254237288, | |
| "grad_norm": 74.35959798227883, | |
| "learning_rate": 1.0416666666666666e-08, | |
| "logits/chosen": -1.0022015571594238, | |
| "logits/rejected": -1.0571039915084839, | |
| "logps/chosen": -26.953861236572266, | |
| "logps/rejected": -41.69861602783203, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.03389830508474576, | |
| "grad_norm": 71.81906743432833, | |
| "learning_rate": 2.083333333333333e-08, | |
| "logits/chosen": -0.9866722822189331, | |
| "logits/rejected": -1.1117209196090698, | |
| "logps/chosen": -33.70802307128906, | |
| "logps/rejected": -37.13496398925781, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.05084745762711865, | |
| "grad_norm": 75.40773138219605, | |
| "learning_rate": 3.125e-08, | |
| "logits/chosen": -1.2932835817337036, | |
| "logits/rejected": -1.2812855243682861, | |
| "logps/chosen": -30.66956329345703, | |
| "logps/rejected": -49.71609878540039, | |
| "loss": 0.7141, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -0.015645474195480347, | |
| "rewards/margins": -0.0929969847202301, | |
| "rewards/rejected": 0.07735151052474976, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.06779661016949153, | |
| "grad_norm": 74.47397381287325, | |
| "learning_rate": 4.166666666666666e-08, | |
| "logits/chosen": -1.0415635108947754, | |
| "logits/rejected": -1.0745270252227783, | |
| "logps/chosen": -26.642629623413086, | |
| "logps/rejected": -38.44277572631836, | |
| "loss": 0.7026, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.048020511865615845, | |
| "rewards/margins": -0.018185943365097046, | |
| "rewards/rejected": 0.06620645523071289, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0847457627118644, | |
| "grad_norm": 76.30048141954929, | |
| "learning_rate": 5.208333333333333e-08, | |
| "logits/chosen": -1.2889574766159058, | |
| "logits/rejected": -1.219961404800415, | |
| "logps/chosen": -31.46270751953125, | |
| "logps/rejected": -28.84333610534668, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.04683685302734375, | |
| "rewards/margins": -0.016847282648086548, | |
| "rewards/rejected": -0.029989570379257202, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.1016949152542373, | |
| "grad_norm": 71.45564125608341, | |
| "learning_rate": 6.25e-08, | |
| "logits/chosen": -1.2035868167877197, | |
| "logits/rejected": -1.1329045295715332, | |
| "logps/chosen": -35.237884521484375, | |
| "logps/rejected": -38.39533996582031, | |
| "loss": 0.6914, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.014047741889953613, | |
| "rewards/margins": 0.0001532137393951416, | |
| "rewards/rejected": -0.014200955629348755, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.11864406779661017, | |
| "grad_norm": 74.36407794239695, | |
| "learning_rate": 7.291666666666667e-08, | |
| "logits/chosen": -1.2061525583267212, | |
| "logits/rejected": -1.1756294965744019, | |
| "logps/chosen": -29.565950393676758, | |
| "logps/rejected": -33.2646484375, | |
| "loss": 0.7034, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.09937351942062378, | |
| "rewards/margins": 0.04770404100418091, | |
| "rewards/rejected": 0.05166947841644287, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.13559322033898305, | |
| "grad_norm": 72.94427557648902, | |
| "learning_rate": 8.333333333333333e-08, | |
| "logits/chosen": -1.2344228029251099, | |
| "logits/rejected": -1.255335807800293, | |
| "logps/chosen": -26.22496223449707, | |
| "logps/rejected": -39.68927764892578, | |
| "loss": 0.7084, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": 0.005427069962024689, | |
| "rewards/margins": -0.014611326158046722, | |
| "rewards/rejected": 0.02003839612007141, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.15254237288135594, | |
| "grad_norm": 69.72083135093712, | |
| "learning_rate": 9.375e-08, | |
| "logits/chosen": -1.1401718854904175, | |
| "logits/rejected": -1.1010041236877441, | |
| "logps/chosen": -33.88338851928711, | |
| "logps/rejected": -28.835594177246094, | |
| "loss": 0.7, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.037932395935058594, | |
| "rewards/margins": -0.005142271518707275, | |
| "rewards/rejected": 0.04307466745376587, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.1694915254237288, | |
| "grad_norm": 71.74909213913158, | |
| "learning_rate": 1.0416666666666667e-07, | |
| "logits/chosen": -1.180943250656128, | |
| "logits/rejected": -1.090497612953186, | |
| "logps/chosen": -37.923099517822266, | |
| "logps/rejected": -36.79877471923828, | |
| "loss": 0.6975, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.031415216624736786, | |
| "rewards/margins": -0.0013954713940620422, | |
| "rewards/rejected": 0.03281068801879883, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.1864406779661017, | |
| "grad_norm": 75.11980776081754, | |
| "learning_rate": 1.1458333333333332e-07, | |
| "logits/chosen": -0.9823209047317505, | |
| "logits/rejected": -0.9876938462257385, | |
| "logps/chosen": -27.41145896911621, | |
| "logps/rejected": -45.299991607666016, | |
| "loss": 0.7077, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.0018342137336730957, | |
| "rewards/margins": 0.0696558952331543, | |
| "rewards/rejected": -0.07149010896682739, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.2033898305084746, | |
| "grad_norm": 71.48536897103016, | |
| "learning_rate": 1.25e-07, | |
| "logits/chosen": -0.9993177652359009, | |
| "logits/rejected": -0.9222314953804016, | |
| "logps/chosen": -23.32973289489746, | |
| "logps/rejected": -32.4486083984375, | |
| "loss": 0.6838, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.01836332678794861, | |
| "rewards/margins": 0.00046828389167785645, | |
| "rewards/rejected": -0.018831610679626465, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.22033898305084745, | |
| "grad_norm": 69.73428591175114, | |
| "learning_rate": 1.3541666666666666e-07, | |
| "logits/chosen": -1.4160847663879395, | |
| "logits/rejected": -1.2769191265106201, | |
| "logps/chosen": -26.515804290771484, | |
| "logps/rejected": -36.50257110595703, | |
| "loss": 0.7069, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.09053070843219757, | |
| "rewards/margins": 0.059635356068611145, | |
| "rewards/rejected": 0.030895352363586426, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.23728813559322035, | |
| "grad_norm": 74.26548135080421, | |
| "learning_rate": 1.4583333333333335e-07, | |
| "logits/chosen": -1.2310669422149658, | |
| "logits/rejected": -1.1061973571777344, | |
| "logps/chosen": -38.83403396606445, | |
| "logps/rejected": -57.466835021972656, | |
| "loss": 0.7022, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.014897465705871582, | |
| "rewards/margins": 0.015999972820281982, | |
| "rewards/rejected": -0.030897438526153564, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.2542372881355932, | |
| "grad_norm": 70.61107518338639, | |
| "learning_rate": 1.5624999999999999e-07, | |
| "logits/chosen": -1.245528221130371, | |
| "logits/rejected": -1.2389111518859863, | |
| "logps/chosen": -24.09255027770996, | |
| "logps/rejected": -35.16242218017578, | |
| "loss": 0.7081, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.009011238813400269, | |
| "rewards/margins": 0.01104736328125, | |
| "rewards/rejected": -0.0020361244678497314, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.2711864406779661, | |
| "grad_norm": 69.06624100974847, | |
| "learning_rate": 1.6666666666666665e-07, | |
| "logits/chosen": -1.1713544130325317, | |
| "logits/rejected": -1.2104028463363647, | |
| "logps/chosen": -27.774211883544922, | |
| "logps/rejected": -32.56517791748047, | |
| "loss": 0.6959, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.06307366490364075, | |
| "rewards/margins": -0.055293723940849304, | |
| "rewards/rejected": -0.007779940962791443, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.288135593220339, | |
| "grad_norm": 74.45322195801226, | |
| "learning_rate": 1.7708333333333334e-07, | |
| "logits/chosen": -1.2374873161315918, | |
| "logits/rejected": -1.2554068565368652, | |
| "logps/chosen": -24.268505096435547, | |
| "logps/rejected": -35.4179573059082, | |
| "loss": 0.6963, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.0001607835292816162, | |
| "rewards/margins": -0.01779848337173462, | |
| "rewards/rejected": 0.017637699842453003, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.3050847457627119, | |
| "grad_norm": 71.43793235652528, | |
| "learning_rate": 1.875e-07, | |
| "logits/chosen": -1.1041090488433838, | |
| "logits/rejected": -1.0679348707199097, | |
| "logps/chosen": -23.000707626342773, | |
| "logps/rejected": -29.853412628173828, | |
| "loss": 0.705, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.055961236357688904, | |
| "rewards/margins": -0.05573050677776337, | |
| "rewards/rejected": -0.0002307295799255371, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.3220338983050847, | |
| "grad_norm": 71.50823519836709, | |
| "learning_rate": 1.9791666666666664e-07, | |
| "logits/chosen": -1.0583523511886597, | |
| "logits/rejected": -1.0313684940338135, | |
| "logps/chosen": -21.926410675048828, | |
| "logps/rejected": -35.9990348815918, | |
| "loss": 0.7084, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -0.018382668495178223, | |
| "rewards/margins": 0.02424493432044983, | |
| "rewards/rejected": -0.04262760281562805, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.3389830508474576, | |
| "grad_norm": 65.57787832871358, | |
| "learning_rate": 2.0833333333333333e-07, | |
| "logits/chosen": -1.2984358072280884, | |
| "logits/rejected": -1.2342230081558228, | |
| "logps/chosen": -30.60858154296875, | |
| "logps/rejected": -38.88047790527344, | |
| "loss": 0.7012, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.01342683658003807, | |
| "rewards/margins": 0.014368299394845963, | |
| "rewards/rejected": -0.027795135974884033, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.3559322033898305, | |
| "grad_norm": 68.54840408145243, | |
| "learning_rate": 2.1875e-07, | |
| "logits/chosen": -1.3012466430664062, | |
| "logits/rejected": -1.3128973245620728, | |
| "logps/chosen": -26.805089950561523, | |
| "logps/rejected": -41.52635192871094, | |
| "loss": 0.6853, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.01039344072341919, | |
| "rewards/margins": 0.011234819889068604, | |
| "rewards/rejected": -0.0008413791656494141, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.3728813559322034, | |
| "grad_norm": 75.35032250165732, | |
| "learning_rate": 2.2916666666666663e-07, | |
| "logits/chosen": -1.1717727184295654, | |
| "logits/rejected": -1.1936640739440918, | |
| "logps/chosen": -21.9468994140625, | |
| "logps/rejected": -27.52823257446289, | |
| "loss": 0.6756, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.030159294605255127, | |
| "rewards/margins": 0.03967534005641937, | |
| "rewards/rejected": -0.0698346346616745, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.3898305084745763, | |
| "grad_norm": 69.76965900033692, | |
| "learning_rate": 2.3958333333333335e-07, | |
| "logits/chosen": -1.227396845817566, | |
| "logits/rejected": -1.2514605522155762, | |
| "logps/chosen": -30.95319938659668, | |
| "logps/rejected": -33.107421875, | |
| "loss": 0.6483, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.009671658277511597, | |
| "rewards/margins": 0.06378874182701111, | |
| "rewards/rejected": -0.05411708354949951, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.4067796610169492, | |
| "grad_norm": 71.41404237507972, | |
| "learning_rate": 2.5e-07, | |
| "logits/chosen": -1.1064453125, | |
| "logits/rejected": -1.0061161518096924, | |
| "logps/chosen": -32.68113327026367, | |
| "logps/rejected": -38.1193962097168, | |
| "loss": 0.6682, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.034807443618774414, | |
| "rewards/margins": 0.0188644677400589, | |
| "rewards/rejected": -0.05367191135883331, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.423728813559322, | |
| "grad_norm": 70.49202249008462, | |
| "learning_rate": 2.604166666666667e-07, | |
| "logits/chosen": -1.1899397373199463, | |
| "logits/rejected": -1.1957398653030396, | |
| "logps/chosen": -37.75098419189453, | |
| "logps/rejected": -34.21184539794922, | |
| "loss": 0.6635, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.003971487283706665, | |
| "rewards/margins": 0.10494436323642731, | |
| "rewards/rejected": -0.10097287595272064, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.4406779661016949, | |
| "grad_norm": 67.14613715257252, | |
| "learning_rate": 2.708333333333333e-07, | |
| "logits/chosen": -1.3827494382858276, | |
| "logits/rejected": -1.3526558876037598, | |
| "logps/chosen": -27.053857803344727, | |
| "logps/rejected": -33.783485412597656, | |
| "loss": 0.6528, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.015252411365509033, | |
| "rewards/margins": 0.1568407416343689, | |
| "rewards/rejected": -0.14158833026885986, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.4576271186440678, | |
| "grad_norm": 68.86332311369058, | |
| "learning_rate": 2.8125e-07, | |
| "logits/chosen": -1.0096489191055298, | |
| "logits/rejected": -1.0554242134094238, | |
| "logps/chosen": -33.25010681152344, | |
| "logps/rejected": -35.958675384521484, | |
| "loss": 0.6573, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.04330983757972717, | |
| "rewards/margins": 0.12464988231658936, | |
| "rewards/rejected": -0.16795971989631653, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.4745762711864407, | |
| "grad_norm": 66.94051666468431, | |
| "learning_rate": 2.916666666666667e-07, | |
| "logits/chosen": -1.3319611549377441, | |
| "logits/rejected": -1.2341638803482056, | |
| "logps/chosen": -23.90947914123535, | |
| "logps/rejected": -31.997909545898438, | |
| "loss": 0.6585, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.028478246182203293, | |
| "rewards/margins": 0.07672013342380524, | |
| "rewards/rejected": -0.10519838333129883, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.4915254237288136, | |
| "grad_norm": 62.13361032578897, | |
| "learning_rate": 3.020833333333333e-07, | |
| "logits/chosen": -1.058496356010437, | |
| "logits/rejected": -1.061716914176941, | |
| "logps/chosen": -27.815153121948242, | |
| "logps/rejected": -35.63865661621094, | |
| "loss": 0.6417, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.002513296902179718, | |
| "rewards/margins": 0.08875668793916702, | |
| "rewards/rejected": -0.0862433910369873, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.5084745762711864, | |
| "grad_norm": 69.40663934648452, | |
| "learning_rate": 3.1249999999999997e-07, | |
| "logits/chosen": -1.148958683013916, | |
| "logits/rejected": -1.0955352783203125, | |
| "logps/chosen": -29.258386611938477, | |
| "logps/rejected": -37.004207611083984, | |
| "loss": 0.6333, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.018777184188365936, | |
| "rewards/margins": 0.270157128572464, | |
| "rewards/rejected": -0.28893429040908813, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.5254237288135594, | |
| "grad_norm": 63.75111913966312, | |
| "learning_rate": 3.2291666666666666e-07, | |
| "logits/chosen": -1.1732263565063477, | |
| "logits/rejected": -1.0661126375198364, | |
| "logps/chosen": -31.076984405517578, | |
| "logps/rejected": -34.91364288330078, | |
| "loss": 0.6028, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.04974186420440674, | |
| "rewards/margins": 0.24639210104942322, | |
| "rewards/rejected": -0.19665023684501648, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.5423728813559322, | |
| "grad_norm": 63.46121996218027, | |
| "learning_rate": 3.333333333333333e-07, | |
| "logits/chosen": -1.2128429412841797, | |
| "logits/rejected": -1.2360285520553589, | |
| "logps/chosen": -36.98728942871094, | |
| "logps/rejected": -38.032920837402344, | |
| "loss": 0.6111, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.015447601675987244, | |
| "rewards/margins": 0.3491629958152771, | |
| "rewards/rejected": -0.33371537923812866, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.559322033898305, | |
| "grad_norm": 64.01628740909891, | |
| "learning_rate": 3.4375e-07, | |
| "logits/chosen": -1.2774078845977783, | |
| "logits/rejected": -1.252654790878296, | |
| "logps/chosen": -30.110694885253906, | |
| "logps/rejected": -39.82551956176758, | |
| "loss": 0.5955, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.12499848008155823, | |
| "rewards/margins": 0.44781434535980225, | |
| "rewards/rejected": -0.5728127956390381, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.576271186440678, | |
| "grad_norm": 61.04313764198192, | |
| "learning_rate": 3.541666666666667e-07, | |
| "logits/chosen": -1.2864689826965332, | |
| "logits/rejected": -1.1125664710998535, | |
| "logps/chosen": -29.14852523803711, | |
| "logps/rejected": -35.5062255859375, | |
| "loss": 0.5925, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.07132556289434433, | |
| "rewards/margins": 0.29910576343536377, | |
| "rewards/rejected": -0.3704313337802887, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.5932203389830508, | |
| "grad_norm": 61.863987476747845, | |
| "learning_rate": 3.645833333333333e-07, | |
| "logits/chosen": -1.1560570001602173, | |
| "logits/rejected": -1.1620285511016846, | |
| "logps/chosen": -21.810134887695312, | |
| "logps/rejected": -45.566375732421875, | |
| "loss": 0.5656, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.03650933504104614, | |
| "rewards/margins": 0.7581607699394226, | |
| "rewards/rejected": -0.7946701049804688, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.6101694915254238, | |
| "grad_norm": 59.689844232963395, | |
| "learning_rate": 3.75e-07, | |
| "logits/chosen": -1.1466398239135742, | |
| "logits/rejected": -1.1647982597351074, | |
| "logps/chosen": -24.348777770996094, | |
| "logps/rejected": -29.572818756103516, | |
| "loss": 0.5827, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.0986182689666748, | |
| "rewards/margins": 0.30504968762397766, | |
| "rewards/rejected": -0.40366795659065247, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.6271186440677966, | |
| "grad_norm": 66.8530097795874, | |
| "learning_rate": 3.8541666666666665e-07, | |
| "logits/chosen": -1.1411319971084595, | |
| "logits/rejected": -1.1138074398040771, | |
| "logps/chosen": -29.88798713684082, | |
| "logps/rejected": -30.512065887451172, | |
| "loss": 0.5748, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.1340489685535431, | |
| "rewards/margins": 0.17574873566627502, | |
| "rewards/rejected": -0.3097977042198181, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.6440677966101694, | |
| "grad_norm": 59.54927270244813, | |
| "learning_rate": 3.958333333333333e-07, | |
| "logits/chosen": -1.2669272422790527, | |
| "logits/rejected": -1.2261359691619873, | |
| "logps/chosen": -33.28898239135742, | |
| "logps/rejected": -52.16841506958008, | |
| "loss": 0.5415, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.1103217601776123, | |
| "rewards/margins": 0.4893791675567627, | |
| "rewards/rejected": -0.599700927734375, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.6610169491525424, | |
| "grad_norm": 61.22261904686975, | |
| "learning_rate": 4.0625e-07, | |
| "logits/chosen": -1.2993597984313965, | |
| "logits/rejected": -1.2113492488861084, | |
| "logps/chosen": -38.285438537597656, | |
| "logps/rejected": -51.55641555786133, | |
| "loss": 0.5969, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.14819863438606262, | |
| "rewards/margins": 0.8797410726547241, | |
| "rewards/rejected": -1.027939796447754, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.6779661016949152, | |
| "grad_norm": 60.12706158104359, | |
| "learning_rate": 4.1666666666666667e-07, | |
| "logits/chosen": -1.1191147565841675, | |
| "logits/rejected": -0.9404773712158203, | |
| "logps/chosen": -27.851015090942383, | |
| "logps/rejected": -40.25725173950195, | |
| "loss": 0.5564, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.17417365312576294, | |
| "rewards/margins": 0.5528932809829712, | |
| "rewards/rejected": -0.7270669341087341, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.6949152542372882, | |
| "grad_norm": 57.92726362837684, | |
| "learning_rate": 4.270833333333333e-07, | |
| "logits/chosen": -1.0899916887283325, | |
| "logits/rejected": -0.9453008770942688, | |
| "logps/chosen": -26.283432006835938, | |
| "logps/rejected": -45.57765579223633, | |
| "loss": 0.5461, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.13252140581607819, | |
| "rewards/margins": 0.8450538516044617, | |
| "rewards/rejected": -0.9775752425193787, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.711864406779661, | |
| "grad_norm": 58.169474320406394, | |
| "learning_rate": 4.375e-07, | |
| "logits/chosen": -1.1068731546401978, | |
| "logits/rejected": -1.0945335626602173, | |
| "logps/chosen": -24.04115104675293, | |
| "logps/rejected": -36.62716293334961, | |
| "loss": 0.5406, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.10331939160823822, | |
| "rewards/margins": 0.5659677386283875, | |
| "rewards/rejected": -0.6692871451377869, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.7288135593220338, | |
| "grad_norm": 66.66372536069926, | |
| "learning_rate": 4.479166666666667e-07, | |
| "logits/chosen": -1.1794297695159912, | |
| "logits/rejected": -1.193390965461731, | |
| "logps/chosen": -46.91223907470703, | |
| "logps/rejected": -41.650333404541016, | |
| "loss": 0.5573, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.3094208538532257, | |
| "rewards/margins": 0.14515355229377747, | |
| "rewards/rejected": -0.4545744061470032, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.7457627118644068, | |
| "grad_norm": 56.78835490397593, | |
| "learning_rate": 4.5833333333333327e-07, | |
| "logits/chosen": -1.0120888948440552, | |
| "logits/rejected": -0.8681447505950928, | |
| "logps/chosen": -33.3481330871582, | |
| "logps/rejected": -48.61557388305664, | |
| "loss": 0.5516, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.093438059091568, | |
| "rewards/margins": 0.624420166015625, | |
| "rewards/rejected": -0.7178582549095154, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.7627118644067796, | |
| "grad_norm": 59.752166781716426, | |
| "learning_rate": 4.6874999999999996e-07, | |
| "logits/chosen": -1.2927764654159546, | |
| "logits/rejected": -1.212005615234375, | |
| "logps/chosen": -27.116588592529297, | |
| "logps/rejected": -48.95513153076172, | |
| "loss": 0.5622, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.08109956979751587, | |
| "rewards/margins": 1.1761130094528198, | |
| "rewards/rejected": -1.2572126388549805, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.7796610169491526, | |
| "grad_norm": 56.0712139430381, | |
| "learning_rate": 4.791666666666667e-07, | |
| "logits/chosen": -1.310462474822998, | |
| "logits/rejected": -1.1343291997909546, | |
| "logps/chosen": -32.363792419433594, | |
| "logps/rejected": -41.77903747558594, | |
| "loss": 0.4924, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.1404968947172165, | |
| "rewards/margins": 0.6709720492362976, | |
| "rewards/rejected": -0.8114689588546753, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.7966101694915254, | |
| "grad_norm": 53.687876472368266, | |
| "learning_rate": 4.895833333333333e-07, | |
| "logits/chosen": -1.0838322639465332, | |
| "logits/rejected": -1.1518497467041016, | |
| "logps/chosen": -34.07114791870117, | |
| "logps/rejected": -38.58074188232422, | |
| "loss": 0.475, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.14443516731262207, | |
| "rewards/margins": 0.33194631338119507, | |
| "rewards/rejected": -0.47638148069381714, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.8135593220338984, | |
| "grad_norm": 57.81488552818391, | |
| "learning_rate": 5e-07, | |
| "logits/chosen": -1.3274493217468262, | |
| "logits/rejected": -1.099511981010437, | |
| "logps/chosen": -33.7588005065918, | |
| "logps/rejected": -51.24869918823242, | |
| "loss": 0.46, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.2817200720310211, | |
| "rewards/margins": 1.4236443042755127, | |
| "rewards/rejected": -1.705364465713501, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.8305084745762712, | |
| "grad_norm": 51.9232811376309, | |
| "learning_rate": 4.999931375995349e-07, | |
| "logits/chosen": -1.2194585800170898, | |
| "logits/rejected": -1.091806173324585, | |
| "logps/chosen": -28.378108978271484, | |
| "logps/rejected": -36.51158905029297, | |
| "loss": 0.4926, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.31702089309692383, | |
| "rewards/margins": 0.6066832542419434, | |
| "rewards/rejected": -0.9237041473388672, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.847457627118644, | |
| "grad_norm": 58.79932831253633, | |
| "learning_rate": 4.999725507748798e-07, | |
| "logits/chosen": -1.4276092052459717, | |
| "logits/rejected": -1.2703173160552979, | |
| "logps/chosen": -27.375112533569336, | |
| "logps/rejected": -45.86325454711914, | |
| "loss": 0.5289, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.21876651048660278, | |
| "rewards/margins": 1.0810855627059937, | |
| "rewards/rejected": -1.2998521327972412, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.864406779661017, | |
| "grad_norm": 56.405768925690126, | |
| "learning_rate": 4.99938240656235e-07, | |
| "logits/chosen": -1.1859700679779053, | |
| "logits/rejected": -1.110828161239624, | |
| "logps/chosen": -30.097301483154297, | |
| "logps/rejected": -56.47822189331055, | |
| "loss": 0.4842, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.14388218522071838, | |
| "rewards/margins": 0.8802270889282227, | |
| "rewards/rejected": -1.0241093635559082, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.8813559322033898, | |
| "grad_norm": 54.84466981430871, | |
| "learning_rate": 4.998902091271985e-07, | |
| "logits/chosen": -1.2907037734985352, | |
| "logits/rejected": -1.198894739151001, | |
| "logps/chosen": -23.458566665649414, | |
| "logps/rejected": -36.68768310546875, | |
| "loss": 0.4399, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.09736257791519165, | |
| "rewards/margins": 0.8091447353363037, | |
| "rewards/rejected": -0.9065073132514954, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.8983050847457628, | |
| "grad_norm": 53.59884977562605, | |
| "learning_rate": 4.998284588246634e-07, | |
| "logits/chosen": -1.2666661739349365, | |
| "logits/rejected": -1.1450353860855103, | |
| "logps/chosen": -32.4919319152832, | |
| "logps/rejected": -36.02308654785156, | |
| "loss": 0.4643, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.2568899691104889, | |
| "rewards/margins": 0.8810305595397949, | |
| "rewards/rejected": -1.137920618057251, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.9152542372881356, | |
| "grad_norm": 56.86352488023535, | |
| "learning_rate": 4.997529931386719e-07, | |
| "logits/chosen": -1.2422281503677368, | |
| "logits/rejected": -1.2324570417404175, | |
| "logps/chosen": -33.5006217956543, | |
| "logps/rejected": -30.282136917114258, | |
| "loss": 0.4844, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.3842281401157379, | |
| "rewards/margins": 0.29243284463882446, | |
| "rewards/rejected": -0.6766610145568848, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.9322033898305084, | |
| "grad_norm": 53.15925076232375, | |
| "learning_rate": 4.996638162122302e-07, | |
| "logits/chosen": -1.2868903875350952, | |
| "logits/rejected": -1.1907857656478882, | |
| "logps/chosen": -32.84450149536133, | |
| "logps/rejected": -39.07723617553711, | |
| "loss": 0.4865, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.19609573483467102, | |
| "rewards/margins": 1.1344835758209229, | |
| "rewards/rejected": -1.3305792808532715, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.9491525423728814, | |
| "grad_norm": 50.6107831993107, | |
| "learning_rate": 4.995609329410804e-07, | |
| "logits/chosen": -1.1869336366653442, | |
| "logits/rejected": -1.0896047353744507, | |
| "logps/chosen": -25.43665885925293, | |
| "logps/rejected": -37.78445816040039, | |
| "loss": 0.4331, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.2436077892780304, | |
| "rewards/margins": 1.5263582468032837, | |
| "rewards/rejected": -1.7699658870697021, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.9661016949152542, | |
| "grad_norm": 53.83410279226871, | |
| "learning_rate": 4.994443489734322e-07, | |
| "logits/chosen": -1.140820860862732, | |
| "logits/rejected": -1.0942250490188599, | |
| "logps/chosen": -28.2945499420166, | |
| "logps/rejected": -44.74162292480469, | |
| "loss": 0.4513, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.28495079278945923, | |
| "rewards/margins": 1.6222182512283325, | |
| "rewards/rejected": -1.9071691036224365, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.9830508474576272, | |
| "grad_norm": 57.20949918087681, | |
| "learning_rate": 4.993140707096525e-07, | |
| "logits/chosen": -1.2853411436080933, | |
| "logits/rejected": -1.1984596252441406, | |
| "logps/chosen": -35.58999252319336, | |
| "logps/rejected": -42.4873161315918, | |
| "loss": 0.4307, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.26272836327552795, | |
| "rewards/margins": 1.1384968757629395, | |
| "rewards/rejected": -1.4012253284454346, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 49.633369993703724, | |
| "learning_rate": 4.991701053019145e-07, | |
| "logits/chosen": -1.2317020893096924, | |
| "logits/rejected": -1.2267297506332397, | |
| "logps/chosen": -28.742332458496094, | |
| "logps/rejected": -50.28435516357422, | |
| "loss": 0.4259, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.22372712194919586, | |
| "rewards/margins": 1.6622364521026611, | |
| "rewards/rejected": -1.8859635591506958, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 1.0169491525423728, | |
| "grad_norm": 43.19271219759473, | |
| "learning_rate": 4.990124606538042e-07, | |
| "logits/chosen": -1.27030348777771, | |
| "logits/rejected": -1.2970830202102661, | |
| "logps/chosen": -22.824378967285156, | |
| "logps/rejected": -43.53241729736328, | |
| "loss": 0.3445, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.11317068338394165, | |
| "rewards/margins": 1.718346118927002, | |
| "rewards/rejected": -1.8315167427062988, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.0338983050847457, | |
| "grad_norm": 46.56532536625075, | |
| "learning_rate": 4.988411454198874e-07, | |
| "logits/chosen": -1.2344048023223877, | |
| "logits/rejected": -1.376710295677185, | |
| "logps/chosen": -30.1800537109375, | |
| "logps/rejected": -34.68271255493164, | |
| "loss": 0.4151, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.3319806754589081, | |
| "rewards/margins": 0.3052244484424591, | |
| "rewards/rejected": -0.637205183506012, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 1.0508474576271187, | |
| "grad_norm": 45.932270396743505, | |
| "learning_rate": 4.98656169005234e-07, | |
| "logits/chosen": -1.300619125366211, | |
| "logits/rejected": -1.1745063066482544, | |
| "logps/chosen": -32.63179016113281, | |
| "logps/rejected": -39.57646942138672, | |
| "loss": 0.3826, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.021081820130348206, | |
| "rewards/margins": 1.4243545532226562, | |
| "rewards/rejected": -1.4454363584518433, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 1.0677966101694916, | |
| "grad_norm": 41.51255029654015, | |
| "learning_rate": 4.984575415649018e-07, | |
| "logits/chosen": -1.272727370262146, | |
| "logits/rejected": -1.1292237043380737, | |
| "logps/chosen": -26.08396339416504, | |
| "logps/rejected": -47.56403350830078, | |
| "loss": 0.3506, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.11639979481697083, | |
| "rewards/margins": 2.3725996017456055, | |
| "rewards/rejected": -2.488999366760254, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 1.0847457627118644, | |
| "grad_norm": 38.246979074883626, | |
| "learning_rate": 4.982452740033792e-07, | |
| "logits/chosen": -1.22151517868042, | |
| "logits/rejected": -1.082349181175232, | |
| "logps/chosen": -27.676637649536133, | |
| "logps/rejected": -33.685211181640625, | |
| "loss": 0.3068, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.00457368791103363, | |
| "rewards/margins": 1.3627952337265015, | |
| "rewards/rejected": -1.358221411705017, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 1.1016949152542372, | |
| "grad_norm": 40.134511453074204, | |
| "learning_rate": 4.980193779739863e-07, | |
| "logits/chosen": -1.1573750972747803, | |
| "logits/rejected": -1.0462363958358765, | |
| "logps/chosen": -29.544757843017578, | |
| "logps/rejected": -45.139732360839844, | |
| "loss": 0.3303, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.12963010370731354, | |
| "rewards/margins": 1.9951367378234863, | |
| "rewards/rejected": -2.1247665882110596, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.11864406779661, | |
| "grad_norm": 42.89070654695283, | |
| "learning_rate": 4.977798658782351e-07, | |
| "logits/chosen": -1.3436741828918457, | |
| "logits/rejected": -1.2738423347473145, | |
| "logps/chosen": -29.10391616821289, | |
| "logps/rejected": -42.66758346557617, | |
| "loss": 0.349, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.23598036170005798, | |
| "rewards/margins": 1.5764625072479248, | |
| "rewards/rejected": -1.8124428987503052, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 1.1355932203389831, | |
| "grad_norm": 41.22280324142371, | |
| "learning_rate": 4.975267508651491e-07, | |
| "logits/chosen": -1.202622890472412, | |
| "logits/rejected": -1.047250509262085, | |
| "logps/chosen": -27.090843200683594, | |
| "logps/rejected": -30.239742279052734, | |
| "loss": 0.3463, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.1256638914346695, | |
| "rewards/margins": 1.4247705936431885, | |
| "rewards/rejected": -1.5504344701766968, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 1.152542372881356, | |
| "grad_norm": 41.21696455893867, | |
| "learning_rate": 4.97260046830541e-07, | |
| "logits/chosen": -1.166504144668579, | |
| "logits/rejected": -0.8773643970489502, | |
| "logps/chosen": -22.584096908569336, | |
| "logps/rejected": -42.063133239746094, | |
| "loss": 0.3428, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.07130265235900879, | |
| "rewards/margins": 1.9287859201431274, | |
| "rewards/rejected": -2.000088691711426, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 1.1694915254237288, | |
| "grad_norm": 43.16486538306169, | |
| "learning_rate": 4.969797684162497e-07, | |
| "logits/chosen": -1.4693577289581299, | |
| "logits/rejected": -1.3569204807281494, | |
| "logps/chosen": -25.79123878479004, | |
| "logps/rejected": -37.476409912109375, | |
| "loss": 0.3706, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.10858143866062164, | |
| "rewards/margins": 1.385354995727539, | |
| "rewards/rejected": -1.4939366579055786, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 1.1864406779661016, | |
| "grad_norm": 42.7942644360943, | |
| "learning_rate": 4.966859310093372e-07, | |
| "logits/chosen": -1.168703317642212, | |
| "logits/rejected": -1.0707366466522217, | |
| "logps/chosen": -30.724082946777344, | |
| "logps/rejected": -41.230712890625, | |
| "loss": 0.354, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.13678063452243805, | |
| "rewards/margins": 1.567689061164856, | |
| "rewards/rejected": -1.7044697999954224, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.2033898305084745, | |
| "grad_norm": 39.25048654439285, | |
| "learning_rate": 4.96378550741243e-07, | |
| "logits/chosen": -1.3091105222702026, | |
| "logits/rejected": -1.2656583786010742, | |
| "logps/chosen": -30.91085433959961, | |
| "logps/rejected": -41.49336624145508, | |
| "loss": 0.307, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.22276180982589722, | |
| "rewards/margins": 1.757472276687622, | |
| "rewards/rejected": -1.980234146118164, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 1.2203389830508475, | |
| "grad_norm": 40.23010655586107, | |
| "learning_rate": 4.960576444868992e-07, | |
| "logits/chosen": -1.4617582559585571, | |
| "logits/rejected": -1.4617631435394287, | |
| "logps/chosen": -28.715673446655273, | |
| "logps/rejected": -48.552024841308594, | |
| "loss": 0.317, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.21487601101398468, | |
| "rewards/margins": 2.4703986644744873, | |
| "rewards/rejected": -2.685274362564087, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.2372881355932204, | |
| "grad_norm": 45.05758163705817, | |
| "learning_rate": 4.957232298638035e-07, | |
| "logits/chosen": -1.3103129863739014, | |
| "logits/rejected": -1.297128677368164, | |
| "logps/chosen": -29.75772476196289, | |
| "logps/rejected": -42.82856750488281, | |
| "loss": 0.3574, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.16225725412368774, | |
| "rewards/margins": 1.7887846231460571, | |
| "rewards/rejected": -1.9510418176651, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 1.2542372881355932, | |
| "grad_norm": 40.337686604932905, | |
| "learning_rate": 4.953753252310525e-07, | |
| "logits/chosen": -1.351475477218628, | |
| "logits/rejected": -1.2956455945968628, | |
| "logps/chosen": -30.769058227539062, | |
| "logps/rejected": -37.68085479736328, | |
| "loss": 0.3211, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.22842487692832947, | |
| "rewards/margins": 1.4851303100585938, | |
| "rewards/rejected": -1.713555097579956, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 1.271186440677966, | |
| "grad_norm": 41.87339924313547, | |
| "learning_rate": 4.950139496883334e-07, | |
| "logits/chosen": -1.1046594381332397, | |
| "logits/rejected": -1.083458662033081, | |
| "logps/chosen": -23.34552764892578, | |
| "logps/rejected": -33.603294372558594, | |
| "loss": 0.2996, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.22441284358501434, | |
| "rewards/margins": 1.64926016330719, | |
| "rewards/rejected": -1.8736729621887207, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.288135593220339, | |
| "grad_norm": 40.59281009638228, | |
| "learning_rate": 4.94639123074876e-07, | |
| "logits/chosen": -1.5633933544158936, | |
| "logits/rejected": -1.3900222778320312, | |
| "logps/chosen": -28.575279235839844, | |
| "logps/rejected": -40.71543502807617, | |
| "loss": 0.3253, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.23141731321811676, | |
| "rewards/margins": 1.784515619277954, | |
| "rewards/rejected": -2.015933036804199, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 1.305084745762712, | |
| "grad_norm": 40.512541982486866, | |
| "learning_rate": 4.942508659683626e-07, | |
| "logits/chosen": -1.2337239980697632, | |
| "logits/rejected": -1.1204659938812256, | |
| "logps/chosen": -37.800514221191406, | |
| "logps/rejected": -57.468048095703125, | |
| "loss": 0.3173, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.0504547655582428, | |
| "rewards/margins": 2.774332046508789, | |
| "rewards/rejected": -2.824786901473999, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 1.3220338983050848, | |
| "grad_norm": 44.23393681805476, | |
| "learning_rate": 4.938491996837994e-07, | |
| "logits/chosen": -1.1162344217300415, | |
| "logits/rejected": -1.1677778959274292, | |
| "logps/chosen": -21.936918258666992, | |
| "logps/rejected": -36.89491653442383, | |
| "loss": 0.3389, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.04354112595319748, | |
| "rewards/margins": 1.7271283864974976, | |
| "rewards/rejected": -1.7706694602966309, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.3389830508474576, | |
| "grad_norm": 41.690947911283914, | |
| "learning_rate": 4.934341462723454e-07, | |
| "logits/chosen": -1.3248709440231323, | |
| "logits/rejected": -1.2905504703521729, | |
| "logps/chosen": -22.56363868713379, | |
| "logps/rejected": -36.43506622314453, | |
| "loss": 0.3275, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.11233043670654297, | |
| "rewards/margins": 1.878725528717041, | |
| "rewards/rejected": -1.991055965423584, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 1.3559322033898304, | |
| "grad_norm": 40.04810360539708, | |
| "learning_rate": 4.930057285201027e-07, | |
| "logits/chosen": -1.1112794876098633, | |
| "logits/rejected": -1.0777575969696045, | |
| "logps/chosen": -24.657052993774414, | |
| "logps/rejected": -39.67708969116211, | |
| "loss": 0.3229, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.13391758501529694, | |
| "rewards/margins": 2.0548856258392334, | |
| "rewards/rejected": -2.18880295753479, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.3728813559322033, | |
| "grad_norm": 34.996138065577895, | |
| "learning_rate": 4.925639699468645e-07, | |
| "logits/chosen": -1.2954164743423462, | |
| "logits/rejected": -1.2493962049484253, | |
| "logps/chosen": -22.976837158203125, | |
| "logps/rejected": -30.558788299560547, | |
| "loss": 0.2742, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.00974225252866745, | |
| "rewards/margins": 1.3260502815246582, | |
| "rewards/rejected": -1.3357923030853271, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 1.3898305084745763, | |
| "grad_norm": 37.19153270519795, | |
| "learning_rate": 4.921088948048246e-07, | |
| "logits/chosen": -1.0799801349639893, | |
| "logits/rejected": -1.029328465461731, | |
| "logps/chosen": -21.34770965576172, | |
| "logps/rejected": -27.523284912109375, | |
| "loss": 0.3197, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.0038819462060928345, | |
| "rewards/margins": 1.294053316116333, | |
| "rewards/rejected": -1.2979353666305542, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 1.4067796610169492, | |
| "grad_norm": 38.782824257992516, | |
| "learning_rate": 4.916405280772462e-07, | |
| "logits/chosen": -1.2136616706848145, | |
| "logits/rejected": -1.1575380563735962, | |
| "logps/chosen": -34.565452575683594, | |
| "logps/rejected": -37.70560836791992, | |
| "loss": 0.2773, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.11810773611068726, | |
| "rewards/margins": 1.5449273586273193, | |
| "rewards/rejected": -1.6630350351333618, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 1.423728813559322, | |
| "grad_norm": 46.057268803202, | |
| "learning_rate": 4.911588954770896e-07, | |
| "logits/chosen": -1.323722004890442, | |
| "logits/rejected": -1.2598729133605957, | |
| "logps/chosen": -28.280380249023438, | |
| "logps/rejected": -35.962127685546875, | |
| "loss": 0.3575, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.18131475150585175, | |
| "rewards/margins": 1.3694193363189697, | |
| "rewards/rejected": -1.5507341623306274, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.4406779661016949, | |
| "grad_norm": 39.078212479916175, | |
| "learning_rate": 4.906640234456011e-07, | |
| "logits/chosen": -1.254352331161499, | |
| "logits/rejected": -1.220250129699707, | |
| "logps/chosen": -22.146629333496094, | |
| "logps/rejected": -34.24107360839844, | |
| "loss": 0.3198, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.16238263249397278, | |
| "rewards/margins": 2.182830810546875, | |
| "rewards/rejected": -2.3452136516571045, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.457627118644068, | |
| "grad_norm": 40.017911222526, | |
| "learning_rate": 4.90155939150861e-07, | |
| "logits/chosen": -1.2662581205368042, | |
| "logits/rejected": -1.1520745754241943, | |
| "logps/chosen": -28.800233840942383, | |
| "logps/rejected": -45.41657638549805, | |
| "loss": 0.2885, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.10078342258930206, | |
| "rewards/margins": 2.74984073638916, | |
| "rewards/rejected": -2.8506245613098145, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 1.4745762711864407, | |
| "grad_norm": 38.422387432652926, | |
| "learning_rate": 4.896346704862927e-07, | |
| "logits/chosen": -1.1133819818496704, | |
| "logits/rejected": -1.1244077682495117, | |
| "logps/chosen": -26.18191909790039, | |
| "logps/rejected": -38.19125747680664, | |
| "loss": 0.2784, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.2561756372451782, | |
| "rewards/margins": 2.0794429779052734, | |
| "rewards/rejected": -2.335618734359741, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 1.4915254237288136, | |
| "grad_norm": 43.19306665638686, | |
| "learning_rate": 4.891002460691305e-07, | |
| "logits/chosen": -1.1532127857208252, | |
| "logits/rejected": -1.0514928102493286, | |
| "logps/chosen": -33.20591735839844, | |
| "logps/rejected": -44.78691101074219, | |
| "loss": 0.2942, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.31594839692115784, | |
| "rewards/margins": 2.774695634841919, | |
| "rewards/rejected": -3.090643882751465, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.5084745762711864, | |
| "grad_norm": 37.55592044811218, | |
| "learning_rate": 4.885526952388497e-07, | |
| "logits/chosen": -1.5443884134292603, | |
| "logits/rejected": -1.491202473640442, | |
| "logps/chosen": -29.413482666015625, | |
| "logps/rejected": -44.91068649291992, | |
| "loss": 0.2738, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.08295662701129913, | |
| "rewards/margins": 2.9935202598571777, | |
| "rewards/rejected": -3.076476812362671, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 1.5254237288135593, | |
| "grad_norm": 39.41480347028368, | |
| "learning_rate": 4.879920480555549e-07, | |
| "logits/chosen": -1.2505245208740234, | |
| "logits/rejected": -1.1800575256347656, | |
| "logps/chosen": -34.517356872558594, | |
| "logps/rejected": -54.461875915527344, | |
| "loss": 0.3016, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.2518163323402405, | |
| "rewards/margins": 2.4639229774475098, | |
| "rewards/rejected": -2.7157392501831055, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.542372881355932, | |
| "grad_norm": 41.233244462870424, | |
| "learning_rate": 4.874183352983297e-07, | |
| "logits/chosen": -1.2128483057022095, | |
| "logits/rejected": -1.1637994050979614, | |
| "logps/chosen": -26.66428565979004, | |
| "logps/rejected": -30.274539947509766, | |
| "loss": 0.3237, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 0.016297563910484314, | |
| "rewards/margins": 1.5082069635391235, | |
| "rewards/rejected": -1.4919092655181885, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 1.559322033898305, | |
| "grad_norm": 33.074028930414165, | |
| "learning_rate": 4.868315884635478e-07, | |
| "logits/chosen": -1.3729506731033325, | |
| "logits/rejected": -1.363387107849121, | |
| "logps/chosen": -32.76537322998047, | |
| "logps/rejected": -40.88609313964844, | |
| "loss": 0.2209, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.49252671003341675, | |
| "rewards/margins": 1.6107347011566162, | |
| "rewards/rejected": -2.1032614707946777, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.576271186440678, | |
| "grad_norm": 44.45767411612402, | |
| "learning_rate": 4.862318397631433e-07, | |
| "logits/chosen": -1.290561318397522, | |
| "logits/rejected": -1.2587978839874268, | |
| "logps/chosen": -27.68136215209961, | |
| "logps/rejected": -37.53421401977539, | |
| "loss": 0.3135, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.045494288206100464, | |
| "rewards/margins": 2.3608152866363525, | |
| "rewards/rejected": -2.3153209686279297, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 1.5932203389830508, | |
| "grad_norm": 38.75459636795706, | |
| "learning_rate": 4.856191221228422e-07, | |
| "logits/chosen": -1.4851233959197998, | |
| "logits/rejected": -1.3868330717086792, | |
| "logps/chosen": -25.41338348388672, | |
| "logps/rejected": -50.90999984741211, | |
| "loss": 0.3055, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.057565659284591675, | |
| "rewards/margins": 3.016381025314331, | |
| "rewards/rejected": -2.958815336227417, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 1.6101694915254239, | |
| "grad_norm": 39.42511408680881, | |
| "learning_rate": 4.84993469180355e-07, | |
| "logits/chosen": -1.5986624956130981, | |
| "logits/rejected": -1.5039199590682983, | |
| "logps/chosen": -23.876976013183594, | |
| "logps/rejected": -37.30643081665039, | |
| "loss": 0.2918, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.1558499038219452, | |
| "rewards/margins": 2.534642219543457, | |
| "rewards/rejected": -2.3787922859191895, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.6271186440677967, | |
| "grad_norm": 40.62251102213997, | |
| "learning_rate": 4.843549152835302e-07, | |
| "logits/chosen": -1.4016410112380981, | |
| "logits/rejected": -1.330747127532959, | |
| "logps/chosen": -35.39458465576172, | |
| "logps/rejected": -41.057533264160156, | |
| "loss": 0.2683, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.30797258019447327, | |
| "rewards/margins": 1.5779187679290771, | |
| "rewards/rejected": -1.8858911991119385, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.6440677966101696, | |
| "grad_norm": 37.47292375573573, | |
| "learning_rate": 4.837034954884681e-07, | |
| "logits/chosen": -1.2742365598678589, | |
| "logits/rejected": -1.16330885887146, | |
| "logps/chosen": -19.86534881591797, | |
| "logps/rejected": -37.20149230957031, | |
| "loss": 0.3136, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.16833439469337463, | |
| "rewards/margins": 2.6455376148223877, | |
| "rewards/rejected": -2.8138718605041504, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 1.6610169491525424, | |
| "grad_norm": 34.360879838757626, | |
| "learning_rate": 4.83039245557597e-07, | |
| "logits/chosen": -1.4751869440078735, | |
| "logits/rejected": -1.3903852701187134, | |
| "logps/chosen": -27.246294021606445, | |
| "logps/rejected": -37.46033477783203, | |
| "loss": 0.2373, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.015960171818733215, | |
| "rewards/margins": 1.7550795078277588, | |
| "rewards/rejected": -1.739119291305542, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 1.6779661016949152, | |
| "grad_norm": 36.086860100703234, | |
| "learning_rate": 4.823622019577088e-07, | |
| "logits/chosen": -1.5154139995574951, | |
| "logits/rejected": -1.435178279876709, | |
| "logps/chosen": -29.518024444580078, | |
| "logps/rejected": -34.60513687133789, | |
| "loss": 0.2638, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.023779883980751038, | |
| "rewards/margins": 1.6297731399536133, | |
| "rewards/rejected": -1.6059932708740234, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 1.694915254237288, | |
| "grad_norm": 44.37484563053148, | |
| "learning_rate": 4.816724018579583e-07, | |
| "logits/chosen": -1.3004710674285889, | |
| "logits/rejected": -1.1905782222747803, | |
| "logps/chosen": -44.38223648071289, | |
| "logps/rejected": -44.088531494140625, | |
| "loss": 0.3148, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.6569697856903076, | |
| "rewards/margins": 1.929584264755249, | |
| "rewards/rejected": -2.5865538120269775, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.711864406779661, | |
| "grad_norm": 37.67689426318597, | |
| "learning_rate": 4.809698831278217e-07, | |
| "logits/chosen": -1.1910172700881958, | |
| "logits/rejected": -1.085993766784668, | |
| "logps/chosen": -25.45205307006836, | |
| "logps/rejected": -38.646697998046875, | |
| "loss": 0.2924, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.10635387897491455, | |
| "rewards/margins": 2.1210451126098633, | |
| "rewards/rejected": -2.2273988723754883, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 1.7288135593220337, | |
| "grad_norm": 44.53554229157193, | |
| "learning_rate": 4.802546843350177e-07, | |
| "logits/chosen": -1.3502918481826782, | |
| "logits/rejected": -1.3793635368347168, | |
| "logps/chosen": -31.245201110839844, | |
| "logps/rejected": -38.174922943115234, | |
| "loss": 0.308, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.04055324196815491, | |
| "rewards/margins": 2.123584032058716, | |
| "rewards/rejected": -2.164137363433838, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 1.7457627118644068, | |
| "grad_norm": 33.6410465461232, | |
| "learning_rate": 4.795268447433906e-07, | |
| "logits/chosen": -1.5270867347717285, | |
| "logits/rejected": -1.5600392818450928, | |
| "logps/chosen": -26.281536102294922, | |
| "logps/rejected": -40.25182342529297, | |
| "loss": 0.237, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.38495194911956787, | |
| "rewards/margins": 2.3371963500976562, | |
| "rewards/rejected": -2.7221484184265137, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 1.7627118644067796, | |
| "grad_norm": 37.86791531616575, | |
| "learning_rate": 4.787864043107546e-07, | |
| "logits/chosen": -1.1703252792358398, | |
| "logits/rejected": -1.2337673902511597, | |
| "logps/chosen": -28.018447875976562, | |
| "logps/rejected": -26.48525619506836, | |
| "loss": 0.2904, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.1365218162536621, | |
| "rewards/margins": 1.0608235597610474, | |
| "rewards/rejected": -1.197345495223999, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 1.7796610169491527, | |
| "grad_norm": 39.89897092814635, | |
| "learning_rate": 4.780334036866996e-07, | |
| "logits/chosen": -1.4013991355895996, | |
| "logits/rejected": -1.3012598752975464, | |
| "logps/chosen": -30.404062271118164, | |
| "logps/rejected": -50.464717864990234, | |
| "loss": 0.2444, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.36456581950187683, | |
| "rewards/margins": 2.707120895385742, | |
| "rewards/rejected": -3.0716869831085205, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.7966101694915255, | |
| "grad_norm": 38.70079485674391, | |
| "learning_rate": 4.772678842103605e-07, | |
| "logits/chosen": -1.396234393119812, | |
| "logits/rejected": -1.2844393253326416, | |
| "logps/chosen": -30.791566848754883, | |
| "logps/rejected": -41.80755615234375, | |
| "loss": 0.2245, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.2901880741119385, | |
| "rewards/margins": 2.5994958877563477, | |
| "rewards/rejected": -2.8896842002868652, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 1.8135593220338984, | |
| "grad_norm": 31.615929528424363, | |
| "learning_rate": 4.764898879081467e-07, | |
| "logits/chosen": -1.3441611528396606, | |
| "logits/rejected": -1.3465229272842407, | |
| "logps/chosen": -25.936952590942383, | |
| "logps/rejected": -44.00107192993164, | |
| "loss": 0.2261, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.0497933030128479, | |
| "rewards/margins": 1.7863792181015015, | |
| "rewards/rejected": -1.7365858554840088, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 1.8305084745762712, | |
| "grad_norm": 36.20936022186147, | |
| "learning_rate": 4.7569945749143586e-07, | |
| "logits/chosen": -1.3387551307678223, | |
| "logits/rejected": -1.329017996788025, | |
| "logps/chosen": -26.50401496887207, | |
| "logps/rejected": -48.64241027832031, | |
| "loss": 0.2448, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.1088000237941742, | |
| "rewards/margins": 2.8309779167175293, | |
| "rewards/rejected": -2.9397776126861572, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 1.847457627118644, | |
| "grad_norm": 36.766498225229036, | |
| "learning_rate": 4.748966363542285e-07, | |
| "logits/chosen": -1.2870004177093506, | |
| "logits/rejected": -1.2100244760513306, | |
| "logps/chosen": -24.399860382080078, | |
| "logps/rejected": -37.19389343261719, | |
| "loss": 0.2463, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.04130461812019348, | |
| "rewards/margins": 2.3760554790496826, | |
| "rewards/rejected": -2.3347508907318115, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 1.8644067796610169, | |
| "grad_norm": 37.505379590427104, | |
| "learning_rate": 4.7408146857076563e-07, | |
| "logits/chosen": -1.2373127937316895, | |
| "logits/rejected": -1.1650217771530151, | |
| "logps/chosen": -39.90864562988281, | |
| "logps/rejected": -40.89439392089844, | |
| "loss": 0.2646, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.0051874518394470215, | |
| "rewards/margins": 1.6469688415527344, | |
| "rewards/rejected": -1.6417814493179321, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.8813559322033897, | |
| "grad_norm": 35.644427389294876, | |
| "learning_rate": 4.732539988931096e-07, | |
| "logits/chosen": -1.3618909120559692, | |
| "logits/rejected": -1.4531258344650269, | |
| "logps/chosen": -26.969541549682617, | |
| "logps/rejected": -42.32440185546875, | |
| "loss": 0.2208, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.25816747546195984, | |
| "rewards/margins": 2.6419811248779297, | |
| "rewards/rejected": -2.900148391723633, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 1.8983050847457628, | |
| "grad_norm": 39.403893355046144, | |
| "learning_rate": 4.7241427274868683e-07, | |
| "logits/chosen": -1.392714262008667, | |
| "logits/rejected": -1.2956047058105469, | |
| "logps/chosen": -25.744760513305664, | |
| "logps/rejected": -43.44940185546875, | |
| "loss": 0.2692, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.19541674852371216, | |
| "rewards/margins": 2.4149329662323, | |
| "rewards/rejected": -2.610349655151367, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 1.9152542372881356, | |
| "grad_norm": 42.63537438222289, | |
| "learning_rate": 4.7156233623779383e-07, | |
| "logits/chosen": -1.3376697301864624, | |
| "logits/rejected": -1.3315945863723755, | |
| "logps/chosen": -31.033275604248047, | |
| "logps/rejected": -34.59294128417969, | |
| "loss": 0.262, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.01679442822933197, | |
| "rewards/margins": 1.9989566802978516, | |
| "rewards/rejected": -2.015751361846924, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 1.9322033898305084, | |
| "grad_norm": 37.518445889823454, | |
| "learning_rate": 4.7069823613106687e-07, | |
| "logits/chosen": -1.3494609594345093, | |
| "logits/rejected": -1.4168843030929565, | |
| "logps/chosen": -37.80765914916992, | |
| "logps/rejected": -50.918888092041016, | |
| "loss": 0.2293, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.5224637985229492, | |
| "rewards/margins": 3.094416618347168, | |
| "rewards/rejected": -3.6168806552886963, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 1.9491525423728815, | |
| "grad_norm": 42.51900093030587, | |
| "learning_rate": 4.698220198669136e-07, | |
| "logits/chosen": -1.646604299545288, | |
| "logits/rejected": -1.517140507698059, | |
| "logps/chosen": -27.87681770324707, | |
| "logps/rejected": -42.672096252441406, | |
| "loss": 0.2965, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.16765162348747253, | |
| "rewards/margins": 2.965381145477295, | |
| "rewards/rejected": -3.133033275604248, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.9661016949152543, | |
| "grad_norm": 33.55277119465526, | |
| "learning_rate": 4.6893373554890917e-07, | |
| "logits/chosen": -1.5241327285766602, | |
| "logits/rejected": -1.2908215522766113, | |
| "logps/chosen": -33.22343444824219, | |
| "logps/rejected": -44.93342208862305, | |
| "loss": 0.226, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.36639106273651123, | |
| "rewards/margins": 2.9403634071350098, | |
| "rewards/rejected": -3.3067543506622314, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 1.9830508474576272, | |
| "grad_norm": 36.89279198796042, | |
| "learning_rate": 4.6803343194315546e-07, | |
| "logits/chosen": -1.3500347137451172, | |
| "logits/rejected": -1.3750879764556885, | |
| "logps/chosen": -33.52631759643555, | |
| "logps/rejected": -46.71184539794922, | |
| "loss": 0.2323, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.17364656925201416, | |
| "rewards/margins": 2.6467905044555664, | |
| "rewards/rejected": -2.820436954498291, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 29.673763942339104, | |
| "learning_rate": 4.6712115847560353e-07, | |
| "logits/chosen": -1.1543025970458984, | |
| "logits/rejected": -1.1016186475753784, | |
| "logps/chosen": -27.275094985961914, | |
| "logps/rejected": -35.34591293334961, | |
| "loss": 0.2172, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.010025471448898315, | |
| "rewards/margins": 2.681260824203491, | |
| "rewards/rejected": -2.6712355613708496, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 2.016949152542373, | |
| "grad_norm": 18.754455801142957, | |
| "learning_rate": 4.661969652293402e-07, | |
| "logits/chosen": -1.3498600721359253, | |
| "logits/rejected": -1.2991336584091187, | |
| "logps/chosen": -24.873241424560547, | |
| "logps/rejected": -45.2943000793457, | |
| "loss": 0.1226, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.13535727560520172, | |
| "rewards/margins": 3.1097800731658936, | |
| "rewards/rejected": -3.2451369762420654, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 2.0338983050847457, | |
| "grad_norm": 22.75173167375844, | |
| "learning_rate": 4.652609029418388e-07, | |
| "logits/chosen": -1.283535122871399, | |
| "logits/rejected": -1.2337732315063477, | |
| "logps/chosen": -26.472030639648438, | |
| "logps/rejected": -41.27444076538086, | |
| "loss": 0.1627, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.14066343009471893, | |
| "rewards/margins": 2.8250813484191895, | |
| "rewards/rejected": -2.9657444953918457, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.0508474576271185, | |
| "grad_norm": 20.99893381194414, | |
| "learning_rate": 4.6431302300217366e-07, | |
| "logits/chosen": -1.4252784252166748, | |
| "logits/rejected": -1.421356439590454, | |
| "logps/chosen": -31.787378311157227, | |
| "logps/rejected": -33.3957405090332, | |
| "loss": 0.1657, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.40507441759109497, | |
| "rewards/margins": 1.8697203397750854, | |
| "rewards/rejected": -1.4646459817886353, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 2.0677966101694913, | |
| "grad_norm": 18.786273208958082, | |
| "learning_rate": 4.633533774481987e-07, | |
| "logits/chosen": -1.2753486633300781, | |
| "logits/rejected": -1.1164805889129639, | |
| "logps/chosen": -31.65105438232422, | |
| "logps/rejected": -46.21112823486328, | |
| "loss": 0.1353, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.07824045419692993, | |
| "rewards/margins": 3.2129859924316406, | |
| "rewards/rejected": -3.291226387023926, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 2.084745762711864, | |
| "grad_norm": 20.917039008362774, | |
| "learning_rate": 4.623820189636905e-07, | |
| "logits/chosen": -1.3903650045394897, | |
| "logits/rejected": -1.3099316358566284, | |
| "logps/chosen": -29.19454002380371, | |
| "logps/rejected": -46.168941497802734, | |
| "loss": 0.1536, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.4621618092060089, | |
| "rewards/margins": 2.845735788345337, | |
| "rewards/rejected": -2.3835740089416504, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 2.1016949152542375, | |
| "grad_norm": 20.45027445287867, | |
| "learning_rate": 4.613990008754565e-07, | |
| "logits/chosen": -1.3704748153686523, | |
| "logits/rejected": -1.2672369480133057, | |
| "logps/chosen": -34.432945251464844, | |
| "logps/rejected": -39.66848373413086, | |
| "loss": 0.1588, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.1847638189792633, | |
| "rewards/margins": 2.653686761856079, | |
| "rewards/rejected": -2.4689230918884277, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 2.1186440677966103, | |
| "grad_norm": 20.088146504282733, | |
| "learning_rate": 4.60404377150407e-07, | |
| "logits/chosen": -1.2935415506362915, | |
| "logits/rejected": -1.2802854776382446, | |
| "logps/chosen": -26.330522537231445, | |
| "logps/rejected": -41.638004302978516, | |
| "loss": 0.1543, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.15763472020626068, | |
| "rewards/margins": 2.74660587310791, | |
| "rewards/rejected": -2.904240131378174, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 2.135593220338983, | |
| "grad_norm": 23.38109010648461, | |
| "learning_rate": 4.593982023925925e-07, | |
| "logits/chosen": -1.1731081008911133, | |
| "logits/rejected": -1.0896246433258057, | |
| "logps/chosen": -33.23085021972656, | |
| "logps/rejected": -40.83133316040039, | |
| "loss": 0.1689, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.19674523174762726, | |
| "rewards/margins": 2.6324305534362793, | |
| "rewards/rejected": -2.8291759490966797, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 2.152542372881356, | |
| "grad_norm": 19.53451419738541, | |
| "learning_rate": 4.58380531840206e-07, | |
| "logits/chosen": -1.3832257986068726, | |
| "logits/rejected": -1.1368017196655273, | |
| "logps/chosen": -31.488880157470703, | |
| "logps/rejected": -37.851097106933594, | |
| "loss": 0.1448, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.38223376870155334, | |
| "rewards/margins": 3.1422624588012695, | |
| "rewards/rejected": -2.76002836227417, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 2.169491525423729, | |
| "grad_norm": 19.597219846897513, | |
| "learning_rate": 4.5735142136255045e-07, | |
| "logits/chosen": -1.3937269449234009, | |
| "logits/rejected": -1.3436622619628906, | |
| "logps/chosen": -28.44049644470215, | |
| "logps/rejected": -49.33514404296875, | |
| "loss": 0.1489, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.09995569288730621, | |
| "rewards/margins": 3.583566665649414, | |
| "rewards/rejected": -3.4836111068725586, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 2.1864406779661016, | |
| "grad_norm": 18.394803732689535, | |
| "learning_rate": 4.5631092745697164e-07, | |
| "logits/chosen": -1.1625999212265015, | |
| "logits/rejected": -1.1585655212402344, | |
| "logps/chosen": -29.666309356689453, | |
| "logps/rejected": -41.36751174926758, | |
| "loss": 0.119, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2516571879386902, | |
| "rewards/margins": 2.918815851211548, | |
| "rewards/rejected": -2.667158603668213, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 2.2033898305084745, | |
| "grad_norm": 19.452927999271022, | |
| "learning_rate": 4.5525910724575645e-07, | |
| "logits/chosen": -1.298140048980713, | |
| "logits/rejected": -1.238019585609436, | |
| "logps/chosen": -30.736804962158203, | |
| "logps/rejected": -49.28110885620117, | |
| "loss": 0.1411, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.11459851264953613, | |
| "rewards/margins": 4.15794563293457, | |
| "rewards/rejected": -4.043347358703613, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.2203389830508473, | |
| "grad_norm": 16.937231864925646, | |
| "learning_rate": 4.54196018472997e-07, | |
| "logits/chosen": -1.2767530679702759, | |
| "logits/rejected": -1.1214213371276855, | |
| "logps/chosen": -27.268341064453125, | |
| "logps/rejected": -52.620338439941406, | |
| "loss": 0.105, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.010926365852355957, | |
| "rewards/margins": 4.242362976074219, | |
| "rewards/rejected": -4.231436729431152, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 2.23728813559322, | |
| "grad_norm": 19.91080195782553, | |
| "learning_rate": 4.5312171950142033e-07, | |
| "logits/chosen": -1.443523645401001, | |
| "logits/rejected": -1.3692708015441895, | |
| "logps/chosen": -23.356483459472656, | |
| "logps/rejected": -39.884552001953125, | |
| "loss": 0.137, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.09224121272563934, | |
| "rewards/margins": 3.541024923324585, | |
| "rewards/rejected": -3.4487838745117188, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 2.2542372881355934, | |
| "grad_norm": 20.224862204629513, | |
| "learning_rate": 4.520362693091845e-07, | |
| "logits/chosen": -1.3375797271728516, | |
| "logits/rejected": -1.2925443649291992, | |
| "logps/chosen": -24.31032371520996, | |
| "logps/rejected": -31.916282653808594, | |
| "loss": 0.1541, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.05450062453746796, | |
| "rewards/margins": 1.724971055984497, | |
| "rewards/rejected": -1.6704705953598022, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 2.2711864406779663, | |
| "grad_norm": 19.256180369703056, | |
| "learning_rate": 4.5093972748664087e-07, | |
| "logits/chosen": -1.3231240510940552, | |
| "logits/rejected": -1.2512582540512085, | |
| "logps/chosen": -35.62217330932617, | |
| "logps/rejected": -48.332237243652344, | |
| "loss": 0.1134, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.26731669902801514, | |
| "rewards/margins": 3.2187671661376953, | |
| "rewards/rejected": -3.486083745956421, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 2.288135593220339, | |
| "grad_norm": 17.92618734739177, | |
| "learning_rate": 4.498321542330622e-07, | |
| "logits/chosen": -1.4986979961395264, | |
| "logits/rejected": -1.4225276708602905, | |
| "logps/chosen": -26.092084884643555, | |
| "logps/rejected": -52.08736038208008, | |
| "loss": 0.1094, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.04193298518657684, | |
| "rewards/margins": 3.300752639770508, | |
| "rewards/rejected": -3.3426852226257324, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 2.305084745762712, | |
| "grad_norm": 19.524384947131523, | |
| "learning_rate": 4.4871361035333833e-07, | |
| "logits/chosen": -1.302308201789856, | |
| "logits/rejected": -1.306333065032959, | |
| "logps/chosen": -25.217451095581055, | |
| "logps/rejected": -43.95090103149414, | |
| "loss": 0.1352, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.24911952018737793, | |
| "rewards/margins": 3.123009204864502, | |
| "rewards/rejected": -2.873889684677124, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 2.3220338983050848, | |
| "grad_norm": 21.654262830345974, | |
| "learning_rate": 4.475841572546374e-07, | |
| "logits/chosen": -1.333143711090088, | |
| "logits/rejected": -1.1934046745300293, | |
| "logps/chosen": -29.572952270507812, | |
| "logps/rejected": -42.41865539550781, | |
| "loss": 0.1608, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2631007432937622, | |
| "rewards/margins": 2.9843344688415527, | |
| "rewards/rejected": -3.2474350929260254, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 2.3389830508474576, | |
| "grad_norm": 18.2902071706345, | |
| "learning_rate": 4.464438569430353e-07, | |
| "logits/chosen": -1.4010558128356934, | |
| "logits/rejected": -1.3596662282943726, | |
| "logps/chosen": -27.541452407836914, | |
| "logps/rejected": -37.1332893371582, | |
| "loss": 0.1122, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.17605237662792206, | |
| "rewards/margins": 2.4087536334991455, | |
| "rewards/rejected": -2.232701301574707, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 2.3559322033898304, | |
| "grad_norm": 20.193211619680806, | |
| "learning_rate": 4.452927720201112e-07, | |
| "logits/chosen": -1.1419758796691895, | |
| "logits/rejected": -1.2467293739318848, | |
| "logps/chosen": -24.366790771484375, | |
| "logps/rejected": -44.22784423828125, | |
| "loss": 0.1357, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2860787510871887, | |
| "rewards/margins": 3.2284557819366455, | |
| "rewards/rejected": -2.9423768520355225, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 2.3728813559322033, | |
| "grad_norm": 16.846442500740498, | |
| "learning_rate": 4.441309656795106e-07, | |
| "logits/chosen": -1.3143264055252075, | |
| "logits/rejected": -1.1588001251220703, | |
| "logps/chosen": -25.230070114135742, | |
| "logps/rejected": -50.344722747802734, | |
| "loss": 0.1133, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2081325650215149, | |
| "rewards/margins": 3.1986870765686035, | |
| "rewards/rejected": -2.990554094314575, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.389830508474576, | |
| "grad_norm": 19.91817673644514, | |
| "learning_rate": 4.429585017034766e-07, | |
| "logits/chosen": -1.355256199836731, | |
| "logits/rejected": -1.3759305477142334, | |
| "logps/chosen": -30.238567352294922, | |
| "logps/rejected": -43.92167663574219, | |
| "loss": 0.1377, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.17302727699279785, | |
| "rewards/margins": 3.385795831680298, | |
| "rewards/rejected": -3.558823347091675, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 2.406779661016949, | |
| "grad_norm": 17.91719154232722, | |
| "learning_rate": 4.417754444593478e-07, | |
| "logits/chosen": -1.452804684638977, | |
| "logits/rejected": -1.394730567932129, | |
| "logps/chosen": -29.204275131225586, | |
| "logps/rejected": -45.112300872802734, | |
| "loss": 0.1072, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.046540290117263794, | |
| "rewards/margins": 4.2701945304870605, | |
| "rewards/rejected": -4.223654270172119, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 2.423728813559322, | |
| "grad_norm": 17.305416636387555, | |
| "learning_rate": 4.4058185889602497e-07, | |
| "logits/chosen": -1.375995397567749, | |
| "logits/rejected": -1.3006415367126465, | |
| "logps/chosen": -17.108240127563477, | |
| "logps/rejected": -35.922874450683594, | |
| "loss": 0.1445, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.28150078654289246, | |
| "rewards/margins": 3.295111656188965, | |
| "rewards/rejected": -3.01361083984375, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 2.440677966101695, | |
| "grad_norm": 23.303228169178016, | |
| "learning_rate": 4.39377810540405e-07, | |
| "logits/chosen": -1.4052019119262695, | |
| "logits/rejected": -1.4526053667068481, | |
| "logps/chosen": -41.58496856689453, | |
| "logps/rejected": -40.82295227050781, | |
| "loss": 0.1742, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.30576008558273315, | |
| "rewards/margins": 2.3252716064453125, | |
| "rewards/rejected": -2.6310315132141113, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 2.457627118644068, | |
| "grad_norm": 19.121378802553227, | |
| "learning_rate": 4.38163365493784e-07, | |
| "logits/chosen": -1.5245730876922607, | |
| "logits/rejected": -1.401250958442688, | |
| "logps/chosen": -34.83244705200195, | |
| "logps/rejected": -64.91217041015625, | |
| "loss": 0.1261, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20400622487068176, | |
| "rewards/margins": 4.261959552764893, | |
| "rewards/rejected": -4.057952880859375, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 2.4745762711864407, | |
| "grad_norm": 19.034390396216985, | |
| "learning_rate": 4.3693859042822774e-07, | |
| "logits/chosen": -1.2971210479736328, | |
| "logits/rejected": -1.2576966285705566, | |
| "logps/chosen": -33.01571273803711, | |
| "logps/rejected": -45.87381362915039, | |
| "loss": 0.1277, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19635039567947388, | |
| "rewards/margins": 3.7301583290100098, | |
| "rewards/rejected": -3.5338077545166016, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 2.4915254237288136, | |
| "grad_norm": 18.357502635593228, | |
| "learning_rate": 4.3570355258291223e-07, | |
| "logits/chosen": -1.2477927207946777, | |
| "logits/rejected": -1.2029328346252441, | |
| "logps/chosen": -28.15603256225586, | |
| "logps/rejected": -39.89695358276367, | |
| "loss": 0.1305, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.18680232763290405, | |
| "rewards/margins": 2.8544976711273193, | |
| "rewards/rejected": -2.6676955223083496, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 2.5084745762711864, | |
| "grad_norm": 13.757078257538927, | |
| "learning_rate": 4.344583197604318e-07, | |
| "logits/chosen": -1.280619740486145, | |
| "logits/rejected": -1.2003180980682373, | |
| "logps/chosen": -24.585142135620117, | |
| "logps/rejected": -49.09882354736328, | |
| "loss": 0.0795, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.009844973683357239, | |
| "rewards/margins": 3.587639331817627, | |
| "rewards/rejected": -3.5974843502044678, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 2.5254237288135593, | |
| "grad_norm": 20.493002641349737, | |
| "learning_rate": 4.332029603230767e-07, | |
| "logits/chosen": -1.2804765701293945, | |
| "logits/rejected": -1.2299047708511353, | |
| "logps/chosen": -38.74854278564453, | |
| "logps/rejected": -40.60469055175781, | |
| "loss": 0.1209, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.31216520071029663, | |
| "rewards/margins": 3.550785541534424, | |
| "rewards/rejected": -3.2386200428009033, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 2.542372881355932, | |
| "grad_norm": 18.873521616647515, | |
| "learning_rate": 4.319375431890806e-07, | |
| "logits/chosen": -1.5655155181884766, | |
| "logits/rejected": -1.5413960218429565, | |
| "logps/chosen": -29.58742904663086, | |
| "logps/rejected": -38.81654739379883, | |
| "loss": 0.1254, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.10849936306476593, | |
| "rewards/margins": 4.284536361694336, | |
| "rewards/rejected": -4.393035888671875, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.559322033898305, | |
| "grad_norm": 19.108581899079446, | |
| "learning_rate": 4.306621378288364e-07, | |
| "logits/chosen": -1.227691650390625, | |
| "logits/rejected": -1.206729769706726, | |
| "logps/chosen": -29.225234985351562, | |
| "logps/rejected": -47.78348159790039, | |
| "loss": 0.1189, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.18846926093101501, | |
| "rewards/margins": 3.5759382247924805, | |
| "rewards/rejected": -3.3874690532684326, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 2.576271186440678, | |
| "grad_norm": 17.54679094674662, | |
| "learning_rate": 4.2937681426108275e-07, | |
| "logits/chosen": -1.2980151176452637, | |
| "logits/rejected": -1.2792203426361084, | |
| "logps/chosen": -29.963348388671875, | |
| "logps/rejected": -36.31569290161133, | |
| "loss": 0.119, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.15102972090244293, | |
| "rewards/margins": 2.2603495121002197, | |
| "rewards/rejected": -2.411379098892212, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 2.593220338983051, | |
| "grad_norm": 20.293681732169816, | |
| "learning_rate": 4.280816430490602e-07, | |
| "logits/chosen": -1.6751010417938232, | |
| "logits/rejected": -1.579331874847412, | |
| "logps/chosen": -27.965721130371094, | |
| "logps/rejected": -41.831912994384766, | |
| "loss": 0.1398, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.03810068964958191, | |
| "rewards/margins": 3.356261968612671, | |
| "rewards/rejected": -3.3181610107421875, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 2.610169491525424, | |
| "grad_norm": 18.30214205628312, | |
| "learning_rate": 4.2677669529663686e-07, | |
| "logits/chosen": -1.2230945825576782, | |
| "logits/rejected": -1.3257890939712524, | |
| "logps/chosen": -22.84085464477539, | |
| "logps/rejected": -30.21630859375, | |
| "loss": 0.1296, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4078897535800934, | |
| "rewards/margins": 2.349480152130127, | |
| "rewards/rejected": -1.9415905475616455, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 2.6271186440677967, | |
| "grad_norm": 16.933166318116392, | |
| "learning_rate": 4.254620426444053e-07, | |
| "logits/chosen": -1.2983089685440063, | |
| "logits/rejected": -1.2278701066970825, | |
| "logps/chosen": -28.5270938873291, | |
| "logps/rejected": -47.79298400878906, | |
| "loss": 0.1053, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.39613279700279236, | |
| "rewards/margins": 4.555056571960449, | |
| "rewards/rejected": -4.158923625946045, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 2.6440677966101696, | |
| "grad_norm": 16.954026895333676, | |
| "learning_rate": 4.2413775726574923e-07, | |
| "logits/chosen": -1.4136017560958862, | |
| "logits/rejected": -1.308146357536316, | |
| "logps/chosen": -26.033409118652344, | |
| "logps/rejected": -45.159584045410156, | |
| "loss": 0.1098, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.3105984926223755, | |
| "rewards/margins": 3.2515172958374023, | |
| "rewards/rejected": -3.5621156692504883, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 2.6610169491525424, | |
| "grad_norm": 23.018543543838756, | |
| "learning_rate": 4.228039118628815e-07, | |
| "logits/chosen": -1.3158849477767944, | |
| "logits/rejected": -1.1991815567016602, | |
| "logps/chosen": -25.21765899658203, | |
| "logps/rejected": -40.39363479614258, | |
| "loss": 0.1629, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.04916132241487503, | |
| "rewards/margins": 2.8159801959991455, | |
| "rewards/rejected": -2.7668187618255615, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 2.6779661016949152, | |
| "grad_norm": 18.04681645919104, | |
| "learning_rate": 4.214605796628526e-07, | |
| "logits/chosen": -1.3597509860992432, | |
| "logits/rejected": -1.375614881515503, | |
| "logps/chosen": -26.287364959716797, | |
| "logps/rejected": -41.234405517578125, | |
| "loss": 0.1155, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.18970844149589539, | |
| "rewards/margins": 3.569676399230957, | |
| "rewards/rejected": -3.3799679279327393, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 2.694915254237288, | |
| "grad_norm": 14.068268306629102, | |
| "learning_rate": 4.201078344135306e-07, | |
| "logits/chosen": -1.497442364692688, | |
| "logits/rejected": -1.4510717391967773, | |
| "logps/chosen": -27.79704475402832, | |
| "logps/rejected": -43.55573272705078, | |
| "loss": 0.101, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.037288397550582886, | |
| "rewards/margins": 3.6583452224731445, | |
| "rewards/rejected": -3.6210567951202393, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 2.711864406779661, | |
| "grad_norm": 20.29404199604278, | |
| "learning_rate": 4.187457503795526e-07, | |
| "logits/chosen": -1.6163471937179565, | |
| "logits/rejected": -1.5419741868972778, | |
| "logps/chosen": -26.61087417602539, | |
| "logps/rejected": -30.924564361572266, | |
| "loss": 0.1336, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.18512818217277527, | |
| "rewards/margins": 2.91485857963562, | |
| "rewards/rejected": -2.7297308444976807, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.7288135593220337, | |
| "grad_norm": 14.433423874260647, | |
| "learning_rate": 4.173744023382474e-07, | |
| "logits/chosen": -1.6092435121536255, | |
| "logits/rejected": -1.4594898223876953, | |
| "logps/chosen": -24.235836029052734, | |
| "logps/rejected": -42.99752426147461, | |
| "loss": 0.0838, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.11106225848197937, | |
| "rewards/margins": 3.523545742034912, | |
| "rewards/rejected": -3.6346077919006348, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 2.7457627118644066, | |
| "grad_norm": 18.073004370156372, | |
| "learning_rate": 4.159938655755306e-07, | |
| "logits/chosen": -1.2371647357940674, | |
| "logits/rejected": -1.249834656715393, | |
| "logps/chosen": -26.12664031982422, | |
| "logps/rejected": -41.84228515625, | |
| "loss": 0.117, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.07593405246734619, | |
| "rewards/margins": 3.008237361907959, | |
| "rewards/rejected": -2.9323031902313232, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 2.7627118644067794, | |
| "grad_norm": 17.580682721769048, | |
| "learning_rate": 4.1460421588177094e-07, | |
| "logits/chosen": -1.3883872032165527, | |
| "logits/rejected": -1.284624695777893, | |
| "logps/chosen": -24.988061904907227, | |
| "logps/rejected": -44.79413986206055, | |
| "loss": 0.1081, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.04829689860343933, | |
| "rewards/margins": 4.283046722412109, | |
| "rewards/rejected": -4.234749794006348, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 2.7796610169491527, | |
| "grad_norm": 14.683936946298637, | |
| "learning_rate": 4.1320552954763037e-07, | |
| "logits/chosen": -1.3138792514801025, | |
| "logits/rejected": -1.3704159259796143, | |
| "logps/chosen": -34.95574188232422, | |
| "logps/rejected": -41.10405731201172, | |
| "loss": 0.0829, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.08980777859687805, | |
| "rewards/margins": 3.34732723236084, | |
| "rewards/rejected": -3.257519483566284, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 2.7966101694915255, | |
| "grad_norm": 19.53577404998919, | |
| "learning_rate": 4.117978833598747e-07, | |
| "logits/chosen": -1.4069619178771973, | |
| "logits/rejected": -1.3315298557281494, | |
| "logps/chosen": -38.807586669921875, | |
| "logps/rejected": -41.68088912963867, | |
| "loss": 0.1398, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.32595694065093994, | |
| "rewards/margins": 2.6362242698669434, | |
| "rewards/rejected": -2.962181329727173, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 2.8135593220338984, | |
| "grad_norm": 15.630747955128056, | |
| "learning_rate": 4.1038135459715885e-07, | |
| "logits/chosen": -1.3965390920639038, | |
| "logits/rejected": -1.3702296018600464, | |
| "logps/chosen": -17.21418571472168, | |
| "logps/rejected": -33.933189392089844, | |
| "loss": 0.0941, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.29006946086883545, | |
| "rewards/margins": 3.5608468055725098, | |
| "rewards/rejected": -3.270777463912964, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 2.830508474576271, | |
| "grad_norm": 15.319247967225127, | |
| "learning_rate": 4.0895602102578373e-07, | |
| "logits/chosen": -1.2587236166000366, | |
| "logits/rejected": -1.2784702777862549, | |
| "logps/chosen": -33.745662689208984, | |
| "logps/rejected": -53.36175537109375, | |
| "loss": 0.0943, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5272909998893738, | |
| "rewards/margins": 3.7912838459014893, | |
| "rewards/rejected": -4.318574905395508, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 2.847457627118644, | |
| "grad_norm": 17.91197797858757, | |
| "learning_rate": 4.075219608954278e-07, | |
| "logits/chosen": -1.220982313156128, | |
| "logits/rejected": -1.119415044784546, | |
| "logps/chosen": -25.41081428527832, | |
| "logps/rejected": -43.74126434326172, | |
| "loss": 0.1181, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.16412924230098724, | |
| "rewards/margins": 3.79860258102417, | |
| "rewards/rejected": -3.6344728469848633, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 2.864406779661017, | |
| "grad_norm": 19.216931577152895, | |
| "learning_rate": 4.0607925293484997e-07, | |
| "logits/chosen": -1.3364936113357544, | |
| "logits/rejected": -1.3348599672317505, | |
| "logps/chosen": -28.252498626708984, | |
| "logps/rejected": -36.41025161743164, | |
| "loss": 0.1405, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.07597078382968903, | |
| "rewards/margins": 2.509685516357422, | |
| "rewards/rejected": -2.5856566429138184, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 2.8813559322033897, | |
| "grad_norm": 18.004667351375737, | |
| "learning_rate": 4.046279763475687e-07, | |
| "logits/chosen": -1.4610190391540527, | |
| "logits/rejected": -1.467834234237671, | |
| "logps/chosen": -23.43694305419922, | |
| "logps/rejected": -40.96953582763672, | |
| "loss": 0.12, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.00839678943157196, | |
| "rewards/margins": 3.222750186920166, | |
| "rewards/rejected": -3.214353561401367, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.898305084745763, | |
| "grad_norm": 16.724266176013543, | |
| "learning_rate": 4.031682108075128e-07, | |
| "logits/chosen": -1.3930025100708008, | |
| "logits/rejected": -1.3053603172302246, | |
| "logps/chosen": -26.837438583374023, | |
| "logps/rejected": -46.042606353759766, | |
| "loss": 0.115, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3490511476993561, | |
| "rewards/margins": 3.341874122619629, | |
| "rewards/rejected": -3.6909255981445312, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 2.915254237288136, | |
| "grad_norm": 17.501626125315028, | |
| "learning_rate": 4.0170003645464835e-07, | |
| "logits/chosen": -1.4981375932693481, | |
| "logits/rejected": -1.4686487913131714, | |
| "logps/chosen": -31.091188430786133, | |
| "logps/rejected": -41.67449188232422, | |
| "loss": 0.1159, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.08511877059936523, | |
| "rewards/margins": 3.4050216674804688, | |
| "rewards/rejected": -3.490140438079834, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 2.9322033898305087, | |
| "grad_norm": 17.505812920779697, | |
| "learning_rate": 4.0022353389057793e-07, | |
| "logits/chosen": -1.5352020263671875, | |
| "logits/rejected": -1.4121986627578735, | |
| "logps/chosen": -29.729156494140625, | |
| "logps/rejected": -48.48210525512695, | |
| "loss": 0.1104, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.050699926912784576, | |
| "rewards/margins": 3.282029390335083, | |
| "rewards/rejected": -3.2313296794891357, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 2.9491525423728815, | |
| "grad_norm": 15.718044631960725, | |
| "learning_rate": 3.9873878417411685e-07, | |
| "logits/chosen": -1.3283764123916626, | |
| "logits/rejected": -1.2386645078659058, | |
| "logps/chosen": -31.367496490478516, | |
| "logps/rejected": -48.3538818359375, | |
| "loss": 0.0849, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.02908661961555481, | |
| "rewards/margins": 4.318341255187988, | |
| "rewards/rejected": -4.3474273681640625, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 2.9661016949152543, | |
| "grad_norm": 19.307114282390735, | |
| "learning_rate": 3.97245868816842e-07, | |
| "logits/chosen": -1.61316978931427, | |
| "logits/rejected": -1.4183673858642578, | |
| "logps/chosen": -25.636213302612305, | |
| "logps/rejected": -31.219690322875977, | |
| "loss": 0.1342, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19882389903068542, | |
| "rewards/margins": 2.8216686248779297, | |
| "rewards/rejected": -2.622844696044922, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 2.983050847457627, | |
| "grad_norm": 19.852640661420647, | |
| "learning_rate": 3.95744869778618e-07, | |
| "logits/chosen": -1.3724339008331299, | |
| "logits/rejected": -1.2494310140609741, | |
| "logps/chosen": -37.323822021484375, | |
| "logps/rejected": -48.69821548461914, | |
| "loss": 0.1322, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.2126758098602295, | |
| "rewards/margins": 3.236673355102539, | |
| "rewards/rejected": -3.4493494033813477, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 17.961834616955002, | |
| "learning_rate": 3.942358694630967e-07, | |
| "logits/chosen": -1.4469400644302368, | |
| "logits/rejected": -1.4965747594833374, | |
| "logps/chosen": -27.073068618774414, | |
| "logps/rejected": -47.31336212158203, | |
| "loss": 0.1379, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.2840898931026459, | |
| "rewards/margins": 3.4035041332244873, | |
| "rewards/rejected": -3.687593936920166, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 3.016949152542373, | |
| "grad_norm": 11.404351341704286, | |
| "learning_rate": 3.927189507131938e-07, | |
| "logits/chosen": -1.340846300125122, | |
| "logits/rejected": -1.3331762552261353, | |
| "logps/chosen": -29.038299560546875, | |
| "logps/rejected": -44.76627731323242, | |
| "loss": 0.0734, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.44353172183036804, | |
| "rewards/margins": 3.546970844268799, | |
| "rewards/rejected": -3.99050235748291, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 3.0338983050847457, | |
| "grad_norm": 13.04527280968449, | |
| "learning_rate": 3.9119419680654083e-07, | |
| "logits/chosen": -1.3644622564315796, | |
| "logits/rejected": -1.2482867240905762, | |
| "logps/chosen": -29.963117599487305, | |
| "logps/rejected": -44.21002960205078, | |
| "loss": 0.0936, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.05338460952043533, | |
| "rewards/margins": 3.796813488006592, | |
| "rewards/rejected": -3.7434287071228027, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 3.0508474576271185, | |
| "grad_norm": 10.551592699584061, | |
| "learning_rate": 3.896616914509131e-07, | |
| "logits/chosen": -1.1854358911514282, | |
| "logits/rejected": -1.2167768478393555, | |
| "logps/chosen": -28.745718002319336, | |
| "logps/rejected": -39.30556869506836, | |
| "loss": 0.0617, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.07138124108314514, | |
| "rewards/margins": 3.6734557151794434, | |
| "rewards/rejected": -3.60207462310791, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 3.0677966101694913, | |
| "grad_norm": 12.613695441723, | |
| "learning_rate": 3.881215187796344e-07, | |
| "logits/chosen": -1.5086756944656372, | |
| "logits/rejected": -1.4750981330871582, | |
| "logps/chosen": -24.299089431762695, | |
| "logps/rejected": -48.68684005737305, | |
| "loss": 0.088, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.2250959873199463, | |
| "rewards/margins": 5.23423433303833, | |
| "rewards/rejected": -5.0091376304626465, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 3.084745762711864, | |
| "grad_norm": 12.720295333105653, | |
| "learning_rate": 3.865737633469579e-07, | |
| "logits/chosen": -1.456554889678955, | |
| "logits/rejected": -1.4655078649520874, | |
| "logps/chosen": -34.70986557006836, | |
| "logps/rejected": -45.1373405456543, | |
| "loss": 0.0913, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.5282381176948547, | |
| "rewards/margins": 3.892458438873291, | |
| "rewards/rejected": -4.420697212219238, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 3.1016949152542375, | |
| "grad_norm": 11.42456365348978, | |
| "learning_rate": 3.8501851012342444e-07, | |
| "logits/chosen": -1.429445505142212, | |
| "logits/rejected": -1.2520623207092285, | |
| "logps/chosen": -33.7725830078125, | |
| "logps/rejected": -48.29920196533203, | |
| "loss": 0.0673, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.20377352833747864, | |
| "rewards/margins": 3.905850887298584, | |
| "rewards/rejected": -4.10962438583374, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 3.1186440677966103, | |
| "grad_norm": 11.707964288174598, | |
| "learning_rate": 3.834558444911977e-07, | |
| "logits/chosen": -1.442047119140625, | |
| "logits/rejected": -1.2768933773040771, | |
| "logps/chosen": -31.306930541992188, | |
| "logps/rejected": -55.08317184448242, | |
| "loss": 0.0851, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1050567626953125, | |
| "rewards/margins": 4.629131317138672, | |
| "rewards/rejected": -4.524074554443359, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 3.135593220338983, | |
| "grad_norm": 12.341349720055733, | |
| "learning_rate": 3.818858522393763e-07, | |
| "logits/chosen": -1.4654240608215332, | |
| "logits/rejected": -1.2980097532272339, | |
| "logps/chosen": -25.214473724365234, | |
| "logps/rejected": -48.52466583251953, | |
| "loss": 0.084, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.06900361180305481, | |
| "rewards/margins": 3.743170738220215, | |
| "rewards/rejected": -3.8121743202209473, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 3.152542372881356, | |
| "grad_norm": 11.013651552230316, | |
| "learning_rate": 3.8030861955928496e-07, | |
| "logits/chosen": -1.4930305480957031, | |
| "logits/rejected": -1.49806809425354, | |
| "logps/chosen": -33.961334228515625, | |
| "logps/rejected": -60.0030403137207, | |
| "loss": 0.0587, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.1520581841468811, | |
| "rewards/margins": 4.225584983825684, | |
| "rewards/rejected": -4.377642631530762, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 3.169491525423729, | |
| "grad_norm": 11.565802520156074, | |
| "learning_rate": 3.787242330397418e-07, | |
| "logits/chosen": -1.151625633239746, | |
| "logits/rejected": -1.229320764541626, | |
| "logps/chosen": -27.614501953125, | |
| "logps/rejected": -46.39374542236328, | |
| "loss": 0.0744, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1534918248653412, | |
| "rewards/margins": 3.757617235183716, | |
| "rewards/rejected": -3.6041250228881836, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 3.1864406779661016, | |
| "grad_norm": 11.91938723600508, | |
| "learning_rate": 3.7713277966230513e-07, | |
| "logits/chosen": -1.3814018964767456, | |
| "logits/rejected": -1.3967022895812988, | |
| "logps/chosen": -39.28313446044922, | |
| "logps/rejected": -51.139671325683594, | |
| "loss": 0.0781, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.14791953563690186, | |
| "rewards/margins": 3.787137508392334, | |
| "rewards/rejected": -3.639218330383301, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 3.2033898305084745, | |
| "grad_norm": 11.7798233218555, | |
| "learning_rate": 3.755343467964981e-07, | |
| "logits/chosen": -1.4196237325668335, | |
| "logits/rejected": -1.281465768814087, | |
| "logps/chosen": -30.973888397216797, | |
| "logps/rejected": -60.48612976074219, | |
| "loss": 0.0728, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.04596884548664093, | |
| "rewards/margins": 5.157370567321777, | |
| "rewards/rejected": -5.203339099884033, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 3.2203389830508473, | |
| "grad_norm": 9.848668366337494, | |
| "learning_rate": 3.739290221950123e-07, | |
| "logits/chosen": -1.4920192956924438, | |
| "logits/rejected": -1.3025527000427246, | |
| "logps/chosen": -20.232595443725586, | |
| "logps/rejected": -40.525020599365234, | |
| "loss": 0.0603, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.6132769584655762, | |
| "rewards/margins": 4.133052349090576, | |
| "rewards/rejected": -3.519775390625, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 3.23728813559322, | |
| "grad_norm": 11.384419516356248, | |
| "learning_rate": 3.723168939888901e-07, | |
| "logits/chosen": -1.4284577369689941, | |
| "logits/rejected": -1.3038253784179688, | |
| "logps/chosen": -36.025821685791016, | |
| "logps/rejected": -47.912906646728516, | |
| "loss": 0.0729, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4135657548904419, | |
| "rewards/margins": 4.515513896942139, | |
| "rewards/rejected": -4.101947784423828, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 3.2542372881355934, | |
| "grad_norm": 12.741788906869417, | |
| "learning_rate": 3.7069805068268624e-07, | |
| "logits/chosen": -1.190822958946228, | |
| "logits/rejected": -1.2386127710342407, | |
| "logps/chosen": -25.53938865661621, | |
| "logps/rejected": -41.572757720947266, | |
| "loss": 0.0968, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.21493886411190033, | |
| "rewards/margins": 3.464285373687744, | |
| "rewards/rejected": -3.6792244911193848, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 3.2711864406779663, | |
| "grad_norm": 11.543121161706487, | |
| "learning_rate": 3.6907258114960915e-07, | |
| "logits/chosen": -1.44771146774292, | |
| "logits/rejected": -1.3371340036392212, | |
| "logps/chosen": -22.689008712768555, | |
| "logps/rejected": -33.55266571044922, | |
| "loss": 0.0759, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20511241257190704, | |
| "rewards/margins": 3.822577953338623, | |
| "rewards/rejected": -3.617465019226074, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 3.288135593220339, | |
| "grad_norm": 12.699281503282757, | |
| "learning_rate": 3.6744057462664194e-07, | |
| "logits/chosen": -1.2974333763122559, | |
| "logits/rejected": -1.2633615732192993, | |
| "logps/chosen": -35.57712936401367, | |
| "logps/rejected": -41.26565170288086, | |
| "loss": 0.079, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.1042805016040802, | |
| "rewards/margins": 3.9843153953552246, | |
| "rewards/rejected": -4.088595867156982, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 3.305084745762712, | |
| "grad_norm": 11.346679553734594, | |
| "learning_rate": 3.658021207096432e-07, | |
| "logits/chosen": -1.250510811805725, | |
| "logits/rejected": -1.2325608730316162, | |
| "logps/chosen": -27.381729125976562, | |
| "logps/rejected": -39.44231414794922, | |
| "loss": 0.071, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.14005836844444275, | |
| "rewards/margins": 3.225729465484619, | |
| "rewards/rejected": -3.0856711864471436, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 3.3220338983050848, | |
| "grad_norm": 11.675211775416328, | |
| "learning_rate": 3.6415730934842825e-07, | |
| "logits/chosen": -1.4330198764801025, | |
| "logits/rejected": -1.2990684509277344, | |
| "logps/chosen": -26.091453552246094, | |
| "logps/rejected": -37.71536636352539, | |
| "loss": 0.0855, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.5305294990539551, | |
| "rewards/margins": 3.7357966899871826, | |
| "rewards/rejected": -3.2052674293518066, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 3.3389830508474576, | |
| "grad_norm": 10.595079308232455, | |
| "learning_rate": 3.625062308418311e-07, | |
| "logits/chosen": -1.4256861209869385, | |
| "logits/rejected": -1.3300725221633911, | |
| "logps/chosen": -44.14484786987305, | |
| "logps/rejected": -49.18852233886719, | |
| "loss": 0.0731, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.23929768800735474, | |
| "rewards/margins": 3.831993818283081, | |
| "rewards/rejected": -4.071290969848633, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 3.3559322033898304, | |
| "grad_norm": 10.589818972158676, | |
| "learning_rate": 3.6084897583274715e-07, | |
| "logits/chosen": -1.5123655796051025, | |
| "logits/rejected": -1.4504189491271973, | |
| "logps/chosen": -21.742530822753906, | |
| "logps/rejected": -43.59285354614258, | |
| "loss": 0.0587, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.08142367005348206, | |
| "rewards/margins": 4.206247329711914, | |
| "rewards/rejected": -4.124823570251465, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 3.3728813559322033, | |
| "grad_norm": 10.064171955563651, | |
| "learning_rate": 3.591856353031566e-07, | |
| "logits/chosen": -1.436945915222168, | |
| "logits/rejected": -1.475320816040039, | |
| "logps/chosen": -22.611244201660156, | |
| "logps/rejected": -43.448341369628906, | |
| "loss": 0.0701, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2055773138999939, | |
| "rewards/margins": 4.247138500213623, | |
| "rewards/rejected": -4.041561126708984, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 3.389830508474576, | |
| "grad_norm": 9.40202931235145, | |
| "learning_rate": 3.5751630056913013e-07, | |
| "logits/chosen": -1.5867615938186646, | |
| "logits/rejected": -1.559362769126892, | |
| "logps/chosen": -27.019208908081055, | |
| "logps/rejected": -38.67339324951172, | |
| "loss": 0.059, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21613261103630066, | |
| "rewards/margins": 3.4724230766296387, | |
| "rewards/rejected": -3.2562904357910156, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.406779661016949, | |
| "grad_norm": 10.1184235462117, | |
| "learning_rate": 3.558410632758153e-07, | |
| "logits/chosen": -1.4771666526794434, | |
| "logits/rejected": -1.4069215059280396, | |
| "logps/chosen": -23.256723403930664, | |
| "logps/rejected": -42.76215744018555, | |
| "loss": 0.0723, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2688101828098297, | |
| "rewards/margins": 4.244396686553955, | |
| "rewards/rejected": -3.9755868911743164, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 3.423728813559322, | |
| "grad_norm": 13.152928025513987, | |
| "learning_rate": 3.5416001539240574e-07, | |
| "logits/chosen": -1.5284346342086792, | |
| "logits/rejected": -1.4866607189178467, | |
| "logps/chosen": -24.796672821044922, | |
| "logps/rejected": -51.007835388183594, | |
| "loss": 0.098, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.19576720893383026, | |
| "rewards/margins": 4.06268835067749, | |
| "rewards/rejected": -4.258455753326416, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 3.440677966101695, | |
| "grad_norm": 11.395001548472651, | |
| "learning_rate": 3.5247324920709147e-07, | |
| "logits/chosen": -1.3313159942626953, | |
| "logits/rejected": -1.2498857975006104, | |
| "logps/chosen": -30.9143009185791, | |
| "logps/rejected": -41.243194580078125, | |
| "loss": 0.07, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.04568520188331604, | |
| "rewards/margins": 3.3133530616760254, | |
| "rewards/rejected": -3.267667770385742, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 3.457627118644068, | |
| "grad_norm": 9.512575018928173, | |
| "learning_rate": 3.5078085732199307e-07, | |
| "logits/chosen": -1.567063331604004, | |
| "logits/rejected": -1.4618712663650513, | |
| "logps/chosen": -25.394840240478516, | |
| "logps/rejected": -49.12550354003906, | |
| "loss": 0.0601, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.02158541977405548, | |
| "rewards/margins": 4.889016628265381, | |
| "rewards/rejected": -4.867431163787842, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 3.4745762711864407, | |
| "grad_norm": 11.586078932679674, | |
| "learning_rate": 3.490829326480773e-07, | |
| "logits/chosen": -1.4906607866287231, | |
| "logits/rejected": -1.3863712549209595, | |
| "logps/chosen": -31.574989318847656, | |
| "logps/rejected": -48.25616455078125, | |
| "loss": 0.0787, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.011842876672744751, | |
| "rewards/margins": 4.549272537231445, | |
| "rewards/rejected": -4.561115741729736, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 3.4915254237288136, | |
| "grad_norm": 12.547434003386876, | |
| "learning_rate": 3.4737956840005684e-07, | |
| "logits/chosen": -1.3435784578323364, | |
| "logits/rejected": -1.3757704496383667, | |
| "logps/chosen": -25.303531646728516, | |
| "logps/rejected": -39.15208053588867, | |
| "loss": 0.086, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.02988174557685852, | |
| "rewards/margins": 3.5366082191467285, | |
| "rewards/rejected": -3.5067262649536133, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 3.5084745762711864, | |
| "grad_norm": 9.788457273992568, | |
| "learning_rate": 3.4567085809127245e-07, | |
| "logits/chosen": -1.5033990144729614, | |
| "logits/rejected": -1.4766664505004883, | |
| "logps/chosen": -26.12259864807129, | |
| "logps/rejected": -52.18391418457031, | |
| "loss": 0.0617, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.08881500363349915, | |
| "rewards/margins": 4.650575160980225, | |
| "rewards/rejected": -4.561759948730469, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 3.5254237288135593, | |
| "grad_norm": 11.249455307964162, | |
| "learning_rate": 3.439568955285595e-07, | |
| "logits/chosen": -1.600437045097351, | |
| "logits/rejected": -1.5185781717300415, | |
| "logps/chosen": -20.018888473510742, | |
| "logps/rejected": -41.524349212646484, | |
| "loss": 0.0704, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.08864006400108337, | |
| "rewards/margins": 3.782201051712036, | |
| "rewards/rejected": -3.8708412647247314, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 3.542372881355932, | |
| "grad_norm": 9.990126506747924, | |
| "learning_rate": 3.4223777480709804e-07, | |
| "logits/chosen": -1.3552483320236206, | |
| "logits/rejected": -1.2629144191741943, | |
| "logps/chosen": -21.040348052978516, | |
| "logps/rejected": -41.19926071166992, | |
| "loss": 0.0595, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.106040358543396, | |
| "rewards/margins": 4.690126419067383, | |
| "rewards/rejected": -4.796167373657227, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 3.559322033898305, | |
| "grad_norm": 9.838092383663449, | |
| "learning_rate": 3.405135903052465e-07, | |
| "logits/chosen": -1.4207416772842407, | |
| "logits/rejected": -1.2667282819747925, | |
| "logps/chosen": -30.103904724121094, | |
| "logps/rejected": -46.88092803955078, | |
| "loss": 0.0606, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3432961106300354, | |
| "rewards/margins": 4.319201946258545, | |
| "rewards/rejected": -4.6624979972839355, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 3.576271186440678, | |
| "grad_norm": 9.67907653448832, | |
| "learning_rate": 3.3878443667936136e-07, | |
| "logits/chosen": -1.2791118621826172, | |
| "logits/rejected": -1.2004616260528564, | |
| "logps/chosen": -40.10679244995117, | |
| "logps/rejected": -60.650394439697266, | |
| "loss": 0.0526, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6684077978134155, | |
| "rewards/margins": 4.151851654052734, | |
| "rewards/rejected": -4.820259094238281, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 3.593220338983051, | |
| "grad_norm": 9.853143082693608, | |
| "learning_rate": 3.3705040885859967e-07, | |
| "logits/chosen": -1.5361111164093018, | |
| "logits/rejected": -1.4038530588150024, | |
| "logps/chosen": -37.90250015258789, | |
| "logps/rejected": -45.059024810791016, | |
| "loss": 0.0492, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.0030466020107269287, | |
| "rewards/margins": 3.964195728302002, | |
| "rewards/rejected": -3.9672420024871826, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 3.610169491525424, | |
| "grad_norm": 9.44047901416316, | |
| "learning_rate": 3.3531160203970805e-07, | |
| "logits/chosen": -1.4581642150878906, | |
| "logits/rejected": -1.4039422273635864, | |
| "logps/chosen": -30.59682846069336, | |
| "logps/rejected": -44.48968505859375, | |
| "loss": 0.0694, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.1339409351348877, | |
| "rewards/margins": 4.1027374267578125, | |
| "rewards/rejected": -4.236677646636963, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 3.6271186440677967, | |
| "grad_norm": 11.435287785470077, | |
| "learning_rate": 3.3356811168179627e-07, | |
| "logits/chosen": -1.2856285572052002, | |
| "logits/rejected": -1.177187204360962, | |
| "logps/chosen": -28.70745086669922, | |
| "logps/rejected": -39.22813415527344, | |
| "loss": 0.0779, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.319486528635025, | |
| "rewards/margins": 4.963109016418457, | |
| "rewards/rejected": -4.643622398376465, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 3.6440677966101696, | |
| "grad_norm": 10.041339419341044, | |
| "learning_rate": 3.318200335010967e-07, | |
| "logits/chosen": -1.720375418663025, | |
| "logits/rejected": -1.5207815170288086, | |
| "logps/chosen": -26.1734676361084, | |
| "logps/rejected": -41.08108901977539, | |
| "loss": 0.0651, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.5092735290527344, | |
| "rewards/margins": 4.717857360839844, | |
| "rewards/rejected": -4.208584308624268, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 3.6610169491525424, | |
| "grad_norm": 10.722521061649259, | |
| "learning_rate": 3.3006746346570935e-07, | |
| "logits/chosen": -1.5194891691207886, | |
| "logits/rejected": -1.5225661993026733, | |
| "logps/chosen": -21.88874626159668, | |
| "logps/rejected": -31.313539505004883, | |
| "loss": 0.0652, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.42684128880500793, | |
| "rewards/margins": 3.6699740886688232, | |
| "rewards/rejected": -3.2431328296661377, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 3.6779661016949152, | |
| "grad_norm": 11.239607248752012, | |
| "learning_rate": 3.2831049779033395e-07, | |
| "logits/chosen": -1.585048794746399, | |
| "logits/rejected": -1.447473406791687, | |
| "logps/chosen": -44.55853271484375, | |
| "logps/rejected": -62.56214141845703, | |
| "loss": 0.0718, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5582241415977478, | |
| "rewards/margins": 5.07534122467041, | |
| "rewards/rejected": -5.633565425872803, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 3.694915254237288, | |
| "grad_norm": 11.121200361780803, | |
| "learning_rate": 3.2654923293098666e-07, | |
| "logits/chosen": -1.4354138374328613, | |
| "logits/rejected": -1.431335210800171, | |
| "logps/chosen": -29.88983917236328, | |
| "logps/rejected": -41.21036911010742, | |
| "loss": 0.0722, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.12309768795967102, | |
| "rewards/margins": 4.238857746124268, | |
| "rewards/rejected": -4.361955642700195, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 3.711864406779661, | |
| "grad_norm": 8.46695075496944, | |
| "learning_rate": 3.247837655797061e-07, | |
| "logits/chosen": -1.4146300554275513, | |
| "logits/rejected": -1.3639909029006958, | |
| "logps/chosen": -23.79798698425293, | |
| "logps/rejected": -42.03700256347656, | |
| "loss": 0.0509, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.14847984910011292, | |
| "rewards/margins": 4.2562971115112305, | |
| "rewards/rejected": -4.107817649841309, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 3.7288135593220337, | |
| "grad_norm": 10.875773708222669, | |
| "learning_rate": 3.2301419265924393e-07, | |
| "logits/chosen": -1.3614307641983032, | |
| "logits/rejected": -1.263061285018921, | |
| "logps/chosen": -26.913053512573242, | |
| "logps/rejected": -38.41120910644531, | |
| "loss": 0.0775, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.12246422469615936, | |
| "rewards/margins": 3.324322462081909, | |
| "rewards/rejected": -3.2018585205078125, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 3.7457627118644066, | |
| "grad_norm": 10.149586335689621, | |
| "learning_rate": 3.2124061131774443e-07, | |
| "logits/chosen": -1.3240911960601807, | |
| "logits/rejected": -1.3163329362869263, | |
| "logps/chosen": -25.73955535888672, | |
| "logps/rejected": -49.833221435546875, | |
| "loss": 0.0624, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2507280111312866, | |
| "rewards/margins": 4.252786636352539, | |
| "rewards/rejected": -4.002058982849121, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 3.7627118644067794, | |
| "grad_norm": 9.140932864263574, | |
| "learning_rate": 3.194631189234109e-07, | |
| "logits/chosen": -1.6033962965011597, | |
| "logits/rejected": -1.4346346855163574, | |
| "logps/chosen": -35.82508087158203, | |
| "logps/rejected": -44.355918884277344, | |
| "loss": 0.0458, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.09858936071395874, | |
| "rewards/margins": 4.44521951675415, | |
| "rewards/rejected": -4.543808937072754, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 3.7796610169491527, | |
| "grad_norm": 9.775231597153738, | |
| "learning_rate": 3.1768181305916063e-07, | |
| "logits/chosen": -1.4034669399261475, | |
| "logits/rejected": -1.2983992099761963, | |
| "logps/chosen": -36.48939514160156, | |
| "logps/rejected": -50.544036865234375, | |
| "loss": 0.0503, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.08162027597427368, | |
| "rewards/margins": 4.144961357116699, | |
| "rewards/rejected": -4.06334114074707, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 3.7966101694915255, | |
| "grad_norm": 11.927845816995063, | |
| "learning_rate": 3.158967915172669e-07, | |
| "logits/chosen": -1.4804027080535889, | |
| "logits/rejected": -1.349886178970337, | |
| "logps/chosen": -27.929018020629883, | |
| "logps/rejected": -37.72224426269531, | |
| "loss": 0.0819, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.21573437750339508, | |
| "rewards/margins": 3.54447078704834, | |
| "rewards/rejected": -3.760205030441284, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 3.8135593220338984, | |
| "grad_norm": 11.052999347550179, | |
| "learning_rate": 3.141081522939911e-07, | |
| "logits/chosen": -1.4779460430145264, | |
| "logits/rejected": -1.349549412727356, | |
| "logps/chosen": -37.28151321411133, | |
| "logps/rejected": -47.440765380859375, | |
| "loss": 0.0651, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3217180073261261, | |
| "rewards/margins": 4.7478485107421875, | |
| "rewards/rejected": -5.06956672668457, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 3.830508474576271, | |
| "grad_norm": 10.970513938481727, | |
| "learning_rate": 3.1231599358420233e-07, | |
| "logits/chosen": -1.3360522985458374, | |
| "logits/rejected": -1.2304054498672485, | |
| "logps/chosen": -25.628524780273438, | |
| "logps/rejected": -38.20055389404297, | |
| "loss": 0.0619, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2233581840991974, | |
| "rewards/margins": 4.349393844604492, | |
| "rewards/rejected": -4.126035690307617, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 3.847457627118644, | |
| "grad_norm": 9.433851363193908, | |
| "learning_rate": 3.105204137759867e-07, | |
| "logits/chosen": -1.2719545364379883, | |
| "logits/rejected": -1.330519676208496, | |
| "logps/chosen": -34.64011001586914, | |
| "logps/rejected": -52.32704162597656, | |
| "loss": 0.0631, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2245815396308899, | |
| "rewards/margins": 4.7216315269470215, | |
| "rewards/rejected": -4.9462127685546875, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 3.864406779661017, | |
| "grad_norm": 10.439526673557062, | |
| "learning_rate": 3.0872151144524594e-07, | |
| "logits/chosen": -1.6759734153747559, | |
| "logits/rejected": -1.579871654510498, | |
| "logps/chosen": -27.196908950805664, | |
| "logps/rejected": -50.7559700012207, | |
| "loss": 0.069, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.22928810119628906, | |
| "rewards/margins": 4.84774923324585, | |
| "rewards/rejected": -5.077037811279297, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 3.8813559322033897, | |
| "grad_norm": 11.392292474810198, | |
| "learning_rate": 3.069193853502855e-07, | |
| "logits/chosen": -1.5869011878967285, | |
| "logits/rejected": -1.5820071697235107, | |
| "logps/chosen": -25.63404655456543, | |
| "logps/rejected": -38.69296646118164, | |
| "loss": 0.0774, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3039775788784027, | |
| "rewards/margins": 3.5710806846618652, | |
| "rewards/rejected": -3.875058174133301, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 3.898305084745763, | |
| "grad_norm": 10.225877059440036, | |
| "learning_rate": 3.0511413442639297e-07, | |
| "logits/chosen": -1.434234619140625, | |
| "logits/rejected": -1.3351249694824219, | |
| "logps/chosen": -24.643152236938477, | |
| "logps/rejected": -60.13444519042969, | |
| "loss": 0.0577, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.10867035388946533, | |
| "rewards/margins": 6.075805187225342, | |
| "rewards/rejected": -6.184475898742676, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 3.915254237288136, | |
| "grad_norm": 9.295415437658166, | |
| "learning_rate": 3.0330585778040675e-07, | |
| "logits/chosen": -1.3108859062194824, | |
| "logits/rejected": -1.2643885612487793, | |
| "logps/chosen": -24.4619140625, | |
| "logps/rejected": -34.88125228881836, | |
| "loss": 0.0527, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.13806024193763733, | |
| "rewards/margins": 3.5883846282958984, | |
| "rewards/rejected": -3.726444721221924, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 3.9322033898305087, | |
| "grad_norm": 9.365373350685292, | |
| "learning_rate": 3.0149465468527457e-07, | |
| "logits/chosen": -1.4852244853973389, | |
| "logits/rejected": -1.5285433530807495, | |
| "logps/chosen": -25.74740982055664, | |
| "logps/rejected": -39.37394332885742, | |
| "loss": 0.0535, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.005698531866073608, | |
| "rewards/margins": 4.059448719024658, | |
| "rewards/rejected": -4.0651469230651855, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 3.9491525423728815, | |
| "grad_norm": 8.974250718981809, | |
| "learning_rate": 2.9968062457460437e-07, | |
| "logits/chosen": -1.5756818056106567, | |
| "logits/rejected": -1.4879854917526245, | |
| "logps/chosen": -24.878393173217773, | |
| "logps/rejected": -43.14397048950195, | |
| "loss": 0.0495, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.1284530758857727, | |
| "rewards/margins": 4.223859786987305, | |
| "rewards/rejected": -4.3523125648498535, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 3.9661016949152543, | |
| "grad_norm": 11.054495120717768, | |
| "learning_rate": 2.978638670372047e-07, | |
| "logits/chosen": -1.4321879148483276, | |
| "logits/rejected": -1.3549392223358154, | |
| "logps/chosen": -35.11801528930664, | |
| "logps/rejected": -50.857975006103516, | |
| "loss": 0.0675, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5975885391235352, | |
| "rewards/margins": 4.9589338302612305, | |
| "rewards/rejected": -5.556522846221924, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 3.983050847457627, | |
| "grad_norm": 10.55567875619197, | |
| "learning_rate": 2.9604448181161755e-07, | |
| "logits/chosen": -1.4181180000305176, | |
| "logits/rejected": -1.247128963470459, | |
| "logps/chosen": -24.004146575927734, | |
| "logps/rejected": -40.258331298828125, | |
| "loss": 0.0771, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.09078972041606903, | |
| "rewards/margins": 3.5850982666015625, | |
| "rewards/rejected": -3.6758880615234375, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 8.789282244689215, | |
| "learning_rate": 2.9422256878064324e-07, | |
| "logits/chosen": -1.3495515584945679, | |
| "logits/rejected": -1.3198853731155396, | |
| "logps/chosen": -35.33921432495117, | |
| "logps/rejected": -48.3183708190918, | |
| "loss": 0.0458, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6972587704658508, | |
| "rewards/margins": 4.445403575897217, | |
| "rewards/rejected": -5.142662525177002, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 4.016949152542373, | |
| "grad_norm": 6.75184146677303, | |
| "learning_rate": 2.923982279658564e-07, | |
| "logits/chosen": -1.633570909500122, | |
| "logits/rejected": -1.5905429124832153, | |
| "logps/chosen": -38.822261810302734, | |
| "logps/rejected": -48.57697296142578, | |
| "loss": 0.0414, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.40955615043640137, | |
| "rewards/margins": 4.950255393981934, | |
| "rewards/rejected": -5.359812259674072, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 4.033898305084746, | |
| "grad_norm": 9.349730549756508, | |
| "learning_rate": 2.90571559522115e-07, | |
| "logits/chosen": -1.0709235668182373, | |
| "logits/rejected": -1.1218361854553223, | |
| "logps/chosen": -29.26075553894043, | |
| "logps/rejected": -34.48569869995117, | |
| "loss": 0.061, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.08125007152557373, | |
| "rewards/margins": 3.5675737857818604, | |
| "rewards/rejected": -3.486323833465576, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 4.0508474576271185, | |
| "grad_norm": 8.738621249147451, | |
| "learning_rate": 2.8874266373206215e-07, | |
| "logits/chosen": -1.5969672203063965, | |
| "logits/rejected": -1.5486068725585938, | |
| "logps/chosen": -32.66074752807617, | |
| "logps/rejected": -43.64617919921875, | |
| "loss": 0.0515, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.13383600115776062, | |
| "rewards/margins": 4.7373738288879395, | |
| "rewards/rejected": -4.603538513183594, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 4.067796610169491, | |
| "grad_norm": 8.414244576812601, | |
| "learning_rate": 2.8691164100062034e-07, | |
| "logits/chosen": -1.4231804609298706, | |
| "logits/rejected": -1.2792776823043823, | |
| "logps/chosen": -31.69137954711914, | |
| "logps/rejected": -54.63775634765625, | |
| "loss": 0.0499, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.08693331480026245, | |
| "rewards/margins": 5.980134963989258, | |
| "rewards/rejected": -5.89320182800293, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 4.084745762711864, | |
| "grad_norm": 8.9348895906443, | |
| "learning_rate": 2.8507859184947953e-07, | |
| "logits/chosen": -1.4366453886032104, | |
| "logits/rejected": -1.3272255659103394, | |
| "logps/chosen": -26.352733612060547, | |
| "logps/rejected": -46.89934158325195, | |
| "loss": 0.0619, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2997795641422272, | |
| "rewards/margins": 3.753899574279785, | |
| "rewards/rejected": -4.053679466247559, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 4.101694915254237, | |
| "grad_norm": 8.250352811668128, | |
| "learning_rate": 2.8324361691157853e-07, | |
| "logits/chosen": -1.374919056892395, | |
| "logits/rejected": -1.421012043952942, | |
| "logps/chosen": -31.536663055419922, | |
| "logps/rejected": -55.928749084472656, | |
| "loss": 0.0425, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2735545039176941, | |
| "rewards/margins": 4.6181135177612305, | |
| "rewards/rejected": -4.891667366027832, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 4.11864406779661, | |
| "grad_norm": 9.40465609028747, | |
| "learning_rate": 2.8140681692558034e-07, | |
| "logits/chosen": -1.7210100889205933, | |
| "logits/rejected": -1.6137436628341675, | |
| "logps/chosen": -29.081104278564453, | |
| "logps/rejected": -42.040245056152344, | |
| "loss": 0.0608, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.13167661428451538, | |
| "rewards/margins": 4.6320929527282715, | |
| "rewards/rejected": -4.500416278839111, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 4.135593220338983, | |
| "grad_norm": 7.575529614099465, | |
| "learning_rate": 2.7956829273034146e-07, | |
| "logits/chosen": -1.3278542757034302, | |
| "logits/rejected": -1.2801333665847778, | |
| "logps/chosen": -28.36415672302246, | |
| "logps/rejected": -45.0773811340332, | |
| "loss": 0.0521, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.0763159990310669, | |
| "rewards/margins": 4.670356750488281, | |
| "rewards/rejected": -4.594040393829346, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 4.1525423728813555, | |
| "grad_norm": 7.841549437235225, | |
| "learning_rate": 2.7772814525937634e-07, | |
| "logits/chosen": -1.521530032157898, | |
| "logits/rejected": -1.3261444568634033, | |
| "logps/chosen": -25.693857192993164, | |
| "logps/rejected": -44.95489501953125, | |
| "loss": 0.0461, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.31940776109695435, | |
| "rewards/margins": 4.865126132965088, | |
| "rewards/rejected": -4.545718193054199, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 4.169491525423728, | |
| "grad_norm": 7.437246530691066, | |
| "learning_rate": 2.7588647553531576e-07, | |
| "logits/chosen": -1.3834595680236816, | |
| "logits/rejected": -1.366625428199768, | |
| "logps/chosen": -27.27410125732422, | |
| "logps/rejected": -47.83835220336914, | |
| "loss": 0.0479, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1564972996711731, | |
| "rewards/margins": 4.690642833709717, | |
| "rewards/rejected": -4.534145355224609, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 4.186440677966102, | |
| "grad_norm": 8.374701794746429, | |
| "learning_rate": 2.7404338466436116e-07, | |
| "logits/chosen": -1.4731414318084717, | |
| "logits/rejected": -1.4213758707046509, | |
| "logps/chosen": -32.91023635864258, | |
| "logps/rejected": -48.37405014038086, | |
| "loss": 0.0475, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.12676534056663513, | |
| "rewards/margins": 5.037107944488525, | |
| "rewards/rejected": -4.910342216491699, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 4.203389830508475, | |
| "grad_norm": 8.684915932739882, | |
| "learning_rate": 2.721989738307337e-07, | |
| "logits/chosen": -1.5974880456924438, | |
| "logits/rejected": -1.5572988986968994, | |
| "logps/chosen": -29.11380386352539, | |
| "logps/rejected": -43.61833953857422, | |
| "loss": 0.0524, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.09620954096317291, | |
| "rewards/margins": 3.6240577697753906, | |
| "rewards/rejected": -3.527848482131958, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 4.220338983050848, | |
| "grad_norm": 8.422615969153167, | |
| "learning_rate": 2.7035334429111955e-07, | |
| "logits/chosen": -1.4860804080963135, | |
| "logits/rejected": -1.4129899740219116, | |
| "logps/chosen": -38.593204498291016, | |
| "logps/rejected": -55.535247802734375, | |
| "loss": 0.0493, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.007733196020126343, | |
| "rewards/margins": 4.608980178833008, | |
| "rewards/rejected": -4.616713047027588, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 4.237288135593221, | |
| "grad_norm": 8.150205311828831, | |
| "learning_rate": 2.685065973691107e-07, | |
| "logits/chosen": -1.509846806526184, | |
| "logits/rejected": -1.440779447555542, | |
| "logps/chosen": -32.75022506713867, | |
| "logps/rejected": -53.621826171875, | |
| "loss": 0.0429, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.10550498962402344, | |
| "rewards/margins": 5.389019966125488, | |
| "rewards/rejected": -5.49452543258667, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 4.254237288135593, | |
| "grad_norm": 8.158516596968319, | |
| "learning_rate": 2.6665883444964277e-07, | |
| "logits/chosen": -1.2622435092926025, | |
| "logits/rejected": -1.2126017808914185, | |
| "logps/chosen": -22.526697158813477, | |
| "logps/rejected": -49.31510543823242, | |
| "loss": 0.0487, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2732163071632385, | |
| "rewards/margins": 5.386449337005615, | |
| "rewards/rejected": -5.659666061401367, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 4.271186440677966, | |
| "grad_norm": 8.708471545136758, | |
| "learning_rate": 2.6481015697342856e-07, | |
| "logits/chosen": -1.3532803058624268, | |
| "logits/rejected": -1.263253092765808, | |
| "logps/chosen": -18.96270751953125, | |
| "logps/rejected": -40.195404052734375, | |
| "loss": 0.0488, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.013826161623001099, | |
| "rewards/margins": 4.144740581512451, | |
| "rewards/rejected": -4.130914688110352, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 4.288135593220339, | |
| "grad_norm": 7.692204259130146, | |
| "learning_rate": 2.629606664313896e-07, | |
| "logits/chosen": -1.4633291959762573, | |
| "logits/rejected": -1.2908146381378174, | |
| "logps/chosen": -26.60018539428711, | |
| "logps/rejected": -42.72929763793945, | |
| "loss": 0.0435, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.05975392460823059, | |
| "rewards/margins": 3.8384809494018555, | |
| "rewards/rejected": -3.8982346057891846, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 4.305084745762712, | |
| "grad_norm": 7.840483273382048, | |
| "learning_rate": 2.611104643590838e-07, | |
| "logits/chosen": -1.3723235130310059, | |
| "logits/rejected": -1.3505971431732178, | |
| "logps/chosen": -22.22472381591797, | |
| "logps/rejected": -48.225982666015625, | |
| "loss": 0.0488, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1287948191165924, | |
| "rewards/margins": 4.58843994140625, | |
| "rewards/rejected": -4.4596452713012695, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 4.322033898305085, | |
| "grad_norm": 7.007309716178494, | |
| "learning_rate": 2.592596523311317e-07, | |
| "logits/chosen": -1.6482080221176147, | |
| "logits/rejected": -1.5536173582077026, | |
| "logps/chosen": -34.07274627685547, | |
| "logps/rejected": -39.57206344604492, | |
| "loss": 0.0379, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.029447555541992188, | |
| "rewards/margins": 4.474746227264404, | |
| "rewards/rejected": -4.445298194885254, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 4.338983050847458, | |
| "grad_norm": 8.655754602681508, | |
| "learning_rate": 2.5740833195563994e-07, | |
| "logits/chosen": -1.4583147764205933, | |
| "logits/rejected": -1.4426932334899902, | |
| "logps/chosen": -28.750776290893555, | |
| "logps/rejected": -42.55610656738281, | |
| "loss": 0.0613, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.13989472389221191, | |
| "rewards/margins": 4.060596942901611, | |
| "rewards/rejected": -4.200491905212402, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 4.3559322033898304, | |
| "grad_norm": 7.666532517465646, | |
| "learning_rate": 2.5555660486862293e-07, | |
| "logits/chosen": -1.3961790800094604, | |
| "logits/rejected": -1.3545511960983276, | |
| "logps/chosen": -30.55471420288086, | |
| "logps/rejected": -46.441307067871094, | |
| "loss": 0.0527, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.01744025945663452, | |
| "rewards/margins": 4.828250885009766, | |
| "rewards/rejected": -4.810810565948486, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 4.372881355932203, | |
| "grad_norm": 7.38904178241979, | |
| "learning_rate": 2.5370457272842315e-07, | |
| "logits/chosen": -1.2144039869308472, | |
| "logits/rejected": -1.1987119913101196, | |
| "logps/chosen": -31.387115478515625, | |
| "logps/rejected": -44.41786193847656, | |
| "loss": 0.0499, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.37475213408470154, | |
| "rewards/margins": 4.554241180419922, | |
| "rewards/rejected": -4.179489612579346, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 4.389830508474576, | |
| "grad_norm": 7.357227804203978, | |
| "learning_rate": 2.5185233721013053e-07, | |
| "logits/chosen": -1.564170479774475, | |
| "logits/rejected": -1.5079408884048462, | |
| "logps/chosen": -24.936302185058594, | |
| "logps/rejected": -44.029632568359375, | |
| "loss": 0.0424, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.1059635728597641, | |
| "rewards/margins": 4.297895908355713, | |
| "rewards/rejected": -4.403859615325928, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 4.406779661016949, | |
| "grad_norm": 9.22648891874881, | |
| "learning_rate": 2.5e-07, | |
| "logits/chosen": -1.3310877084732056, | |
| "logits/rejected": -1.3538789749145508, | |
| "logps/chosen": -24.936809539794922, | |
| "logps/rejected": -49.063575744628906, | |
| "loss": 0.0569, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.07093064486980438, | |
| "rewards/margins": 4.918918132781982, | |
| "rewards/rejected": -4.989849090576172, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 4.423728813559322, | |
| "grad_norm": 6.836226567179652, | |
| "learning_rate": 2.4814766278986944e-07, | |
| "logits/chosen": -1.6707507371902466, | |
| "logits/rejected": -1.5290323495864868, | |
| "logps/chosen": -29.512426376342773, | |
| "logps/rejected": -54.653114318847656, | |
| "loss": 0.0401, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.0224665105342865, | |
| "rewards/margins": 5.460002899169922, | |
| "rewards/rejected": -5.437536239624023, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 4.440677966101695, | |
| "grad_norm": 8.63432296696573, | |
| "learning_rate": 2.462954272715768e-07, | |
| "logits/chosen": -1.5188168287277222, | |
| "logits/rejected": -1.4816163778305054, | |
| "logps/chosen": -31.95643424987793, | |
| "logps/rejected": -42.647403717041016, | |
| "loss": 0.0511, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3785313367843628, | |
| "rewards/margins": 4.157118320465088, | |
| "rewards/rejected": -4.535649299621582, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 4.4576271186440675, | |
| "grad_norm": 8.141729762323788, | |
| "learning_rate": 2.4444339513137716e-07, | |
| "logits/chosen": -1.590928316116333, | |
| "logits/rejected": -1.5425117015838623, | |
| "logps/chosen": -30.981969833374023, | |
| "logps/rejected": -55.22663116455078, | |
| "loss": 0.0517, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1410723626613617, | |
| "rewards/margins": 5.534295082092285, | |
| "rewards/rejected": -5.393222808837891, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 4.47457627118644, | |
| "grad_norm": 6.553586136504455, | |
| "learning_rate": 2.4259166804436003e-07, | |
| "logits/chosen": -1.6111406087875366, | |
| "logits/rejected": -1.5660130977630615, | |
| "logps/chosen": -30.944381713867188, | |
| "logps/rejected": -48.914039611816406, | |
| "loss": 0.0361, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.1980370730161667, | |
| "rewards/margins": 5.023958683013916, | |
| "rewards/rejected": -5.2219953536987305, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 4.491525423728813, | |
| "grad_norm": 7.7695916088798045, | |
| "learning_rate": 2.4074034766886826e-07, | |
| "logits/chosen": -1.4081194400787354, | |
| "logits/rejected": -1.4115426540374756, | |
| "logps/chosen": -26.77755355834961, | |
| "logps/rejected": -44.787628173828125, | |
| "loss": 0.0479, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2828730642795563, | |
| "rewards/margins": 5.210730075836182, | |
| "rewards/rejected": -5.493603229522705, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 4.508474576271187, | |
| "grad_norm": 9.138228801008614, | |
| "learning_rate": 2.3888953564091616e-07, | |
| "logits/chosen": -1.5158981084823608, | |
| "logits/rejected": -1.5783250331878662, | |
| "logps/chosen": -34.667503356933594, | |
| "logps/rejected": -53.5840950012207, | |
| "loss": 0.056, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.20706957578659058, | |
| "rewards/margins": 5.503992080688477, | |
| "rewards/rejected": -5.711061477661133, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 4.52542372881356, | |
| "grad_norm": 6.507174183795552, | |
| "learning_rate": 2.3703933356861044e-07, | |
| "logits/chosen": -1.399531602859497, | |
| "logits/rejected": -1.419585108757019, | |
| "logps/chosen": -33.15267562866211, | |
| "logps/rejected": -45.78288269042969, | |
| "loss": 0.0436, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5228755474090576, | |
| "rewards/margins": 4.130328178405762, | |
| "rewards/rejected": -4.653203964233398, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 4.5423728813559325, | |
| "grad_norm": 7.507653099421495, | |
| "learning_rate": 2.3518984302657144e-07, | |
| "logits/chosen": -1.5033714771270752, | |
| "logits/rejected": -1.427080750465393, | |
| "logps/chosen": -24.752490997314453, | |
| "logps/rejected": -57.89167022705078, | |
| "loss": 0.0403, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.4027895927429199, | |
| "rewards/margins": 5.957125186920166, | |
| "rewards/rejected": -6.359914779663086, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 4.559322033898305, | |
| "grad_norm": 7.12313349874887, | |
| "learning_rate": 2.333411655503572e-07, | |
| "logits/chosen": -1.4686946868896484, | |
| "logits/rejected": -1.3002254962921143, | |
| "logps/chosen": -29.05103302001953, | |
| "logps/rejected": -53.72389221191406, | |
| "loss": 0.0382, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.0036399289965629578, | |
| "rewards/margins": 5.409298419952393, | |
| "rewards/rejected": -5.412938117980957, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 4.576271186440678, | |
| "grad_norm": 7.93618380947219, | |
| "learning_rate": 2.3149340263088927e-07, | |
| "logits/chosen": -1.7106562852859497, | |
| "logits/rejected": -1.5941462516784668, | |
| "logps/chosen": -23.763492584228516, | |
| "logps/rejected": -47.54367446899414, | |
| "loss": 0.0517, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3024110496044159, | |
| "rewards/margins": 5.066253185272217, | |
| "rewards/rejected": -4.7638421058654785, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 4.593220338983051, | |
| "grad_norm": 7.441146999396367, | |
| "learning_rate": 2.296466557088805e-07, | |
| "logits/chosen": -1.432921051979065, | |
| "logits/rejected": -1.415549635887146, | |
| "logps/chosen": -27.215051651000977, | |
| "logps/rejected": -50.206668853759766, | |
| "loss": 0.0465, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3797184228897095, | |
| "rewards/margins": 5.635693550109863, | |
| "rewards/rejected": -6.015412330627441, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 4.610169491525424, | |
| "grad_norm": 7.203519404793784, | |
| "learning_rate": 2.278010261692663e-07, | |
| "logits/chosen": -1.6040518283843994, | |
| "logits/rejected": -1.5052812099456787, | |
| "logps/chosen": -27.241743087768555, | |
| "logps/rejected": -47.83567810058594, | |
| "loss": 0.0384, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.10971636325120926, | |
| "rewards/margins": 5.601438999176025, | |
| "rewards/rejected": -5.711155891418457, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 4.627118644067797, | |
| "grad_norm": 7.1515886159837505, | |
| "learning_rate": 2.2595661533563887e-07, | |
| "logits/chosen": -1.5058000087738037, | |
| "logits/rejected": -1.4968637228012085, | |
| "logps/chosen": -31.407421112060547, | |
| "logps/rejected": -50.27019500732422, | |
| "loss": 0.0469, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.42635661363601685, | |
| "rewards/margins": 4.159647464752197, | |
| "rewards/rejected": -4.58600378036499, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 4.6440677966101696, | |
| "grad_norm": 7.65650118033261, | |
| "learning_rate": 2.2411352446468424e-07, | |
| "logits/chosen": -1.3374431133270264, | |
| "logits/rejected": -1.3260188102722168, | |
| "logps/chosen": -21.7060489654541, | |
| "logps/rejected": -47.063289642333984, | |
| "loss": 0.0408, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.14809060096740723, | |
| "rewards/margins": 4.805155277252197, | |
| "rewards/rejected": -4.657064437866211, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 4.661016949152542, | |
| "grad_norm": 8.277053566352693, | |
| "learning_rate": 2.2227185474062374e-07, | |
| "logits/chosen": -1.49006986618042, | |
| "logits/rejected": -1.3998165130615234, | |
| "logps/chosen": -24.71628761291504, | |
| "logps/rejected": -47.67393493652344, | |
| "loss": 0.051, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.02221532166004181, | |
| "rewards/margins": 4.3083624839782715, | |
| "rewards/rejected": -4.286147117614746, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 4.677966101694915, | |
| "grad_norm": 7.8454820880953715, | |
| "learning_rate": 2.2043170726965857e-07, | |
| "logits/chosen": -1.4486889839172363, | |
| "logits/rejected": -1.4862079620361328, | |
| "logps/chosen": -27.138608932495117, | |
| "logps/rejected": -44.235687255859375, | |
| "loss": 0.0498, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.02510838210582733, | |
| "rewards/margins": 4.74564266204834, | |
| "rewards/rejected": -4.720534324645996, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 4.694915254237288, | |
| "grad_norm": 7.405600784416248, | |
| "learning_rate": 2.1859318307441966e-07, | |
| "logits/chosen": -1.444726586341858, | |
| "logits/rejected": -1.4401050806045532, | |
| "logps/chosen": -32.68350601196289, | |
| "logps/rejected": -45.99779510498047, | |
| "loss": 0.0469, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.024221569299697876, | |
| "rewards/margins": 5.507167816162109, | |
| "rewards/rejected": -5.482946872711182, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 4.711864406779661, | |
| "grad_norm": 8.104155066599384, | |
| "learning_rate": 2.1675638308842142e-07, | |
| "logits/chosen": -1.2842341661453247, | |
| "logits/rejected": -1.330397605895996, | |
| "logps/chosen": -27.432506561279297, | |
| "logps/rejected": -40.2528076171875, | |
| "loss": 0.0472, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19671624898910522, | |
| "rewards/margins": 4.634299278259277, | |
| "rewards/rejected": -4.4375834465026855, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 4.728813559322034, | |
| "grad_norm": 5.864508514550939, | |
| "learning_rate": 2.149214081505205e-07, | |
| "logits/chosen": -1.5249521732330322, | |
| "logits/rejected": -1.404898762702942, | |
| "logps/chosen": -32.8936653137207, | |
| "logps/rejected": -37.96503829956055, | |
| "loss": 0.0318, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.00440371036529541, | |
| "rewards/margins": 4.577935218811035, | |
| "rewards/rejected": -4.573531627655029, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 4.745762711864407, | |
| "grad_norm": 7.425009165674246, | |
| "learning_rate": 2.1308835899937972e-07, | |
| "logits/chosen": -1.463742733001709, | |
| "logits/rejected": -1.4091339111328125, | |
| "logps/chosen": -28.56261444091797, | |
| "logps/rejected": -44.104896545410156, | |
| "loss": 0.0411, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.04231783747673035, | |
| "rewards/margins": 4.795005798339844, | |
| "rewards/rejected": -4.837324142456055, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 4.762711864406779, | |
| "grad_norm": 8.25860440969276, | |
| "learning_rate": 2.112573362679379e-07, | |
| "logits/chosen": -1.4964463710784912, | |
| "logits/rejected": -1.355745553970337, | |
| "logps/chosen": -37.27642059326172, | |
| "logps/rejected": -61.14379119873047, | |
| "loss": 0.0524, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.36257362365722656, | |
| "rewards/margins": 5.591864109039307, | |
| "rewards/rejected": -5.22929048538208, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 4.779661016949152, | |
| "grad_norm": 8.825180205529128, | |
| "learning_rate": 2.09428440477885e-07, | |
| "logits/chosen": -1.5772864818572998, | |
| "logits/rejected": -1.3255836963653564, | |
| "logps/chosen": -27.248455047607422, | |
| "logps/rejected": -47.95212173461914, | |
| "loss": 0.0507, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.1756860911846161, | |
| "rewards/margins": 6.540701866149902, | |
| "rewards/rejected": -6.71638822555542, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 4.796610169491525, | |
| "grad_norm": 6.459544380114144, | |
| "learning_rate": 2.0760177203414366e-07, | |
| "logits/chosen": -1.4836134910583496, | |
| "logits/rejected": -1.5253633260726929, | |
| "logps/chosen": -28.740909576416016, | |
| "logps/rejected": -38.53902816772461, | |
| "loss": 0.0349, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.034703388810157776, | |
| "rewards/margins": 4.7387285232543945, | |
| "rewards/rejected": -4.773431777954102, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 4.813559322033898, | |
| "grad_norm": 8.201593648267453, | |
| "learning_rate": 2.0577743121935682e-07, | |
| "logits/chosen": -1.5238087177276611, | |
| "logits/rejected": -1.479065179824829, | |
| "logps/chosen": -23.523155212402344, | |
| "logps/rejected": -49.87913131713867, | |
| "loss": 0.0576, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.09851184487342834, | |
| "rewards/margins": 4.452397346496582, | |
| "rewards/rejected": -4.550909519195557, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 4.830508474576272, | |
| "grad_norm": 8.037104047323464, | |
| "learning_rate": 2.0395551818838243e-07, | |
| "logits/chosen": -1.5587154626846313, | |
| "logits/rejected": -1.473785638809204, | |
| "logps/chosen": -38.94770812988281, | |
| "logps/rejected": -56.7118034362793, | |
| "loss": 0.0483, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5131532549858093, | |
| "rewards/margins": 5.551197052001953, | |
| "rewards/rejected": -6.06434965133667, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 4.847457627118644, | |
| "grad_norm": 6.619262748940799, | |
| "learning_rate": 2.021361329627953e-07, | |
| "logits/chosen": -1.5830734968185425, | |
| "logits/rejected": -1.4701610803604126, | |
| "logps/chosen": -25.216768264770508, | |
| "logps/rejected": -52.16200637817383, | |
| "loss": 0.0422, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.12712815403938293, | |
| "rewards/margins": 5.188778877258301, | |
| "rewards/rejected": -5.3159074783325195, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 4.864406779661017, | |
| "grad_norm": 7.108190229695481, | |
| "learning_rate": 2.003193754253957e-07, | |
| "logits/chosen": -1.499477744102478, | |
| "logits/rejected": -1.442081093788147, | |
| "logps/chosen": -29.464977264404297, | |
| "logps/rejected": -44.91366958618164, | |
| "loss": 0.0467, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.14971515536308289, | |
| "rewards/margins": 4.699173927307129, | |
| "rewards/rejected": -4.848889350891113, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 4.88135593220339, | |
| "grad_norm": 6.501653298714645, | |
| "learning_rate": 1.9850534531472544e-07, | |
| "logits/chosen": -1.384242057800293, | |
| "logits/rejected": -1.2570266723632812, | |
| "logps/chosen": -27.370962142944336, | |
| "logps/rejected": -41.63351058959961, | |
| "loss": 0.0416, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.09189403057098389, | |
| "rewards/margins": 5.2285356521606445, | |
| "rewards/rejected": -5.136641502380371, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 4.898305084745763, | |
| "grad_norm": 6.999053945822559, | |
| "learning_rate": 1.966941422195933e-07, | |
| "logits/chosen": -1.4301297664642334, | |
| "logits/rejected": -1.3571842908859253, | |
| "logps/chosen": -30.701709747314453, | |
| "logps/rejected": -47.53770065307617, | |
| "loss": 0.0442, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2589109241962433, | |
| "rewards/margins": 4.9735002517700195, | |
| "rewards/rejected": -5.232410907745361, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 4.915254237288136, | |
| "grad_norm": 7.2234737195409275, | |
| "learning_rate": 1.94885865573607e-07, | |
| "logits/chosen": -1.5849264860153198, | |
| "logits/rejected": -1.5440596342086792, | |
| "logps/chosen": -22.58307647705078, | |
| "logps/rejected": -42.317161560058594, | |
| "loss": 0.0549, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.05918128788471222, | |
| "rewards/margins": 4.245813846588135, | |
| "rewards/rejected": -4.304995536804199, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 4.932203389830509, | |
| "grad_norm": 7.881613271557854, | |
| "learning_rate": 1.930806146497146e-07, | |
| "logits/chosen": -1.5185688734054565, | |
| "logits/rejected": -1.498981237411499, | |
| "logps/chosen": -27.832983016967773, | |
| "logps/rejected": -44.11700439453125, | |
| "loss": 0.0452, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.012274429202079773, | |
| "rewards/margins": 4.80919885635376, | |
| "rewards/rejected": -4.796924591064453, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 4.9491525423728815, | |
| "grad_norm": 7.929413571170103, | |
| "learning_rate": 1.912784885547541e-07, | |
| "logits/chosen": -1.4354244470596313, | |
| "logits/rejected": -1.2839109897613525, | |
| "logps/chosen": -28.58751678466797, | |
| "logps/rejected": -52.627559661865234, | |
| "loss": 0.05, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.09678968787193298, | |
| "rewards/margins": 3.94634747505188, | |
| "rewards/rejected": -4.043137550354004, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 4.966101694915254, | |
| "grad_norm": 8.10839834515343, | |
| "learning_rate": 1.8947958622401328e-07, | |
| "logits/chosen": -1.2912473678588867, | |
| "logits/rejected": -1.3136945962905884, | |
| "logps/chosen": -28.488567352294922, | |
| "logps/rejected": -44.88406753540039, | |
| "loss": 0.0492, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5085054636001587, | |
| "rewards/margins": 4.020839691162109, | |
| "rewards/rejected": -4.529345512390137, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 4.983050847457627, | |
| "grad_norm": 6.68942425307024, | |
| "learning_rate": 1.876840064157976e-07, | |
| "logits/chosen": -1.4644904136657715, | |
| "logits/rejected": -1.4102816581726074, | |
| "logps/chosen": -26.936355590820312, | |
| "logps/rejected": -47.06075668334961, | |
| "loss": 0.0416, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.07672211527824402, | |
| "rewards/margins": 4.622737884521484, | |
| "rewards/rejected": -4.699460029602051, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 8.672333086068912, | |
| "learning_rate": 1.858918477060089e-07, | |
| "logits/chosen": -1.4006508588790894, | |
| "logits/rejected": -1.4321238994598389, | |
| "logps/chosen": -25.137971878051758, | |
| "logps/rejected": -42.432716369628906, | |
| "loss": 0.0478, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.02563352882862091, | |
| "rewards/margins": 4.927007675170898, | |
| "rewards/rejected": -4.952641010284424, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 5.016949152542373, | |
| "grad_norm": 6.442205099947888, | |
| "learning_rate": 1.8410320848273313e-07, | |
| "logits/chosen": -1.4923574924468994, | |
| "logits/rejected": -1.4842016696929932, | |
| "logps/chosen": -26.232227325439453, | |
| "logps/rejected": -48.02200698852539, | |
| "loss": 0.0339, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0712697505950928, | |
| "rewards/margins": 5.136242866516113, | |
| "rewards/rejected": -6.207512855529785, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 5.033898305084746, | |
| "grad_norm": 6.132421236608343, | |
| "learning_rate": 1.8231818694083938e-07, | |
| "logits/chosen": -1.4439387321472168, | |
| "logits/rejected": -1.4443602561950684, | |
| "logps/chosen": -39.26667404174805, | |
| "logps/rejected": -56.92470169067383, | |
| "loss": 0.0313, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.1077936589717865, | |
| "rewards/margins": 6.438662528991699, | |
| "rewards/rejected": -6.546456336975098, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 5.0508474576271185, | |
| "grad_norm": 5.496092091450637, | |
| "learning_rate": 1.8053688107658905e-07, | |
| "logits/chosen": -1.2399489879608154, | |
| "logits/rejected": -1.1487674713134766, | |
| "logps/chosen": -24.482892990112305, | |
| "logps/rejected": -38.75669860839844, | |
| "loss": 0.0391, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1850048452615738, | |
| "rewards/margins": 4.235044479370117, | |
| "rewards/rejected": -4.050039291381836, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 5.067796610169491, | |
| "grad_norm": 6.371224857913382, | |
| "learning_rate": 1.787593886822556e-07, | |
| "logits/chosen": -1.5012649297714233, | |
| "logits/rejected": -1.5702931880950928, | |
| "logps/chosen": -26.034366607666016, | |
| "logps/rejected": -54.3376579284668, | |
| "loss": 0.0339, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.206559419631958, | |
| "rewards/margins": 5.6937360763549805, | |
| "rewards/rejected": -5.900295257568359, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 5.084745762711864, | |
| "grad_norm": 6.572183005650043, | |
| "learning_rate": 1.7698580734075607e-07, | |
| "logits/chosen": -1.48176908493042, | |
| "logits/rejected": -1.4655022621154785, | |
| "logps/chosen": -27.89720344543457, | |
| "logps/rejected": -45.596282958984375, | |
| "loss": 0.0356, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.14924107491970062, | |
| "rewards/margins": 4.559385299682617, | |
| "rewards/rejected": -4.7086262702941895, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 5.101694915254237, | |
| "grad_norm": 6.034869634249788, | |
| "learning_rate": 1.7521623442029388e-07, | |
| "logits/chosen": -1.5313202142715454, | |
| "logits/rejected": -1.564162015914917, | |
| "logps/chosen": -22.828670501708984, | |
| "logps/rejected": -48.36279296875, | |
| "loss": 0.0363, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2727990448474884, | |
| "rewards/margins": 4.790719985961914, | |
| "rewards/rejected": -4.51792049407959, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 5.11864406779661, | |
| "grad_norm": 6.764929177752538, | |
| "learning_rate": 1.7345076706901326e-07, | |
| "logits/chosen": -1.4571880102157593, | |
| "logits/rejected": -1.4540932178497314, | |
| "logps/chosen": -30.096073150634766, | |
| "logps/rejected": -55.99176788330078, | |
| "loss": 0.0389, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.1132214367389679, | |
| "rewards/margins": 5.543183326721191, | |
| "rewards/rejected": -5.656404972076416, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 5.135593220338983, | |
| "grad_norm": 7.398683409083888, | |
| "learning_rate": 1.7168950220966614e-07, | |
| "logits/chosen": -1.430177927017212, | |
| "logits/rejected": -1.482927918434143, | |
| "logps/chosen": -30.467327117919922, | |
| "logps/rejected": -45.27473831176758, | |
| "loss": 0.0516, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.06274604797363281, | |
| "rewards/margins": 4.2044267654418945, | |
| "rewards/rejected": -4.267173767089844, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 5.1525423728813555, | |
| "grad_norm": 5.9915990011633555, | |
| "learning_rate": 1.6993253653429062e-07, | |
| "logits/chosen": -1.4992166757583618, | |
| "logits/rejected": -1.4459744691848755, | |
| "logps/chosen": -33.046180725097656, | |
| "logps/rejected": -52.499366760253906, | |
| "loss": 0.0365, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5871363878250122, | |
| "rewards/margins": 5.686320781707764, | |
| "rewards/rejected": -6.2734575271606445, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 5.169491525423728, | |
| "grad_norm": 6.387235286093505, | |
| "learning_rate": 1.681799664989033e-07, | |
| "logits/chosen": -1.4004420042037964, | |
| "logits/rejected": -1.3709527254104614, | |
| "logps/chosen": -26.409391403198242, | |
| "logps/rejected": -37.188838958740234, | |
| "loss": 0.0425, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3655552864074707, | |
| "rewards/margins": 4.699047565460205, | |
| "rewards/rejected": -4.333492279052734, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 5.186440677966102, | |
| "grad_norm": 5.71477855668128, | |
| "learning_rate": 1.6643188831820374e-07, | |
| "logits/chosen": -1.4717719554901123, | |
| "logits/rejected": -1.5952577590942383, | |
| "logps/chosen": -27.931270599365234, | |
| "logps/rejected": -50.50065231323242, | |
| "loss": 0.0397, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6083439588546753, | |
| "rewards/margins": 5.1790900230407715, | |
| "rewards/rejected": -5.787433624267578, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 5.203389830508475, | |
| "grad_norm": 5.369095807459635, | |
| "learning_rate": 1.6468839796029198e-07, | |
| "logits/chosen": -1.5481698513031006, | |
| "logits/rejected": -1.4016451835632324, | |
| "logps/chosen": -33.04383850097656, | |
| "logps/rejected": -58.85445022583008, | |
| "loss": 0.0321, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.07381519675254822, | |
| "rewards/margins": 5.094147205352783, | |
| "rewards/rejected": -5.167962074279785, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 5.220338983050848, | |
| "grad_norm": 6.478677176582888, | |
| "learning_rate": 1.6294959114140033e-07, | |
| "logits/chosen": -1.6205213069915771, | |
| "logits/rejected": -1.5254000425338745, | |
| "logps/chosen": -29.354225158691406, | |
| "logps/rejected": -43.66874694824219, | |
| "loss": 0.0357, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.08936242759227753, | |
| "rewards/margins": 4.020651817321777, | |
| "rewards/rejected": -4.110013961791992, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 5.237288135593221, | |
| "grad_norm": 6.290350105724975, | |
| "learning_rate": 1.6121556332063861e-07, | |
| "logits/chosen": -1.3170948028564453, | |
| "logits/rejected": -1.281385898590088, | |
| "logps/chosen": -37.28167724609375, | |
| "logps/rejected": -45.006961822509766, | |
| "loss": 0.0352, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.06686612963676453, | |
| "rewards/margins": 4.521815776824951, | |
| "rewards/rejected": -4.588682174682617, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 5.254237288135593, | |
| "grad_norm": 7.295722008430091, | |
| "learning_rate": 1.5948640969475345e-07, | |
| "logits/chosen": -1.5421946048736572, | |
| "logits/rejected": -1.406693935394287, | |
| "logps/chosen": -26.546001434326172, | |
| "logps/rejected": -40.070228576660156, | |
| "loss": 0.0464, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.11074566841125488, | |
| "rewards/margins": 4.666855812072754, | |
| "rewards/rejected": -4.77760124206543, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 5.271186440677966, | |
| "grad_norm": 6.299381642433967, | |
| "learning_rate": 1.5776222519290204e-07, | |
| "logits/chosen": -1.5537474155426025, | |
| "logits/rejected": -1.501166820526123, | |
| "logps/chosen": -26.153173446655273, | |
| "logps/rejected": -50.52534103393555, | |
| "loss": 0.0353, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.541397213935852, | |
| "rewards/margins": 5.92680025100708, | |
| "rewards/rejected": -6.468197822570801, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 5.288135593220339, | |
| "grad_norm": 6.901736790648853, | |
| "learning_rate": 1.560431044714405e-07, | |
| "logits/chosen": -1.547554850578308, | |
| "logits/rejected": -1.4092559814453125, | |
| "logps/chosen": -32.46575927734375, | |
| "logps/rejected": -52.17520523071289, | |
| "loss": 0.042, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.18227432668209076, | |
| "rewards/margins": 5.195803642272949, | |
| "rewards/rejected": -5.378078460693359, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 5.305084745762712, | |
| "grad_norm": 6.537170506793814, | |
| "learning_rate": 1.5432914190872756e-07, | |
| "logits/chosen": -1.394778847694397, | |
| "logits/rejected": -1.3562414646148682, | |
| "logps/chosen": -26.12644386291504, | |
| "logps/rejected": -38.51447677612305, | |
| "loss": 0.036, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.43324902653694153, | |
| "rewards/margins": 4.649719715118408, | |
| "rewards/rejected": -4.216470718383789, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 5.322033898305085, | |
| "grad_norm": 6.822269638404017, | |
| "learning_rate": 1.5262043159994314e-07, | |
| "logits/chosen": -1.603257179260254, | |
| "logits/rejected": -1.41001558303833, | |
| "logps/chosen": -27.069252014160156, | |
| "logps/rejected": -54.39444351196289, | |
| "loss": 0.037, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2375459223985672, | |
| "rewards/margins": 5.968645095825195, | |
| "rewards/rejected": -5.731099605560303, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 5.338983050847458, | |
| "grad_norm": 6.852429555090263, | |
| "learning_rate": 1.5091706735192266e-07, | |
| "logits/chosen": -1.5043295621871948, | |
| "logits/rejected": -1.4766517877578735, | |
| "logps/chosen": -21.581401824951172, | |
| "logps/rejected": -50.377071380615234, | |
| "loss": 0.0566, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.24750420451164246, | |
| "rewards/margins": 5.493175506591797, | |
| "rewards/rejected": -5.24567174911499, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 5.3559322033898304, | |
| "grad_norm": 6.421398270201425, | |
| "learning_rate": 1.4921914267800699e-07, | |
| "logits/chosen": -1.4200242757797241, | |
| "logits/rejected": -1.3987623453140259, | |
| "logps/chosen": -19.360551834106445, | |
| "logps/rejected": -35.37394714355469, | |
| "loss": 0.0374, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.14433270692825317, | |
| "rewards/margins": 3.7558863162994385, | |
| "rewards/rejected": -3.611553430557251, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 5.372881355932203, | |
| "grad_norm": 6.030116263122066, | |
| "learning_rate": 1.4752675079290848e-07, | |
| "logits/chosen": -1.3269891738891602, | |
| "logits/rejected": -1.2499594688415527, | |
| "logps/chosen": -29.73949432373047, | |
| "logps/rejected": -37.97323989868164, | |
| "loss": 0.0307, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.44939878582954407, | |
| "rewards/margins": 3.9090001583099365, | |
| "rewards/rejected": -4.358399391174316, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 5.389830508474576, | |
| "grad_norm": 6.16190997278292, | |
| "learning_rate": 1.458399846075942e-07, | |
| "logits/chosen": -1.5378894805908203, | |
| "logits/rejected": -1.5542147159576416, | |
| "logps/chosen": -33.65290832519531, | |
| "logps/rejected": -57.606727600097656, | |
| "loss": 0.0361, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.47632092237472534, | |
| "rewards/margins": 5.512063980102539, | |
| "rewards/rejected": -5.98838472366333, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 5.406779661016949, | |
| "grad_norm": 6.084242732250159, | |
| "learning_rate": 1.441589367241846e-07, | |
| "logits/chosen": -1.3787221908569336, | |
| "logits/rejected": -1.2671631574630737, | |
| "logps/chosen": -25.762453079223633, | |
| "logps/rejected": -41.90483093261719, | |
| "loss": 0.0351, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.07579733431339264, | |
| "rewards/margins": 4.506519317626953, | |
| "rewards/rejected": -4.430721759796143, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 5.423728813559322, | |
| "grad_norm": 6.400287756731778, | |
| "learning_rate": 1.4248369943086995e-07, | |
| "logits/chosen": -1.609413504600525, | |
| "logits/rejected": -1.440029501914978, | |
| "logps/chosen": -29.613908767700195, | |
| "logps/rejected": -45.63286590576172, | |
| "loss": 0.0406, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.1749686449766159, | |
| "rewards/margins": 4.805515766143799, | |
| "rewards/rejected": -4.980484485626221, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 5.440677966101695, | |
| "grad_norm": 5.58673826227153, | |
| "learning_rate": 1.4081436469684337e-07, | |
| "logits/chosen": -1.4598129987716675, | |
| "logits/rejected": -1.3000261783599854, | |
| "logps/chosen": -26.955453872680664, | |
| "logps/rejected": -49.03952407836914, | |
| "loss": 0.0351, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2353724241256714, | |
| "rewards/margins": 5.155057907104492, | |
| "rewards/rejected": -5.390429973602295, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 5.4576271186440675, | |
| "grad_norm": 5.7806024491841885, | |
| "learning_rate": 1.3915102416725286e-07, | |
| "logits/chosen": -1.5266464948654175, | |
| "logits/rejected": -1.406123161315918, | |
| "logps/chosen": -21.365182876586914, | |
| "logps/rejected": -44.53782653808594, | |
| "loss": 0.0369, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19776439666748047, | |
| "rewards/margins": 4.6383209228515625, | |
| "rewards/rejected": -4.440556526184082, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 5.47457627118644, | |
| "grad_norm": 6.663910066076515, | |
| "learning_rate": 1.3749376915816885e-07, | |
| "logits/chosen": -1.5701993703842163, | |
| "logits/rejected": -1.552007794380188, | |
| "logps/chosen": -36.67780685424805, | |
| "logps/rejected": -47.98915481567383, | |
| "loss": 0.0398, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8618345856666565, | |
| "rewards/margins": 4.562122821807861, | |
| "rewards/rejected": -5.423957347869873, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 5.491525423728813, | |
| "grad_norm": 6.220022143578053, | |
| "learning_rate": 1.3584269065157172e-07, | |
| "logits/chosen": -1.3734331130981445, | |
| "logits/rejected": -1.3631477355957031, | |
| "logps/chosen": -36.97685623168945, | |
| "logps/rejected": -53.4107780456543, | |
| "loss": 0.0326, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.10500967502593994, | |
| "rewards/margins": 4.521078109741211, | |
| "rewards/rejected": -4.6260881423950195, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 5.508474576271187, | |
| "grad_norm": 6.094670518205716, | |
| "learning_rate": 1.341978792903568e-07, | |
| "logits/chosen": -1.4123265743255615, | |
| "logits/rejected": -1.3930362462997437, | |
| "logps/chosen": -24.757308959960938, | |
| "logps/rejected": -50.27605438232422, | |
| "loss": 0.0323, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1657731980085373, | |
| "rewards/margins": 6.393916606903076, | |
| "rewards/rejected": -6.22814416885376, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 5.52542372881356, | |
| "grad_norm": 7.344149893164419, | |
| "learning_rate": 1.3255942537335804e-07, | |
| "logits/chosen": -1.4898028373718262, | |
| "logits/rejected": -1.529905915260315, | |
| "logps/chosen": -30.63724136352539, | |
| "logps/rejected": -47.23683547973633, | |
| "loss": 0.0557, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2261982262134552, | |
| "rewards/margins": 4.666712760925293, | |
| "rewards/rejected": -4.892910957336426, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 5.5423728813559325, | |
| "grad_norm": 5.4257600584522185, | |
| "learning_rate": 1.3092741885039085e-07, | |
| "logits/chosen": -1.430129051208496, | |
| "logits/rejected": -1.2972102165222168, | |
| "logps/chosen": -29.192171096801758, | |
| "logps/rejected": -61.473453521728516, | |
| "loss": 0.0452, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.42023560404777527, | |
| "rewards/margins": 5.649378299713135, | |
| "rewards/rejected": -6.069613933563232, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 5.559322033898305, | |
| "grad_norm": 6.750041211234325, | |
| "learning_rate": 1.2930194931731382e-07, | |
| "logits/chosen": -1.5282580852508545, | |
| "logits/rejected": -1.464250087738037, | |
| "logps/chosen": -21.08121109008789, | |
| "logps/rejected": -36.48762512207031, | |
| "loss": 0.0373, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.21070019900798798, | |
| "rewards/margins": 4.739938735961914, | |
| "rewards/rejected": -4.950639247894287, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 5.576271186440678, | |
| "grad_norm": 6.346382606047745, | |
| "learning_rate": 1.2768310601110993e-07, | |
| "logits/chosen": -1.544804334640503, | |
| "logits/rejected": -1.5036790370941162, | |
| "logps/chosen": -25.425878524780273, | |
| "logps/rejected": -57.694950103759766, | |
| "loss": 0.0329, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.15332584083080292, | |
| "rewards/margins": 6.2687883377075195, | |
| "rewards/rejected": -6.115462303161621, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 5.593220338983051, | |
| "grad_norm": 5.891747930904324, | |
| "learning_rate": 1.260709778049877e-07, | |
| "logits/chosen": -1.4886832237243652, | |
| "logits/rejected": -1.5345224142074585, | |
| "logps/chosen": -24.217357635498047, | |
| "logps/rejected": -38.936588287353516, | |
| "loss": 0.031, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.14570964872837067, | |
| "rewards/margins": 3.9201903343200684, | |
| "rewards/rejected": -3.7744803428649902, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 5.610169491525424, | |
| "grad_norm": 6.479009923461722, | |
| "learning_rate": 1.2446565320350182e-07, | |
| "logits/chosen": -1.6119954586029053, | |
| "logits/rejected": -1.5358697175979614, | |
| "logps/chosen": -23.517757415771484, | |
| "logps/rejected": -43.86408233642578, | |
| "loss": 0.0457, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.01591832935810089, | |
| "rewards/margins": 5.240238189697266, | |
| "rewards/rejected": -5.2243194580078125, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 5.627118644067797, | |
| "grad_norm": 5.354023576639168, | |
| "learning_rate": 1.2286722033769492e-07, | |
| "logits/chosen": -1.630919337272644, | |
| "logits/rejected": -1.538849949836731, | |
| "logps/chosen": -28.93423080444336, | |
| "logps/rejected": -53.66632080078125, | |
| "loss": 0.0289, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.17018574476242065, | |
| "rewards/margins": 5.786593437194824, | |
| "rewards/rejected": -5.9567790031433105, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 5.6440677966101696, | |
| "grad_norm": 5.571846024722403, | |
| "learning_rate": 1.2127576696025826e-07, | |
| "logits/chosen": -1.3449130058288574, | |
| "logits/rejected": -1.3209434747695923, | |
| "logps/chosen": -28.3823299407959, | |
| "logps/rejected": -57.359275817871094, | |
| "loss": 0.0353, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.19171355664730072, | |
| "rewards/margins": 6.2648420333862305, | |
| "rewards/rejected": -6.4565558433532715, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 5.661016949152542, | |
| "grad_norm": 6.138956177743218, | |
| "learning_rate": 1.19691380440715e-07, | |
| "logits/chosen": -1.358786702156067, | |
| "logits/rejected": -1.3909467458724976, | |
| "logps/chosen": -30.639657974243164, | |
| "logps/rejected": -48.24367141723633, | |
| "loss": 0.0411, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.4908216595649719, | |
| "rewards/margins": 4.40742301940918, | |
| "rewards/rejected": -4.898244857788086, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 5.677966101694915, | |
| "grad_norm": 4.507338496018484, | |
| "learning_rate": 1.1811414776062365e-07, | |
| "logits/chosen": -1.2691717147827148, | |
| "logits/rejected": -1.2181487083435059, | |
| "logps/chosen": -32.79166793823242, | |
| "logps/rejected": -39.962955474853516, | |
| "loss": 0.0227, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.39959853887557983, | |
| "rewards/margins": 4.66073751449585, | |
| "rewards/rejected": -4.261138916015625, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 5.694915254237288, | |
| "grad_norm": 6.700687388710383, | |
| "learning_rate": 1.1654415550880242e-07, | |
| "logits/chosen": -1.4832508563995361, | |
| "logits/rejected": -1.4977812767028809, | |
| "logps/chosen": -25.554107666015625, | |
| "logps/rejected": -42.260276794433594, | |
| "loss": 0.0325, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.07877922058105469, | |
| "rewards/margins": 4.69774055480957, | |
| "rewards/rejected": -4.776519775390625, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 5.711864406779661, | |
| "grad_norm": 6.852779788605717, | |
| "learning_rate": 1.1498148987657549e-07, | |
| "logits/chosen": -1.2651898860931396, | |
| "logits/rejected": -1.2874916791915894, | |
| "logps/chosen": -31.26464080810547, | |
| "logps/rejected": -56.77597427368164, | |
| "loss": 0.0453, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.4548006057739258, | |
| "rewards/margins": 6.219876289367676, | |
| "rewards/rejected": -6.67467737197876, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 5.728813559322034, | |
| "grad_norm": 5.857910583133337, | |
| "learning_rate": 1.1342623665304207e-07, | |
| "logits/chosen": -1.5274604558944702, | |
| "logits/rejected": -1.5151126384735107, | |
| "logps/chosen": -26.850894927978516, | |
| "logps/rejected": -47.350067138671875, | |
| "loss": 0.0367, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5475198030471802, | |
| "rewards/margins": 4.643810272216797, | |
| "rewards/rejected": -5.1913299560546875, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 5.745762711864407, | |
| "grad_norm": 7.203472778094057, | |
| "learning_rate": 1.1187848122036562e-07, | |
| "logits/chosen": -1.6401658058166504, | |
| "logits/rejected": -1.6699111461639404, | |
| "logps/chosen": -28.927499771118164, | |
| "logps/rejected": -36.704795837402344, | |
| "loss": 0.0422, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.15964704751968384, | |
| "rewards/margins": 4.4911370277404785, | |
| "rewards/rejected": -4.3314900398254395, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 5.762711864406779, | |
| "grad_norm": 7.2779221460952, | |
| "learning_rate": 1.1033830854908691e-07, | |
| "logits/chosen": -1.502543330192566, | |
| "logits/rejected": -1.4199045896530151, | |
| "logps/chosen": -23.966299057006836, | |
| "logps/rejected": -46.33234786987305, | |
| "loss": 0.0506, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.10161617398262024, | |
| "rewards/margins": 5.133113861083984, | |
| "rewards/rejected": -5.2347307205200195, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 5.779661016949152, | |
| "grad_norm": 7.302109736865858, | |
| "learning_rate": 1.0880580319345919e-07, | |
| "logits/chosen": -1.4564778804779053, | |
| "logits/rejected": -1.5227388143539429, | |
| "logps/chosen": -33.69976806640625, | |
| "logps/rejected": -42.49582290649414, | |
| "loss": 0.0396, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.1525229811668396, | |
| "rewards/margins": 4.674422264099121, | |
| "rewards/rejected": -4.8269453048706055, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 5.796610169491525, | |
| "grad_norm": 7.067583835233995, | |
| "learning_rate": 1.0728104928680623e-07, | |
| "logits/chosen": -1.5719774961471558, | |
| "logits/rejected": -1.502136468887329, | |
| "logps/chosen": -21.33773422241211, | |
| "logps/rejected": -42.372676849365234, | |
| "loss": 0.0433, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.12551027536392212, | |
| "rewards/margins": 5.258235454559326, | |
| "rewards/rejected": -5.3837456703186035, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 5.813559322033898, | |
| "grad_norm": 5.957807006157361, | |
| "learning_rate": 1.0576413053690326e-07, | |
| "logits/chosen": -1.4468128681182861, | |
| "logits/rejected": -1.3548585176467896, | |
| "logps/chosen": -24.791799545288086, | |
| "logps/rejected": -44.49803161621094, | |
| "loss": 0.039, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.11925530433654785, | |
| "rewards/margins": 5.184385299682617, | |
| "rewards/rejected": -5.06512975692749, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 5.830508474576272, | |
| "grad_norm": 6.401362630388005, | |
| "learning_rate": 1.0425513022138202e-07, | |
| "logits/chosen": -1.491408348083496, | |
| "logits/rejected": -1.3371365070343018, | |
| "logps/chosen": -29.460378646850586, | |
| "logps/rejected": -53.63321304321289, | |
| "loss": 0.0354, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3325144648551941, | |
| "rewards/margins": 5.398627758026123, | |
| "rewards/rejected": -5.731142520904541, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 5.847457627118644, | |
| "grad_norm": 5.5542840449584405, | |
| "learning_rate": 1.0275413118315798e-07, | |
| "logits/chosen": -1.3796604871749878, | |
| "logits/rejected": -1.377408504486084, | |
| "logps/chosen": -26.488426208496094, | |
| "logps/rejected": -46.708282470703125, | |
| "loss": 0.0331, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1414252519607544, | |
| "rewards/margins": 5.361431121826172, | |
| "rewards/rejected": -5.220005989074707, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 5.864406779661017, | |
| "grad_norm": 6.360073336005713, | |
| "learning_rate": 1.0126121582588315e-07, | |
| "logits/chosen": -1.5218093395233154, | |
| "logits/rejected": -1.3046934604644775, | |
| "logps/chosen": -41.216285705566406, | |
| "logps/rejected": -43.98678970336914, | |
| "loss": 0.0402, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5423004627227783, | |
| "rewards/margins": 4.604074001312256, | |
| "rewards/rejected": -5.146374702453613, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 5.88135593220339, | |
| "grad_norm": 5.539790259394075, | |
| "learning_rate": 9.977646610942201e-08, | |
| "logits/chosen": -1.4420253038406372, | |
| "logits/rejected": -1.4948465824127197, | |
| "logps/chosen": -37.67629623413086, | |
| "logps/rejected": -53.65730667114258, | |
| "loss": 0.0283, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8092373609542847, | |
| "rewards/margins": 5.268472671508789, | |
| "rewards/rejected": -6.077709674835205, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 5.898305084745763, | |
| "grad_norm": 6.554303392876224, | |
| "learning_rate": 9.829996354535172e-08, | |
| "logits/chosen": -1.5454871654510498, | |
| "logits/rejected": -1.5333365201950073, | |
| "logps/chosen": -21.350933074951172, | |
| "logps/rejected": -49.00358200073242, | |
| "loss": 0.0408, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.24133503437042236, | |
| "rewards/margins": 4.911167144775391, | |
| "rewards/rejected": -5.152502059936523, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 5.915254237288136, | |
| "grad_norm": 6.546704494284942, | |
| "learning_rate": 9.68317891924871e-08, | |
| "logits/chosen": -1.5749708414077759, | |
| "logits/rejected": -1.433280110359192, | |
| "logps/chosen": -34.201988220214844, | |
| "logps/rejected": -51.28511428833008, | |
| "loss": 0.0356, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3138273358345032, | |
| "rewards/margins": 4.917507648468018, | |
| "rewards/rejected": -5.231335163116455, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 5.932203389830509, | |
| "grad_norm": 6.294270520592479, | |
| "learning_rate": 9.53720236524313e-08, | |
| "logits/chosen": -1.5306050777435303, | |
| "logits/rejected": -1.442819356918335, | |
| "logps/chosen": -39.66340255737305, | |
| "logps/rejected": -47.36227798461914, | |
| "loss": 0.034, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3249303102493286, | |
| "rewards/margins": 4.536349296569824, | |
| "rewards/rejected": -4.861279487609863, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 5.9491525423728815, | |
| "grad_norm": 6.24196436709209, | |
| "learning_rate": 9.392074706515002e-08, | |
| "logits/chosen": -1.3876663446426392, | |
| "logits/rejected": -1.3757574558258057, | |
| "logps/chosen": -29.42030143737793, | |
| "logps/rejected": -51.045021057128906, | |
| "loss": 0.0347, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.15947160124778748, | |
| "rewards/margins": 4.901638031005859, | |
| "rewards/rejected": -5.06110954284668, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 5.966101694915254, | |
| "grad_norm": 7.215999575793012, | |
| "learning_rate": 9.247803910457225e-08, | |
| "logits/chosen": -1.5945814847946167, | |
| "logits/rejected": -1.5649042129516602, | |
| "logps/chosen": -23.595874786376953, | |
| "logps/rejected": -45.547088623046875, | |
| "loss": 0.0492, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.08822253346443176, | |
| "rewards/margins": 4.996213912963867, | |
| "rewards/rejected": -5.084437370300293, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 5.983050847457627, | |
| "grad_norm": 6.19260019151604, | |
| "learning_rate": 9.104397897421623e-08, | |
| "logits/chosen": -1.4307911396026611, | |
| "logits/rejected": -1.3201934099197388, | |
| "logps/chosen": -26.220760345458984, | |
| "logps/rejected": -51.94208526611328, | |
| "loss": 0.0311, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.05383840203285217, | |
| "rewards/margins": 5.268675804138184, | |
| "rewards/rejected": -5.322514057159424, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 5.881521162946629, | |
| "learning_rate": 8.961864540284119e-08, | |
| "logits/chosen": -1.46354079246521, | |
| "logits/rejected": -1.369706392288208, | |
| "logps/chosen": -21.57988739013672, | |
| "logps/rejected": -39.70952606201172, | |
| "loss": 0.0307, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.02779284119606018, | |
| "rewards/margins": 4.516615867614746, | |
| "rewards/rejected": -4.488822937011719, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 6.016949152542373, | |
| "grad_norm": 6.0145355879659625, | |
| "learning_rate": 8.82021166401253e-08, | |
| "logits/chosen": -1.124272108078003, | |
| "logits/rejected": -1.1505348682403564, | |
| "logps/chosen": -46.4174919128418, | |
| "logps/rejected": -55.31433868408203, | |
| "loss": 0.0382, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.7474560737609863, | |
| "rewards/margins": 5.104282379150391, | |
| "rewards/rejected": -5.851738452911377, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 6.033898305084746, | |
| "grad_norm": 5.799187572293291, | |
| "learning_rate": 8.679447045236962e-08, | |
| "logits/chosen": -1.4048717021942139, | |
| "logits/rejected": -1.479861855506897, | |
| "logps/chosen": -21.200166702270508, | |
| "logps/rejected": -38.02268600463867, | |
| "loss": 0.0352, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.14473703503608704, | |
| "rewards/margins": 5.024458885192871, | |
| "rewards/rejected": -5.169196128845215, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 6.0508474576271185, | |
| "grad_norm": 7.041713270895366, | |
| "learning_rate": 8.539578411822901e-08, | |
| "logits/chosen": -1.3607594966888428, | |
| "logits/rejected": -1.3202852010726929, | |
| "logps/chosen": -27.951833724975586, | |
| "logps/rejected": -46.871917724609375, | |
| "loss": 0.0439, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.13125565648078918, | |
| "rewards/margins": 4.497035026550293, | |
| "rewards/rejected": -4.365779399871826, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 6.067796610169491, | |
| "grad_norm": 4.947363488557164, | |
| "learning_rate": 8.400613442446947e-08, | |
| "logits/chosen": -1.571787714958191, | |
| "logits/rejected": -1.4330024719238281, | |
| "logps/chosen": -28.946853637695312, | |
| "logps/rejected": -45.973785400390625, | |
| "loss": 0.0311, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.589256763458252, | |
| "rewards/margins": 5.187601566314697, | |
| "rewards/rejected": -5.776858329772949, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 6.084745762711864, | |
| "grad_norm": 5.199584429039793, | |
| "learning_rate": 8.262559766175253e-08, | |
| "logits/chosen": -1.4506189823150635, | |
| "logits/rejected": -1.3975802659988403, | |
| "logps/chosen": -24.713668823242188, | |
| "logps/rejected": -51.15769577026367, | |
| "loss": 0.0267, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.11659543216228485, | |
| "rewards/margins": 6.322598457336426, | |
| "rewards/rejected": -6.439194202423096, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 6.101694915254237, | |
| "grad_norm": 5.228661804484342, | |
| "learning_rate": 8.125424962044741e-08, | |
| "logits/chosen": -1.679395079612732, | |
| "logits/rejected": -1.5920395851135254, | |
| "logps/chosen": -32.77759552001953, | |
| "logps/rejected": -51.247283935546875, | |
| "loss": 0.0256, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6732353568077087, | |
| "rewards/margins": 5.932892322540283, | |
| "rewards/rejected": -6.6061272621154785, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 6.11864406779661, | |
| "grad_norm": 4.99263433146697, | |
| "learning_rate": 7.989216558646941e-08, | |
| "logits/chosen": -1.5995450019836426, | |
| "logits/rejected": -1.5377026796340942, | |
| "logps/chosen": -30.13187026977539, | |
| "logps/rejected": -47.109947204589844, | |
| "loss": 0.0269, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2468140721321106, | |
| "rewards/margins": 5.169532299041748, | |
| "rewards/rejected": -5.416346073150635, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 6.135593220338983, | |
| "grad_norm": 5.703896702738386, | |
| "learning_rate": 7.853942033714736e-08, | |
| "logits/chosen": -1.5143556594848633, | |
| "logits/rejected": -1.3968244791030884, | |
| "logps/chosen": -37.41975402832031, | |
| "logps/rejected": -56.349769592285156, | |
| "loss": 0.0326, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.31496256589889526, | |
| "rewards/margins": 5.113517761230469, | |
| "rewards/rejected": -5.428481101989746, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 6.1525423728813555, | |
| "grad_norm": 5.916237621370921, | |
| "learning_rate": 7.719608813711847e-08, | |
| "logits/chosen": -1.5250320434570312, | |
| "logits/rejected": -1.475509762763977, | |
| "logps/chosen": -26.13006591796875, | |
| "logps/rejected": -35.852359771728516, | |
| "loss": 0.0313, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.13443481922149658, | |
| "rewards/margins": 4.1385908126831055, | |
| "rewards/rejected": -4.00415563583374, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 6.169491525423728, | |
| "grad_norm": 6.510656924645236, | |
| "learning_rate": 7.586224273425081e-08, | |
| "logits/chosen": -1.3499259948730469, | |
| "logits/rejected": -1.244497299194336, | |
| "logps/chosen": -34.813541412353516, | |
| "logps/rejected": -47.603271484375, | |
| "loss": 0.0405, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.11887124180793762, | |
| "rewards/margins": 4.864650726318359, | |
| "rewards/rejected": -4.745779514312744, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 6.186440677966102, | |
| "grad_norm": 5.630103461727817, | |
| "learning_rate": 7.45379573555947e-08, | |
| "logits/chosen": -1.4300577640533447, | |
| "logits/rejected": -1.4099435806274414, | |
| "logps/chosen": -34.18559646606445, | |
| "logps/rejected": -45.739933013916016, | |
| "loss": 0.0314, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.07826349139213562, | |
| "rewards/margins": 4.771849155426025, | |
| "rewards/rejected": -4.8501129150390625, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 6.203389830508475, | |
| "grad_norm": 5.238611793677947, | |
| "learning_rate": 7.322330470336313e-08, | |
| "logits/chosen": -1.5490187406539917, | |
| "logits/rejected": -1.6131647825241089, | |
| "logps/chosen": -29.01973533630371, | |
| "logps/rejected": -48.46769332885742, | |
| "loss": 0.0323, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.043659090995788574, | |
| "rewards/margins": 4.948011875152588, | |
| "rewards/rejected": -4.991670608520508, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 6.220338983050848, | |
| "grad_norm": 7.178172383273588, | |
| "learning_rate": 7.19183569509398e-08, | |
| "logits/chosen": -1.4348180294036865, | |
| "logits/rejected": -1.4342725276947021, | |
| "logps/chosen": -26.032867431640625, | |
| "logps/rejected": -35.465248107910156, | |
| "loss": 0.0433, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.11982511729001999, | |
| "rewards/margins": 4.798130035400391, | |
| "rewards/rejected": -4.678304672241211, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 6.237288135593221, | |
| "grad_norm": 5.80114325638975, | |
| "learning_rate": 7.062318573891715e-08, | |
| "logits/chosen": -1.3932957649230957, | |
| "logits/rejected": -1.3466596603393555, | |
| "logps/chosen": -26.193689346313477, | |
| "logps/rejected": -43.341983795166016, | |
| "loss": 0.035, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.26808837056159973, | |
| "rewards/margins": 5.052790641784668, | |
| "rewards/rejected": -4.784702301025391, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 6.254237288135593, | |
| "grad_norm": 5.8991450258769875, | |
| "learning_rate": 6.933786217116364e-08, | |
| "logits/chosen": -1.5599933862686157, | |
| "logits/rejected": -1.4719116687774658, | |
| "logps/chosen": -26.387096405029297, | |
| "logps/rejected": -41.33942794799805, | |
| "loss": 0.0386, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.40677589178085327, | |
| "rewards/margins": 4.713696479797363, | |
| "rewards/rejected": -4.306921005249023, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 6.271186440677966, | |
| "grad_norm": 5.827867650343381, | |
| "learning_rate": 6.806245681091944e-08, | |
| "logits/chosen": -1.4101459980010986, | |
| "logits/rejected": -1.3021355867385864, | |
| "logps/chosen": -27.591569900512695, | |
| "logps/rejected": -47.71153259277344, | |
| "loss": 0.0338, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1557801067829132, | |
| "rewards/margins": 5.925251483917236, | |
| "rewards/rejected": -5.769471168518066, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 6.288135593220339, | |
| "grad_norm": 5.390338841256569, | |
| "learning_rate": 6.679703967692321e-08, | |
| "logits/chosen": -1.584707498550415, | |
| "logits/rejected": -1.4573631286621094, | |
| "logps/chosen": -21.927671432495117, | |
| "logps/rejected": -49.91468048095703, | |
| "loss": 0.0317, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.393250048160553, | |
| "rewards/margins": 5.488755226135254, | |
| "rewards/rejected": -5.095505714416504, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 6.305084745762712, | |
| "grad_norm": 4.571609396535961, | |
| "learning_rate": 6.554168023956816e-08, | |
| "logits/chosen": -1.4562060832977295, | |
| "logits/rejected": -1.3005712032318115, | |
| "logps/chosen": -25.927963256835938, | |
| "logps/rejected": -44.458621978759766, | |
| "loss": 0.0284, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.2479822039604187, | |
| "rewards/margins": 4.508713722229004, | |
| "rewards/rejected": -4.756695747375488, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 6.322033898305085, | |
| "grad_norm": 6.286732270647367, | |
| "learning_rate": 6.429644741708779e-08, | |
| "logits/chosen": -1.4326915740966797, | |
| "logits/rejected": -1.3545727729797363, | |
| "logps/chosen": -25.131105422973633, | |
| "logps/rejected": -40.29548263549805, | |
| "loss": 0.0324, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.04497765004634857, | |
| "rewards/margins": 4.998070240020752, | |
| "rewards/rejected": -4.953092575073242, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 6.338983050847458, | |
| "grad_norm": 6.303722387593113, | |
| "learning_rate": 6.306140957177225e-08, | |
| "logits/chosen": -1.452634334564209, | |
| "logits/rejected": -1.427751898765564, | |
| "logps/chosen": -26.491840362548828, | |
| "logps/rejected": -47.47591781616211, | |
| "loss": 0.0331, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.015405148267745972, | |
| "rewards/margins": 5.423872947692871, | |
| "rewards/rejected": -5.439278602600098, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 6.3559322033898304, | |
| "grad_norm": 5.766682311294022, | |
| "learning_rate": 6.183663450621607e-08, | |
| "logits/chosen": -1.506758213043213, | |
| "logits/rejected": -1.4310308694839478, | |
| "logps/chosen": -38.03307342529297, | |
| "logps/rejected": -46.39859390258789, | |
| "loss": 0.0361, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.16017396748065948, | |
| "rewards/margins": 4.915417194366455, | |
| "rewards/rejected": -5.075592041015625, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 6.372881355932203, | |
| "grad_norm": 4.912000082420141, | |
| "learning_rate": 6.062218945959496e-08, | |
| "logits/chosen": -1.4634639024734497, | |
| "logits/rejected": -1.5328123569488525, | |
| "logps/chosen": -34.974857330322266, | |
| "logps/rejected": -43.447689056396484, | |
| "loss": 0.0265, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.06525677442550659, | |
| "rewards/margins": 5.0226850509643555, | |
| "rewards/rejected": -5.087942123413086, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 6.389830508474576, | |
| "grad_norm": 4.923323155963838, | |
| "learning_rate": 5.9418141103975026e-08, | |
| "logits/chosen": -1.4239155054092407, | |
| "logits/rejected": -1.4835269451141357, | |
| "logps/chosen": -30.768068313598633, | |
| "logps/rejected": -60.89370346069336, | |
| "loss": 0.0235, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3536137342453003, | |
| "rewards/margins": 6.844842910766602, | |
| "rewards/rejected": -7.198456764221191, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 6.406779661016949, | |
| "grad_norm": 6.31358841065967, | |
| "learning_rate": 5.822455554065217e-08, | |
| "logits/chosen": -1.3272855281829834, | |
| "logits/rejected": -1.2906978130340576, | |
| "logps/chosen": -25.13174819946289, | |
| "logps/rejected": -41.51945877075195, | |
| "loss": 0.0377, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2586425840854645, | |
| "rewards/margins": 5.224736213684082, | |
| "rewards/rejected": -4.966093063354492, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 6.423728813559322, | |
| "grad_norm": 5.9217345486120365, | |
| "learning_rate": 5.704149829652341e-08, | |
| "logits/chosen": -1.5300796031951904, | |
| "logits/rejected": -1.4160737991333008, | |
| "logps/chosen": -29.946929931640625, | |
| "logps/rejected": -49.227298736572266, | |
| "loss": 0.0318, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.1305888444185257, | |
| "rewards/margins": 5.067564487457275, | |
| "rewards/rejected": -5.198153495788574, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 6.440677966101695, | |
| "grad_norm": 5.394022162647855, | |
| "learning_rate": 5.586903432048942e-08, | |
| "logits/chosen": -1.5190213918685913, | |
| "logits/rejected": -1.4037725925445557, | |
| "logps/chosen": -31.318918228149414, | |
| "logps/rejected": -49.82917404174805, | |
| "loss": 0.0302, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9247037768363953, | |
| "rewards/margins": 5.2036919593811035, | |
| "rewards/rejected": -6.128396034240723, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 6.4576271186440675, | |
| "grad_norm": 5.307172264281174, | |
| "learning_rate": 5.470722797988883e-08, | |
| "logits/chosen": -1.3760260343551636, | |
| "logits/rejected": -1.3698216676712036, | |
| "logps/chosen": -24.693878173828125, | |
| "logps/rejected": -37.211387634277344, | |
| "loss": 0.0353, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.05154569447040558, | |
| "rewards/margins": 4.664517879486084, | |
| "rewards/rejected": -4.612972736358643, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 6.47457627118644, | |
| "grad_norm": 5.039925613354933, | |
| "learning_rate": 5.355614305696468e-08, | |
| "logits/chosen": -1.56493079662323, | |
| "logits/rejected": -1.5143284797668457, | |
| "logps/chosen": -27.657060623168945, | |
| "logps/rejected": -43.6398811340332, | |
| "loss": 0.0262, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.09710407257080078, | |
| "rewards/margins": 4.8609700202941895, | |
| "rewards/rejected": -4.958074569702148, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 6.491525423728813, | |
| "grad_norm": 6.304781751843407, | |
| "learning_rate": 5.241584274536259e-08, | |
| "logits/chosen": -1.3160762786865234, | |
| "logits/rejected": -1.426667332649231, | |
| "logps/chosen": -31.549245834350586, | |
| "logps/rejected": -49.771148681640625, | |
| "loss": 0.0322, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3802323043346405, | |
| "rewards/margins": 5.496129035949707, | |
| "rewards/rejected": -5.876360893249512, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 6.508474576271187, | |
| "grad_norm": 5.063738993506685, | |
| "learning_rate": 5.1286389646661654e-08, | |
| "logits/chosen": -1.396496057510376, | |
| "logits/rejected": -1.3829492330551147, | |
| "logps/chosen": -28.830703735351562, | |
| "logps/rejected": -47.824317932128906, | |
| "loss": 0.0246, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2612743079662323, | |
| "rewards/margins": 4.95106315612793, | |
| "rewards/rejected": -5.212337017059326, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 6.52542372881356, | |
| "grad_norm": 5.637690264352778, | |
| "learning_rate": 5.0167845766937806e-08, | |
| "logits/chosen": -1.4855574369430542, | |
| "logits/rejected": -1.3984534740447998, | |
| "logps/chosen": -27.114734649658203, | |
| "logps/rejected": -43.69512939453125, | |
| "loss": 0.0414, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.21004855632781982, | |
| "rewards/margins": 4.845144748687744, | |
| "rewards/rejected": -5.055192947387695, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 6.5423728813559325, | |
| "grad_norm": 6.025392312759281, | |
| "learning_rate": 4.906027251335917e-08, | |
| "logits/chosen": -1.4550882577896118, | |
| "logits/rejected": -1.4542597532272339, | |
| "logps/chosen": -23.857240676879883, | |
| "logps/rejected": -47.817237854003906, | |
| "loss": 0.0352, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.31768307089805603, | |
| "rewards/margins": 4.948706150054932, | |
| "rewards/rejected": -5.266389846801758, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 6.559322033898305, | |
| "grad_norm": 5.587911152077424, | |
| "learning_rate": 4.7963730690815467e-08, | |
| "logits/chosen": -1.4407036304473877, | |
| "logits/rejected": -1.3747568130493164, | |
| "logps/chosen": -19.481971740722656, | |
| "logps/rejected": -42.736568450927734, | |
| "loss": 0.0309, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.19361047446727753, | |
| "rewards/margins": 5.198715686798096, | |
| "rewards/rejected": -5.392325401306152, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 6.576271186440678, | |
| "grad_norm": 6.586187305125031, | |
| "learning_rate": 4.687828049857967e-08, | |
| "logits/chosen": -1.3945553302764893, | |
| "logits/rejected": -1.322791337966919, | |
| "logps/chosen": -29.22484016418457, | |
| "logps/rejected": -40.85685348510742, | |
| "loss": 0.0546, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.17647796869277954, | |
| "rewards/margins": 4.56741189956665, | |
| "rewards/rejected": -4.390933990478516, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 6.593220338983051, | |
| "grad_norm": 6.218480481228633, | |
| "learning_rate": 4.580398152700304e-08, | |
| "logits/chosen": -1.6018644571304321, | |
| "logits/rejected": -1.4887254238128662, | |
| "logps/chosen": -25.33257484436035, | |
| "logps/rejected": -45.91154479980469, | |
| "loss": 0.0352, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.20426103472709656, | |
| "rewards/margins": 5.0216498374938965, | |
| "rewards/rejected": -5.225910186767578, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 6.610169491525424, | |
| "grad_norm": 6.569813291973876, | |
| "learning_rate": 4.47408927542435e-08, | |
| "logits/chosen": -1.3770880699157715, | |
| "logits/rejected": -1.286376714706421, | |
| "logps/chosen": -23.488061904907227, | |
| "logps/rejected": -38.757720947265625, | |
| "loss": 0.0388, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2643284499645233, | |
| "rewards/margins": 3.8822245597839355, | |
| "rewards/rejected": -4.146553039550781, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 6.627118644067797, | |
| "grad_norm": 5.39352903238084, | |
| "learning_rate": 4.368907254302837e-08, | |
| "logits/chosen": -1.5808112621307373, | |
| "logits/rejected": -1.5275750160217285, | |
| "logps/chosen": -20.95781707763672, | |
| "logps/rejected": -38.6187744140625, | |
| "loss": 0.0366, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.05846305191516876, | |
| "rewards/margins": 4.244011878967285, | |
| "rewards/rejected": -4.302474498748779, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 6.6440677966101696, | |
| "grad_norm": 5.323944978561868, | |
| "learning_rate": 4.264857863744956e-08, | |
| "logits/chosen": -1.5523847341537476, | |
| "logits/rejected": -1.39801824092865, | |
| "logps/chosen": -22.804271697998047, | |
| "logps/rejected": -38.31208419799805, | |
| "loss": 0.0275, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4400111436843872, | |
| "rewards/margins": 5.238194465637207, | |
| "rewards/rejected": -4.798183917999268, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 6.661016949152542, | |
| "grad_norm": 7.086981471555236, | |
| "learning_rate": 4.161946815979403e-08, | |
| "logits/chosen": -1.4906023740768433, | |
| "logits/rejected": -1.4377923011779785, | |
| "logps/chosen": -37.56535720825195, | |
| "logps/rejected": -52.383487701416016, | |
| "loss": 0.0451, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3086543679237366, | |
| "rewards/margins": 5.0754618644714355, | |
| "rewards/rejected": -5.384116172790527, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 6.677966101694915, | |
| "grad_norm": 5.2671704490811075, | |
| "learning_rate": 4.0601797607407505e-08, | |
| "logits/chosen": -1.4550049304962158, | |
| "logits/rejected": -1.3966783285140991, | |
| "logps/chosen": -22.132400512695312, | |
| "logps/rejected": -41.57554626464844, | |
| "loss": 0.0324, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2699892520904541, | |
| "rewards/margins": 4.588739395141602, | |
| "rewards/rejected": -4.858728408813477, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 6.694915254237288, | |
| "grad_norm": 5.32453491166638, | |
| "learning_rate": 3.9595622849593e-08, | |
| "logits/chosen": -1.512937307357788, | |
| "logits/rejected": -1.3621115684509277, | |
| "logps/chosen": -28.885116577148438, | |
| "logps/rejected": -50.2530517578125, | |
| "loss": 0.0366, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5175858736038208, | |
| "rewards/margins": 5.390725135803223, | |
| "rewards/rejected": -5.908310890197754, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 6.711864406779661, | |
| "grad_norm": 5.792264114569628, | |
| "learning_rate": 3.8600999124543455e-08, | |
| "logits/chosen": -1.5662391185760498, | |
| "logits/rejected": -1.42247474193573, | |
| "logps/chosen": -25.263622283935547, | |
| "logps/rejected": -45.11002731323242, | |
| "loss": 0.0315, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.27481862902641296, | |
| "rewards/margins": 5.28562593460083, | |
| "rewards/rejected": -5.010807037353516, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 6.728813559322034, | |
| "grad_norm": 5.997208149184034, | |
| "learning_rate": 3.7617981036309533e-08, | |
| "logits/chosen": -1.5715502500534058, | |
| "logits/rejected": -1.6154096126556396, | |
| "logps/chosen": -23.581790924072266, | |
| "logps/rejected": -45.5838623046875, | |
| "loss": 0.0377, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.18038499355316162, | |
| "rewards/margins": 4.884888172149658, | |
| "rewards/rejected": -5.065273284912109, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 6.745762711864407, | |
| "grad_norm": 5.245999983483119, | |
| "learning_rate": 3.664662255180134e-08, | |
| "logits/chosen": -1.4324719905853271, | |
| "logits/rejected": -1.329777717590332, | |
| "logps/chosen": -26.615516662597656, | |
| "logps/rejected": -41.86264419555664, | |
| "loss": 0.0394, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.05792872607707977, | |
| "rewards/margins": 4.163052082061768, | |
| "rewards/rejected": -4.105123519897461, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 6.762711864406779, | |
| "grad_norm": 4.518676670270046, | |
| "learning_rate": 3.5686976997826245e-08, | |
| "logits/chosen": -1.6349799633026123, | |
| "logits/rejected": -1.6590909957885742, | |
| "logps/chosen": -41.426727294921875, | |
| "logps/rejected": -52.470558166503906, | |
| "loss": 0.0236, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.4126446843147278, | |
| "rewards/margins": 5.763060569763184, | |
| "rewards/rejected": -6.175705432891846, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 6.779661016949152, | |
| "grad_norm": 5.721240252343364, | |
| "learning_rate": 3.473909705816111e-08, | |
| "logits/chosen": -1.4431383609771729, | |
| "logits/rejected": -1.3899328708648682, | |
| "logps/chosen": -36.3721923828125, | |
| "logps/rejected": -52.30086898803711, | |
| "loss": 0.0351, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.106335163116455, | |
| "rewards/margins": 5.6671295166015625, | |
| "rewards/rejected": -6.773464679718018, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 6.796610169491525, | |
| "grad_norm": 5.164348258065598, | |
| "learning_rate": 3.3803034770659824e-08, | |
| "logits/chosen": -1.6824212074279785, | |
| "logits/rejected": -1.5971221923828125, | |
| "logps/chosen": -37.007022857666016, | |
| "logps/rejected": -74.48016357421875, | |
| "loss": 0.0286, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6765580177307129, | |
| "rewards/margins": 7.421613693237305, | |
| "rewards/rejected": -8.09817123413086, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 6.813559322033898, | |
| "grad_norm": 4.808650804090459, | |
| "learning_rate": 3.287884152439646e-08, | |
| "logits/chosen": -1.3810476064682007, | |
| "logits/rejected": -1.4028950929641724, | |
| "logps/chosen": -29.925891876220703, | |
| "logps/rejected": -49.08871841430664, | |
| "loss": 0.0311, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.16381314396858215, | |
| "rewards/margins": 6.113283157348633, | |
| "rewards/rejected": -5.949470520019531, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 6.830508474576272, | |
| "grad_norm": 6.236701718893695, | |
| "learning_rate": 3.19665680568445e-08, | |
| "logits/chosen": -1.5909409523010254, | |
| "logits/rejected": -1.5455948114395142, | |
| "logps/chosen": -33.62629699707031, | |
| "logps/rejected": -39.20241165161133, | |
| "loss": 0.0374, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.20260879397392273, | |
| "rewards/margins": 4.192948341369629, | |
| "rewards/rejected": -4.395556926727295, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 6.847457627118644, | |
| "grad_norm": 4.563195691909389, | |
| "learning_rate": 3.106626445109081e-08, | |
| "logits/chosen": -1.4258350133895874, | |
| "logits/rejected": -1.471103549003601, | |
| "logps/chosen": -32.27821731567383, | |
| "logps/rejected": -55.883445739746094, | |
| "loss": 0.0274, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.37013059854507446, | |
| "rewards/margins": 5.775850296020508, | |
| "rewards/rejected": -6.1459808349609375, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 6.864406779661017, | |
| "grad_norm": 6.319828827539619, | |
| "learning_rate": 3.017798013308645e-08, | |
| "logits/chosen": -1.6110063791275024, | |
| "logits/rejected": -1.5970886945724487, | |
| "logps/chosen": -33.942317962646484, | |
| "logps/rejected": -42.78126525878906, | |
| "loss": 0.0394, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.08474293351173401, | |
| "rewards/margins": 4.63005256652832, | |
| "rewards/rejected": -4.545310020446777, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 6.88135593220339, | |
| "grad_norm": 6.956985946783924, | |
| "learning_rate": 2.9301763868933153e-08, | |
| "logits/chosen": -1.4388988018035889, | |
| "logits/rejected": -1.4342849254608154, | |
| "logps/chosen": -23.345102310180664, | |
| "logps/rejected": -40.68294906616211, | |
| "loss": 0.0458, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.11982996761798859, | |
| "rewards/margins": 4.858503818511963, | |
| "rewards/rejected": -4.978332996368408, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 6.898305084745763, | |
| "grad_norm": 5.4291102026527716, | |
| "learning_rate": 2.843766376220616e-08, | |
| "logits/chosen": -1.7093875408172607, | |
| "logits/rejected": -1.6210181713104248, | |
| "logps/chosen": -29.290470123291016, | |
| "logps/rejected": -50.43269729614258, | |
| "loss": 0.0278, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.7307620048522949, | |
| "rewards/margins": 5.429152488708496, | |
| "rewards/rejected": -6.159914016723633, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 6.915254237288136, | |
| "grad_norm": 5.393300173055395, | |
| "learning_rate": 2.7585727251313195e-08, | |
| "logits/chosen": -1.3670405149459839, | |
| "logits/rejected": -1.1494945287704468, | |
| "logps/chosen": -38.649288177490234, | |
| "logps/rejected": -55.98222351074219, | |
| "loss": 0.0388, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.4593397378921509, | |
| "rewards/margins": 5.185694694519043, | |
| "rewards/rejected": -5.645034313201904, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 6.932203389830509, | |
| "grad_norm": 4.837513841498393, | |
| "learning_rate": 2.6746001106890377e-08, | |
| "logits/chosen": -1.656294584274292, | |
| "logits/rejected": -1.5977482795715332, | |
| "logps/chosen": -29.096288681030273, | |
| "logps/rejected": -46.76082229614258, | |
| "loss": 0.0252, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.21669504046440125, | |
| "rewards/margins": 5.154686450958252, | |
| "rewards/rejected": -5.371381759643555, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 6.9491525423728815, | |
| "grad_norm": 5.248560918966433, | |
| "learning_rate": 2.5918531429234364e-08, | |
| "logits/chosen": -1.592848300933838, | |
| "logits/rejected": -1.540168285369873, | |
| "logps/chosen": -31.085020065307617, | |
| "logps/rejected": -53.143741607666016, | |
| "loss": 0.0298, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6363227963447571, | |
| "rewards/margins": 5.588076591491699, | |
| "rewards/rejected": -6.224399089813232, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 6.966101694915254, | |
| "grad_norm": 5.2195974344109715, | |
| "learning_rate": 2.5103363645771536e-08, | |
| "logits/chosen": -1.65935218334198, | |
| "logits/rejected": -1.6805386543273926, | |
| "logps/chosen": -34.833702087402344, | |
| "logps/rejected": -48.904422760009766, | |
| "loss": 0.0352, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.1815856397151947, | |
| "rewards/margins": 5.786221981048584, | |
| "rewards/rejected": -5.967808246612549, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 6.983050847457627, | |
| "grad_norm": 5.470555151396108, | |
| "learning_rate": 2.4300542508564114e-08, | |
| "logits/chosen": -1.4322288036346436, | |
| "logits/rejected": -1.3301326036453247, | |
| "logps/chosen": -28.12655258178711, | |
| "logps/rejected": -46.921104431152344, | |
| "loss": 0.0395, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.31704282760620117, | |
| "rewards/margins": 4.567241191864014, | |
| "rewards/rejected": -4.884284019470215, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 5.578953824776314, | |
| "learning_rate": 2.3510112091853357e-08, | |
| "logits/chosen": -1.276556134223938, | |
| "logits/rejected": -1.2297403812408447, | |
| "logps/chosen": -21.443805694580078, | |
| "logps/rejected": -48.807159423828125, | |
| "loss": 0.0301, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.017134033143520355, | |
| "rewards/margins": 5.347229957580566, | |
| "rewards/rejected": -5.330096244812012, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 7.016949152542373, | |
| "grad_norm": 6.999197685418954, | |
| "learning_rate": 2.27321157896396e-08, | |
| "logits/chosen": -1.5116349458694458, | |
| "logits/rejected": -1.3849687576293945, | |
| "logps/chosen": -27.718652725219727, | |
| "logps/rejected": -47.36982345581055, | |
| "loss": 0.0412, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1311918944120407, | |
| "rewards/margins": 5.171109676361084, | |
| "rewards/rejected": -5.039917469024658, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 7.033898305084746, | |
| "grad_norm": 4.883886921689459, | |
| "learning_rate": 2.1966596313300362e-08, | |
| "logits/chosen": -1.6032100915908813, | |
| "logits/rejected": -1.518362045288086, | |
| "logps/chosen": -29.143009185791016, | |
| "logps/rejected": -40.435001373291016, | |
| "loss": 0.0341, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.24970372021198273, | |
| "rewards/margins": 4.1580705642700195, | |
| "rewards/rejected": -4.407774448394775, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 7.0508474576271185, | |
| "grad_norm": 5.855476388920514, | |
| "learning_rate": 2.1213595689245384e-08, | |
| "logits/chosen": -1.350822925567627, | |
| "logits/rejected": -1.3723689317703247, | |
| "logps/chosen": -23.788740158081055, | |
| "logps/rejected": -40.06031799316406, | |
| "loss": 0.0317, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.003228999674320221, | |
| "rewards/margins": 4.06948184967041, | |
| "rewards/rejected": -4.0727105140686035, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 7.067796610169491, | |
| "grad_norm": 5.942075006867736, | |
| "learning_rate": 2.0473155256609363e-08, | |
| "logits/chosen": -1.6869771480560303, | |
| "logits/rejected": -1.6048226356506348, | |
| "logps/chosen": -27.607175827026367, | |
| "logps/rejected": -46.171112060546875, | |
| "loss": 0.0478, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.15510573983192444, | |
| "rewards/margins": 4.710604667663574, | |
| "rewards/rejected": -4.865710258483887, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 7.084745762711864, | |
| "grad_norm": 5.9322015599453595, | |
| "learning_rate": 1.9745315664982277e-08, | |
| "logits/chosen": -1.5960508584976196, | |
| "logits/rejected": -1.4311569929122925, | |
| "logps/chosen": -20.32171630859375, | |
| "logps/rejected": -41.58867645263672, | |
| "loss": 0.0372, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.00745101273059845, | |
| "rewards/margins": 5.94549560546875, | |
| "rewards/rejected": -5.938044548034668, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 7.101694915254237, | |
| "grad_norm": 5.918275937210799, | |
| "learning_rate": 1.9030116872178314e-08, | |
| "logits/chosen": -1.699689269065857, | |
| "logits/rejected": -1.5470737218856812, | |
| "logps/chosen": -28.80027198791504, | |
| "logps/rejected": -46.22833251953125, | |
| "loss": 0.0378, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.4729538857936859, | |
| "rewards/margins": 4.527037143707275, | |
| "rewards/rejected": -4.999991416931152, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 7.11864406779661, | |
| "grad_norm": 6.2740518918594015, | |
| "learning_rate": 1.8327598142041656e-08, | |
| "logits/chosen": -1.4966247081756592, | |
| "logits/rejected": -1.5006301403045654, | |
| "logps/chosen": -39.597660064697266, | |
| "logps/rejected": -59.670684814453125, | |
| "loss": 0.0464, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.10089424252510071, | |
| "rewards/margins": 5.958191871643066, | |
| "rewards/rejected": -5.857297420501709, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 7.135593220338983, | |
| "grad_norm": 5.848230208495223, | |
| "learning_rate": 1.7637798042291125e-08, | |
| "logits/chosen": -1.4466344118118286, | |
| "logits/rejected": -1.4158880710601807, | |
| "logps/chosen": -34.30101013183594, | |
| "logps/rejected": -41.198646545410156, | |
| "loss": 0.0301, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5382071733474731, | |
| "rewards/margins": 4.64283561706543, | |
| "rewards/rejected": -5.1810431480407715, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 7.1525423728813555, | |
| "grad_norm": 4.83673555496754, | |
| "learning_rate": 1.696075444240305e-08, | |
| "logits/chosen": -1.481490135192871, | |
| "logits/rejected": -1.3944220542907715, | |
| "logps/chosen": -23.669601440429688, | |
| "logps/rejected": -44.325191497802734, | |
| "loss": 0.0259, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.12855800986289978, | |
| "rewards/margins": 4.879583835601807, | |
| "rewards/rejected": -5.008142471313477, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 7.169491525423728, | |
| "grad_norm": 5.610847549034329, | |
| "learning_rate": 1.6296504511531834e-08, | |
| "logits/chosen": -1.418038249015808, | |
| "logits/rejected": -1.3723573684692383, | |
| "logps/chosen": -27.353422164916992, | |
| "logps/rejected": -49.684974670410156, | |
| "loss": 0.0335, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.7425976395606995, | |
| "rewards/margins": 4.4584550857543945, | |
| "rewards/rejected": -5.201053142547607, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 7.186440677966102, | |
| "grad_norm": 5.7865923023012975, | |
| "learning_rate": 1.5645084716469776e-08, | |
| "logits/chosen": -1.6516090631484985, | |
| "logits/rejected": -1.5412788391113281, | |
| "logps/chosen": -33.868499755859375, | |
| "logps/rejected": -49.333709716796875, | |
| "loss": 0.0308, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2233295440673828, | |
| "rewards/margins": 5.964905261993408, | |
| "rewards/rejected": -6.188235282897949, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 7.203389830508475, | |
| "grad_norm": 5.701505367137342, | |
| "learning_rate": 1.5006530819644923e-08, | |
| "logits/chosen": -1.416776418685913, | |
| "logits/rejected": -1.5832912921905518, | |
| "logps/chosen": -32.08591842651367, | |
| "logps/rejected": -47.3320198059082, | |
| "loss": 0.0324, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.342715859413147, | |
| "rewards/margins": 5.240063667297363, | |
| "rewards/rejected": -5.582779884338379, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 7.220338983050848, | |
| "grad_norm": 4.437542789195646, | |
| "learning_rate": 1.4380877877157832e-08, | |
| "logits/chosen": -1.4847077131271362, | |
| "logits/rejected": -1.4457132816314697, | |
| "logps/chosen": -32.51253128051758, | |
| "logps/rejected": -53.59151077270508, | |
| "loss": 0.0218, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6102309226989746, | |
| "rewards/margins": 5.707812786102295, | |
| "rewards/rejected": -6.3180437088012695, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 7.237288135593221, | |
| "grad_norm": 5.065631167693018, | |
| "learning_rate": 1.3768160236856674e-08, | |
| "logits/chosen": -1.4158098697662354, | |
| "logits/rejected": -1.4317882061004639, | |
| "logps/chosen": -29.830453872680664, | |
| "logps/rejected": -54.25461196899414, | |
| "loss": 0.024, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.038821518421173096, | |
| "rewards/margins": 5.01283597946167, | |
| "rewards/rejected": -5.051657676696777, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 7.254237288135593, | |
| "grad_norm": 5.220392921602965, | |
| "learning_rate": 1.316841153645215e-08, | |
| "logits/chosen": -1.560931921005249, | |
| "logits/rejected": -1.5064420700073242, | |
| "logps/chosen": -30.01360321044922, | |
| "logps/rejected": -48.26612854003906, | |
| "loss": 0.0276, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3863828778266907, | |
| "rewards/margins": 5.301400184631348, | |
| "rewards/rejected": -5.687783241271973, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 7.271186440677966, | |
| "grad_norm": 5.688311376504444, | |
| "learning_rate": 1.2581664701670296e-08, | |
| "logits/chosen": -1.52787446975708, | |
| "logits/rejected": -1.4021316766738892, | |
| "logps/chosen": -28.248119354248047, | |
| "logps/rejected": -41.31006622314453, | |
| "loss": 0.0392, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3207528591156006, | |
| "rewards/margins": 5.154188632965088, | |
| "rewards/rejected": -5.474941253662109, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 7.288135593220339, | |
| "grad_norm": 5.800049450685864, | |
| "learning_rate": 1.2007951944445121e-08, | |
| "logits/chosen": -1.4032049179077148, | |
| "logits/rejected": -1.3456979990005493, | |
| "logps/chosen": -24.016460418701172, | |
| "logps/rejected": -41.04709243774414, | |
| "loss": 0.0278, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.04568904638290405, | |
| "rewards/margins": 3.968392848968506, | |
| "rewards/rejected": -4.0140814781188965, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 7.305084745762712, | |
| "grad_norm": 6.591249552173131, | |
| "learning_rate": 1.144730476115019e-08, | |
| "logits/chosen": -1.4663312435150146, | |
| "logits/rejected": -1.437060832977295, | |
| "logps/chosen": -25.586219787597656, | |
| "logps/rejected": -59.44462585449219, | |
| "loss": 0.0416, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.418854296207428, | |
| "rewards/margins": 6.551999092102051, | |
| "rewards/rejected": -6.970853328704834, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 7.322033898305085, | |
| "grad_norm": 5.1436479741963765, | |
| "learning_rate": 1.0899753930869394e-08, | |
| "logits/chosen": -1.479379415512085, | |
| "logits/rejected": -1.4856846332550049, | |
| "logps/chosen": -24.105491638183594, | |
| "logps/rejected": -42.0874137878418, | |
| "loss": 0.0283, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.016550958156585693, | |
| "rewards/margins": 4.623503684997559, | |
| "rewards/rejected": -4.640054702758789, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 7.338983050847458, | |
| "grad_norm": 6.13135090791532, | |
| "learning_rate": 1.036532951370736e-08, | |
| "logits/chosen": -1.5114121437072754, | |
| "logits/rejected": -1.487284541130066, | |
| "logps/chosen": -30.636520385742188, | |
| "logps/rejected": -55.582489013671875, | |
| "loss": 0.0421, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.0013150721788406372, | |
| "rewards/margins": 6.1965837478637695, | |
| "rewards/rejected": -6.1952691078186035, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 7.3559322033898304, | |
| "grad_norm": 5.332642447483903, | |
| "learning_rate": 9.844060849138997e-09, | |
| "logits/chosen": -1.599272608757019, | |
| "logits/rejected": -1.610948085784912, | |
| "logps/chosen": -24.916534423828125, | |
| "logps/rejected": -39.19096755981445, | |
| "loss": 0.0336, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.01982739567756653, | |
| "rewards/margins": 4.651766777038574, | |
| "rewards/rejected": -4.671594142913818, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 7.372881355932203, | |
| "grad_norm": 6.134889313671512, | |
| "learning_rate": 9.335976554398912e-09, | |
| "logits/chosen": -1.540908932685852, | |
| "logits/rejected": -1.5109443664550781, | |
| "logps/chosen": -32.279666900634766, | |
| "logps/rejected": -38.13484191894531, | |
| "loss": 0.0326, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6547858715057373, | |
| "rewards/margins": 3.962502956390381, | |
| "rewards/rejected": -4.617289066314697, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 7.389830508474576, | |
| "grad_norm": 5.165828974962686, | |
| "learning_rate": 8.841104522910342e-09, | |
| "logits/chosen": -1.5303874015808105, | |
| "logits/rejected": -1.4300156831741333, | |
| "logps/chosen": -33.03150939941406, | |
| "logps/rejected": -49.22145080566406, | |
| "loss": 0.0242, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.05898144841194153, | |
| "rewards/margins": 5.506203651428223, | |
| "rewards/rejected": -5.565184593200684, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 7.406779661016949, | |
| "grad_norm": 5.59136345566115, | |
| "learning_rate": 8.359471922753714e-09, | |
| "logits/chosen": -1.5242373943328857, | |
| "logits/rejected": -1.3456315994262695, | |
| "logps/chosen": -29.918407440185547, | |
| "logps/rejected": -53.36083984375, | |
| "loss": 0.0359, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.07045301795005798, | |
| "rewards/margins": 5.991230487823486, | |
| "rewards/rejected": -5.920777320861816, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 7.423728813559322, | |
| "grad_norm": 5.700922848308937, | |
| "learning_rate": 7.891105195175356e-09, | |
| "logits/chosen": -1.421931505203247, | |
| "logits/rejected": -1.4313148260116577, | |
| "logps/chosen": -30.72463607788086, | |
| "logps/rejected": -39.33094787597656, | |
| "loss": 0.0314, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.35566258430480957, | |
| "rewards/margins": 4.320624351501465, | |
| "rewards/rejected": -4.676286697387695, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 7.440677966101695, | |
| "grad_norm": 4.895447924779327, | |
| "learning_rate": 7.4360300531355894e-09, | |
| "logits/chosen": -1.2597088813781738, | |
| "logits/rejected": -1.1974666118621826, | |
| "logps/chosen": -34.747318267822266, | |
| "logps/rejected": -60.813480377197266, | |
| "loss": 0.028, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.4029719829559326, | |
| "rewards/margins": 5.782626628875732, | |
| "rewards/rejected": -6.185598373413086, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 7.4576271186440675, | |
| "grad_norm": 4.753436790014126, | |
| "learning_rate": 6.994271479897313e-09, | |
| "logits/chosen": -1.2228598594665527, | |
| "logits/rejected": -1.2148244380950928, | |
| "logps/chosen": -23.159412384033203, | |
| "logps/rejected": -39.971435546875, | |
| "loss": 0.0222, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20260876417160034, | |
| "rewards/margins": 4.695430278778076, | |
| "rewards/rejected": -4.49282169342041, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 7.47457627118644, | |
| "grad_norm": 5.465778826325062, | |
| "learning_rate": 6.565853727654502e-09, | |
| "logits/chosen": -1.6256568431854248, | |
| "logits/rejected": -1.6748077869415283, | |
| "logps/chosen": -35.98029327392578, | |
| "logps/rejected": -51.9576530456543, | |
| "loss": 0.0343, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.7835733890533447, | |
| "rewards/margins": 5.101205825805664, | |
| "rewards/rejected": -5.884779453277588, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 7.491525423728813, | |
| "grad_norm": 5.19068823671553, | |
| "learning_rate": 6.150800316200605e-09, | |
| "logits/chosen": -1.5622632503509521, | |
| "logits/rejected": -1.6051169633865356, | |
| "logps/chosen": -26.639972686767578, | |
| "logps/rejected": -39.97083282470703, | |
| "loss": 0.0279, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.18523569405078888, | |
| "rewards/margins": 5.357626914978027, | |
| "rewards/rejected": -5.172390937805176, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 7.508474576271187, | |
| "grad_norm": 5.290094026909458, | |
| "learning_rate": 5.7491340316373485e-09, | |
| "logits/chosen": -1.3767602443695068, | |
| "logits/rejected": -1.2797472476959229, | |
| "logps/chosen": -26.007776260375977, | |
| "logps/rejected": -53.1204719543457, | |
| "loss": 0.031, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.03208550810813904, | |
| "rewards/margins": 6.148884296417236, | |
| "rewards/rejected": -6.18096923828125, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 7.52542372881356, | |
| "grad_norm": 5.384685543036387, | |
| "learning_rate": 5.360876925123992e-09, | |
| "logits/chosen": -1.660988450050354, | |
| "logits/rejected": -1.495483160018921, | |
| "logps/chosen": -38.71052551269531, | |
| "logps/rejected": -63.9692497253418, | |
| "loss": 0.026, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.767485499382019, | |
| "rewards/margins": 6.337566375732422, | |
| "rewards/rejected": -7.105052947998047, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 7.5423728813559325, | |
| "grad_norm": 6.204818072148143, | |
| "learning_rate": 4.9860503116665176e-09, | |
| "logits/chosen": -1.7063543796539307, | |
| "logits/rejected": -1.692810297012329, | |
| "logps/chosen": -25.562942504882812, | |
| "logps/rejected": -48.51639175415039, | |
| "loss": 0.041, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.01574818789958954, | |
| "rewards/margins": 4.9537529945373535, | |
| "rewards/rejected": -4.969500541687012, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 7.559322033898305, | |
| "grad_norm": 5.655663554463141, | |
| "learning_rate": 4.624674768947484e-09, | |
| "logits/chosen": -1.598649024963379, | |
| "logits/rejected": -1.4650629758834839, | |
| "logps/chosen": -27.018341064453125, | |
| "logps/rejected": -46.08781051635742, | |
| "loss": 0.0369, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.0027409642934799194, | |
| "rewards/margins": 4.6786322593688965, | |
| "rewards/rejected": -4.681373596191406, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 7.576271186440678, | |
| "grad_norm": 5.701136400159739, | |
| "learning_rate": 4.2767701361964835e-09, | |
| "logits/chosen": -1.3610193729400635, | |
| "logits/rejected": -1.3563426733016968, | |
| "logps/chosen": -35.585716247558594, | |
| "logps/rejected": -50.32651901245117, | |
| "loss": 0.0389, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6664023995399475, | |
| "rewards/margins": 4.827932834625244, | |
| "rewards/rejected": -5.494334697723389, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 7.593220338983051, | |
| "grad_norm": 6.209794879281624, | |
| "learning_rate": 3.942355513100792e-09, | |
| "logits/chosen": -1.4733314514160156, | |
| "logits/rejected": -1.4402073621749878, | |
| "logps/chosen": -28.033966064453125, | |
| "logps/rejected": -54.08881378173828, | |
| "loss": 0.0345, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.519957423210144, | |
| "rewards/margins": 5.857639312744141, | |
| "rewards/rejected": -6.377596855163574, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 7.610169491525424, | |
| "grad_norm": 4.997367172232428, | |
| "learning_rate": 3.6214492587569313e-09, | |
| "logits/chosen": -1.5576658248901367, | |
| "logits/rejected": -1.6168861389160156, | |
| "logps/chosen": -32.894317626953125, | |
| "logps/rejected": -41.69158172607422, | |
| "loss": 0.0284, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.22915619611740112, | |
| "rewards/margins": 4.693375110626221, | |
| "rewards/rejected": -4.922531604766846, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 7.627118644067797, | |
| "grad_norm": 5.4083679319206555, | |
| "learning_rate": 3.314068990662805e-09, | |
| "logits/chosen": -1.619524359703064, | |
| "logits/rejected": -1.469055414199829, | |
| "logps/chosen": -25.54304313659668, | |
| "logps/rejected": -38.69428634643555, | |
| "loss": 0.0327, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.059964776039123535, | |
| "rewards/margins": 5.082298278808594, | |
| "rewards/rejected": -5.02233362197876, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 7.6440677966101696, | |
| "grad_norm": 5.217030218721459, | |
| "learning_rate": 3.0202315837502545e-09, | |
| "logits/chosen": -1.4242594242095947, | |
| "logits/rejected": -1.439193844795227, | |
| "logps/chosen": -31.73979949951172, | |
| "logps/rejected": -39.235633850097656, | |
| "loss": 0.0321, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5601508617401123, | |
| "rewards/margins": 4.23047399520874, | |
| "rewards/rejected": -4.790624618530273, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 7.661016949152542, | |
| "grad_norm": 5.102186940922818, | |
| "learning_rate": 2.7399531694589917e-09, | |
| "logits/chosen": -1.5301525592803955, | |
| "logits/rejected": -1.5297596454620361, | |
| "logps/chosen": -27.26993179321289, | |
| "logps/rejected": -46.745147705078125, | |
| "loss": 0.0316, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.15520310401916504, | |
| "rewards/margins": 5.617114543914795, | |
| "rewards/rejected": -5.772317409515381, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 7.677966101694915, | |
| "grad_norm": 5.403786442321042, | |
| "learning_rate": 2.473249134850808e-09, | |
| "logits/chosen": -1.3538662195205688, | |
| "logits/rejected": -1.3246500492095947, | |
| "logps/chosen": -22.38913345336914, | |
| "logps/rejected": -45.14683532714844, | |
| "loss": 0.0325, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.044996634125709534, | |
| "rewards/margins": 4.89539098739624, | |
| "rewards/rejected": -4.850394248962402, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 7.694915254237288, | |
| "grad_norm": 6.421766143852716, | |
| "learning_rate": 2.220134121764833e-09, | |
| "logits/chosen": -1.4915080070495605, | |
| "logits/rejected": -1.452484130859375, | |
| "logps/chosen": -16.578571319580078, | |
| "logps/rejected": -37.22169876098633, | |
| "loss": 0.0441, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.8915931582450867, | |
| "rewards/margins": 5.497744560241699, | |
| "rewards/rejected": -4.606151580810547, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 7.711864406779661, | |
| "grad_norm": 5.924650241904462, | |
| "learning_rate": 1.9806220260137065e-09, | |
| "logits/chosen": -1.5789787769317627, | |
| "logits/rejected": -1.4192255735397339, | |
| "logps/chosen": -30.962202072143555, | |
| "logps/rejected": -46.20201873779297, | |
| "loss": 0.0511, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.06508506834506989, | |
| "rewards/margins": 5.4069318771362305, | |
| "rewards/rejected": -5.341846466064453, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 7.728813559322034, | |
| "grad_norm": 5.712693057718049, | |
| "learning_rate": 1.7547259966207705e-09, | |
| "logits/chosen": -1.5617210865020752, | |
| "logits/rejected": -1.489423394203186, | |
| "logps/chosen": -30.215980529785156, | |
| "logps/rejected": -46.79343795776367, | |
| "loss": 0.0302, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2140485942363739, | |
| "rewards/margins": 6.268076419830322, | |
| "rewards/rejected": -6.482125282287598, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 7.745762711864407, | |
| "grad_norm": 4.765004207763158, | |
| "learning_rate": 1.5424584350981485e-09, | |
| "logits/chosen": -1.5075204372406006, | |
| "logits/rejected": -1.446047067642212, | |
| "logps/chosen": -25.82567596435547, | |
| "logps/rejected": -43.98649597167969, | |
| "loss": 0.0296, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2662041187286377, | |
| "rewards/margins": 5.002760887145996, | |
| "rewards/rejected": -5.268965244293213, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 7.762711864406779, | |
| "grad_norm": 4.613094215982073, | |
| "learning_rate": 1.343830994765982e-09, | |
| "logits/chosen": -1.5371060371398926, | |
| "logits/rejected": -1.4451144933700562, | |
| "logps/chosen": -26.201602935791016, | |
| "logps/rejected": -57.06352996826172, | |
| "loss": 0.0367, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.1458522379398346, | |
| "rewards/margins": 6.390477657318115, | |
| "rewards/rejected": -6.53632926940918, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 7.779661016949152, | |
| "grad_norm": 4.4872892882814535, | |
| "learning_rate": 1.1588545801125837e-09, | |
| "logits/chosen": -1.7496167421340942, | |
| "logits/rejected": -1.6176550388336182, | |
| "logps/chosen": -36.218074798583984, | |
| "logps/rejected": -55.342689514160156, | |
| "loss": 0.0286, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.4042947292327881, | |
| "rewards/margins": 5.6281538009643555, | |
| "rewards/rejected": -6.032447814941406, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 7.796610169491525, | |
| "grad_norm": 5.575240209393775, | |
| "learning_rate": 9.87539346195776e-10, | |
| "logits/chosen": -1.4099677801132202, | |
| "logits/rejected": -1.2582119703292847, | |
| "logps/chosen": -29.023414611816406, | |
| "logps/rejected": -42.5074577331543, | |
| "loss": 0.0318, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.27348968386650085, | |
| "rewards/margins": 4.847697734832764, | |
| "rewards/rejected": -5.121187210083008, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 7.813559322033898, | |
| "grad_norm": 5.199396752692828, | |
| "learning_rate": 8.298946980855315e-10, | |
| "logits/chosen": -1.472285509109497, | |
| "logits/rejected": -1.330193281173706, | |
| "logps/chosen": -29.86899185180664, | |
| "logps/rejected": -41.80485153198242, | |
| "loss": 0.0294, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.15588414669036865, | |
| "rewards/margins": 5.636477470397949, | |
| "rewards/rejected": -5.792361259460449, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 7.830508474576272, | |
| "grad_norm": 4.521518393613341, | |
| "learning_rate": 6.8592929034747e-10, | |
| "logits/chosen": -1.4151493310928345, | |
| "logits/rejected": -1.4073522090911865, | |
| "logps/chosen": -27.720748901367188, | |
| "logps/rejected": -53.330909729003906, | |
| "loss": 0.0224, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.1728513091802597, | |
| "rewards/margins": 5.166114807128906, | |
| "rewards/rejected": -5.338965892791748, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 7.847457627118644, | |
| "grad_norm": 4.567347653583189, | |
| "learning_rate": 5.556510265678771e-10, | |
| "logits/chosen": -1.570703387260437, | |
| "logits/rejected": -1.5427438020706177, | |
| "logps/chosen": -24.184301376342773, | |
| "logps/rejected": -44.638912200927734, | |
| "loss": 0.0259, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.29552435874938965, | |
| "rewards/margins": 5.448472499847412, | |
| "rewards/rejected": -5.743997573852539, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 7.864406779661017, | |
| "grad_norm": 5.544082314499855, | |
| "learning_rate": 4.390670589196621e-10, | |
| "logits/chosen": -1.6661534309387207, | |
| "logits/rejected": -1.5269910097122192, | |
| "logps/chosen": -24.99061393737793, | |
| "logps/rejected": -45.773014068603516, | |
| "loss": 0.0279, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.4490078091621399, | |
| "rewards/margins": 5.556340217590332, | |
| "rewards/rejected": -6.005348205566406, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 7.88135593220339, | |
| "grad_norm": 4.867190786041254, | |
| "learning_rate": 3.3618378776981147e-10, | |
| "logits/chosen": -1.6171151399612427, | |
| "logits/rejected": -1.5128624439239502, | |
| "logps/chosen": -28.165693283081055, | |
| "logps/rejected": -41.43475341796875, | |
| "loss": 0.0235, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2984722852706909, | |
| "rewards/margins": 4.441769599914551, | |
| "rewards/rejected": -4.14329719543457, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 7.898305084745763, | |
| "grad_norm": 5.660749716669039, | |
| "learning_rate": 2.4700686132803075e-10, | |
| "logits/chosen": -1.5283398628234863, | |
| "logits/rejected": -1.484012484550476, | |
| "logps/chosen": -29.490188598632812, | |
| "logps/rejected": -45.15831756591797, | |
| "loss": 0.035, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.029849261045455933, | |
| "rewards/margins": 5.0221357345581055, | |
| "rewards/rejected": -4.992286682128906, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 7.915254237288136, | |
| "grad_norm": 6.365487263229808, | |
| "learning_rate": 1.715411753365481e-10, | |
| "logits/chosen": -1.649171233177185, | |
| "logits/rejected": -1.701267957687378, | |
| "logps/chosen": -25.891307830810547, | |
| "logps/rejected": -47.70183181762695, | |
| "loss": 0.0396, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6622889041900635, | |
| "rewards/margins": 5.317337989807129, | |
| "rewards/rejected": -5.9796271324157715, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 7.932203389830509, | |
| "grad_norm": 4.885763801093592, | |
| "learning_rate": 1.0979087280141297e-10, | |
| "logits/chosen": -1.2668333053588867, | |
| "logits/rejected": -1.3214877843856812, | |
| "logps/chosen": -21.41905403137207, | |
| "logps/rejected": -38.722625732421875, | |
| "loss": 0.0272, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.182024747133255, | |
| "rewards/margins": 4.632743835449219, | |
| "rewards/rejected": -4.814768314361572, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 7.9491525423728815, | |
| "grad_norm": 5.3854848332900085, | |
| "learning_rate": 6.175934376509429e-11, | |
| "logits/chosen": -1.5046411752700806, | |
| "logits/rejected": -1.5680880546569824, | |
| "logps/chosen": -28.198741912841797, | |
| "logps/rejected": -59.644596099853516, | |
| "loss": 0.0301, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.0053573548793792725, | |
| "rewards/margins": 6.178599834442139, | |
| "rewards/rejected": -6.173242568969727, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 7.966101694915254, | |
| "grad_norm": 4.75232754040335, | |
| "learning_rate": 2.7449225120268482e-11, | |
| "logits/chosen": -1.4780237674713135, | |
| "logits/rejected": -1.427555799484253, | |
| "logps/chosen": -27.112102508544922, | |
| "logps/rejected": -52.26079177856445, | |
| "loss": 0.0277, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.19119581580162048, | |
| "rewards/margins": 6.167375087738037, | |
| "rewards/rejected": -6.3585710525512695, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 7.983050847457627, | |
| "grad_norm": 5.233750895631027, | |
| "learning_rate": 6.862400465157403e-12, | |
| "logits/chosen": -1.5693848133087158, | |
| "logits/rejected": -1.5205610990524292, | |
| "logps/chosen": -33.29484558105469, | |
| "logps/rejected": -41.09092712402344, | |
| "loss": 0.0276, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.30308201909065247, | |
| "rewards/margins": 4.717187881469727, | |
| "rewards/rejected": -5.020270347595215, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 6.397324083693935, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -1.639676570892334, | |
| "logits/rejected": -1.6202343702316284, | |
| "logps/chosen": -33.1722526550293, | |
| "logps/rejected": -40.651100158691406, | |
| "loss": 0.0363, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.057152003049850464, | |
| "rewards/margins": 4.614887237548828, | |
| "rewards/rejected": -4.672039031982422, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "step": 472, | |
| "total_flos": 0.0, | |
| "train_loss": 0.15622181608736263, | |
| "train_runtime": 4721.8854, | |
| "train_samples_per_second": 12.788, | |
| "train_steps_per_second": 0.1 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 472, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 8, | |
| "save_steps": 400, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |