Llama-3.1-8B-Instruct-ppo-250 / trainer_log.jsonl
chchen's picture
Upload 14 files
4557d00 verified
{"current_steps": 10, "total_steps": 150, "loss": 5.6043, "reward": 14.4635, "lr": 5.625e-06, "epoch": 0.6, "percentage": 6.67, "elapsed_time": "0:12:54", "remaining_time": "3:00:43"}
{"current_steps": 20, "total_steps": 150, "loss": 2.8511, "reward": 14.7847, "lr": 9.989294616193018e-06, "epoch": 1.27, "percentage": 13.33, "elapsed_time": "0:26:23", "remaining_time": "2:51:33"}
{"current_steps": 30, "total_steps": 150, "loss": 1.8762, "reward": 14.1969, "lr": 9.800249271929645e-06, "epoch": 1.93, "percentage": 20.0, "elapsed_time": "0:39:28", "remaining_time": "2:37:53"}
{"current_steps": 40, "total_steps": 150, "loss": 1.5917, "reward": 13.6521, "lr": 9.38363377853754e-06, "epoch": 2.6, "percentage": 26.67, "elapsed_time": "0:53:09", "remaining_time": "2:26:10"}
{"current_steps": 50, "total_steps": 150, "loss": 1.1798, "reward": 13.6497, "lr": 8.759199037394888e-06, "epoch": 3.27, "percentage": 33.33, "elapsed_time": "1:07:16", "remaining_time": "2:14:32"}
{"current_steps": 60, "total_steps": 150, "loss": 0.7525, "reward": 14.6839, "lr": 7.956548241817914e-06, "epoch": 3.93, "percentage": 40.0, "elapsed_time": "1:21:19", "remaining_time": "2:01:59"}
{"current_steps": 70, "total_steps": 150, "loss": 0.8999, "reward": 15.1988, "lr": 7.0137334492936875e-06, "epoch": 4.6, "percentage": 46.67, "elapsed_time": "1:33:42", "remaining_time": "1:47:05"}
{"current_steps": 80, "total_steps": 150, "loss": 0.9507, "reward": 15.1846, "lr": 5.975451610080643e-06, "epoch": 5.27, "percentage": 53.33, "elapsed_time": "1:45:36", "remaining_time": "1:32:24"}
{"current_steps": 90, "total_steps": 150, "loss": 0.7962, "reward": 15.0285, "lr": 4.890925574827195e-06, "epoch": 5.93, "percentage": 60.0, "elapsed_time": "1:59:30", "remaining_time": "1:19:40"}
{"current_steps": 100, "total_steps": 150, "loss": 0.9013, "reward": 15.9189, "lr": 3.8115705383691354e-06, "epoch": 6.6, "percentage": 66.67, "elapsed_time": "2:13:16", "remaining_time": "1:06:38"}
{"current_steps": 110, "total_steps": 150, "loss": 0.6388, "reward": 15.8048, "lr": 2.7885565489049948e-06, "epoch": 7.27, "percentage": 73.33, "elapsed_time": "2:23:51", "remaining_time": "0:52:18"}
{"current_steps": 120, "total_steps": 150, "loss": 0.3787, "reward": 15.6162, "lr": 1.8703826390797047e-06, "epoch": 7.93, "percentage": 80.0, "elapsed_time": "2:35:31", "remaining_time": "0:38:52"}
{"current_steps": 130, "total_steps": 150, "loss": 0.6496, "reward": 16.7622, "lr": 1.100577584535592e-06, "epoch": 8.6, "percentage": 86.67, "elapsed_time": "2:45:54", "remaining_time": "0:25:31"}
{"current_steps": 140, "total_steps": 150, "loss": 0.3453, "reward": 16.8219, "lr": 5.156362923365587e-07, "epoch": 9.27, "percentage": 93.33, "elapsed_time": "2:57:49", "remaining_time": "0:12:42"}
{"current_steps": 150, "total_steps": 150, "loss": 0.3208, "reward": 16.369, "lr": 1.4328965093369284e-07, "epoch": 9.93, "percentage": 100.0, "elapsed_time": "3:10:51", "remaining_time": "0:00:00"}