gemmalorafull / trainer_log.jsonl
jgayed's picture
Upload folder using huggingface_hub
5c53087 verified
{"current_steps": 5, "total_steps": 480, "loss": 39.5171, "lr": 4.9986614686909146e-05, "epoch": 0.08333333333333333, "percentage": 1.04, "elapsed_time": "0:00:32", "remaining_time": "0:51:25", "throughput": 1449.05, "total_tokens": 47056}
{"current_steps": 10, "total_steps": 480, "loss": 3.2466, "lr": 4.994647308096509e-05, "epoch": 0.16666666666666666, "percentage": 2.08, "elapsed_time": "0:01:03", "remaining_time": "0:49:59", "throughput": 1465.67, "total_tokens": 93544}
{"current_steps": 15, "total_steps": 480, "loss": 3.3122, "lr": 4.987961816680492e-05, "epoch": 0.25, "percentage": 3.12, "elapsed_time": "0:01:35", "remaining_time": "0:49:28", "throughput": 1473.55, "total_tokens": 141120}
{"current_steps": 20, "total_steps": 480, "loss": 3.0265, "lr": 4.9786121534345265e-05, "epoch": 0.3333333333333333, "percentage": 4.17, "elapsed_time": "0:02:07", "remaining_time": "0:49:00", "throughput": 1474.8, "total_tokens": 188544}
{"current_steps": 25, "total_steps": 480, "loss": 2.6989, "lr": 4.966608330212198e-05, "epoch": 0.4166666666666667, "percentage": 5.21, "elapsed_time": "0:02:39", "remaining_time": "0:48:19", "throughput": 1475.96, "total_tokens": 235128}
{"current_steps": 30, "total_steps": 480, "loss": 2.2284, "lr": 4.951963201008076e-05, "epoch": 0.5, "percentage": 6.25, "elapsed_time": "0:03:10", "remaining_time": "0:47:41", "throughput": 1477.49, "total_tokens": 281904}
{"current_steps": 35, "total_steps": 480, "loss": 2.2164, "lr": 4.934692448193334e-05, "epoch": 0.5833333333333334, "percentage": 7.29, "elapsed_time": "0:03:43", "remaining_time": "0:47:16", "throughput": 1479.53, "total_tokens": 330096}
{"current_steps": 40, "total_steps": 480, "loss": 2.6854, "lr": 4.914814565722671e-05, "epoch": 0.6666666666666666, "percentage": 8.33, "elapsed_time": "0:04:14", "remaining_time": "0:46:41", "throughput": 1479.57, "total_tokens": 376824}
{"current_steps": 45, "total_steps": 480, "loss": 2.4108, "lr": 4.892350839330522e-05, "epoch": 0.75, "percentage": 9.38, "elapsed_time": "0:04:46", "remaining_time": "0:46:10", "throughput": 1479.42, "total_tokens": 424048}
{"current_steps": 50, "total_steps": 480, "loss": 2.419, "lr": 4.867325323737765e-05, "epoch": 0.8333333333333334, "percentage": 10.42, "elapsed_time": "0:05:18", "remaining_time": "0:45:35", "throughput": 1478.92, "total_tokens": 470432}
{"current_steps": 55, "total_steps": 480, "loss": 2.3183, "lr": 4.839764816893315e-05, "epoch": 0.9166666666666666, "percentage": 11.46, "elapsed_time": "0:05:50", "remaining_time": "0:45:04", "throughput": 1478.9, "total_tokens": 517624}
{"current_steps": 60, "total_steps": 480, "loss": 1.9734, "lr": 4.8096988312782174e-05, "epoch": 1.0, "percentage": 12.5, "elapsed_time": "0:06:21", "remaining_time": "0:44:33", "throughput": 1479.13, "total_tokens": 564920}
{"current_steps": 65, "total_steps": 480, "loss": 1.8086, "lr": 4.7771595623029394e-05, "epoch": 1.0833333333333333, "percentage": 13.54, "elapsed_time": "0:06:53", "remaining_time": "0:44:00", "throughput": 1478.11, "total_tokens": 611240}
{"current_steps": 70, "total_steps": 480, "loss": 2.4591, "lr": 4.742181853831721e-05, "epoch": 1.1666666666666667, "percentage": 14.58, "elapsed_time": "0:07:25", "remaining_time": "0:43:31", "throughput": 1477.8, "total_tokens": 658904}
{"current_steps": 75, "total_steps": 480, "loss": 1.7169, "lr": 4.7048031608708876e-05, "epoch": 1.25, "percentage": 15.62, "elapsed_time": "0:07:58", "remaining_time": "0:43:01", "throughput": 1477.85, "total_tokens": 706480}
{"current_steps": 80, "total_steps": 480, "loss": 1.809, "lr": 4.665063509461097e-05, "epoch": 1.3333333333333333, "percentage": 16.67, "elapsed_time": "0:08:29", "remaining_time": "0:42:28", "throughput": 1477.14, "total_tokens": 752904}
{"current_steps": 85, "total_steps": 480, "loss": 2.3907, "lr": 4.6230054538164474e-05, "epoch": 1.4166666666666667, "percentage": 17.71, "elapsed_time": "0:09:01", "remaining_time": "0:41:57", "throughput": 1477.29, "total_tokens": 800304}
{"current_steps": 90, "total_steps": 480, "loss": 2.0662, "lr": 4.5786740307563636e-05, "epoch": 1.5, "percentage": 18.75, "elapsed_time": "0:09:33", "remaining_time": "0:41:24", "throughput": 1477.1, "total_tokens": 846792}
{"current_steps": 95, "total_steps": 480, "loss": 2.0185, "lr": 4.5321167114790385e-05, "epoch": 1.5833333333333335, "percentage": 19.79, "elapsed_time": "0:10:05", "remaining_time": "0:40:53", "throughput": 1477.68, "total_tokens": 894552}
{"current_steps": 100, "total_steps": 480, "loss": 1.9831, "lr": 4.4833833507280884e-05, "epoch": 1.6666666666666665, "percentage": 20.83, "elapsed_time": "0:10:36", "remaining_time": "0:40:20", "throughput": 1477.46, "total_tokens": 941040}
{"current_steps": 105, "total_steps": 480, "loss": 2.0873, "lr": 4.4325261334068426e-05, "epoch": 1.75, "percentage": 21.88, "elapsed_time": "0:11:15", "remaining_time": "0:40:11", "throughput": 1462.63, "total_tokens": 987776}
{"current_steps": 110, "total_steps": 480, "loss": 1.9163, "lr": 4.379599518697444e-05, "epoch": 1.8333333333333335, "percentage": 22.92, "elapsed_time": "0:11:47", "remaining_time": "0:39:40", "throughput": 1464.01, "total_tokens": 1036072}
{"current_steps": 115, "total_steps": 480, "loss": 1.4848, "lr": 4.324660181744589e-05, "epoch": 1.9166666666666665, "percentage": 23.96, "elapsed_time": "0:12:19", "remaining_time": "0:39:07", "throughput": 1464.7, "total_tokens": 1083328}
{"current_steps": 120, "total_steps": 480, "loss": 1.7719, "lr": 4.267766952966369e-05, "epoch": 2.0, "percentage": 25.0, "elapsed_time": "0:12:50", "remaining_time": "0:38:32", "throughput": 1465.6, "total_tokens": 1129840}
{"current_steps": 125, "total_steps": 480, "loss": 1.4241, "lr": 4.208980755057178e-05, "epoch": 2.0833333333333335, "percentage": 26.04, "elapsed_time": "0:13:22", "remaining_time": "0:38:00", "throughput": 1466.59, "total_tokens": 1177616}
{"current_steps": 130, "total_steps": 480, "loss": 1.6277, "lr": 4.148364537750172e-05, "epoch": 2.1666666666666665, "percentage": 27.08, "elapsed_time": "0:13:55", "remaining_time": "0:37:28", "throughput": 1467.19, "total_tokens": 1225296}
{"current_steps": 135, "total_steps": 480, "loss": 0.9761, "lr": 4.085983210409114e-05, "epoch": 2.25, "percentage": 28.12, "elapsed_time": "0:14:26", "remaining_time": "0:36:54", "throughput": 1467.83, "total_tokens": 1272160}
{"current_steps": 140, "total_steps": 480, "loss": 1.0144, "lr": 4.021903572521802e-05, "epoch": 2.3333333333333335, "percentage": 29.17, "elapsed_time": "0:14:58", "remaining_time": "0:36:22", "throughput": 1467.95, "total_tokens": 1319432}
{"current_steps": 145, "total_steps": 480, "loss": 1.3858, "lr": 3.956194242169506e-05, "epoch": 2.4166666666666665, "percentage": 30.21, "elapsed_time": "0:15:30", "remaining_time": "0:35:49", "throughput": 1467.95, "total_tokens": 1365992}
{"current_steps": 150, "total_steps": 480, "loss": 0.9969, "lr": 3.888925582549006e-05, "epoch": 2.5, "percentage": 31.25, "elapsed_time": "0:16:02", "remaining_time": "0:35:16", "throughput": 1468.23, "total_tokens": 1412712}
{"current_steps": 155, "total_steps": 480, "loss": 1.0721, "lr": 3.82016962662592e-05, "epoch": 2.5833333333333335, "percentage": 32.29, "elapsed_time": "0:16:33", "remaining_time": "0:34:43", "throughput": 1468.7, "total_tokens": 1459448}
{"current_steps": 160, "total_steps": 480, "loss": 1.6622, "lr": 3.7500000000000003e-05, "epoch": 2.6666666666666665, "percentage": 33.33, "elapsed_time": "0:17:05", "remaining_time": "0:34:11", "throughput": 1469.07, "total_tokens": 1506944}
{"current_steps": 165, "total_steps": 480, "loss": 1.059, "lr": 3.678491842064995e-05, "epoch": 2.75, "percentage": 34.38, "elapsed_time": "0:17:37", "remaining_time": "0:33:39", "throughput": 1469.47, "total_tokens": 1554216}
{"current_steps": 170, "total_steps": 480, "loss": 0.774, "lr": 3.6057217255475034e-05, "epoch": 2.8333333333333335, "percentage": 35.42, "elapsed_time": "0:18:08", "remaining_time": "0:33:05", "throughput": 1469.8, "total_tokens": 1600608}
{"current_steps": 175, "total_steps": 480, "loss": 1.0403, "lr": 3.5317675745109866e-05, "epoch": 2.9166666666666665, "percentage": 36.46, "elapsed_time": "0:18:40", "remaining_time": "0:32:32", "throughput": 1470.15, "total_tokens": 1647272}
{"current_steps": 180, "total_steps": 480, "loss": 2.173, "lr": 3.456708580912725e-05, "epoch": 3.0, "percentage": 37.5, "elapsed_time": "0:19:12", "remaining_time": "0:32:00", "throughput": 1470.48, "total_tokens": 1694760}
{"current_steps": 185, "total_steps": 480, "loss": 1.9583, "lr": 3.380625119803084e-05, "epoch": 3.0833333333333335, "percentage": 38.54, "elapsed_time": "0:19:44", "remaining_time": "0:31:28", "throughput": 1470.8, "total_tokens": 1741936}
{"current_steps": 190, "total_steps": 480, "loss": 1.8407, "lr": 3.303598663257904e-05, "epoch": 3.1666666666666665, "percentage": 39.58, "elapsed_time": "0:20:15", "remaining_time": "0:30:55", "throughput": 1471.27, "total_tokens": 1788624}
{"current_steps": 195, "total_steps": 480, "loss": 1.1398, "lr": 3.225711693136156e-05, "epoch": 3.25, "percentage": 40.62, "elapsed_time": "0:20:47", "remaining_time": "0:30:23", "throughput": 1471.34, "total_tokens": 1835336}
{"current_steps": 200, "total_steps": 480, "loss": 0.8378, "lr": 3.147047612756302e-05, "epoch": 3.3333333333333335, "percentage": 41.67, "elapsed_time": "0:21:19", "remaining_time": "0:29:51", "throughput": 1471.31, "total_tokens": 1882584}
{"current_steps": 205, "total_steps": 480, "loss": 1.4633, "lr": 3.0676906575859334e-05, "epoch": 3.4166666666666665, "percentage": 42.71, "elapsed_time": "0:21:57", "remaining_time": "0:29:27", "throughput": 1464.23, "total_tokens": 1929344}
{"current_steps": 210, "total_steps": 480, "loss": 0.7289, "lr": 2.9877258050403212e-05, "epoch": 3.5, "percentage": 43.75, "elapsed_time": "0:22:29", "remaining_time": "0:28:54", "throughput": 1464.39, "total_tokens": 1976064}
{"current_steps": 215, "total_steps": 480, "loss": 0.9374, "lr": 2.9072386834864724e-05, "epoch": 3.5833333333333335, "percentage": 44.79, "elapsed_time": "0:23:01", "remaining_time": "0:28:22", "throughput": 1464.9, "total_tokens": 2023392}
{"current_steps": 220, "total_steps": 480, "loss": 0.8814, "lr": 2.8263154805501297e-05, "epoch": 3.6666666666666665, "percentage": 45.83, "elapsed_time": "0:23:33", "remaining_time": "0:27:50", "throughput": 1465.37, "total_tokens": 2070752}
{"current_steps": 225, "total_steps": 480, "loss": 0.8873, "lr": 2.7450428508239024e-05, "epoch": 3.75, "percentage": 46.88, "elapsed_time": "0:24:05", "remaining_time": "0:27:17", "throughput": 1465.82, "total_tokens": 2118464}
{"current_steps": 230, "total_steps": 480, "loss": 0.5864, "lr": 2.663507823075358e-05, "epoch": 3.8333333333333335, "percentage": 47.92, "elapsed_time": "0:24:37", "remaining_time": "0:26:45", "throughput": 1466.13, "total_tokens": 2165488}
{"current_steps": 235, "total_steps": 480, "loss": 0.7186, "lr": 2.5817977070544407e-05, "epoch": 3.9166666666666665, "percentage": 48.96, "elapsed_time": "0:25:08", "remaining_time": "0:26:12", "throughput": 1466.44, "total_tokens": 2212104}
{"current_steps": 240, "total_steps": 480, "loss": 0.8153, "lr": 2.5e-05, "epoch": 4.0, "percentage": 50.0, "elapsed_time": "0:25:40", "remaining_time": "0:25:40", "throughput": 1466.85, "total_tokens": 2259680}
{"current_steps": 245, "total_steps": 480, "loss": 0.5078, "lr": 2.41820229294556e-05, "epoch": 4.083333333333333, "percentage": 51.04, "elapsed_time": "0:26:12", "remaining_time": "0:25:08", "throughput": 1467.26, "total_tokens": 2307200}
{"current_steps": 250, "total_steps": 480, "loss": 0.5766, "lr": 2.3364921769246423e-05, "epoch": 4.166666666666667, "percentage": 52.08, "elapsed_time": "0:26:44", "remaining_time": "0:24:35", "throughput": 1467.56, "total_tokens": 2354416}
{"current_steps": 255, "total_steps": 480, "loss": 0.2176, "lr": 2.2549571491760986e-05, "epoch": 4.25, "percentage": 53.12, "elapsed_time": "0:27:15", "remaining_time": "0:24:03", "throughput": 1467.82, "total_tokens": 2400880}
{"current_steps": 260, "total_steps": 480, "loss": 0.6257, "lr": 2.173684519449872e-05, "epoch": 4.333333333333333, "percentage": 54.17, "elapsed_time": "0:27:47", "remaining_time": "0:23:30", "throughput": 1468.14, "total_tokens": 2448016}
{"current_steps": 265, "total_steps": 480, "loss": 0.4594, "lr": 2.0927613165135285e-05, "epoch": 4.416666666666667, "percentage": 55.21, "elapsed_time": "0:28:19", "remaining_time": "0:22:58", "throughput": 1468.36, "total_tokens": 2495032}
{"current_steps": 270, "total_steps": 480, "loss": 0.1073, "lr": 2.0122741949596797e-05, "epoch": 4.5, "percentage": 56.25, "elapsed_time": "0:28:50", "remaining_time": "0:22:26", "throughput": 1468.7, "total_tokens": 2542264}
{"current_steps": 275, "total_steps": 480, "loss": 0.7155, "lr": 1.932309342414067e-05, "epoch": 4.583333333333333, "percentage": 57.29, "elapsed_time": "0:29:22", "remaining_time": "0:21:54", "throughput": 1468.92, "total_tokens": 2589368}
{"current_steps": 280, "total_steps": 480, "loss": 1.0752, "lr": 1.852952387243698e-05, "epoch": 4.666666666666667, "percentage": 58.33, "elapsed_time": "0:29:54", "remaining_time": "0:21:21", "throughput": 1469.22, "total_tokens": 2636712}
{"current_steps": 285, "total_steps": 480, "loss": 0.7887, "lr": 1.7742883068638447e-05, "epoch": 4.75, "percentage": 59.38, "elapsed_time": "0:30:25", "remaining_time": "0:20:49", "throughput": 1469.35, "total_tokens": 2682328}
{"current_steps": 290, "total_steps": 480, "loss": 0.1958, "lr": 1.6964013367420966e-05, "epoch": 4.833333333333333, "percentage": 60.42, "elapsed_time": "0:30:57", "remaining_time": "0:20:16", "throughput": 1469.56, "total_tokens": 2729264}
{"current_steps": 295, "total_steps": 480, "loss": 0.1749, "lr": 1.6193748801969163e-05, "epoch": 4.916666666666667, "percentage": 61.46, "elapsed_time": "0:31:29", "remaining_time": "0:19:44", "throughput": 1469.97, "total_tokens": 2777032}
{"current_steps": 300, "total_steps": 480, "loss": 0.6815, "lr": 1.5432914190872757e-05, "epoch": 5.0, "percentage": 62.5, "elapsed_time": "0:32:01", "remaining_time": "0:19:12", "throughput": 1470.29, "total_tokens": 2824600}
{"current_steps": 305, "total_steps": 480, "loss": 0.4818, "lr": 1.4682324254890134e-05, "epoch": 5.083333333333333, "percentage": 63.54, "elapsed_time": "0:32:39", "remaining_time": "0:18:44", "throughput": 1465.58, "total_tokens": 2871552}
{"current_steps": 310, "total_steps": 480, "loss": 0.0944, "lr": 1.3942782744524973e-05, "epoch": 5.166666666666667, "percentage": 64.58, "elapsed_time": "0:33:11", "remaining_time": "0:18:11", "throughput": 1465.88, "total_tokens": 2918968}
{"current_steps": 315, "total_steps": 480, "loss": 0.0347, "lr": 1.3215081579350058e-05, "epoch": 5.25, "percentage": 65.62, "elapsed_time": "0:33:43", "remaining_time": "0:17:39", "throughput": 1466.19, "total_tokens": 2966784}
{"current_steps": 320, "total_steps": 480, "loss": 0.1513, "lr": 1.2500000000000006e-05, "epoch": 5.333333333333333, "percentage": 66.67, "elapsed_time": "0:34:14", "remaining_time": "0:17:07", "throughput": 1466.34, "total_tokens": 3013000}
{"current_steps": 325, "total_steps": 480, "loss": 0.6741, "lr": 1.1798303733740802e-05, "epoch": 5.416666666666667, "percentage": 67.71, "elapsed_time": "0:34:46", "remaining_time": "0:16:34", "throughput": 1466.57, "total_tokens": 3059640}
{"current_steps": 330, "total_steps": 480, "loss": 0.1384, "lr": 1.1110744174509952e-05, "epoch": 5.5, "percentage": 68.75, "elapsed_time": "0:35:17", "remaining_time": "0:16:02", "throughput": 1466.8, "total_tokens": 3106528}
{"current_steps": 335, "total_steps": 480, "loss": 0.1767, "lr": 1.043805757830495e-05, "epoch": 5.583333333333333, "percentage": 69.79, "elapsed_time": "0:35:49", "remaining_time": "0:15:30", "throughput": 1467.21, "total_tokens": 3154400}
{"current_steps": 340, "total_steps": 480, "loss": 0.0103, "lr": 9.780964274781984e-06, "epoch": 5.666666666666667, "percentage": 70.83, "elapsed_time": "0:36:21", "remaining_time": "0:14:58", "throughput": 1467.41, "total_tokens": 3201256}
{"current_steps": 345, "total_steps": 480, "loss": 0.2753, "lr": 9.140167895908867e-06, "epoch": 5.75, "percentage": 71.88, "elapsed_time": "0:36:53", "remaining_time": "0:14:26", "throughput": 1467.76, "total_tokens": 3248464}
{"current_steps": 350, "total_steps": 480, "loss": 0.1367, "lr": 8.51635462249828e-06, "epoch": 5.833333333333333, "percentage": 72.92, "elapsed_time": "0:37:24", "remaining_time": "0:13:53", "throughput": 1467.89, "total_tokens": 3294448}
{"current_steps": 355, "total_steps": 480, "loss": 1.0681, "lr": 7.910192449428217e-06, "epoch": 5.916666666666667, "percentage": 73.96, "elapsed_time": "0:37:55", "remaining_time": "0:13:21", "throughput": 1468.08, "total_tokens": 3341088}
{"current_steps": 360, "total_steps": 480, "loss": 0.0855, "lr": 7.3223304703363135e-06, "epoch": 6.0, "percentage": 75.0, "elapsed_time": "0:38:28", "remaining_time": "0:12:49", "throughput": 1468.36, "total_tokens": 3389520}
{"current_steps": 365, "total_steps": 480, "loss": 0.007, "lr": 6.753398182554116e-06, "epoch": 6.083333333333333, "percentage": 76.04, "elapsed_time": "0:38:59", "remaining_time": "0:12:17", "throughput": 1468.35, "total_tokens": 3435816}
{"current_steps": 370, "total_steps": 480, "loss": 0.0161, "lr": 6.204004813025568e-06, "epoch": 6.166666666666667, "percentage": 77.08, "elapsed_time": "0:39:31", "remaining_time": "0:11:44", "throughput": 1468.6, "total_tokens": 3482312}
{"current_steps": 375, "total_steps": 480, "loss": 0.002, "lr": 5.674738665931575e-06, "epoch": 6.25, "percentage": 78.12, "elapsed_time": "0:40:03", "remaining_time": "0:11:12", "throughput": 1468.57, "total_tokens": 3529280}
{"current_steps": 380, "total_steps": 480, "loss": 0.0501, "lr": 5.166166492719124e-06, "epoch": 6.333333333333333, "percentage": 79.17, "elapsed_time": "0:40:34", "remaining_time": "0:10:40", "throughput": 1468.72, "total_tokens": 3575952}
{"current_steps": 385, "total_steps": 480, "loss": 0.2303, "lr": 4.678832885209622e-06, "epoch": 6.416666666666667, "percentage": 80.21, "elapsed_time": "0:41:06", "remaining_time": "0:10:08", "throughput": 1468.95, "total_tokens": 3623744}
{"current_steps": 390, "total_steps": 480, "loss": 0.0015, "lr": 4.213259692436367e-06, "epoch": 6.5, "percentage": 81.25, "elapsed_time": "0:41:39", "remaining_time": "0:09:36", "throughput": 1469.08, "total_tokens": 3671256}
{"current_steps": 395, "total_steps": 480, "loss": 0.0139, "lr": 3.769945461835531e-06, "epoch": 6.583333333333333, "percentage": 82.29, "elapsed_time": "0:42:10", "remaining_time": "0:09:04", "throughput": 1469.13, "total_tokens": 3718144}
{"current_steps": 400, "total_steps": 480, "loss": 0.0001, "lr": 3.3493649053890326e-06, "epoch": 6.666666666666667, "percentage": 83.33, "elapsed_time": "0:42:43", "remaining_time": "0:08:32", "throughput": 1469.39, "total_tokens": 3766064}
{"current_steps": 405, "total_steps": 480, "loss": 0.0013, "lr": 2.9519683912911266e-06, "epoch": 6.75, "percentage": 84.38, "elapsed_time": "0:43:21", "remaining_time": "0:08:01", "throughput": 1465.66, "total_tokens": 3812744}
{"current_steps": 410, "total_steps": 480, "loss": 0.0275, "lr": 2.578181461682794e-06, "epoch": 6.833333333333333, "percentage": 85.42, "elapsed_time": "0:43:53", "remaining_time": "0:07:29", "throughput": 1465.89, "total_tokens": 3859792}
{"current_steps": 415, "total_steps": 480, "loss": 0.0002, "lr": 2.2284043769706027e-06, "epoch": 6.916666666666667, "percentage": 86.46, "elapsed_time": "0:44:24", "remaining_time": "0:06:57", "throughput": 1466.07, "total_tokens": 3906864}
{"current_steps": 420, "total_steps": 480, "loss": 0.0004, "lr": 1.9030116872178316e-06, "epoch": 7.0, "percentage": 87.5, "elapsed_time": "0:44:56", "remaining_time": "0:06:25", "throughput": 1466.41, "total_tokens": 3954440}
{"current_steps": 425, "total_steps": 480, "loss": 0.0001, "lr": 1.6023518310668618e-06, "epoch": 7.083333333333333, "percentage": 88.54, "elapsed_time": "0:45:28", "remaining_time": "0:05:53", "throughput": 1466.6, "total_tokens": 4001944}
{"current_steps": 430, "total_steps": 480, "loss": 0.0003, "lr": 1.3267467626223606e-06, "epoch": 7.166666666666667, "percentage": 89.58, "elapsed_time": "0:45:59", "remaining_time": "0:05:20", "throughput": 1466.67, "total_tokens": 4047736}
{"current_steps": 435, "total_steps": 480, "loss": 0.0001, "lr": 1.0764916066947794e-06, "epoch": 7.25, "percentage": 90.62, "elapsed_time": "0:46:31", "remaining_time": "0:04:48", "throughput": 1466.93, "total_tokens": 4095480}
{"current_steps": 440, "total_steps": 480, "loss": 0.0001, "lr": 8.51854342773295e-07, "epoch": 7.333333333333333, "percentage": 91.67, "elapsed_time": "0:47:03", "remaining_time": "0:04:16", "throughput": 1467.12, "total_tokens": 4142520}
{"current_steps": 445, "total_steps": 480, "loss": 0.001, "lr": 6.530755180666592e-07, "epoch": 7.416666666666667, "percentage": 92.71, "elapsed_time": "0:47:35", "remaining_time": "0:03:44", "throughput": 1467.18, "total_tokens": 4188920}
{"current_steps": 450, "total_steps": 480, "loss": 0.0001, "lr": 4.803679899192392e-07, "epoch": 7.5, "percentage": 93.75, "elapsed_time": "0:48:06", "remaining_time": "0:03:12", "throughput": 1467.43, "total_tokens": 4236368}
{"current_steps": 455, "total_steps": 480, "loss": 0.0, "lr": 3.339166978780256e-07, "epoch": 7.583333333333333, "percentage": 94.79, "elapsed_time": "0:48:38", "remaining_time": "0:02:40", "throughput": 1467.65, "total_tokens": 4283360}
{"current_steps": 460, "total_steps": 480, "loss": 0.0001, "lr": 2.1387846565474045e-07, "epoch": 7.666666666666667, "percentage": 95.83, "elapsed_time": "0:49:10", "remaining_time": "0:02:08", "throughput": 1467.8, "total_tokens": 4330912}
{"current_steps": 465, "total_steps": 480, "loss": 0.0191, "lr": 1.2038183319507955e-07, "epoch": 7.75, "percentage": 96.88, "elapsed_time": "0:49:42", "remaining_time": "0:01:36", "throughput": 1468.1, "total_tokens": 4378920}
{"current_steps": 470, "total_steps": 480, "loss": 0.0016, "lr": 5.352691903491303e-08, "epoch": 7.833333333333333, "percentage": 97.92, "elapsed_time": "0:50:14", "remaining_time": "0:01:04", "throughput": 1468.22, "total_tokens": 4425608}
{"current_steps": 475, "total_steps": 480, "loss": 0.0016, "lr": 1.3385313090857887e-08, "epoch": 7.916666666666667, "percentage": 98.96, "elapsed_time": "0:50:46", "remaining_time": "0:00:32", "throughput": 1468.37, "total_tokens": 4472872}
{"current_steps": 480, "total_steps": 480, "loss": 0.0, "lr": 0.0, "epoch": 8.0, "percentage": 100.0, "elapsed_time": "0:51:17", "remaining_time": "0:00:00", "throughput": 1468.54, "total_tokens": 4519360}
{"current_steps": 480, "total_steps": 480, "epoch": 8.0, "percentage": 100.0, "elapsed_time": "0:51:24", "remaining_time": "0:00:00", "throughput": 1465.38, "total_tokens": 4519360}