Training in progress, step 2900, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5991064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:322d27566c97d95eb19d51961c27a563d5536f16940b4a27a9956d7bb2578261
|
| 3 |
size 5991064
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3875258
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b14f31da3b0eeb58f975f764db512d31a1cb3b846a2d135ff547d999136e23f2
|
| 3 |
size 3875258
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f673b50c9d76e7f4aba905deba6bab9362132836c548cdeb5ff0494bb47b8d67
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0753cd8573da61b5febd8fa3eea8c43dff8632c42f6883cac9734506ab6aa43
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2003,6 +2003,41 @@
|
|
| 2003 |
"learning_rate": 0.0002,
|
| 2004 |
"loss": 0.7436,
|
| 2005 |
"step": 2850
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2006 |
}
|
| 2007 |
],
|
| 2008 |
"logging_steps": 10,
|
|
@@ -2022,7 +2057,7 @@
|
|
| 2022 |
"attributes": {}
|
| 2023 |
}
|
| 2024 |
},
|
| 2025 |
-
"total_flos":
|
| 2026 |
"train_batch_size": 2,
|
| 2027 |
"trial_name": null,
|
| 2028 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.1912108924273893,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 2900,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2003 |
"learning_rate": 0.0002,
|
| 2004 |
"loss": 0.7436,
|
| 2005 |
"step": 2850
|
| 2006 |
+
},
|
| 2007 |
+
{
|
| 2008 |
+
"epoch": 0.1885735008077012,
|
| 2009 |
+
"grad_norm": 1.677764892578125,
|
| 2010 |
+
"learning_rate": 0.0002,
|
| 2011 |
+
"loss": 0.6904,
|
| 2012 |
+
"step": 2860
|
| 2013 |
+
},
|
| 2014 |
+
{
|
| 2015 |
+
"epoch": 0.18923284871262322,
|
| 2016 |
+
"grad_norm": 3.1064493656158447,
|
| 2017 |
+
"learning_rate": 0.0002,
|
| 2018 |
+
"loss": 0.7331,
|
| 2019 |
+
"step": 2870
|
| 2020 |
+
},
|
| 2021 |
+
{
|
| 2022 |
+
"epoch": 0.18989219661754525,
|
| 2023 |
+
"grad_norm": 1.320894479751587,
|
| 2024 |
+
"learning_rate": 0.0002,
|
| 2025 |
+
"loss": 0.7432,
|
| 2026 |
+
"step": 2880
|
| 2027 |
+
},
|
| 2028 |
+
{
|
| 2029 |
+
"epoch": 0.19055154452246728,
|
| 2030 |
+
"grad_norm": 2.0653398036956787,
|
| 2031 |
+
"learning_rate": 0.0002,
|
| 2032 |
+
"loss": 0.7793,
|
| 2033 |
+
"step": 2890
|
| 2034 |
+
},
|
| 2035 |
+
{
|
| 2036 |
+
"epoch": 0.1912108924273893,
|
| 2037 |
+
"grad_norm": 1.7600677013397217,
|
| 2038 |
+
"learning_rate": 0.0002,
|
| 2039 |
+
"loss": 0.7675,
|
| 2040 |
+
"step": 2900
|
| 2041 |
}
|
| 2042 |
],
|
| 2043 |
"logging_steps": 10,
|
|
|
|
| 2057 |
"attributes": {}
|
| 2058 |
}
|
| 2059 |
},
|
| 2060 |
+
"total_flos": 5017928334422016.0,
|
| 2061 |
"train_batch_size": 2,
|
| 2062 |
"trial_name": null,
|
| 2063 |
"trial_params": null
|