Training in progress, step 300, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ae1e7ff551fbccad491f1d6f3e5f47820ac501264a51aca47a7b2f7fe3db7cc
 size 5991064

 version https://git-lfs.github.com/spec/v1
+oid sha256:55e1f5b78f15e58120b2f1bf0bff556d90c4c57af08b524a65470f4ab427f3a0
 size 5991064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:32e2d46a90431b0d5cd0a89b78e7583455332324fe7359755d84e2904414eebd
-size 3875002

 version https://git-lfs.github.com/spec/v1
+oid sha256:99f145932c39d3ff2605a3afad7d70eeed6f6568267d4bec8925f640fbbaf402
+size 3875258

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:62a586579a715a0a81b961015595a1bc5750b1ca116b28fcb423cdee1b35f175
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:73110530bc5eb0ad8c3e2aa0e744d5f1c37ffb681f8000a9fca35801abeb420f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d1650f5062195d8ee65b24ab00a137ab48cccbff41f41ba060d4208547a763c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c79c66b60a0abe3dc1f1792ced2b6c99f10b3ada4ba94ee60000ba5931c603a9
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.016483697623050804,
   "eval_steps": 500,
-  "global_step": 250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -183,6 +183,41 @@
       "learning_rate": 0.0002,
       "loss": 0.9725,
       "step": 250
     }
   ],
   "logging_steps": 10,
@@ -202,7 +237,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 432422594402304.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.019780437147660965,
   "eval_steps": 500,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0002,
       "loss": 0.9725,
       "step": 250
+    },
+    {
+      "epoch": 0.017143045527972835,
+      "grad_norm": 1.7628215551376343,
+      "learning_rate": 0.0002,
+      "loss": 1.2322,
+      "step": 260
+    },
+    {
+      "epoch": 0.017802393432894865,
+      "grad_norm": 1.614889144897461,
+      "learning_rate": 0.0002,
+      "loss": 1.1429,
+      "step": 270
+    },
+    {
+      "epoch": 0.0184617413378169,
+      "grad_norm": 1.440961241722107,
+      "learning_rate": 0.0002,
+      "loss": 1.1346,
+      "step": 280
+    },
+    {
+      "epoch": 0.01912108924273893,
+      "grad_norm": 1.3889188766479492,
+      "learning_rate": 0.0002,
+      "loss": 1.0588,
+      "step": 290
+    },
+    {
+      "epoch": 0.019780437147660965,
+      "grad_norm": 1.4845261573791504,
+      "learning_rate": 0.0002,
+      "loss": 0.9927,
+      "step": 300
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 518942362387968.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null