Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:95d4eed27b90a2856facececdd97411fbc1ce64e19ffccaf3c1ad8d39a335b4d
 size 550593184

 version https://git-lfs.github.com/spec/v1
+oid sha256:2a12350b58ec624b0d5664c33aa3c9e18b286d78254c630bdcd3fc874e788783
 size 550593184

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7b63645f6f3e47d9b08c5622e993699a69c3e78972be33cd155e174a1678cd2a
 size 1101572914

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca0d84cc821acbe166e6cdca05fa841d3551e5ea880721e4120893e178a78cdc
 size 1101572914

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a74aa4fed1e29c9d1a6ff8828642558706da3fd173127349b6461eb754a1bd1f
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:3754ff440b748b1ae3f3b4064b7cf64d1380ccfccc70101f8fd849d41bc3e9e0
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f978d2ddde786dd6cb5ab8c79875852cc1b866a8c5495be20066800cc1246f9
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:57a36dce534393303291909c399e47c890439a2a31832f034884be355e7af794
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ca6b6a9ee86f60f9c39412d312cf1f5ee1842c52c0eac31c9f059f0709283c9a
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:8404c96e5ae00c5be76d7e364c876cad8492c3e6b0f8c80891a7ed04c27f8297
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:349a6eaf951fd8093bdabbc9a24355bbbde2573a95c9aa25813bec8e1a4b5aaf
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:3cf6ee1e7fdb5e1f4b27b2e73d309fd4d1dd1759513800464493dcd513969c0c
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.6034004092216492,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.7952286282306164,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 9.356,
       "eval_steps_per_second": 1.192,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.555785010511872e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.38056910037994385,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 1.6003976143141152,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.356,
       "eval_steps_per_second": 1.192,
       "step": 25
+    },
+    {
+      "epoch": 0.827037773359841,
+      "grad_norm": 1.3676831722259521,
+      "learning_rate": 5e-05,
+      "loss": 0.6656,
+      "step": 26
+    },
+    {
+      "epoch": 0.8588469184890656,
+      "grad_norm": 1.1924246549606323,
+      "learning_rate": 4.6729843538492847e-05,
+      "loss": 0.5322,
+      "step": 27
+    },
+    {
+      "epoch": 0.8906560636182903,
+      "grad_norm": 1.3114556074142456,
+      "learning_rate": 4.347369038899744e-05,
+      "loss": 0.536,
+      "step": 28
+    },
+    {
+      "epoch": 0.9224652087475149,
+      "grad_norm": 1.1508691310882568,
+      "learning_rate": 4.0245483899193595e-05,
+      "loss": 0.4105,
+      "step": 29
+    },
+    {
+      "epoch": 0.9542743538767395,
+      "grad_norm": 2.0720202922821045,
+      "learning_rate": 3.705904774487396e-05,
+      "loss": 0.5614,
+      "step": 30
+    },
+    {
+      "epoch": 0.9860834990059643,
+      "grad_norm": 1.2711116075515747,
+      "learning_rate": 3.392802673484193e-05,
+      "loss": 0.2281,
+      "step": 31
+    },
+    {
+      "epoch": 1.0278330019880715,
+      "grad_norm": 2.973681926727295,
+      "learning_rate": 3.086582838174551e-05,
+      "loss": 1.0804,
+      "step": 32
+    },
+    {
+      "epoch": 1.0596421471172963,
+      "grad_norm": 0.9690569043159485,
+      "learning_rate": 2.7885565489049946e-05,
+      "loss": 0.3993,
+      "step": 33
+    },
+    {
+      "epoch": 1.091451292246521,
+      "grad_norm": 1.1462165117263794,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 0.4802,
+      "step": 34
+    },
+    {
+      "epoch": 1.1232604373757455,
+      "grad_norm": 1.1109638214111328,
+      "learning_rate": 2.2221488349019903e-05,
+      "loss": 0.3798,
+      "step": 35
+    },
+    {
+      "epoch": 1.1550695825049702,
+      "grad_norm": 1.2443093061447144,
+      "learning_rate": 1.9561928549563968e-05,
+      "loss": 0.4002,
+      "step": 36
+    },
+    {
+      "epoch": 1.1868787276341948,
+      "grad_norm": 1.0544307231903076,
+      "learning_rate": 1.703270924499656e-05,
+      "loss": 0.2902,
+      "step": 37
+    },
+    {
+      "epoch": 1.2186878727634194,
+      "grad_norm": 1.2791643142700195,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 0.3088,
+      "step": 38
+    },
+    {
+      "epoch": 1.250497017892644,
+      "grad_norm": 1.7745587825775146,
+      "learning_rate": 1.2408009626051137e-05,
+      "loss": 0.2056,
+      "step": 39
+    },
+    {
+      "epoch": 1.2823061630218688,
+      "grad_norm": 1.2029690742492676,
+      "learning_rate": 1.0332332985438248e-05,
+      "loss": 0.4316,
+      "step": 40
+    },
+    {
+      "epoch": 1.3141153081510935,
+      "grad_norm": 1.0333634614944458,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 0.3804,
+      "step": 41
+    },
+    {
+      "epoch": 1.345924453280318,
+      "grad_norm": 1.0389782190322876,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.2648,
+      "step": 42
+    },
+    {
+      "epoch": 1.3777335984095427,
+      "grad_norm": 1.212583065032959,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 0.3425,
+      "step": 43
+    },
+    {
+      "epoch": 1.4095427435387675,
+      "grad_norm": 1.3398654460906982,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.3015,
+      "step": 44
+    },
+    {
+      "epoch": 1.4413518886679921,
+      "grad_norm": 1.8134361505508423,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 0.3699,
+      "step": 45
+    },
+    {
+      "epoch": 1.4731610337972167,
+      "grad_norm": 1.1156195402145386,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 0.1551,
+      "step": 46
+    },
+    {
+      "epoch": 1.5049701789264414,
+      "grad_norm": 1.3077094554901123,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 0.2284,
+      "step": 47
+    },
+    {
+      "epoch": 1.536779324055666,
+      "grad_norm": 1.3075495958328247,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 0.3622,
+      "step": 48
+    },
+    {
+      "epoch": 1.5685884691848906,
+      "grad_norm": 1.1181570291519165,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 0.3798,
+      "step": 49
+    },
+    {
+      "epoch": 1.6003976143141152,
+      "grad_norm": 1.0557467937469482,
+      "learning_rate": 0.0,
+      "loss": 0.2514,
+      "step": 50
+    },
+    {
+      "epoch": 1.6003976143141152,
+      "eval_loss": 0.38056910037994385,
+      "eval_runtime": 22.5548,
+      "eval_samples_per_second": 9.399,
+      "eval_steps_per_second": 1.197,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.1111570021023744e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null