Training in progress, step 18100, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_config.json +4 -4
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +241 -3
last-checkpoint/training_args.bin +1 -1

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -25,12 +25,12 @@
   "revision": null,
   "target_modules": [
     "q_proj",
-    "o_proj",
-    "gate_proj",
-    "down_proj",
     "v_proj",
     "k_proj",
-    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

   "revision": null,
   "target_modules": [
     "q_proj",
+    "up_proj",
     "v_proj",
+    "gate_proj",
+    "o_proj",
     "k_proj",
+    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f91e3c5f21cf129551bcf7c69bf4d8d16a4ef7f114f3eb09634873a47bacc359
 size 161533160

 version https://git-lfs.github.com/spec/v1
+oid sha256:0503d72fbbe734bd1aa1b8c5ab147618e57fdf12c863564129dd2fd420090e10
 size 161533160

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4014d31deb4cce624d0816b7ea019aa4bc828b0109dcbd09f7797bdcac508304
 size 323292202

 version https://git-lfs.github.com/spec/v1
+oid sha256:ce7e3b0e28911f6b755418ff9c22e6d5a076b7ea9b6a3985fb3ac3d87c247dd4
 size 323292202

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6863575074205cdae5bd9a3add89cf69a3951c4f077fb3f56f28f0fe9457c14f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8463e4bd1b3065b876119025f33b75a288e72e59b85af3eab3c7b7c3838477e6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.7152317880794703,
   "eval_steps": 100,
-  "global_step": 16400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2336,6 +2336,244 @@
       "learning_rate": 2.7228170036058153e-06,
       "loss": 0.2617,
       "step": 16400
     }
   ],
   "logging_steps": 50,
@@ -2355,7 +2593,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.2874605314388787e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.9966887417218544,
   "eval_steps": 100,
+  "global_step": 18100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.7228170036058153e-06,
       "loss": 0.2617,
       "step": 16400
+    },
+    {
+      "epoch": 2.7235099337748343,
+      "grad_norm": 1.3112074136734009,
+      "learning_rate": 2.5682530093579626e-06,
+      "loss": 0.2751,
+      "step": 16450
+    },
+    {
+      "epoch": 2.7317880794701987,
+      "grad_norm": 1.3641598224639893,
+      "learning_rate": 2.418089536248769e-06,
+      "loss": 0.2741,
+      "step": 16500
+    },
+    {
+      "epoch": 2.7400662251655628,
+      "grad_norm": 1.2237251996994019,
+      "learning_rate": 2.2723405158236455e-06,
+      "loss": 0.2772,
+      "step": 16550
+    },
+    {
+      "epoch": 2.748344370860927,
+      "grad_norm": 1.171391248703003,
+      "learning_rate": 2.1310194700733697e-06,
+      "loss": 0.2715,
+      "step": 16600
+    },
+    {
+      "epoch": 2.756622516556291,
+      "grad_norm": 1.3004522323608398,
+      "learning_rate": 1.9941395101795655e-06,
+      "loss": 0.2712,
+      "step": 16650
+    },
+    {
+      "epoch": 2.7649006622516556,
+      "grad_norm": 1.0566428899765015,
+      "learning_rate": 1.861713335298282e-06,
+      "loss": 0.2758,
+      "step": 16700
+    },
+    {
+      "epoch": 2.77317880794702,
+      "grad_norm": 1.5244101285934448,
+      "learning_rate": 1.7337532313818294e-06,
+      "loss": 0.2786,
+      "step": 16750
+    },
+    {
+      "epoch": 2.781456953642384,
+      "grad_norm": 1.2633907794952393,
+      "learning_rate": 1.6102710700389656e-06,
+      "loss": 0.272,
+      "step": 16800
+    },
+    {
+      "epoch": 2.789735099337748,
+      "grad_norm": 1.2915066480636597,
+      "learning_rate": 1.4912783074334469e-06,
+      "loss": 0.2839,
+      "step": 16850
+    },
+    {
+      "epoch": 2.7980132450331126,
+      "grad_norm": 1.2640457153320312,
+      "learning_rate": 1.3767859832212016e-06,
+      "loss": 0.2756,
+      "step": 16900
+    },
+    {
+      "epoch": 2.806291390728477,
+      "grad_norm": 1.3004051446914673,
+      "learning_rate": 1.2668047195261379e-06,
+      "loss": 0.2747,
+      "step": 16950
+    },
+    {
+      "epoch": 2.814569536423841,
+      "grad_norm": 1.3884724378585815,
+      "learning_rate": 1.1613447199546158e-06,
+      "loss": 0.2707,
+      "step": 17000
+    },
+    {
+      "epoch": 2.8228476821192054,
+      "grad_norm": 1.416150689125061,
+      "learning_rate": 1.0604157686488313e-06,
+      "loss": 0.2702,
+      "step": 17050
+    },
+    {
+      "epoch": 2.8311258278145695,
+      "grad_norm": 1.3139573335647583,
+      "learning_rate": 9.640272293790998e-07,
+      "loss": 0.2721,
+      "step": 17100
+    },
+    {
+      "epoch": 2.839403973509934,
+      "grad_norm": 1.2322123050689697,
+      "learning_rate": 8.721880446750708e-07,
+      "loss": 0.2734,
+      "step": 17150
+    },
+    {
+      "epoch": 2.847682119205298,
+      "grad_norm": 1.3550212383270264,
+      "learning_rate": 7.849067349961381e-07,
+      "loss": 0.2757,
+      "step": 17200
+    },
+    {
+      "epoch": 2.8559602649006623,
+      "grad_norm": 1.4651058912277222,
+      "learning_rate": 7.02191397940899e-07,
+      "loss": 0.274,
+      "step": 17250
+    },
+    {
+      "epoch": 2.8642384105960264,
+      "grad_norm": 1.3420034646987915,
+      "learning_rate": 6.240497074959162e-07,
+      "loss": 0.2721,
+      "step": 17300
+    },
+    {
+      "epoch": 2.872516556291391,
+      "grad_norm": 1.2362934350967407,
+      "learning_rate": 5.504889133237656e-07,
+      "loss": 0.279,
+      "step": 17350
+    },
+    {
+      "epoch": 2.880794701986755,
+      "grad_norm": 1.1310467720031738,
+      "learning_rate": 4.815158400904185e-07,
+      "loss": 0.2744,
+      "step": 17400
+    },
+    {
+      "epoch": 2.8890728476821192,
+      "grad_norm": 1.2838894128799438,
+      "learning_rate": 4.1713688683208686e-07,
+      "loss": 0.2795,
+      "step": 17450
+    },
+    {
+      "epoch": 2.8973509933774837,
+      "grad_norm": 1.2171522378921509,
+      "learning_rate": 3.573580263615539e-07,
+      "loss": 0.2778,
+      "step": 17500
+    },
+    {
+      "epoch": 2.9056291390728477,
+      "grad_norm": 0.9482976198196411,
+      "learning_rate": 3.0218480471403965e-07,
+      "loss": 0.2757,
+      "step": 17550
+    },
+    {
+      "epoch": 2.9139072847682117,
+      "grad_norm": 1.2617802619934082,
+      "learning_rate": 2.5162234063264567e-07,
+      "loss": 0.274,
+      "step": 17600
+    },
+    {
+      "epoch": 2.922185430463576,
+      "grad_norm": 1.1305478811264038,
+      "learning_rate": 2.0567532509348508e-07,
+      "loss": 0.2701,
+      "step": 17650
+    },
+    {
+      "epoch": 2.9304635761589406,
+      "grad_norm": 1.1746705770492554,
+      "learning_rate": 1.6434802087046397e-07,
+      "loss": 0.2693,
+      "step": 17700
+    },
+    {
+      "epoch": 2.9387417218543046,
+      "grad_norm": 1.2440382242202759,
+      "learning_rate": 1.276442621397922e-07,
+      "loss": 0.2637,
+      "step": 17750
+    },
+    {
+      "epoch": 2.9470198675496686,
+      "grad_norm": 0.7597376704216003,
+      "learning_rate": 9.556745412425683e-08,
+      "loss": 0.2657,
+      "step": 17800
+    },
+    {
+      "epoch": 2.955298013245033,
+      "grad_norm": 1.2113037109375,
+      "learning_rate": 6.812057277733042e-08,
+      "loss": 0.272,
+      "step": 17850
+    },
+    {
+      "epoch": 2.9635761589403975,
+      "grad_norm": 1.07718825340271,
+      "learning_rate": 4.530616450704184e-08,
+      "loss": 0.2696,
+      "step": 17900
+    },
+    {
+      "epoch": 2.9718543046357615,
+      "grad_norm": 1.29874849319458,
+      "learning_rate": 2.712634593974861e-08,
+      "loss": 0.2712,
+      "step": 17950
+    },
+    {
+      "epoch": 2.980132450331126,
+      "grad_norm": 1.1109284162521362,
+      "learning_rate": 1.358280372376064e-08,
+      "loss": 0.2695,
+      "step": 18000
+    },
+    {
+      "epoch": 2.98841059602649,
+      "grad_norm": 1.177689790725708,
+      "learning_rate": 4.6767943728598386e-09,
+      "loss": 0.277,
+      "step": 18050
+    },
+    {
+      "epoch": 2.9966887417218544,
+      "grad_norm": 1.1074708700180054,
+      "learning_rate": 4.091441497322546e-10,
+      "loss": 0.2837,
+      "step": 18100
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 1.4196983428389274e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e7fd7045b4a15373c4d905f915edbb1436664635b44fe1df1bfa95de5188043
 size 5752

 version https://git-lfs.github.com/spec/v1
+oid sha256:e16cd32295896d2f57f1f19dbc49310a6ac6c29a2ec738cf4fb0c11d34db588b
 size 5752