Ahil1991 commited on
Commit
f80b1ba
·
verified ·
1 Parent(s): 8a7661a

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ae1e7ff551fbccad491f1d6f3e5f47820ac501264a51aca47a7b2f7fe3db7cc
3
  size 5991064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55e1f5b78f15e58120b2f1bf0bff556d90c4c57af08b524a65470f4ab427f3a0
3
  size 5991064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32e2d46a90431b0d5cd0a89b78e7583455332324fe7359755d84e2904414eebd
3
- size 3875002
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99f145932c39d3ff2605a3afad7d70eeed6f6568267d4bec8925f640fbbaf402
3
+ size 3875258
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62a586579a715a0a81b961015595a1bc5750b1ca116b28fcb423cdee1b35f175
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73110530bc5eb0ad8c3e2aa0e744d5f1c37ffb681f8000a9fca35801abeb420f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d1650f5062195d8ee65b24ab00a137ab48cccbff41f41ba060d4208547a763c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c79c66b60a0abe3dc1f1792ced2b6c99f10b3ada4ba94ee60000ba5931c603a9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.016483697623050804,
6
  "eval_steps": 500,
7
- "global_step": 250,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -183,6 +183,41 @@
183
  "learning_rate": 0.0002,
184
  "loss": 0.9725,
185
  "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  }
187
  ],
188
  "logging_steps": 10,
@@ -202,7 +237,7 @@
202
  "attributes": {}
203
  }
204
  },
205
- "total_flos": 432422594402304.0,
206
  "train_batch_size": 2,
207
  "trial_name": null,
208
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.019780437147660965,
6
  "eval_steps": 500,
7
+ "global_step": 300,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
183
  "learning_rate": 0.0002,
184
  "loss": 0.9725,
185
  "step": 250
186
+ },
187
+ {
188
+ "epoch": 0.017143045527972835,
189
+ "grad_norm": 1.7628215551376343,
190
+ "learning_rate": 0.0002,
191
+ "loss": 1.2322,
192
+ "step": 260
193
+ },
194
+ {
195
+ "epoch": 0.017802393432894865,
196
+ "grad_norm": 1.614889144897461,
197
+ "learning_rate": 0.0002,
198
+ "loss": 1.1429,
199
+ "step": 270
200
+ },
201
+ {
202
+ "epoch": 0.0184617413378169,
203
+ "grad_norm": 1.440961241722107,
204
+ "learning_rate": 0.0002,
205
+ "loss": 1.1346,
206
+ "step": 280
207
+ },
208
+ {
209
+ "epoch": 0.01912108924273893,
210
+ "grad_norm": 1.3889188766479492,
211
+ "learning_rate": 0.0002,
212
+ "loss": 1.0588,
213
+ "step": 290
214
+ },
215
+ {
216
+ "epoch": 0.019780437147660965,
217
+ "grad_norm": 1.4845261573791504,
218
+ "learning_rate": 0.0002,
219
+ "loss": 0.9927,
220
+ "step": 300
221
  }
222
  ],
223
  "logging_steps": 10,
 
237
  "attributes": {}
238
  }
239
  },
240
+ "total_flos": 518942362387968.0,
241
  "train_batch_size": 2,
242
  "trial_name": null,
243
  "trial_params": null