DeepDream2045 commited on
Commit
eb68afe
·
verified ·
1 Parent(s): 2820925

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95d4eed27b90a2856facececdd97411fbc1ce64e19ffccaf3c1ad8d39a335b4d
3
  size 550593184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a12350b58ec624b0d5664c33aa3c9e18b286d78254c630bdcd3fc874e788783
3
  size 550593184
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b63645f6f3e47d9b08c5622e993699a69c3e78972be33cd155e174a1678cd2a
3
  size 1101572914
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca0d84cc821acbe166e6cdca05fa841d3551e5ea880721e4120893e178a78cdc
3
  size 1101572914
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a74aa4fed1e29c9d1a6ff8828642558706da3fd173127349b6461eb754a1bd1f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3754ff440b748b1ae3f3b4064b7cf64d1380ccfccc70101f8fd849d41bc3e9e0
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f978d2ddde786dd6cb5ab8c79875852cc1b866a8c5495be20066800cc1246f9
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57a36dce534393303291909c399e47c890439a2a31832f034884be355e7af794
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca6b6a9ee86f60f9c39412d312cf1f5ee1842c52c0eac31c9f059f0709283c9a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8404c96e5ae00c5be76d7e364c876cad8492c3e6b0f8c80891a7ed04c27f8297
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:349a6eaf951fd8093bdabbc9a24355bbbde2573a95c9aa25813bec8e1a4b5aaf
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cf6ee1e7fdb5e1f4b27b2e73d309fd4d1dd1759513800464493dcd513969c0c
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.6034004092216492,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-25",
4
- "epoch": 0.7952286282306164,
5
  "eval_steps": 25,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -198,6 +198,189 @@
198
  "eval_samples_per_second": 9.356,
199
  "eval_steps_per_second": 1.192,
200
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  }
202
  ],
203
  "logging_steps": 1,
@@ -221,12 +404,12 @@
221
  "should_evaluate": false,
222
  "should_log": false,
223
  "should_save": true,
224
- "should_training_stop": false
225
  },
226
  "attributes": {}
227
  }
228
  },
229
- "total_flos": 5.555785010511872e+17,
230
  "train_batch_size": 2,
231
  "trial_name": null,
232
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.38056910037994385,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
+ "epoch": 1.6003976143141152,
5
  "eval_steps": 25,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
198
  "eval_samples_per_second": 9.356,
199
  "eval_steps_per_second": 1.192,
200
  "step": 25
201
+ },
202
+ {
203
+ "epoch": 0.827037773359841,
204
+ "grad_norm": 1.3676831722259521,
205
+ "learning_rate": 5e-05,
206
+ "loss": 0.6656,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 0.8588469184890656,
211
+ "grad_norm": 1.1924246549606323,
212
+ "learning_rate": 4.6729843538492847e-05,
213
+ "loss": 0.5322,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 0.8906560636182903,
218
+ "grad_norm": 1.3114556074142456,
219
+ "learning_rate": 4.347369038899744e-05,
220
+ "loss": 0.536,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 0.9224652087475149,
225
+ "grad_norm": 1.1508691310882568,
226
+ "learning_rate": 4.0245483899193595e-05,
227
+ "loss": 0.4105,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 0.9542743538767395,
232
+ "grad_norm": 2.0720202922821045,
233
+ "learning_rate": 3.705904774487396e-05,
234
+ "loss": 0.5614,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 0.9860834990059643,
239
+ "grad_norm": 1.2711116075515747,
240
+ "learning_rate": 3.392802673484193e-05,
241
+ "loss": 0.2281,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 1.0278330019880715,
246
+ "grad_norm": 2.973681926727295,
247
+ "learning_rate": 3.086582838174551e-05,
248
+ "loss": 1.0804,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 1.0596421471172963,
253
+ "grad_norm": 0.9690569043159485,
254
+ "learning_rate": 2.7885565489049946e-05,
255
+ "loss": 0.3993,
256
+ "step": 33
257
+ },
258
+ {
259
+ "epoch": 1.091451292246521,
260
+ "grad_norm": 1.1462165117263794,
261
+ "learning_rate": 2.500000000000001e-05,
262
+ "loss": 0.4802,
263
+ "step": 34
264
+ },
265
+ {
266
+ "epoch": 1.1232604373757455,
267
+ "grad_norm": 1.1109638214111328,
268
+ "learning_rate": 2.2221488349019903e-05,
269
+ "loss": 0.3798,
270
+ "step": 35
271
+ },
272
+ {
273
+ "epoch": 1.1550695825049702,
274
+ "grad_norm": 1.2443093061447144,
275
+ "learning_rate": 1.9561928549563968e-05,
276
+ "loss": 0.4002,
277
+ "step": 36
278
+ },
279
+ {
280
+ "epoch": 1.1868787276341948,
281
+ "grad_norm": 1.0544307231903076,
282
+ "learning_rate": 1.703270924499656e-05,
283
+ "loss": 0.2902,
284
+ "step": 37
285
+ },
286
+ {
287
+ "epoch": 1.2186878727634194,
288
+ "grad_norm": 1.2791643142700195,
289
+ "learning_rate": 1.4644660940672627e-05,
290
+ "loss": 0.3088,
291
+ "step": 38
292
+ },
293
+ {
294
+ "epoch": 1.250497017892644,
295
+ "grad_norm": 1.7745587825775146,
296
+ "learning_rate": 1.2408009626051137e-05,
297
+ "loss": 0.2056,
298
+ "step": 39
299
+ },
300
+ {
301
+ "epoch": 1.2823061630218688,
302
+ "grad_norm": 1.2029690742492676,
303
+ "learning_rate": 1.0332332985438248e-05,
304
+ "loss": 0.4316,
305
+ "step": 40
306
+ },
307
+ {
308
+ "epoch": 1.3141153081510935,
309
+ "grad_norm": 1.0333634614944458,
310
+ "learning_rate": 8.426519384872733e-06,
311
+ "loss": 0.3804,
312
+ "step": 41
313
+ },
314
+ {
315
+ "epoch": 1.345924453280318,
316
+ "grad_norm": 1.0389782190322876,
317
+ "learning_rate": 6.698729810778065e-06,
318
+ "loss": 0.2648,
319
+ "step": 42
320
+ },
321
+ {
322
+ "epoch": 1.3777335984095427,
323
+ "grad_norm": 1.212583065032959,
324
+ "learning_rate": 5.156362923365588e-06,
325
+ "loss": 0.3425,
326
+ "step": 43
327
+ },
328
+ {
329
+ "epoch": 1.4095427435387675,
330
+ "grad_norm": 1.3398654460906982,
331
+ "learning_rate": 3.8060233744356633e-06,
332
+ "loss": 0.3015,
333
+ "step": 44
334
+ },
335
+ {
336
+ "epoch": 1.4413518886679921,
337
+ "grad_norm": 1.8134361505508423,
338
+ "learning_rate": 2.653493525244721e-06,
339
+ "loss": 0.3699,
340
+ "step": 45
341
+ },
342
+ {
343
+ "epoch": 1.4731610337972167,
344
+ "grad_norm": 1.1156195402145386,
345
+ "learning_rate": 1.70370868554659e-06,
346
+ "loss": 0.1551,
347
+ "step": 46
348
+ },
349
+ {
350
+ "epoch": 1.5049701789264414,
351
+ "grad_norm": 1.3077094554901123,
352
+ "learning_rate": 9.607359798384785e-07,
353
+ "loss": 0.2284,
354
+ "step": 47
355
+ },
356
+ {
357
+ "epoch": 1.536779324055666,
358
+ "grad_norm": 1.3075495958328247,
359
+ "learning_rate": 4.277569313094809e-07,
360
+ "loss": 0.3622,
361
+ "step": 48
362
+ },
363
+ {
364
+ "epoch": 1.5685884691848906,
365
+ "grad_norm": 1.1181570291519165,
366
+ "learning_rate": 1.0705383806982606e-07,
367
+ "loss": 0.3798,
368
+ "step": 49
369
+ },
370
+ {
371
+ "epoch": 1.6003976143141152,
372
+ "grad_norm": 1.0557467937469482,
373
+ "learning_rate": 0.0,
374
+ "loss": 0.2514,
375
+ "step": 50
376
+ },
377
+ {
378
+ "epoch": 1.6003976143141152,
379
+ "eval_loss": 0.38056910037994385,
380
+ "eval_runtime": 22.5548,
381
+ "eval_samples_per_second": 9.399,
382
+ "eval_steps_per_second": 1.197,
383
+ "step": 50
384
  }
385
  ],
386
  "logging_steps": 1,
 
404
  "should_evaluate": false,
405
  "should_log": false,
406
  "should_save": true,
407
+ "should_training_stop": true
408
  },
409
  "attributes": {}
410
  }
411
  },
412
+ "total_flos": 1.1111570021023744e+18,
413
  "train_batch_size": 2,
414
  "trial_name": null,
415
  "trial_params": null