rootxhacker commited on
Commit
412136c
·
verified ·
1 Parent(s): 69ec7a2

Training in progress, step 38500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f14125fba192c696e9ea3e36ca9f9429c24cac19e9b8c350f6135ab01f7ad59
3
  size 132187888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78bb7fd938fc79d04fd8027a0abe1e34028a04e2b63e0cc950fef379ab50d87e
3
  size 132187888
last-checkpoint/ar_diffusion_info.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea672d9cf3b33b67a7d7775ed77487a43c2ed825e9c82fdac570ac86bb7b82e0
3
  size 1800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d987e9657df7f151748ac91d327188ea5727ed2126f4ab712064c12bbf47258
3
  size 1800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bcbfe92a4edd63a8b647a0117b3f07614d7a6540fc40ddbb287c8b91dd837c1
3
  size 264665786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1fa81680adbb395cf42af1d0162aa26889a89b2e0a8b194c7f5b489f789ba80
3
  size 264665786
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc11b8250e89187182c146356c8dd099d66b6a75fadfa56262487e142dbe4b90
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddf4785afde6da18af71573e1d2e7789bcefbfcd7bcf8163ade4a7bd4cb547c8
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:102e993b93214454fb03b9ca20f482147915e5a724733ab537e52178df0f84f1
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a79402dde3815fb10cc451e7d2fb9bb473a43858a3148dbc3bddcddce2fe949e
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:541cb6dea9c80e2359e5134078f161ec83a5de3af8e29b3e9b6b880e5dd9058f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3dea87c40748698e240c93bc3eea5482b3f627ab3a1ed391f6cbe2fe5d7da89
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 37750,
3
- "best_metric": 0.5946066975593567,
4
- "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-37500",
5
- "epoch": 2.9228520883008997,
6
  "eval_steps": 250,
7
- "global_step": 38000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -6544,6 +6544,92 @@
6544
  "eval_samples_per_second": 22.573,
6545
  "eval_steps_per_second": 5.643,
6546
  "step": 38000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6547
  }
6548
  ],
6549
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 38500,
3
+ "best_metric": 0.5919152498245239,
4
+ "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-38500",
5
+ "epoch": 2.961310668410122,
6
  "eval_steps": 250,
7
+ "global_step": 38500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
6544
  "eval_samples_per_second": 22.573,
6545
  "eval_steps_per_second": 5.643,
6546
  "step": 38000
6547
+ },
6548
+ {
6549
+ "epoch": 2.926697946311822,
6550
+ "grad_norm": 0.9351138472557068,
6551
+ "learning_rate": 5.033374022803418e-06,
6552
+ "loss": 0.6027,
6553
+ "step": 38050
6554
+ },
6555
+ {
6556
+ "epoch": 2.9305438043227445,
6557
+ "grad_norm": 0.425923615694046,
6558
+ "learning_rate": 4.773654000986936e-06,
6559
+ "loss": 0.578,
6560
+ "step": 38100
6561
+ },
6562
+ {
6563
+ "epoch": 2.9343896623336665,
6564
+ "grad_norm": 0.728718638420105,
6565
+ "learning_rate": 4.513933979170454e-06,
6566
+ "loss": 0.6103,
6567
+ "step": 38150
6568
+ },
6569
+ {
6570
+ "epoch": 2.938235520344589,
6571
+ "grad_norm": 0.9447925090789795,
6572
+ "learning_rate": 4.254213957353972e-06,
6573
+ "loss": 0.6281,
6574
+ "step": 38200
6575
+ },
6576
+ {
6577
+ "epoch": 2.9420813783555113,
6578
+ "grad_norm": 1.3824400901794434,
6579
+ "learning_rate": 3.994493935537491e-06,
6580
+ "loss": 0.5825,
6581
+ "step": 38250
6582
+ },
6583
+ {
6584
+ "epoch": 2.9420813783555113,
6585
+ "eval_loss": 0.5938913226127625,
6586
+ "eval_runtime": 21.2182,
6587
+ "eval_samples_per_second": 23.565,
6588
+ "eval_steps_per_second": 5.891,
6589
+ "step": 38250
6590
+ },
6591
+ {
6592
+ "epoch": 2.9459272363664333,
6593
+ "grad_norm": 1.073121190071106,
6594
+ "learning_rate": 3.734773913721009e-06,
6595
+ "loss": 0.6468,
6596
+ "step": 38300
6597
+ },
6598
+ {
6599
+ "epoch": 2.9497730943773557,
6600
+ "grad_norm": 1.4922077655792236,
6601
+ "learning_rate": 3.475053891904527e-06,
6602
+ "loss": 0.6511,
6603
+ "step": 38350
6604
+ },
6605
+ {
6606
+ "epoch": 2.953618952388278,
6607
+ "grad_norm": 0.7231102585792542,
6608
+ "learning_rate": 3.215333870088045e-06,
6609
+ "loss": 0.5952,
6610
+ "step": 38400
6611
+ },
6612
+ {
6613
+ "epoch": 2.9574648103992,
6614
+ "grad_norm": 1.1157270669937134,
6615
+ "learning_rate": 2.9556138482715634e-06,
6616
+ "loss": 0.5715,
6617
+ "step": 38450
6618
+ },
6619
+ {
6620
+ "epoch": 2.961310668410122,
6621
+ "grad_norm": 0.7627404928207397,
6622
+ "learning_rate": 2.6958938264550813e-06,
6623
+ "loss": 0.6085,
6624
+ "step": 38500
6625
+ },
6626
+ {
6627
+ "epoch": 2.961310668410122,
6628
+ "eval_loss": 0.5919152498245239,
6629
+ "eval_runtime": 22.1697,
6630
+ "eval_samples_per_second": 22.553,
6631
+ "eval_steps_per_second": 5.638,
6632
+ "step": 38500
6633
  }
6634
  ],
6635
  "logging_steps": 50,