Ahil1991 commited on
Commit
4e5cc73
·
verified ·
1 Parent(s): f9a6a2d

Training in progress, step 2900, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0abce668b3eb4d44dc784d9560c9e8eaf1d78e218c11316de74bb009145f23e
3
  size 5991064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:322d27566c97d95eb19d51961c27a563d5536f16940b4a27a9956d7bb2578261
3
  size 5991064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5af9af34cee614f2f3b9fcd4f30272cd7939d050f865cdf0b26c2e35e2ffb59c
3
  size 3875258
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b14f31da3b0eeb58f975f764db512d31a1cb3b846a2d135ff547d999136e23f2
3
  size 3875258
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0823cffba25b0205e4aaa4829d080da287f93b4b12f54638f5eb7ec2af7f34c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f673b50c9d76e7f4aba905deba6bab9362132836c548cdeb5ff0494bb47b8d67
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7f1cdaac6e6d07ba74169a3057ad987f49c993cbd8847dc15c52a36f581d092
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0753cd8573da61b5febd8fa3eea8c43dff8632c42f6883cac9734506ab6aa43
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.18791415290277916,
6
  "eval_steps": 500,
7
- "global_step": 2850,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2003,6 +2003,41 @@
2003
  "learning_rate": 0.0002,
2004
  "loss": 0.7436,
2005
  "step": 2850
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2006
  }
2007
  ],
2008
  "logging_steps": 10,
@@ -2022,7 +2057,7 @@
2022
  "attributes": {}
2023
  }
2024
  },
2025
- "total_flos": 4931223928266240.0,
2026
  "train_batch_size": 2,
2027
  "trial_name": null,
2028
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.1912108924273893,
6
  "eval_steps": 500,
7
+ "global_step": 2900,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2003
  "learning_rate": 0.0002,
2004
  "loss": 0.7436,
2005
  "step": 2850
2006
+ },
2007
+ {
2008
+ "epoch": 0.1885735008077012,
2009
+ "grad_norm": 1.677764892578125,
2010
+ "learning_rate": 0.0002,
2011
+ "loss": 0.6904,
2012
+ "step": 2860
2013
+ },
2014
+ {
2015
+ "epoch": 0.18923284871262322,
2016
+ "grad_norm": 3.1064493656158447,
2017
+ "learning_rate": 0.0002,
2018
+ "loss": 0.7331,
2019
+ "step": 2870
2020
+ },
2021
+ {
2022
+ "epoch": 0.18989219661754525,
2023
+ "grad_norm": 1.320894479751587,
2024
+ "learning_rate": 0.0002,
2025
+ "loss": 0.7432,
2026
+ "step": 2880
2027
+ },
2028
+ {
2029
+ "epoch": 0.19055154452246728,
2030
+ "grad_norm": 2.0653398036956787,
2031
+ "learning_rate": 0.0002,
2032
+ "loss": 0.7793,
2033
+ "step": 2890
2034
+ },
2035
+ {
2036
+ "epoch": 0.1912108924273893,
2037
+ "grad_norm": 1.7600677013397217,
2038
+ "learning_rate": 0.0002,
2039
+ "loss": 0.7675,
2040
+ "step": 2900
2041
  }
2042
  ],
2043
  "logging_steps": 10,
 
2057
  "attributes": {}
2058
  }
2059
  },
2060
+ "total_flos": 5017928334422016.0,
2061
  "train_batch_size": 2,
2062
  "trial_name": null,
2063
  "trial_params": null