Ahil1991 commited on
Commit
794edd4
·
verified ·
1 Parent(s): daf9316

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -23,10 +23,10 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "v_proj",
27
- "o_proj",
28
  "k_proj",
29
- "q_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "q_proj",
 
27
  "k_proj",
28
+ "v_proj",
29
+ "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14851adaa9e05434ee1f37e933208a49edac7f16ff4ae446ef063be05cef3f23
3
  size 5991064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:788f721f77b1d913a8b39bfad7d410c920b93d9228bd39aa0818a75a6d1ba7e5
3
  size 5991064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33c3cc3ee5bb0b305a4fee738020f7834021669ef0f94f68acde11f366850bec
3
  size 3875002
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62bf1f6c6259d7ba8524d5501488f59e70f0242d124e45bbdcc8a547ff5660f3
3
  size 3875002
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76b9fde5dce7e940b3874aa3cffeaaf4c858dbb7bcd23b53c0ae370065b6c604
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdc875138895871a875d3239f8cb1e091d137772294fc52a0696aafd459bed5c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d31dc31a119769737d72f3df4c8cdf99522596cafc12bf2eea05a4ff374f599c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b80fcc7599efca0c6313d990c467c2eb3001742b23ddaadc22e3499c12cea79
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,117 +2,47 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.009890218573830482,
6
  "eval_steps": 500,
7
- "global_step": 150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0.0006593479049220321,
14
- "grad_norm": 5.685938835144043,
15
  "learning_rate": 0.0002,
16
- "loss": 4.2492,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.0013186958098440641,
21
- "grad_norm": 7.900416374206543,
22
  "learning_rate": 0.0002,
23
- "loss": 2.1794,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.001978043714766096,
28
- "grad_norm": 2.213648557662964,
29
  "learning_rate": 0.0002,
30
- "loss": 1.7103,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 0.0026373916196881283,
35
- "grad_norm": 2.4948573112487793,
36
  "learning_rate": 0.0002,
37
- "loss": 1.5154,
38
  "step": 40
39
  },
40
  {
41
  "epoch": 0.0032967395246101604,
42
- "grad_norm": 2.5954508781433105,
43
  "learning_rate": 0.0002,
44
- "loss": 1.2728,
45
  "step": 50
46
- },
47
- {
48
- "epoch": 0.003956087429532192,
49
- "grad_norm": 1.7071881294250488,
50
- "learning_rate": 0.0002,
51
- "loss": 1.6193,
52
- "step": 60
53
- },
54
- {
55
- "epoch": 0.004615435334454225,
56
- "grad_norm": 2.5582172870635986,
57
- "learning_rate": 0.0002,
58
- "loss": 1.3123,
59
- "step": 70
60
- },
61
- {
62
- "epoch": 0.0052747832393762566,
63
- "grad_norm": 1.731862187385559,
64
- "learning_rate": 0.0002,
65
- "loss": 1.2874,
66
- "step": 80
67
- },
68
- {
69
- "epoch": 0.005934131144298289,
70
- "grad_norm": 1.6525731086730957,
71
- "learning_rate": 0.0002,
72
- "loss": 1.222,
73
- "step": 90
74
- },
75
- {
76
- "epoch": 0.006593479049220321,
77
- "grad_norm": 1.7500087022781372,
78
- "learning_rate": 0.0002,
79
- "loss": 1.0816,
80
- "step": 100
81
- },
82
- {
83
- "epoch": 0.007252826954142353,
84
- "grad_norm": 2.4738411903381348,
85
- "learning_rate": 0.0002,
86
- "loss": 1.4273,
87
- "step": 110
88
- },
89
- {
90
- "epoch": 0.007912174859064385,
91
- "grad_norm": 1.6450600624084473,
92
- "learning_rate": 0.0002,
93
- "loss": 1.272,
94
- "step": 120
95
- },
96
- {
97
- "epoch": 0.008571522763986417,
98
- "grad_norm": 1.6056126356124878,
99
- "learning_rate": 0.0002,
100
- "loss": 1.2377,
101
- "step": 130
102
- },
103
- {
104
- "epoch": 0.00923087066890845,
105
- "grad_norm": 1.706062912940979,
106
- "learning_rate": 0.0002,
107
- "loss": 1.2175,
108
- "step": 140
109
- },
110
- {
111
- "epoch": 0.009890218573830482,
112
- "grad_norm": 2.413339614868164,
113
- "learning_rate": 0.0002,
114
- "loss": 1.1407,
115
- "step": 150
116
  }
117
  ],
118
  "logging_steps": 10,
@@ -132,7 +62,7 @@
132
  "attributes": {}
133
  }
134
  },
135
- "total_flos": 259097708531712.0,
136
  "train_batch_size": 2,
137
  "trial_name": null,
138
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0032967395246101604,
6
  "eval_steps": 500,
7
+ "global_step": 50,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0.0006593479049220321,
14
+ "grad_norm": 4.714463233947754,
15
  "learning_rate": 0.0002,
16
+ "loss": 4.214,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.0013186958098440641,
21
+ "grad_norm": 7.59287691116333,
22
  "learning_rate": 0.0002,
23
+ "loss": 2.1247,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.001978043714766096,
28
+ "grad_norm": 1.9915987253189087,
29
  "learning_rate": 0.0002,
30
+ "loss": 1.6737,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 0.0026373916196881283,
35
+ "grad_norm": 1.6922399997711182,
36
  "learning_rate": 0.0002,
37
+ "loss": 1.495,
38
  "step": 40
39
  },
40
  {
41
  "epoch": 0.0032967395246101604,
42
+ "grad_norm": 2.0545172691345215,
43
  "learning_rate": 0.0002,
44
+ "loss": 1.2314,
45
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  }
47
  ],
48
  "logging_steps": 10,
 
62
  "attributes": {}
63
  }
64
  },
65
+ "total_flos": 86209240154112.0,
66
  "train_batch_size": 2,
67
  "trial_name": null,
68
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c7511702f18d8ac213dc65da12f19fd71052f783eb5d7f8d09cb8b9aec226f3
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f35053563362c1def6be0aee6e36b447f4c27182c14ba0c6bf80a20f0a3c3cf
3
  size 5368