alessiobezzi commited on
Commit
a25ee72
·
verified ·
1 Parent(s): d688fef

Training in progress, epoch 1

Browse files
adapter_config.json CHANGED
@@ -1,7 +1,10 @@
1
  {
2
  "alpha_pattern": {},
3
- "auto_mapping": null,
4
- "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
 
 
 
5
  "bias": "none",
6
  "corda_config": null,
7
  "eva_config": null,
@@ -13,7 +16,7 @@
13
  "layers_pattern": null,
14
  "layers_to_transform": null,
15
  "loftq_config": {},
16
- "lora_alpha": 8,
17
  "lora_bias": false,
18
  "lora_dropout": 0.05,
19
  "megatron_config": null,
@@ -23,19 +26,22 @@
23
  "embed_tokens"
24
  ],
25
  "peft_type": "LORA",
26
- "r": 8,
27
  "rank_pattern": {},
28
  "revision": null,
29
  "target_modules": [
30
- "q_proj",
31
- "up_proj",
 
32
  "gate_proj",
33
- "v_proj",
 
34
  "o_proj",
35
  "k_proj",
36
- "down_proj"
 
37
  ],
38
- "task_type": "CAUSAL_LM",
39
  "trainable_token_indices": null,
40
  "use_dora": false,
41
  "use_rslora": false
 
1
  {
2
  "alpha_pattern": {},
3
+ "auto_mapping": {
4
+ "base_model_class": "Gemma3ForConditionalGeneration",
5
+ "parent_library": "transformers.models.gemma3.modeling_gemma3"
6
+ },
7
+ "base_model_name_or_path": "google/gemma-3-27b-it",
8
  "bias": "none",
9
  "corda_config": null,
10
  "eva_config": null,
 
16
  "layers_pattern": null,
17
  "layers_to_transform": null,
18
  "loftq_config": {},
19
+ "lora_alpha": 16,
20
  "lora_bias": false,
21
  "lora_dropout": 0.05,
22
  "megatron_config": null,
 
26
  "embed_tokens"
27
  ],
28
  "peft_type": "LORA",
29
+ "r": 16,
30
  "rank_pattern": {},
31
  "revision": null,
32
  "target_modules": [
33
+ "out_proj",
34
+ "fc2",
35
+ "down_proj",
36
  "gate_proj",
37
+ "up_proj",
38
+ "fc1",
39
  "o_proj",
40
  "k_proj",
41
+ "v_proj",
42
+ "q_proj"
43
  ],
44
+ "task_type": null,
45
  "trainable_token_indices": null,
46
  "use_dora": false,
47
  "use_rslora": false
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3103739f20055b097aa7e8eb1f5835b25753fb77386dc4c348b868c0bf7a5224
3
- size 3382826216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:740908d9140c8ef11d44c015ad7e4fc8c06f674d02e721c5f96dc9aab4109742
3
+ size 6127551176
runs/May05_07-34-31_code-smells-gpu/events.out.tfevents.1746430489.code-smells-gpu.5094.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:131cda81cb0d1aa7a3952614df76588a9f4c723bb1f195fc5835e160e5128f7f
3
+ size 18286
special_tokens_map.json CHANGED
@@ -1,20 +1,30 @@
1
  {
 
2
  "bos_token": {
3
- "content": "<|begin▁of▁sentence|>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
 
9
  "eos_token": {
10
- "content": "<|end▁of▁sentence|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
 
16
  "pad_token": {
17
- "content": "<|end▁of▁sentence|>",
 
 
 
 
 
 
 
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
 
1
  {
2
+ "boi_token": "<start_of_image>",
3
  "bos_token": {
4
+ "content": "<bos>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false
9
  },
10
+ "eoi_token": "<end_of_image>",
11
  "eos_token": {
12
+ "content": "<eos>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
16
  "single_word": false
17
  },
18
+ "image_token": "<image_soft_token>",
19
  "pad_token": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "unk_token": {
27
+ "content": "<unk>",
28
  "lstrip": false,
29
  "normalized": false,
30
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893
3
- size 11422778
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
3
+ size 33384568
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e755e1fb8c5b095b9aa2eb663b64de0c6ac5602912cbaba6997903f592fc8e6b
3
  size 5816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb68ee0a206c5ab6f71dbae93b6c2ba860e5f97388483e3c285141814c228dca
3
  size 5816