danielhanchen commited on
Commit
a356171
·
verified ·
1 Parent(s): 987d952

Add files using upload-large-folder tool

Browse files
config.json CHANGED
@@ -4,7 +4,6 @@
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
- "bos_token_id": 151643,
8
  "eos_token_id": 151645,
9
  "head_dim": 128,
10
  "hidden_act": "silu",
@@ -17,23 +16,25 @@
17
  "num_attention_heads": 40,
18
  "num_hidden_layers": 40,
19
  "num_key_value_heads": 8,
 
 
 
 
 
 
 
 
 
 
20
  "rms_norm_eps": 1e-06,
21
  "rope_scaling": null,
22
  "rope_theta": 1000000,
23
  "sliding_window": null,
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "bfloat16",
26
- "transformers_version": "4.51.0",
 
27
  "use_cache": true,
28
  "use_sliding_window": false,
29
- "vocab_size": 151936,
30
- "quantization_config": {
31
- "activation_scheme": "dynamic",
32
- "fmt": "e4m3",
33
- "quant_method": "fp8",
34
- "weight_block_size": [
35
- 128,
36
- 128
37
- ]
38
- }
39
- }
 
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
 
7
  "eos_token_id": 151645,
8
  "head_dim": 128,
9
  "hidden_act": "silu",
 
16
  "num_attention_heads": 40,
17
  "num_hidden_layers": 40,
18
  "num_key_value_heads": 8,
19
+ "pad_token_id": 151654,
20
+ "quantization_config": {
21
+ "activation_scheme": "dynamic",
22
+ "modules_to_not_convert": null,
23
+ "quant_method": "fp8",
24
+ "weight_block_size": [
25
+ 128,
26
+ 128
27
+ ]
28
+ },
29
  "rms_norm_eps": 1e-06,
30
  "rope_scaling": null,
31
  "rope_theta": 1000000,
32
  "sliding_window": null,
33
  "tie_word_embeddings": false,
34
  "torch_dtype": "bfloat16",
35
+ "transformers_version": "4.52.0.dev0",
36
+ "unsloth_fixed": true,
37
  "use_cache": true,
38
  "use_sliding_window": false,
39
+ "vocab_size": 151936
40
+ }
 
 
 
 
 
 
 
 
 
generation_config.json CHANGED
@@ -1,13 +1,14 @@
1
  {
2
- "bos_token_id": 151643,
3
- "do_sample": true,
4
- "eos_token_id": [
5
- 151645,
6
- 151643
7
- ],
8
- "pad_token_id": 151643,
9
- "temperature": 0.6,
10
- "top_k": 20,
11
- "top_p": 0.95,
12
- "transformers_version": "4.51.0"
 
13
  }
 
1
  {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "max_length": 40960,
9
+ "pad_token_id": 151654,
10
+ "temperature": 0.6,
11
+ "top_k": 20,
12
+ "top_p": 0.95,
13
+ "transformers_version": "4.52.0.dev0"
14
  }
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 16339276800
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00004-of-00004.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 16326169600
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00004-of-00004.safetensors",
tokenizer_config.json CHANGED
@@ -231,7 +231,7 @@
231
  "eos_token": "<|im_end|>",
232
  "errors": "replace",
233
  "extra_special_tokens": {},
234
- "model_max_length": 131072,
235
  "pad_token": "<|vision_pad|>",
236
  "padding_side": "left",
237
  "split_special_tokens": false,
 
231
  "eos_token": "<|im_end|>",
232
  "errors": "replace",
233
  "extra_special_tokens": {},
234
+ "model_max_length": 40960,
235
  "pad_token": "<|vision_pad|>",
236
  "padding_side": "left",
237
  "split_special_tokens": false,