danielhanchen commited on
Commit
7ece404
·
verified ·
1 Parent(s): e1fc1b4

Upload folder using huggingface_hub

Browse files
chat_template.jinja CHANGED
@@ -7,7 +7,7 @@
7
 
8
  ## {{ function.name }}
9
 
10
- {{ function | tojson(indent=4, ensure_ascii=False) }}
11
  在调用上述函数时,请使用 Json 格式表示调用的参数。
12
  {%- endfor %}
13
  {%- endif -%}
 
7
 
8
  ## {{ function.name }}
9
 
10
+ {{ function | tojson(indent=4)|string }}
11
  在调用上述函数时,请使用 Json 格式表示调用的参数。
12
  {%- endfor %}
13
  {%- endif -%}
config.json CHANGED
@@ -19,13 +19,14 @@
19
  "num_attention_heads": 32,
20
  "num_hidden_layers": 40,
21
  "num_key_value_heads": 2,
22
- "pad_token_id": 151329,
23
  "partial_rotary_factor": 0.5,
24
  "rms_norm_eps": 1e-05,
25
  "rope_theta": 10000.0,
26
  "tie_word_embeddings": false,
27
  "torch_dtype": "bfloat16",
28
- "transformers_version": "4.52.0.dev0",
 
29
  "use_cache": true,
30
  "vocab_size": 151552
31
- }
 
19
  "num_attention_heads": 32,
20
  "num_hidden_layers": 40,
21
  "num_key_value_heads": 2,
22
+ "pad_token_id": 151330,
23
  "partial_rotary_factor": 0.5,
24
  "rms_norm_eps": 1e-05,
25
  "rope_theta": 10000.0,
26
  "tie_word_embeddings": false,
27
  "torch_dtype": "bfloat16",
28
+ "transformers_version": "4.52.4",
29
+ "unsloth_fixed": true,
30
  "use_cache": true,
31
  "vocab_size": 151552
32
+ }
special_tokens_map.json CHANGED
@@ -16,14 +16,14 @@
16
  "<|end_of_video|>"
17
  ],
18
  "eos_token": {
19
- "content": "<|endoftext|>",
20
  "lstrip": false,
21
  "normalized": false,
22
  "rstrip": false,
23
  "single_word": false
24
  },
25
  "pad_token": {
26
- "content": "<|endoftext|>",
27
  "lstrip": false,
28
  "normalized": false,
29
  "rstrip": false,
 
16
  "<|end_of_video|>"
17
  ],
18
  "eos_token": {
19
+ "content": "<|user|>",
20
  "lstrip": false,
21
  "normalized": false,
22
  "rstrip": false,
23
  "single_word": false
24
  },
25
  "pad_token": {
26
+ "content": "[MASK]",
27
  "lstrip": false,
28
  "normalized": false,
29
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -129,7 +129,7 @@
129
  "<|begin_of_video|>",
130
  "<|end_of_video|>"
131
  ],
132
- "chat_template": "[gMASK]<sop>{%- if tools -%}<|system|>\n# 可用工具\n{% for tool in tools %}{%- set function = tool.function if tool.get(\"function\") else tool %}\n\n## {{ function.name }}\n\n{{ function | tojson(indent=4, ensure_ascii=False) }}\n在调用上述函数时,请使用 Json 格式表示调用的参数。{%- endfor %}{%- endif -%}{%- for msg in messages %}{%- if msg.role == 'system' %}<|system|>\n{{ msg.content }}{%- endif %}{%- endfor %}{%- for message in messages if message.role != 'system' %}{%- set role = message['role'] %}{%- set content = message['content'] %}{%- set meta = message.get(\"metadata\", \"\") %}{%- if role == 'user' %}<|user|>\n{{ content }}{%- elif role == 'assistant' and not meta %}<|assistant|>\n{{ content }}{%- elif role == 'assistant' and meta %}<|assistant|>{{ meta }} \n{{ content }}{%- elif role == 'observation' %}<|observation|>\n{{ content }}{%- endif %}{%- endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
133
  "clean_up_tokenization_spaces": false,
134
  "do_lower_case": false,
135
  "eos_token": "<|user|>",
@@ -138,9 +138,11 @@
138
  "input_ids",
139
  "attention_mask"
140
  ],
141
- "model_max_length": 128000,
142
- "pad_token": "<|endoftext|>",
143
  "padding_side": "left",
144
  "remove_space": false,
145
- "tokenizer_class": "PreTrainedTokenizer"
146
- }
 
 
 
129
  "<|begin_of_video|>",
130
  "<|end_of_video|>"
131
  ],
132
+ "bos_token": null,
133
  "clean_up_tokenization_spaces": false,
134
  "do_lower_case": false,
135
  "eos_token": "<|user|>",
 
138
  "input_ids",
139
  "attention_mask"
140
  ],
141
+ "model_max_length": 32768,
142
+ "pad_token": "[MASK]",
143
  "padding_side": "left",
144
  "remove_space": false,
145
+ "tokenizer_class": "PreTrainedTokenizer",
146
+ "unk_token": null,
147
+ "chat_template": "[gMASK]<sop>\n{%- if tools -%}\n<|system|>\n# 可用工具\n{% for tool in tools %}\n {%- set function = tool.function if tool.get(\"function\") else tool %}\n\n## {{ function.name }}\n\n{{ function | tojson(indent=4)|string }}\n在调用上述函数时,请使用 Json 格式表示调用的参数。\n{%- endfor %}\n{%- endif -%}\n\n{%- for msg in messages %}\n {%- if msg.role == 'system' %}\n<|system|>\n{{ msg.content }}\n {%- endif %}\n{%- endfor %}\n\n{%- for message in messages if message.role != 'system' %}\n {%- set role = message['role'] %}\n {%- set content = message['content'] %}\n {%- set meta = message.get(\"metadata\", \"\") %}\n\n {%- if role == 'user' %}\n<|user|>\n{{ content }}\n {%- elif role == 'assistant' and not meta %}\n<|assistant|>\n{{ content }}\n {%- elif role == 'assistant' and meta %}\n<|assistant|>{{ meta }}\n{{ content }}\n {%- elif role == 'observation' %}\n<|observation|>\n{{ content }}\n {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}<|assistant|>{% endif %}"
148
+ }