qqc1989 commited on
Commit
cc0c66c
·
verified ·
1 Parent(s): c5f7e10

Upload 49 files

Browse files
Files changed (50) hide show
  1. .gitattributes +4 -0
  2. config.json +0 -0
  3. main_ax650 +3 -0
  4. main_axcl_aarch64 +3 -0
  5. main_axcl_x86 +3 -0
  6. post_config.json +14 -0
  7. qwen2.5_tokenizer/merges.txt +0 -0
  8. qwen2.5_tokenizer/tokenizer.json +0 -0
  9. qwen2.5_tokenizer/tokenizer_config.json +207 -0
  10. qwen2.5_tokenizer/vocab.json +0 -0
  11. qwen3-0.6b-ax650/model.embed_tokens.weight.bfloat16.bin +3 -0
  12. qwen3-0.6b-ax650/qwen3_p128_l0_together.axmodel +3 -0
  13. qwen3-0.6b-ax650/qwen3_p128_l10_together.axmodel +3 -0
  14. qwen3-0.6b-ax650/qwen3_p128_l11_together.axmodel +3 -0
  15. qwen3-0.6b-ax650/qwen3_p128_l12_together.axmodel +3 -0
  16. qwen3-0.6b-ax650/qwen3_p128_l13_together.axmodel +3 -0
  17. qwen3-0.6b-ax650/qwen3_p128_l14_together.axmodel +3 -0
  18. qwen3-0.6b-ax650/qwen3_p128_l15_together.axmodel +3 -0
  19. qwen3-0.6b-ax650/qwen3_p128_l16_together.axmodel +3 -0
  20. qwen3-0.6b-ax650/qwen3_p128_l17_together.axmodel +3 -0
  21. qwen3-0.6b-ax650/qwen3_p128_l18_together.axmodel +3 -0
  22. qwen3-0.6b-ax650/qwen3_p128_l19_together.axmodel +3 -0
  23. qwen3-0.6b-ax650/qwen3_p128_l1_together.axmodel +3 -0
  24. qwen3-0.6b-ax650/qwen3_p128_l20_together.axmodel +3 -0
  25. qwen3-0.6b-ax650/qwen3_p128_l21_together.axmodel +3 -0
  26. qwen3-0.6b-ax650/qwen3_p128_l22_together.axmodel +3 -0
  27. qwen3-0.6b-ax650/qwen3_p128_l23_together.axmodel +3 -0
  28. qwen3-0.6b-ax650/qwen3_p128_l24_together.axmodel +3 -0
  29. qwen3-0.6b-ax650/qwen3_p128_l25_together.axmodel +3 -0
  30. qwen3-0.6b-ax650/qwen3_p128_l26_together.axmodel +3 -0
  31. qwen3-0.6b-ax650/qwen3_p128_l27_together.axmodel +3 -0
  32. qwen3-0.6b-ax650/qwen3_p128_l2_together.axmodel +3 -0
  33. qwen3-0.6b-ax650/qwen3_p128_l3_together.axmodel +3 -0
  34. qwen3-0.6b-ax650/qwen3_p128_l4_together.axmodel +3 -0
  35. qwen3-0.6b-ax650/qwen3_p128_l5_together.axmodel +3 -0
  36. qwen3-0.6b-ax650/qwen3_p128_l6_together.axmodel +3 -0
  37. qwen3-0.6b-ax650/qwen3_p128_l7_together.axmodel +3 -0
  38. qwen3-0.6b-ax650/qwen3_p128_l8_together.axmodel +3 -0
  39. qwen3-0.6b-ax650/qwen3_p128_l9_together.axmodel +3 -0
  40. qwen3-0.6b-ax650/qwen3_post.axmodel +3 -0
  41. qwen3_tokenizer/config.json +30 -0
  42. qwen3_tokenizer/generation_config.json +13 -0
  43. qwen3_tokenizer/merges.txt +0 -0
  44. qwen3_tokenizer/tokenizer.json +3 -0
  45. qwen3_tokenizer/tokenizer_config.json +239 -0
  46. qwen3_tokenizer/vocab.json +0 -0
  47. qwen3_tokenizer_uid.py +214 -0
  48. run_qwen3_0.6b_int8_ctx_ax650.sh +14 -0
  49. run_qwen3_0.6b_int8_ctx_axcl_aarch64.sh +15 -0
  50. run_qwen3_0.6b_int8_ctx_axcl_x86.sh +15 -0
.gitattributes CHANGED
@@ -34,3 +34,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *.axmodel filter=lfs diff=lfs merge=lfs -text
36
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *.axmodel filter=lfs diff=lfs merge=lfs -text
36
  *tfevents* filter=lfs diff=lfs merge=lfs -text
37
+ main_ax650 filter=lfs diff=lfs merge=lfs -text
38
+ main_axcl_aarch64 filter=lfs diff=lfs merge=lfs -text
39
+ main_axcl_x86 filter=lfs diff=lfs merge=lfs -text
40
+ qwen3_tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
File without changes
main_ax650 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdc5392fa0454f782562d64d964c708b2829847772db08ac21406d3b5e58c0b8
3
+ size 981152
main_axcl_aarch64 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e27934d17d671208441f65a31ee40ddeff7cbade19bf20cf2dc88be9e042def
3
+ size 1724632
main_axcl_x86 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d427677213e3b50b22ed55a1d0af10223f5b0711c1bf050d0655aedd3ffc8df9
3
+ size 1839824
post_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "enable_temperature" : false,
3
+ "temperature" : 0.9,
4
+
5
+ "enable_repetition_penalty" : false,
6
+ "repetition_penalty" : 1.2,
7
+ "penalty_window" : 20,
8
+
9
+ "enable_top_p_sampling" : false,
10
+ "top_p" : 0.8,
11
+
12
+ "enable_top_k_sampling" : true,
13
+ "top_k" : 1
14
+ }
qwen2.5_tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qwen2.5_tokenizer/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
qwen2.5_tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "151646": {
29
+ "content": "<|object_ref_start|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "151647": {
37
+ "content": "<|object_ref_end|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "151648": {
45
+ "content": "<|box_start|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "151649": {
53
+ "content": "<|box_end|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "151650": {
61
+ "content": "<|quad_start|>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "151651": {
69
+ "content": "<|quad_end|>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "151652": {
77
+ "content": "<|vision_start|>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "151653": {
85
+ "content": "<|vision_end|>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "151654": {
93
+ "content": "<|vision_pad|>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "151655": {
101
+ "content": "<|image_pad|>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "151656": {
109
+ "content": "<|video_pad|>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "151657": {
117
+ "content": "<tool_call>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": false
123
+ },
124
+ "151658": {
125
+ "content": "</tool_call>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": false
131
+ },
132
+ "151659": {
133
+ "content": "<|fim_prefix|>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": false
139
+ },
140
+ "151660": {
141
+ "content": "<|fim_middle|>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": false
147
+ },
148
+ "151661": {
149
+ "content": "<|fim_suffix|>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": false
155
+ },
156
+ "151662": {
157
+ "content": "<|fim_pad|>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": false
163
+ },
164
+ "151663": {
165
+ "content": "<|repo_name|>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": false
171
+ },
172
+ "151664": {
173
+ "content": "<|file_sep|>",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": false
179
+ }
180
+ },
181
+ "additional_special_tokens": [
182
+ "<|im_start|>",
183
+ "<|im_end|>",
184
+ "<|object_ref_start|>",
185
+ "<|object_ref_end|>",
186
+ "<|box_start|>",
187
+ "<|box_end|>",
188
+ "<|quad_start|>",
189
+ "<|quad_end|>",
190
+ "<|vision_start|>",
191
+ "<|vision_end|>",
192
+ "<|vision_pad|>",
193
+ "<|image_pad|>",
194
+ "<|video_pad|>"
195
+ ],
196
+ "bos_token": null,
197
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|im_end|>",
200
+ "errors": "replace",
201
+ "model_max_length": 131072,
202
+ "pad_token": "<|endoftext|>",
203
+ "split_special_tokens": false,
204
+ "tokenizer_class": "Qwen2Tokenizer",
205
+ "unk_token": null,
206
+ "add_bos_token": false
207
+ }
qwen2.5_tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
qwen3-0.6b-ax650/model.embed_tokens.weight.bfloat16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f29acf519434862d95613b2b4f6b9d14933a5e4d16baebf8ac0b33b410acfb6
3
+ size 311164928
qwen3-0.6b-ax650/qwen3_p128_l0_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcd24167b78cc772c2d3701cbb436e258db587d4168c52ec4c15bd6002ca6ed6
3
+ size 27123984
qwen3-0.6b-ax650/qwen3_p128_l10_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc6dc71b56599417a72592e2a9cd6b391978b02ab3ab980de574ca2bf7f0a897
3
+ size 27124240
qwen3-0.6b-ax650/qwen3_p128_l11_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9983e94cbc475867f3bec3f5961e899070149085a7da39cfc6383f4ce5e20fec
3
+ size 27123984
qwen3-0.6b-ax650/qwen3_p128_l12_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31d383fd27db82be6a7fff240838f8bc68f9e508ebbe4afa8862ab84fafeb9da
3
+ size 27124208
qwen3-0.6b-ax650/qwen3_p128_l13_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4b1564133162afc9e08bb46fa3a7e91ad681b5ef7a3f0a5707827bd18d618da
3
+ size 27124176
qwen3-0.6b-ax650/qwen3_p128_l14_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c498c17de6ce20587715ede473b941239bf234bffe4f5462fdae5b4f5f1e013
3
+ size 27124144
qwen3-0.6b-ax650/qwen3_p128_l15_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb2f79708412674f7299ea3655b866af763abb311bb7eb72a71bb8c831d0a898
3
+ size 27124048
qwen3-0.6b-ax650/qwen3_p128_l16_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:602830bed5841fc0eb3bc7f6675c5810b226db2b4d2bcb798ee4125dc9a5d59a
3
+ size 27124048
qwen3-0.6b-ax650/qwen3_p128_l17_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca191339394a1e0e405151fd62f30748feb351e53fa489ff5b41480e5a073e35
3
+ size 27123984
qwen3-0.6b-ax650/qwen3_p128_l18_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53df7abbed452f8261987f89f7cc66a001c7b168508a1ef14971b76abed65c50
3
+ size 27124240
qwen3-0.6b-ax650/qwen3_p128_l19_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8efb0386f4eeacb3a9550813b65637b566f9c513308c26c6a8d03d74b885ef0
3
+ size 27124112
qwen3-0.6b-ax650/qwen3_p128_l1_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dafd423a5c3081a48af896d6a8090a1ebd27efec5ba105c3ce52f34f22e29e42
3
+ size 27124368
qwen3-0.6b-ax650/qwen3_p128_l20_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:835e214b8a0836d3affd8e7359036b7a10dbfc00787928de66aa14d864a81e51
3
+ size 27123984
qwen3-0.6b-ax650/qwen3_p128_l21_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:836f708d519f1b99d48fd264e8efcbc3e58fc9c55454ca8d617b939315f77648
3
+ size 27124176
qwen3-0.6b-ax650/qwen3_p128_l22_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6d014696658bc86036055c2efba67ef40e19242f45a83a6604e41479f40b8ea
3
+ size 27124240
qwen3-0.6b-ax650/qwen3_p128_l23_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6892f65949dd0a8a460072e60fee94e3a7c443c35f78303ffa613790165d12a
3
+ size 27124240
qwen3-0.6b-ax650/qwen3_p128_l24_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3156e644a4ca87993e196ddb1c12a49e82c4f92f651021d0232471993a23d9b0
3
+ size 27124496
qwen3-0.6b-ax650/qwen3_p128_l25_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9114ab2f42e70990607cfb5dbc6c89ea6ee485a0425f4f617e4a2309bcd08358
3
+ size 27124368
qwen3-0.6b-ax650/qwen3_p128_l26_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7f5845950fb97073131cebb52c7fadfef026fed0b16dcee2ab792b65b3f721a
3
+ size 27123984
qwen3-0.6b-ax650/qwen3_p128_l27_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4796dcb182378de1d15315a6b65e19c4d191982da33fe8169cf7ef78a97d805f
3
+ size 27124496
qwen3-0.6b-ax650/qwen3_p128_l2_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b171ca124ae369079d443b4e2e6a5a207f09315995ea8cc235937bf2d1d68e77
3
+ size 27124112
qwen3-0.6b-ax650/qwen3_p128_l3_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23c9ae2fbce26f77f0a875ab94ac5096ca92c94e47e07ba8a01b2e56ee25fbfe
3
+ size 27123984
qwen3-0.6b-ax650/qwen3_p128_l4_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1a6408092998e01dbcc3cd378c10e051107d4d54697347b384aeb294703d360
3
+ size 27123984
qwen3-0.6b-ax650/qwen3_p128_l5_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8966d4d9dc75951bebec922c2cb824328f7ddc7e20e4322fe1f5749c96f02881
3
+ size 27124048
qwen3-0.6b-ax650/qwen3_p128_l6_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37893a55f23d7582cb36423e7f1e4c42733a26940fc5d1181312219ad5ed95fe
3
+ size 27123984
qwen3-0.6b-ax650/qwen3_p128_l7_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e61ebc34ef8c3c483c5b85b48b4589979f4c1d07388926c6e91dbf2dc58f474
3
+ size 27124368
qwen3-0.6b-ax650/qwen3_p128_l8_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f3989668b8774aa73d3d6cd56b4353cfc7a90d3b99bb513dc27123750f17f1e
3
+ size 27124240
qwen3-0.6b-ax650/qwen3_p128_l9_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3da368a35ca79046c64ebc839a5ddadd11a46a7cac91a21494ba8f548add440
3
+ size 27123984
qwen3-0.6b-ax650/qwen3_post.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d7cf72f5c066432591e6f8ab890c00ccfb92efe591bebd8cb4ce755b3e9124a
3
+ size 169743543
qwen3_tokenizer/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "max_position_embeddings": 40960,
15
+ "max_window_layers": 28,
16
+ "model_type": "qwen3",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 28,
19
+ "num_key_value_heads": 8,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": true,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.51.0",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936
30
+ }
qwen3_tokenizer/generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "temperature": 0.6,
10
+ "top_k": 20,
11
+ "top_p": 0.95,
12
+ "transformers_version": "4.51.0"
13
+ }
qwen3_tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qwen3_tokenizer/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
qwen3_tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in message.content %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- endif %}\n{%- endif %}",
231
+ "clean_up_tokenization_spaces": false,
232
+ "eos_token": "<|im_end|>",
233
+ "errors": "replace",
234
+ "model_max_length": 131072,
235
+ "pad_token": "<|endoftext|>",
236
+ "split_special_tokens": false,
237
+ "tokenizer_class": "Qwen2Tokenizer",
238
+ "unk_token": null
239
+ }
qwen3_tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
qwen3_tokenizer_uid.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, PreTrainedTokenizerFast
2
+ from http.server import HTTPServer, BaseHTTPRequestHandler
3
+ import json
4
+ import argparse
5
+ import uuid
6
+
7
+ # 全局字典:存储 uid 到 Tokenizer_Http 实例的映射
8
+ tokenizers = {}
9
+
10
+ class Tokenizer_Http():
11
+ def __init__(self):
12
+ model_id = "qwen3_tokenizer"
13
+ self.tokenizer = AutoTokenizer.from_pretrained(model_id)
14
+
15
+ model_id = "qwen2.5_tokenizer"
16
+ self.tokenizer_25 = AutoTokenizer.from_pretrained(model_id)
17
+ self.messages = [
18
+ {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
19
+ ]
20
+ self.token_ids = []
21
+
22
+ self.token_ids_cache = []
23
+
24
+ def remove_think(self, text:str):
25
+ text = text.replace("<think>","")
26
+ text = text.replace("</think>","")
27
+ # trim
28
+ text = text.strip("\n")
29
+ return text
30
+
31
+ def encode(self, prompt:str, last_reply:str=None):
32
+ if last_reply is not None:
33
+ last_reply = self.remove_think(last_reply)
34
+ self.messages.append({"role": "assistant", "content": last_reply})
35
+ text = self.tokenizer_25.apply_chat_template(
36
+ self.messages,
37
+ tokenize=False,
38
+ add_generation_prompt=True
39
+ )
40
+ print("fff生成的文本:\n============\n", text, "============\n")
41
+ self.token_ids = self.tokenizer.encode(text)[:-3]
42
+ print("diff:", self.decode(self.token_ids))
43
+ if not prompt.endswith("/no_think"):
44
+ prompt+="/no_think"
45
+ print("prompt:", prompt)
46
+ self.messages.append({"role": "user", "content": prompt})
47
+
48
+ text = self.tokenizer_25.apply_chat_template(
49
+ self.messages,
50
+ tokenize=False,
51
+ add_generation_prompt=True
52
+ )
53
+ print("生成的文本:\n============\n", text, "============\n")
54
+ token_ids = self.tokenizer.encode(text)
55
+ # 找出新增部分
56
+ diff = token_ids[len(self.token_ids):]
57
+ self.token_ids = token_ids
58
+ print("diff:", self.decode(diff))
59
+ return token_ids, diff
60
+
61
+ def decode(self, token_ids):
62
+ self.token_ids_cache += token_ids
63
+ text = self.tokenizer.decode(self.token_ids_cache)
64
+ if "\ufffd" in text:
65
+ print("text 中包含非法字符")
66
+ return ""
67
+ else:
68
+ self.token_ids_cache.clear()
69
+ return text
70
+
71
+
72
+ @property
73
+ def bos_id(self):
74
+ return self.tokenizer.bos_token_id
75
+
76
+ @property
77
+ def eos_id(self):
78
+ return self.tokenizer.eos_token_id
79
+
80
+ @property
81
+ def bos_token(self):
82
+ return self.tokenizer.bos_token
83
+
84
+ @property
85
+ def eos_token(self):
86
+ return self.tokenizer.eos_token
87
+
88
+ def reset(self, system_prompt="You are Qwen, created by Alibaba Cloud. You are a helpful assistant."):
89
+ self.messages = [
90
+ {"role": "system", "content": system_prompt},
91
+ ]
92
+ text = self.tokenizer_25.apply_chat_template(
93
+ self.messages,
94
+ tokenize=False,
95
+ add_generation_prompt=True
96
+ )
97
+ token_ids = self.tokenizer.encode(text)[:-3]
98
+ self.token_ids = token_ids
99
+ print(self.decode(token_ids))
100
+ return token_ids
101
+
102
+
103
+ class Request(BaseHTTPRequestHandler):
104
+ timeout = 5
105
+ server_version = 'Apache'
106
+
107
+ def do_GET(self):
108
+ print("GET 请求路径:", self.path)
109
+ self.send_response(200)
110
+ self.send_header("Content-Type", "application/json")
111
+ self.end_headers()
112
+
113
+ # 新增接口:获取 uid
114
+ if '/get_uid' in self.path:
115
+ new_uid = str(uuid.uuid4())
116
+ print("新 uid:", new_uid)
117
+ # 为该 uid 创建一个新的 Tokenizer_Http 实例
118
+ tokenizers[new_uid] = Tokenizer_Http()
119
+ msg = json.dumps({'uid': new_uid})
120
+ elif '/bos_id' in self.path:
121
+ # 获取 uid 参数(例如 ?uid=xxx)
122
+ uid = self.get_query_param("uid")
123
+ instance: Tokenizer_Http = tokenizers.get(uid)
124
+ if instance is None:
125
+ msg = json.dumps({'error': 'Invalid uid'})
126
+ else:
127
+ bos_id = instance.bos_id
128
+ msg = json.dumps({'bos_id': bos_id if bos_id is not None else -1})
129
+ elif '/eos_id' in self.path:
130
+ uid = self.get_query_param("uid")
131
+ instance: Tokenizer_Http = tokenizers.get(uid)
132
+ if instance is None:
133
+ msg = json.dumps({'error': 'Invalid uid'})
134
+ else:
135
+ eos_id = instance.eos_id
136
+ msg = json.dumps({'eos_id': eos_id if eos_id is not None else -1})
137
+ else:
138
+ msg = json.dumps({'error': 'Invalid GET endpoint'})
139
+
140
+ print("响应消息:", msg)
141
+ self.wfile.write(msg.encode())
142
+
143
+ def do_POST(self):
144
+ content_length = int(self.headers.get('content-length', 0))
145
+ data = self.rfile.read(content_length).decode()
146
+ print("POST 请求路径:", self.path)
147
+ print("接收到的数据:", data)
148
+ req = json.loads(data)
149
+
150
+ self.send_response(200)
151
+ self.send_header("Content-Type", "application/json")
152
+ self.end_headers()
153
+
154
+ if '/encode' in self.path:
155
+ # 请求数据中必须包含 uid, text, 和可选的 last_reply
156
+ uid = req.get('uid')
157
+ prompt = req.get('text')
158
+ last_reply = req.get('last_reply')
159
+ instance: Tokenizer_Http = tokenizers.get(uid)
160
+ if instance is None:
161
+ msg = json.dumps({'error': 'Invalid uid'})
162
+ else:
163
+ token_ids, diff = instance.encode(prompt, last_reply)
164
+ msg = json.dumps({'token_ids': token_ids, 'diff': diff})
165
+ elif '/decode' in self.path:
166
+ uid = req.get('uid')
167
+ token_ids = req.get('token_ids')
168
+ instance: Tokenizer_Http = tokenizers.get(uid)
169
+ if instance is None:
170
+ msg = json.dumps({'error': 'Invalid uid'})
171
+ else:
172
+ text = instance.decode(token_ids)
173
+ msg = json.dumps({'text': text})
174
+ elif '/reset' in self.path:
175
+ uid = req.get("uid")
176
+ system_prompt = req.get("system_prompt")
177
+ instance: Tokenizer_Http = tokenizers.get(uid)
178
+ if instance is None:
179
+ msg = json.dumps({'error': 'Invalid uid'})
180
+ else:
181
+ if system_prompt is not None:
182
+ print("system_prompt:", system_prompt)
183
+ token_ids = instance.reset(system_prompt)
184
+ msg = json.dumps({'token_ids': token_ids})
185
+ else:
186
+ token_ids = instance.reset()
187
+ msg = json.dumps({'token_ids': token_ids})
188
+ else:
189
+ msg = json.dumps({'error': 'Invalid POST endpoint'})
190
+
191
+ print("响应消息:", msg)
192
+ self.wfile.write(msg.encode())
193
+
194
+ def get_query_param(self, key):
195
+ """
196
+ 辅助函数:从 GET 请求的 URL 中获取查询参数的值
197
+ 例如:/bos_id?uid=xxx
198
+ """
199
+ from urllib.parse import urlparse, parse_qs
200
+ query = urlparse(self.path).query
201
+ params = parse_qs(query)
202
+ values = params.get(key)
203
+ return values[0] if values else None
204
+
205
+ if __name__ == "__main__":
206
+ parser = argparse.ArgumentParser()
207
+ parser.add_argument('--host', type=str, default='0.0.0.0')
208
+ parser.add_argument('--port', type=int, default=12345)
209
+ args = parser.parse_args()
210
+
211
+ host = (args.host, args.port)
212
+ print('Server running at http://%s:%s' % host)
213
+ server = HTTPServer(host, Request)
214
+ server.serve_forever()
run_qwen3_0.6b_int8_ctx_ax650.sh ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ./main_ax650 \
2
+ --system_prompt "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." \
3
+ --template_filename_axmodel "qwen3-0.6b-ax650/qwen3_p128_l%d_together.axmodel" \
4
+ --axmodel_num 28 \
5
+ --tokenizer_type 2 \
6
+ --url_tokenizer_model "http://127.0.0.1:12345" \
7
+ --filename_post_axmodel qwen3-0.6b-ax650/qwen3_post.axmodel \
8
+ --filename_tokens_embed qwen3-0.6b-ax650/model.embed_tokens.weight.bfloat16.bin \
9
+ --tokens_embed_num 151936 \
10
+ --tokens_embed_size 1024 \
11
+ --use_mmap_load_embed 1 \
12
+ --live_print 1
13
+
14
+ # --kvcache_path /home/axera/ax-llm/build/kvcache_yuanqi \
run_qwen3_0.6b_int8_ctx_axcl_aarch64.sh ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ./main_axcl_aarch64 \
2
+ --system_prompt "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." \
3
+ --template_filename_axmodel "qwen3-0.6b-ax650/qwen3_p128_l%d_together.axmodel" \
4
+ --axmodel_num 28 \
5
+ --tokenizer_type 2 \
6
+ --url_tokenizer_model "http://127.0.0.1:12345" \
7
+ --filename_post_axmodel qwen3-0.6b-ax650/qwen3_post.axmodel \
8
+ --filename_tokens_embed qwen3-0.6b-ax650/model.embed_tokens.weight.bfloat16.bin \
9
+ --tokens_embed_num 151936 \
10
+ --tokens_embed_size 1024 \
11
+ --use_mmap_load_embed 1 \
12
+ --live_print 1 \
13
+ --devices 0
14
+
15
+ # --kvcache_path /home/axera/ax-llm/build/kvcache_yuanqi \
run_qwen3_0.6b_int8_ctx_axcl_x86.sh ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ./main_axcl_x86 \
2
+ --system_prompt "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." \
3
+ --template_filename_axmodel "qwen3-0.6b-ax650/qwen3_p128_l%d_together.axmodel" \
4
+ --axmodel_num 28 \
5
+ --tokenizer_type 2 \
6
+ --url_tokenizer_model "http://127.0.0.1:12345" \
7
+ --filename_post_axmodel qwen3-0.6b-ax650/qwen3_post.axmodel \
8
+ --filename_tokens_embed qwen3-0.6b-ax650/model.embed_tokens.weight.bfloat16.bin \
9
+ --tokens_embed_num 151936 \
10
+ --tokens_embed_size 1024 \
11
+ --use_mmap_load_embed 1 \
12
+ --live_print 1 \
13
+ --devices 0
14
+
15
+ # --kvcache_path /home/axera/ax-llm/build/kvcache_yuanqi \