feihu.hf
commited on
Commit
·
0db3ae2
1
Parent(s):
34ba78d
update README
Browse files
README.md
CHANGED
|
@@ -48,7 +48,7 @@ The following contains a code snippet illustrating how to use the model generate
|
|
| 48 |
```python
|
| 49 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 50 |
|
| 51 |
-
model_name = "Qwen/Qwen3-32B"
|
| 52 |
|
| 53 |
# load the tokenizer and the model
|
| 54 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
@@ -95,11 +95,11 @@ print("content:", content)
|
|
| 95 |
For deployment, you can use `sglang>=0.4.6.post1` or `vllm>=0.8.5` or to create an OpenAI-compatible API endpoint:
|
| 96 |
- SGLang:
|
| 97 |
```shell
|
| 98 |
-
python -m sglang.launch_server --model-path Qwen/Qwen3-32B --reasoning-parser qwen3
|
| 99 |
```
|
| 100 |
- vLLM:
|
| 101 |
```shell
|
| 102 |
-
vllm serve Qwen/Qwen3-32B --enable-reasoning --reasoning-parser deepseek_r1
|
| 103 |
```
|
| 104 |
|
| 105 |
Also check out our [AWQ documentation](https://qwen.readthedocs.io/en/latest/quantization/awq.html) for more usage guide.
|
|
@@ -157,7 +157,7 @@ Here is an example of a multi-turn conversation:
|
|
| 157 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 158 |
|
| 159 |
class QwenChatbot:
|
| 160 |
-
def __init__(self, model_name="Qwen/Qwen3-32B"):
|
| 161 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 162 |
self.model = AutoModelForCausalLM.from_pretrained(model_name)
|
| 163 |
self.history = []
|
|
@@ -220,7 +220,7 @@ from qwen_agent.agents import Assistant
|
|
| 220 |
|
| 221 |
# Define LLM
|
| 222 |
llm_cfg = {
|
| 223 |
-
'model': 'Qwen3-32B',
|
| 224 |
|
| 225 |
# Use the endpoint provided by Alibaba Model Studio:
|
| 226 |
# 'model_type': 'qwen_dashscope',
|
|
|
|
| 48 |
```python
|
| 49 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 50 |
|
| 51 |
+
model_name = "Qwen/Qwen3-32B-AWQ"
|
| 52 |
|
| 53 |
# load the tokenizer and the model
|
| 54 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
|
| 95 |
For deployment, you can use `sglang>=0.4.6.post1` or `vllm>=0.8.5` or to create an OpenAI-compatible API endpoint:
|
| 96 |
- SGLang:
|
| 97 |
```shell
|
| 98 |
+
python -m sglang.launch_server --model-path Qwen/Qwen3-32B-AWQ --reasoning-parser qwen3
|
| 99 |
```
|
| 100 |
- vLLM:
|
| 101 |
```shell
|
| 102 |
+
vllm serve Qwen/Qwen3-32B-AWQ --enable-reasoning --reasoning-parser deepseek_r1
|
| 103 |
```
|
| 104 |
|
| 105 |
Also check out our [AWQ documentation](https://qwen.readthedocs.io/en/latest/quantization/awq.html) for more usage guide.
|
|
|
|
| 157 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 158 |
|
| 159 |
class QwenChatbot:
|
| 160 |
+
def __init__(self, model_name="Qwen/Qwen3-32B-AWQ"):
|
| 161 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 162 |
self.model = AutoModelForCausalLM.from_pretrained(model_name)
|
| 163 |
self.history = []
|
|
|
|
| 220 |
|
| 221 |
# Define LLM
|
| 222 |
llm_cfg = {
|
| 223 |
+
'model': 'Qwen3-32B-AWQ',
|
| 224 |
|
| 225 |
# Use the endpoint provided by Alibaba Model Studio:
|
| 226 |
# 'model_type': 'qwen_dashscope',
|