Update README.md
Browse files
README.md
CHANGED
|
@@ -214,7 +214,7 @@ class EosListStoppingCriteria(StoppingCriteria):
|
|
| 214 |
return self.eos_sequence in last_ids
|
| 215 |
|
| 216 |
# Initialize the model with automatic device mapping
|
| 217 |
-
llm = pipeline("text-generation", model="yentinglin/Llama-3-Taiwan-70B-Instruct
|
| 218 |
tokenizer = llm.tokenizer
|
| 219 |
|
| 220 |
# Define a conversation example
|
|
@@ -258,7 +258,7 @@ docker run \
|
|
| 258 |
-p "${PORT}:8000" \
|
| 259 |
--ipc=host \
|
| 260 |
vllm/vllm-openai:v0.4.0.post1 \
|
| 261 |
-
--model "yentinglin/Llama-3-Taiwan-70B-Instruct
|
| 262 |
-tp "${NUM_GPUS}"
|
| 263 |
```
|
| 264 |
|
|
@@ -277,7 +277,7 @@ client = OpenAI(
|
|
| 277 |
)
|
| 278 |
|
| 279 |
chat_response = client.chat.completions.create(
|
| 280 |
-
model="yentinglin/Llama-3-Taiwan-70B-Instruct
|
| 281 |
messages=[
|
| 282 |
{"role": "system", "content": "You are a helpful assistant."},
|
| 283 |
{"role": "user", "content": "Tell me a joke."},
|
|
|
|
| 214 |
return self.eos_sequence in last_ids
|
| 215 |
|
| 216 |
# Initialize the model with automatic device mapping
|
| 217 |
+
llm = pipeline("text-generation", model="yentinglin/Llama-3-Taiwan-70B-Instruct", device_map="auto")
|
| 218 |
tokenizer = llm.tokenizer
|
| 219 |
|
| 220 |
# Define a conversation example
|
|
|
|
| 258 |
-p "${PORT}:8000" \
|
| 259 |
--ipc=host \
|
| 260 |
vllm/vllm-openai:v0.4.0.post1 \
|
| 261 |
+
--model "yentinglin/Llama-3-Taiwan-70B-Instruct" \
|
| 262 |
-tp "${NUM_GPUS}"
|
| 263 |
```
|
| 264 |
|
|
|
|
| 277 |
)
|
| 278 |
|
| 279 |
chat_response = client.chat.completions.create(
|
| 280 |
+
model="yentinglin/Llama-3-Taiwan-70B-Instruct",
|
| 281 |
messages=[
|
| 282 |
{"role": "system", "content": "You are a helpful assistant."},
|
| 283 |
{"role": "user", "content": "Tell me a joke."},
|