Update README.md
Browse files
README.md
CHANGED
|
@@ -235,7 +235,7 @@ messages = [
|
|
| 235 |
sampling_params = SamplingParams(max_tokens=128_000)
|
| 236 |
|
| 237 |
# note that running this model on GPU requires over 300 GB of GPU RAM
|
| 238 |
-
llm = LLM(model=model_name, tokenizer_mode="mistral", tensor_parallel_size=8, limit_mm_per_prompt={"image": 4})
|
| 239 |
|
| 240 |
outputs = llm.chat(messages, sampling_params=sampling_params)
|
| 241 |
|
|
|
|
| 235 |
sampling_params = SamplingParams(max_tokens=128_000)
|
| 236 |
|
| 237 |
# note that running this model on GPU requires over 300 GB of GPU RAM
|
| 238 |
+
llm = LLM(model=model_name, tokenizer_mode="mistral", tensor_parallel_size=8, limit_mm_per_prompt={"image": 4}, max_model_len=32768)
|
| 239 |
|
| 240 |
outputs = llm.chat(messages, sampling_params=sampling_params)
|
| 241 |
|