patrickvonplaten commited on
Commit
68cfa17
·
verified ·
1 Parent(s): dfb704c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -1
README.md CHANGED
@@ -235,7 +235,7 @@ messages = [
235
  sampling_params = SamplingParams(max_tokens=128_000)
236
 
237
  # note that running this model on GPU requires over 300 GB of GPU RAM
238
- llm = LLM(model=model_name, tokenizer_mode="mistral", tensor_parallel_size=8, limit_mm_per_prompt={"image": 4})
239
 
240
  outputs = llm.chat(messages, sampling_params=sampling_params)
241
 
 
235
  sampling_params = SamplingParams(max_tokens=128_000)
236
 
237
  # note that running this model on GPU requires over 300 GB of GPU RAM
238
+ llm = LLM(model=model_name, tokenizer_mode="mistral", tensor_parallel_size=8, limit_mm_per_prompt={"image": 4}, max_model_len=32768)
239
 
240
  outputs = llm.chat(messages, sampling_params=sampling_params)
241