Update model card with paper link and library details
#18
by
nielsr
HF Staff
- opened
README.md
CHANGED
@@ -8,20 +8,22 @@ language:
|
|
8 |
- pt
|
9 |
- nl
|
10 |
- hi
|
|
|
11 |
license: apache-2.0
|
12 |
-
library_name: vllm
|
13 |
-
inference: false
|
14 |
-
extra_gated_description: >-
|
15 |
-
If you want to learn more about how we process your personal data, please read
|
16 |
-
our <a href="https://mistral.ai/terms/">Privacy Policy</a>.
|
17 |
pipeline_tag: audio-text-to-text
|
18 |
tags:
|
19 |
- transformers
|
|
|
|
|
|
|
|
|
20 |
---
|
|
|
21 |
# Voxtral Mini 1.0 (3B) - 2507
|
22 |
|
23 |
Voxtral Mini is an enhancement of [Ministral 3B](https://mistral.ai/news/ministraux), incorporating state-of-the-art audio input capabilities while retaining best-in-class text performance. It excels at speech transcription, translation and audio understanding.
|
24 |
|
|
|
25 |
Learn more about Voxtral in our blog post [here](https://mistral.ai/news/voxtral).
|
26 |
|
27 |
## Key Features
|
@@ -151,7 +153,9 @@ user_msg = UserMessage(content=[file_to_chunk(obama_file), file_to_chunk(bcn_fil
|
|
151 |
|
152 |
print(30 * "=" + "USER 1" + 30 * "=")
|
153 |
print(text_chunk.text)
|
154 |
-
print("
|
|
|
|
|
155 |
|
156 |
response = client.chat.completions.create(
|
157 |
model=model,
|
@@ -163,7 +167,9 @@ content = response.choices[0].message.content
|
|
163 |
|
164 |
print(30 * "=" + "BOT 1" + 30 * "=")
|
165 |
print(content)
|
166 |
-
print("
|
|
|
|
|
167 |
# The speaker who is more inspiring is the one who delivered the farewell address, as they express
|
168 |
# gratitude, optimism, and a strong commitment to the nation and its citizens. They emphasize the importance of
|
169 |
# self-government and active citizenship, encouraging everyone to participate in the democratic process. In contrast,
|
@@ -182,7 +188,9 @@ messages = [
|
|
182 |
]
|
183 |
print(30 * "=" + "USER 2" + 30 * "=")
|
184 |
print(messages[-1]["content"])
|
185 |
-
print("
|
|
|
|
|
186 |
|
187 |
response = client.chat.completions.create(
|
188 |
model=model,
|
@@ -292,7 +300,8 @@ inputs = inputs.to(device, dtype=torch.bfloat16)
|
|
292 |
outputs = model.generate(**inputs, max_new_tokens=500)
|
293 |
decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
294 |
|
295 |
-
print("
|
|
|
296 |
print("=" * 80)
|
297 |
print(decoded_outputs[0])
|
298 |
print("=" * 80)
|
@@ -350,7 +359,8 @@ inputs = inputs.to(device, dtype=torch.bfloat16)
|
|
350 |
outputs = model.generate(**inputs, max_new_tokens=500)
|
351 |
decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
352 |
|
353 |
-
print("
|
|
|
354 |
print("=" * 80)
|
355 |
print(decoded_outputs[0])
|
356 |
print("=" * 80)
|
@@ -389,7 +399,8 @@ inputs = inputs.to(device, dtype=torch.bfloat16)
|
|
389 |
outputs = model.generate(**inputs, max_new_tokens=500)
|
390 |
decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
391 |
|
392 |
-
print("
|
|
|
393 |
print("=" * 80)
|
394 |
print(decoded_outputs[0])
|
395 |
print("=" * 80)
|
@@ -428,7 +439,8 @@ inputs = inputs.to(device, dtype=torch.bfloat16)
|
|
428 |
outputs = model.generate(**inputs, max_new_tokens=500)
|
429 |
decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
430 |
|
431 |
-
print("
|
|
|
432 |
print("=" * 80)
|
433 |
print(decoded_outputs[0])
|
434 |
print("=" * 80)
|
@@ -489,7 +501,8 @@ inputs = inputs.to(device, dtype=torch.bfloat16)
|
|
489 |
outputs = model.generate(**inputs, max_new_tokens=500)
|
490 |
decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
491 |
|
492 |
-
print("
|
|
|
493 |
print("=" * 80)
|
494 |
for decoded_output in decoded_outputs:
|
495 |
print(decoded_output)
|
@@ -518,7 +531,8 @@ inputs = inputs.to(device, dtype=torch.bfloat16)
|
|
518 |
outputs = model.generate(**inputs, max_new_tokens=500)
|
519 |
decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
520 |
|
521 |
-
print("
|
|
|
522 |
print("=" * 80)
|
523 |
for decoded_output in decoded_outputs:
|
524 |
print(decoded_output)
|
|
|
8 |
- pt
|
9 |
- nl
|
10 |
- hi
|
11 |
+
library_name: transformers
|
12 |
license: apache-2.0
|
|
|
|
|
|
|
|
|
|
|
13 |
pipeline_tag: audio-text-to-text
|
14 |
tags:
|
15 |
- transformers
|
16 |
+
- vllm
|
17 |
+
inference: false
|
18 |
+
extra_gated_description: If you want to learn more about how we process your personal
|
19 |
+
data, please read our <a href="https://mistral.ai/terms/">Privacy Policy</a>.
|
20 |
---
|
21 |
+
|
22 |
# Voxtral Mini 1.0 (3B) - 2507
|
23 |
|
24 |
Voxtral Mini is an enhancement of [Ministral 3B](https://mistral.ai/news/ministraux), incorporating state-of-the-art audio input capabilities while retaining best-in-class text performance. It excels at speech transcription, translation and audio understanding.
|
25 |
|
26 |
+
The model was presented in the paper [Voxtral](https://huggingface.co/papers/2507.13264).
|
27 |
Learn more about Voxtral in our blog post [here](https://mistral.ai/news/voxtral).
|
28 |
|
29 |
## Key Features
|
|
|
153 |
|
154 |
print(30 * "=" + "USER 1" + 30 * "=")
|
155 |
print(text_chunk.text)
|
156 |
+
print("
|
157 |
+
|
158 |
+
")
|
159 |
|
160 |
response = client.chat.completions.create(
|
161 |
model=model,
|
|
|
167 |
|
168 |
print(30 * "=" + "BOT 1" + 30 * "=")
|
169 |
print(content)
|
170 |
+
print("
|
171 |
+
|
172 |
+
")
|
173 |
# The speaker who is more inspiring is the one who delivered the farewell address, as they express
|
174 |
# gratitude, optimism, and a strong commitment to the nation and its citizens. They emphasize the importance of
|
175 |
# self-government and active citizenship, encouraging everyone to participate in the democratic process. In contrast,
|
|
|
188 |
]
|
189 |
print(30 * "=" + "USER 2" + 30 * "=")
|
190 |
print(messages[-1]["content"])
|
191 |
+
print("
|
192 |
+
|
193 |
+
")
|
194 |
|
195 |
response = client.chat.completions.create(
|
196 |
model=model,
|
|
|
300 |
outputs = model.generate(**inputs, max_new_tokens=500)
|
301 |
decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
302 |
|
303 |
+
print("
|
304 |
+
Generated response:")
|
305 |
print("=" * 80)
|
306 |
print(decoded_outputs[0])
|
307 |
print("=" * 80)
|
|
|
359 |
outputs = model.generate(**inputs, max_new_tokens=500)
|
360 |
decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
361 |
|
362 |
+
print("
|
363 |
+
Generated response:")
|
364 |
print("=" * 80)
|
365 |
print(decoded_outputs[0])
|
366 |
print("=" * 80)
|
|
|
399 |
outputs = model.generate(**inputs, max_new_tokens=500)
|
400 |
decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
401 |
|
402 |
+
print("
|
403 |
+
Generated response:")
|
404 |
print("=" * 80)
|
405 |
print(decoded_outputs[0])
|
406 |
print("=" * 80)
|
|
|
439 |
outputs = model.generate(**inputs, max_new_tokens=500)
|
440 |
decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
441 |
|
442 |
+
print("
|
443 |
+
Generated response:")
|
444 |
print("=" * 80)
|
445 |
print(decoded_outputs[0])
|
446 |
print("=" * 80)
|
|
|
501 |
outputs = model.generate(**inputs, max_new_tokens=500)
|
502 |
decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
503 |
|
504 |
+
print("
|
505 |
+
Generated responses:")
|
506 |
print("=" * 80)
|
507 |
for decoded_output in decoded_outputs:
|
508 |
print(decoded_output)
|
|
|
531 |
outputs = model.generate(**inputs, max_new_tokens=500)
|
532 |
decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
533 |
|
534 |
+
print("
|
535 |
+
Generated responses:")
|
536 |
print("=" * 80)
|
537 |
for decoded_output in decoded_outputs:
|
538 |
print(decoded_output)
|