Update model card with paper link and library details

#18
by nielsr HF Staff - opened
Files changed (1) hide show
  1. README.md +28 -14
README.md CHANGED
@@ -8,20 +8,22 @@ language:
8
  - pt
9
  - nl
10
  - hi
 
11
  license: apache-2.0
12
- library_name: vllm
13
- inference: false
14
- extra_gated_description: >-
15
- If you want to learn more about how we process your personal data, please read
16
- our <a href="https://mistral.ai/terms/">Privacy Policy</a>.
17
  pipeline_tag: audio-text-to-text
18
  tags:
19
  - transformers
 
 
 
 
20
  ---
 
21
  # Voxtral Mini 1.0 (3B) - 2507
22
 
23
  Voxtral Mini is an enhancement of [Ministral 3B](https://mistral.ai/news/ministraux), incorporating state-of-the-art audio input capabilities while retaining best-in-class text performance. It excels at speech transcription, translation and audio understanding.
24
 
 
25
  Learn more about Voxtral in our blog post [here](https://mistral.ai/news/voxtral).
26
 
27
  ## Key Features
@@ -151,7 +153,9 @@ user_msg = UserMessage(content=[file_to_chunk(obama_file), file_to_chunk(bcn_fil
151
 
152
  print(30 * "=" + "USER 1" + 30 * "=")
153
  print(text_chunk.text)
154
- print("\n\n")
 
 
155
 
156
  response = client.chat.completions.create(
157
  model=model,
@@ -163,7 +167,9 @@ content = response.choices[0].message.content
163
 
164
  print(30 * "=" + "BOT 1" + 30 * "=")
165
  print(content)
166
- print("\n\n")
 
 
167
  # The speaker who is more inspiring is the one who delivered the farewell address, as they express
168
  # gratitude, optimism, and a strong commitment to the nation and its citizens. They emphasize the importance of
169
  # self-government and active citizenship, encouraging everyone to participate in the democratic process. In contrast,
@@ -182,7 +188,9 @@ messages = [
182
  ]
183
  print(30 * "=" + "USER 2" + 30 * "=")
184
  print(messages[-1]["content"])
185
- print("\n\n")
 
 
186
 
187
  response = client.chat.completions.create(
188
  model=model,
@@ -292,7 +300,8 @@ inputs = inputs.to(device, dtype=torch.bfloat16)
292
  outputs = model.generate(**inputs, max_new_tokens=500)
293
  decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
294
 
295
- print("\nGenerated response:")
 
296
  print("=" * 80)
297
  print(decoded_outputs[0])
298
  print("=" * 80)
@@ -350,7 +359,8 @@ inputs = inputs.to(device, dtype=torch.bfloat16)
350
  outputs = model.generate(**inputs, max_new_tokens=500)
351
  decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
352
 
353
- print("\nGenerated response:")
 
354
  print("=" * 80)
355
  print(decoded_outputs[0])
356
  print("=" * 80)
@@ -389,7 +399,8 @@ inputs = inputs.to(device, dtype=torch.bfloat16)
389
  outputs = model.generate(**inputs, max_new_tokens=500)
390
  decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
391
 
392
- print("\nGenerated response:")
 
393
  print("=" * 80)
394
  print(decoded_outputs[0])
395
  print("=" * 80)
@@ -428,7 +439,8 @@ inputs = inputs.to(device, dtype=torch.bfloat16)
428
  outputs = model.generate(**inputs, max_new_tokens=500)
429
  decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
430
 
431
- print("\nGenerated response:")
 
432
  print("=" * 80)
433
  print(decoded_outputs[0])
434
  print("=" * 80)
@@ -489,7 +501,8 @@ inputs = inputs.to(device, dtype=torch.bfloat16)
489
  outputs = model.generate(**inputs, max_new_tokens=500)
490
  decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
491
 
492
- print("\nGenerated responses:")
 
493
  print("=" * 80)
494
  for decoded_output in decoded_outputs:
495
  print(decoded_output)
@@ -518,7 +531,8 @@ inputs = inputs.to(device, dtype=torch.bfloat16)
518
  outputs = model.generate(**inputs, max_new_tokens=500)
519
  decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
520
 
521
- print("\nGenerated responses:")
 
522
  print("=" * 80)
523
  for decoded_output in decoded_outputs:
524
  print(decoded_output)
 
8
  - pt
9
  - nl
10
  - hi
11
+ library_name: transformers
12
  license: apache-2.0
 
 
 
 
 
13
  pipeline_tag: audio-text-to-text
14
  tags:
15
  - transformers
16
+ - vllm
17
+ inference: false
18
+ extra_gated_description: If you want to learn more about how we process your personal
19
+ data, please read our <a href="https://mistral.ai/terms/">Privacy Policy</a>.
20
  ---
21
+
22
  # Voxtral Mini 1.0 (3B) - 2507
23
 
24
  Voxtral Mini is an enhancement of [Ministral 3B](https://mistral.ai/news/ministraux), incorporating state-of-the-art audio input capabilities while retaining best-in-class text performance. It excels at speech transcription, translation and audio understanding.
25
 
26
+ The model was presented in the paper [Voxtral](https://huggingface.co/papers/2507.13264).
27
  Learn more about Voxtral in our blog post [here](https://mistral.ai/news/voxtral).
28
 
29
  ## Key Features
 
153
 
154
  print(30 * "=" + "USER 1" + 30 * "=")
155
  print(text_chunk.text)
156
+ print("
157
+
158
+ ")
159
 
160
  response = client.chat.completions.create(
161
  model=model,
 
167
 
168
  print(30 * "=" + "BOT 1" + 30 * "=")
169
  print(content)
170
+ print("
171
+
172
+ ")
173
  # The speaker who is more inspiring is the one who delivered the farewell address, as they express
174
  # gratitude, optimism, and a strong commitment to the nation and its citizens. They emphasize the importance of
175
  # self-government and active citizenship, encouraging everyone to participate in the democratic process. In contrast,
 
188
  ]
189
  print(30 * "=" + "USER 2" + 30 * "=")
190
  print(messages[-1]["content"])
191
+ print("
192
+
193
+ ")
194
 
195
  response = client.chat.completions.create(
196
  model=model,
 
300
  outputs = model.generate(**inputs, max_new_tokens=500)
301
  decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
302
 
303
+ print("
304
+ Generated response:")
305
  print("=" * 80)
306
  print(decoded_outputs[0])
307
  print("=" * 80)
 
359
  outputs = model.generate(**inputs, max_new_tokens=500)
360
  decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
361
 
362
+ print("
363
+ Generated response:")
364
  print("=" * 80)
365
  print(decoded_outputs[0])
366
  print("=" * 80)
 
399
  outputs = model.generate(**inputs, max_new_tokens=500)
400
  decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
401
 
402
+ print("
403
+ Generated response:")
404
  print("=" * 80)
405
  print(decoded_outputs[0])
406
  print("=" * 80)
 
439
  outputs = model.generate(**inputs, max_new_tokens=500)
440
  decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
441
 
442
+ print("
443
+ Generated response:")
444
  print("=" * 80)
445
  print(decoded_outputs[0])
446
  print("=" * 80)
 
501
  outputs = model.generate(**inputs, max_new_tokens=500)
502
  decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
503
 
504
+ print("
505
+ Generated responses:")
506
  print("=" * 80)
507
  for decoded_output in decoded_outputs:
508
  print(decoded_output)
 
531
  outputs = model.generate(**inputs, max_new_tokens=500)
532
  decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
533
 
534
+ print("
535
+ Generated responses:")
536
  print("=" * 80)
537
  for decoded_output in decoded_outputs:
538
  print(decoded_output)