|
from openai import OpenAI |
|
from mistral_common.audio import Audio |
|
from mistral_common.protocol.instruct.messages import AudioChunk, TextChunk, UserMessage |
|
import gradio as gr |
|
import httpx |
|
|
|
|
|
class NoAuth(httpx.Auth): |
|
def auth_flow(self, request): |
|
yield request |
|
|
|
client = OpenAI( |
|
api_key="dddd", |
|
base_url="http://localhost:8000/v1", |
|
http_client=httpx.Client(auth=NoAuth()) |
|
) |
|
|
|
model = "mistralai/Voxtral-Mini-3B-2507" |
|
|
|
def transcribe(audio_file): |
|
if not audio_file: |
|
return "Please record something." |
|
|
|
with open(audio_file, "rb") as f: |
|
audio_bytes = f.read() |
|
|
|
audio = Audio.from_bytes(audio_bytes, strict=False) |
|
chunk = AudioChunk.from_audio(audio) |
|
prompt = TextChunk(text="Please transcribe this audio.") |
|
user_msg = UserMessage(content=[chunk, prompt]).to_openai() |
|
|
|
response = client.chat.completions.create( |
|
model=model, |
|
messages=[user_msg], |
|
temperature=0.0 |
|
) |
|
|
|
return response.choices[0].message.content |
|
|
|
gr.Interface( |
|
fn=transcribe, |
|
inputs=gr.Audio(type="filepath", label="Record your voice"), |
|
outputs=gr.Textbox(label="Transcription"), |
|
title="Transcribe Audio with Voxtral", |
|
).launch() |
|
|