import gradio as gr import librosa from transformers import pipeline # Load pipeline pipe = pipeline( "audio-classification", model="audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim" ) def classify_audio(audio_path): # Load audio (always convert to 16k) y, sr = librosa.load(audio_path, sr=16000) # Just pass the waveform, NOT a tuple results = pipe(y) return {r["label"]: float(r["score"]) for r in results} iface = gr.Interface( fn=classify_audio, inputs=gr.Audio(type="filepath", label="Upload Audio (WAV, MP3, etc.)"), outputs=gr.Label(num_top_classes=8, label="Emotion Classification"), title="Speech Emotion Classification", description="Upload an audio clip to classify the speaker's emotion." ) iface.launch()