import gradio as gr
import librosa
from transformers import pipeline

# Load pipeline
pipe = pipeline(
    "audio-classification",
    model="audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim"
)

def classify_audio(audio_path):
    # Load audio (always convert to 16k)
    y, sr = librosa.load(audio_path, sr=16000)
    
    # Just pass the waveform, NOT a tuple
    results = pipe(y)

    return {r["label"]: float(r["score"]) for r in results}

iface = gr.Interface(
    fn=classify_audio,
    inputs=gr.Audio(type="filepath", label="Upload Audio (WAV, MP3, etc.)"),
    outputs=gr.Label(num_top_classes=8, label="Emotion Classification"),
    title="Speech Emotion Classification",
    description="Upload an audio clip to classify the speaker's emotion."
)

iface.launch()