audio_ranges / app.py
Pseudo-Sid's picture
Update app.py
a84ae16 verified
raw
history blame contribute delete
781 Bytes
import gradio as gr
import librosa
from transformers import pipeline
# Load pipeline
pipe = pipeline(
"audio-classification",
model="audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim"
)
def classify_audio(audio_path):
# Load audio (always convert to 16k)
y, sr = librosa.load(audio_path, sr=16000)
# Just pass the waveform, NOT a tuple
results = pipe(y)
return {r["label"]: float(r["score"]) for r in results}
iface = gr.Interface(
fn=classify_audio,
inputs=gr.Audio(type="filepath", label="Upload Audio (WAV, MP3, etc.)"),
outputs=gr.Label(num_top_classes=8, label="Emotion Classification"),
title="Speech Emotion Classification",
description="Upload an audio clip to classify the speaker's emotion."
)
iface.launch()