Spaces:

CGIAR
/

luganda-asr

Running on T4

File size: 2,455 Bytes

a30854f
dde51bf
 
 
 
 
d1ae0b8
dde51bf
 
 
 
 
 
 
 
 
b605946
dde51bf
 
ceeeba7
 
 
dde51bf
ceeeba7
 
dde51bf
 
ab7d4d2
 
dde51bf
 
ceeeba7
 
dde51bf
 
ceeeba7
dde51bf
00f33d3
dde51bf
 
 
 
 
 
 
 
 
 
 
ceeeba7
 
dde51bf
 
ceeeba7
dde51bf
00f33d3
dde51bf
 
 
 
 
36dc535
ceeeba7
 
99054f6
 
36dc535
 
dde51bf
 
 
 
 
 
ceeeba7

import os
import torch
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
import gradio as gr

MODEL_NAME = "FarmRadioInternational/luganda-whisper-asr"
BATCH_SIZE = 8

device = 0 if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
    token=os.getenv('HF_TOKEN'),
)

def transcribe(inputs, task):
    if inputs is None:
        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")

    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
    return  text

demo = gr.Blocks()

mic_transcribe = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.Audio(sources="microphone", type="filepath"),
        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
    ],
    outputs="text",
    # layout="horizontal",
    theme="huggingface",
    title="Luganda Whisper Demo: Transcribe Audio",
    description=(
        "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
        " of arbitrary length."
    ),
    allow_flagging="never",
)

file_transcribe = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.Audio(sources="upload", label="Audio file", type="filepath"),
        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
    ],
    outputs="text",
    # layout="horizontal",
    theme="huggingface",
    title="Luganda Whisper Demo: Transcribe Audio",
    description=(
        "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
        " of arbitrary length."
    ),
    examples=[
        ["./ama_log-1514-E30_17.wav", "transcribe"],
        ["./ng_log-1614-E2_364.wav", "transcribe"],
        ["./New Recording.wav", "transcribe"],
        ["./New Recording 3.wav", "transcribe"],
    ],
    cache_examples=True,
    allow_flagging="never",
)

with demo:
    gr.TabbedInterface([mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"])

demo.queue(max_size=10)
demo.launch()