Spaces:
Running
on
T4
Running
on
T4
File size: 2,455 Bytes
a30854f dde51bf d1ae0b8 dde51bf b605946 dde51bf ceeeba7 dde51bf ceeeba7 dde51bf ab7d4d2 dde51bf ceeeba7 dde51bf ceeeba7 dde51bf 00f33d3 dde51bf ceeeba7 dde51bf ceeeba7 dde51bf 00f33d3 dde51bf 36dc535 ceeeba7 99054f6 36dc535 dde51bf ceeeba7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import os
import torch
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
import gradio as gr
MODEL_NAME = "FarmRadioInternational/luganda-whisper-asr"
BATCH_SIZE = 8
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
token=os.getenv('HF_TOKEN'),
)
def transcribe(inputs, task):
if inputs is None:
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
return text
demo = gr.Blocks()
mic_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(sources="microphone", type="filepath"),
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
],
outputs="text",
# layout="horizontal",
theme="huggingface",
title="Luganda Whisper Demo: Transcribe Audio",
description=(
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and π€ Transformers to transcribe audio files"
" of arbitrary length."
),
allow_flagging="never",
)
file_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(sources="upload", label="Audio file", type="filepath"),
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
],
outputs="text",
# layout="horizontal",
theme="huggingface",
title="Luganda Whisper Demo: Transcribe Audio",
description=(
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and π€ Transformers to transcribe audio files"
" of arbitrary length."
),
examples=[
["./ama_log-1514-E30_17.wav", "transcribe"],
["./ng_log-1614-E2_364.wav", "transcribe"],
["./New Recording.wav", "transcribe"],
["./New Recording 3.wav", "transcribe"],
],
cache_examples=True,
allow_flagging="never",
)
with demo:
gr.TabbedInterface([mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"])
demo.queue(max_size=10)
demo.launch() |