Spaces:
Sleeping
Sleeping
Completely new app.py, use torchaudio
Browse files- README.md +27 -0
- app.py +35 -12
- requirements.txt +2 -1
README.md
CHANGED
|
@@ -12,3 +12,30 @@ short_description: Transcribing the audio file with Whisper
|
|
| 12 |
---
|
| 13 |
|
| 14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
---
|
| 13 |
|
| 14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 15 |
+
|
| 16 |
+
# Audio Transcription with Whisper
|
| 17 |
+
|
| 18 |
+
This Hugging Face Space uses the `openai/whisper-large-v3` model to transcribe audio files (wav, m4a, mp3). It handles long audio files gracefully.
|
| 19 |
+
|
| 20 |
+
## How to Use
|
| 21 |
+
|
| 22 |
+
1. Upload your audio file or record audio directly in the browser.
|
| 23 |
+
2. Click the "Transcribe" button.
|
| 24 |
+
3. The transcribed text will be displayed in the textbox below.
|
| 25 |
+
|
| 26 |
+
## Dependencies
|
| 27 |
+
|
| 28 |
+
- `transformers`
|
| 29 |
+
- `torch`
|
| 30 |
+
- `torchaudio`
|
| 31 |
+
- `gradio`
|
| 32 |
+
|
| 33 |
+
## Model
|
| 34 |
+
|
| 35 |
+
`openai/whisper-large-v3`
|
| 36 |
+
|
| 37 |
+
## Notes
|
| 38 |
+
|
| 39 |
+
- This space is designed to handle long audio files.
|
| 40 |
+
- The audio is resampled to 16kHz if necessary.
|
| 41 |
+
- Error messages are displayed if transcription fails.
|
app.py
CHANGED
|
@@ -1,23 +1,46 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
from transformers import pipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
# Load the Whisper pipeline
|
| 6 |
-
|
| 7 |
|
| 8 |
def transcribe_audio(audio_file):
|
| 9 |
-
if audio_file is
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
with gr.Blocks() as demo:
|
| 17 |
-
gr.
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
from transformers import pipeline
|
| 4 |
+
import torchaudio
|
| 5 |
+
|
| 6 |
+
# Check for CUDA availability and set device
|
| 7 |
+
if torch.cuda.is_available():
|
| 8 |
+
device = "cuda"
|
| 9 |
+
else:
|
| 10 |
+
device = "cpu"
|
| 11 |
|
| 12 |
# Load the Whisper pipeline
|
| 13 |
+
whisper_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3", device=device)
|
| 14 |
|
| 15 |
def transcribe_audio(audio_file):
|
| 16 |
+
if audio_file is None:
|
| 17 |
+
return "Please upload or record an audio file."
|
| 18 |
+
|
| 19 |
+
try:
|
| 20 |
+
# Load audio using torchaudio to handle various formats and long files
|
| 21 |
+
audio, sample_rate = torchaudio.load(audio_file)
|
| 22 |
+
|
| 23 |
+
# Resample if necessary (Whisper often expects 16kHz)
|
| 24 |
+
if sample_rate != 16000:
|
| 25 |
+
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
|
| 26 |
+
audio = resampler(audio)
|
| 27 |
+
|
| 28 |
+
# Transcribe the audio
|
| 29 |
+
transcription = whisper_pipeline(audio.squeeze().numpy())["text"] # .squeeze() removes extra dimensions
|
| 30 |
+
|
| 31 |
+
return transcription
|
| 32 |
+
|
| 33 |
+
except Exception as e:
|
| 34 |
+
return f"An error occurred: {e}"
|
| 35 |
|
| 36 |
|
| 37 |
with gr.Blocks() as demo:
|
| 38 |
+
with gr.Row():
|
| 39 |
+
audio_input = gr.Audio(source="upload", type="filepath", label="Upload or Record Audio")
|
| 40 |
+
|
| 41 |
+
transcribe_button = gr.Button("Transcribe")
|
| 42 |
+
transcription_output = gr.Textbox(label="Transcription")
|
| 43 |
+
|
| 44 |
+
transcribe_button.click(transcribe_audio, inputs=audio_input, outputs=transcription_output)
|
| 45 |
|
| 46 |
+
demo.launch()
|
requirements.txt
CHANGED
|
@@ -1,3 +1,4 @@
|
|
| 1 |
transformers
|
| 2 |
-
gradio
|
| 3 |
torch
|
|
|
|
|
|
|
|
|
| 1 |
transformers
|
|
|
|
| 2 |
torch
|
| 3 |
+
torchaudio
|
| 4 |
+
gradio
|