File size: 3,675 Bytes
ffa3aaf 810585d e8a080e 52a5350 27d74f3 f13603f fc138bd 8d799e6 66ab005 2366bc6 66ab005 33d3f17 780225d 8d799e6 810585d 33d3f17 0ca0046 f13603f e058aa0 8d799e6 66ab005 2366bc6 66ab005 e058aa0 8d799e6 810585d e058aa0 f4a9639 ffa3aaf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import gradio as gr
from src.transcriber import transcriber
def main():
with gr.Blocks(title='multilang-asr-transcriber', delete_cache=(86400, 86400), theme=gr.themes.Base()) as demo:
gr.Markdown('## Multilang ASR Transcriber')
gr.Markdown('An automatic speech recognition tool using [faster-whisper](https://github.com/SYSTRAN/faster-whisper). Supports multilingual video transcription and translation to english. Users may set the max words per line.')
with gr.Tabs(selected="video") as tabs:
with gr.Tab("Video", id="video"):
file = gr.File(file_types=["video"],type="filepath", label="Upload a video")
file_type = gr.Radio(choices=["video"], value="video", label="File Type", visible=False)
max_words_per_line = gr.Number(value=6, label="Max words per line")
task = gr.Radio(choices=["transcribe", "translate"], value="transcribe", label="Select Task")
model_version = gr.Radio(choices=["deepdml/faster-whisper-large-v3-turbo-ct2",
"turbo",
"large-v3"], value="deepdml/faster-whisper-large-v3-turbo-ct2", label="Select Model")
device_type = gr.Radio(choices=["desktop", "mobile"], value="desktop", label="Select Device")
text_output = gr.Textbox(label="SRT Text transcription", show_copy_button=True)
srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
text_clean_output = gr.Textbox(label="Text transcription", show_copy_button=True)
json_output = gr.JSON(label="JSON Transcription")
gr.Interface(
fn=transcriber,
inputs=[file, file_type, max_words_per_line, task, model_version, device_type],
outputs=[text_output, srt_file, text_clean_output, json_output],
allow_flagging="never"
)
with gr.Tab("Audio", id = "audio"):
file = gr.File(file_types=["audio"],type="filepath", label="Upload an audio file")
file_type = gr.Radio(choices=["audio"], value="audio", label="File Type", visible=False)
max_words_per_line = gr.Number(value=6, label="Max words per line")
task = gr.Radio(choices=["transcribe", "translate"], value="transcribe", label="Select Task")
model_version = gr.Radio(choices=["deepdml/faster-whisper-large-v3-turbo-ct2",
"turbo",
"large-v3"], value="deepdml/faster-whisper-large-v3-turbo-ct2", label="Select Model")
device_type = gr.Radio(choices=["desktop", "mobile"], value="desktop", label="Select Device")
text_output = gr.Textbox(label="SRT Text transcription", show_copy_button=True)
srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
text_clean_output = gr.Textbox(label="Text transcription", show_copy_button=True)
json_output = gr.JSON(label="JSON Transcription")
gr.Interface(
fn=transcriber,
inputs=[file, file_type, max_words_per_line, task, model_version, device_type],
outputs=[text_output, srt_file, text_clean_output, json_output],
allow_flagging="never"
)
demo.launch(server_name="0.0.0.0", server_port=7860)
if __name__ == '__main__':
main() |