Spaces:

marquesafonso
/

multilang-asr-transcriber

Running

File size: 3,675 Bytes

ffa3aaf
 
810585d
e8a080e
52a5350
27d74f3
 
f13603f
 
fc138bd
 
 
 
 
 
 
8d799e6
66ab005
2366bc6
66ab005
 
33d3f17
780225d
8d799e6
810585d
33d3f17
0ca0046
f13603f
e058aa0
 
 
 
 
 
 
8d799e6
66ab005
2366bc6
66ab005
 
e058aa0
 
8d799e6
810585d
e058aa0
 
f4a9639
ffa3aaf

import gradio as gr
from src.transcriber import transcriber

def main():    
    with gr.Blocks(title='multilang-asr-transcriber', delete_cache=(86400, 86400), theme=gr.themes.Base()) as demo:
        gr.Markdown('## Multilang ASR Transcriber')
        gr.Markdown('An automatic speech recognition tool using [faster-whisper](https://github.com/SYSTRAN/faster-whisper). Supports multilingual video transcription and translation to english. Users may set the max words per line.')
        with gr.Tabs(selected="video") as tabs:
            with gr.Tab("Video", id="video"):
                file = gr.File(file_types=["video"],type="filepath", label="Upload a video")
                file_type = gr.Radio(choices=["video"], value="video", label="File Type", visible=False)
                max_words_per_line = gr.Number(value=6, label="Max words per line")
                task = gr.Radio(choices=["transcribe", "translate"], value="transcribe", label="Select Task")
                model_version = gr.Radio(choices=["deepdml/faster-whisper-large-v3-turbo-ct2",
                                                "turbo",
                                                "large-v3"], value="deepdml/faster-whisper-large-v3-turbo-ct2", label="Select Model")
                device_type = gr.Radio(choices=["desktop", "mobile"], value="desktop", label="Select Device")
                text_output = gr.Textbox(label="SRT Text transcription", show_copy_button=True)
                srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
                text_clean_output = gr.Textbox(label="Text transcription", show_copy_button=True)
                json_output = gr.JSON(label="JSON Transcription")
                gr.Interface(
                    fn=transcriber,
                    inputs=[file, file_type, max_words_per_line, task, model_version, device_type],
                    outputs=[text_output, srt_file, text_clean_output, json_output],
                    allow_flagging="never"
                )
            with gr.Tab("Audio", id = "audio"):
                file = gr.File(file_types=["audio"],type="filepath", label="Upload an audio file")
                file_type = gr.Radio(choices=["audio"], value="audio", label="File Type", visible=False)
                max_words_per_line = gr.Number(value=6, label="Max words per line")
                task = gr.Radio(choices=["transcribe", "translate"], value="transcribe", label="Select Task")
                model_version = gr.Radio(choices=["deepdml/faster-whisper-large-v3-turbo-ct2",
                                                "turbo",
                                                "large-v3"], value="deepdml/faster-whisper-large-v3-turbo-ct2", label="Select Model")
                device_type = gr.Radio(choices=["desktop", "mobile"], value="desktop", label="Select Device")
                text_output = gr.Textbox(label="SRT Text transcription", show_copy_button=True)
                srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
                text_clean_output = gr.Textbox(label="Text transcription", show_copy_button=True)
                json_output = gr.JSON(label="JSON Transcription")
                gr.Interface(
                    fn=transcriber,
                    inputs=[file, file_type, max_words_per_line, task, model_version, device_type],
                    outputs=[text_output, srt_file, text_clean_output, json_output],
                    allow_flagging="never"
                )    
        demo.launch(server_name="0.0.0.0", server_port=7860)

if __name__ == '__main__':
    main()