import gradio as gr from transformers import pipeline import torch device = "cuda:2" if torch.cuda.is_available() else "cpu" pipe = pipeline( "automatic-speech-recognition", model="openai/whisper-large", chunk_length_s=30, device=device, ) pipe.model.config.forced_decoder_ids = ( pipe.tokenizer.get_decoder_prompt_ids( language="zh", task="transcribe" ) ) def greet(name): return "Hello " + name + "!" def asr(inputs): print(inputs) return pipe(inputs) # demo = gr.Interface(fn=asr, inputs="audio", outputs="text") # demo = gr.Interface( # fn=asr, # inputs=[ # gr.Audio( # source="upload", # Choose between "microphone", "upload" # type="filepath", # label="Upload from disk", # ) # ], # outputs=gr.Textbox(label="Recognized speech from uploaded file") # # ) # # demo.launch(share=True) gr.Interface.from_pipeline(pipe).launch()