import gradio as gr from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration # 所有可选模型 MODEL_OPTIONS = { "Whisper Lao defualt":"LuoYiSULIXAY/whisper-lao-finetuned_laonlp", "Whisper Lao update":"LuoYiSULIXAY/whisper-lao-finetuned_laonlp_2", "Whisper Lao v1": "LuoYiSULIXAY/whisper-lao-finetuned_1", "Whisper Lao v2": "LuoYiSULIXAY/whisper-lao-finetuned_2", "OpenAI Whisper Medium": "openai/whisper-medium", "small aug":"LuoYiSULIXAY/whisper_small_aug" } # 缓存加载过的模型,避免重复加载耗时 loaded_pipelines = {} def load_asr_pipeline(model_name): """根据模型名动态加载 pipeline""" if model_name in loaded_pipelines: return loaded_pipelines[model_name] processor = WhisperProcessor.from_pretrained("openai/whisper-medium", language="lo") model = WhisperForConditionalGeneration.from_pretrained(model_name) asr_pipeline = pipeline( "automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, device=0 # 如果用 CPU 可以设置为 -1 ) loaded_pipelines[model_name] = asr_pipeline return asr_pipeline def transcribe(audio, model_choice): asr = load_asr_pipeline(model_choice) result = asr(audio, generate_kwargs={"language": "lao", "task": "transcribe"}) return result["text"] demo = gr.Interface( fn=transcribe, inputs=[ gr.Audio(type="filepath", label="Upload Audio"), gr.Dropdown( choices=list(MODEL_OPTIONS.values()), value="LuoYiSULIXAY/whisper-lao-finetuned_laonlp", label="Select Whisper Model" ) ], outputs=gr.Textbox(label="Transcription"), title="Whisper Lao", description="Realtime demo for Lao speech recognition using different Whisper fine-tuned models." ) demo.launch(share=True)