import gradio as gr
import wespeaker

model = wespeaker.load_model("vblinkp")


def speaker_verification(audio_path1, audio_path2, threshold):
    if audio_path1 is None or audio_path2 is None:
        gr.warning("Please enter two audios")
        return ""

    cos_score = model.compute_similarity(audio_path1, audio_path2)

    return f"語者相似機率: {cos_score * 100:.2f}%\n是否為相同語者: {'是' if cos_score > threshold else '否'}"


inputs = [
    gr.Audio(sources=["upload", "microphone"], type="filepath", label="Speaker#1"),
    gr.Audio(sources=["upload", "microphone"], type="filepath", label="Speaker#2"),
    gr.Slider(
        minimum=0,
        maximum=1,
        step=0.01,
        label="Similarity Threshold",
        value=0.5,
        interactive=True,
    ),
]

output = gr.Textbox(label="")

interface = gr.Interface(
    fn=speaker_verification,
    inputs=inputs,
    outputs=output,
    title="Speaker Verification",
    flagging_mode="never",
)
interface.queue(max_size=20)
interface.launch()