import tempfile, os, re
import gradio as gr
import fitz  # PyMuPDF
from TTS.utils.synthesizer import Synthesizer
from TTS.utils.download import download_url

MODEL_NAMES = [
    "vits male1 (best)", "vits female (best)", "vits-male", "vits female1",
    "glowtts-male", "glowtts-female", "female tacotron2"
]
MAX_TXT_LEN = 800

# Load/download models if not already present
modelInfo = [
    ["vits-male", "best_model_65633.pth", "config-0.json", "https://huggingface.co/Kamtera/persian-tts-male-vits/resolve/main/"],
    ["vits female (best)", "checkpoint_48000.pth", "config-2.json", "https://huggingface.co/Kamtera/persian-tts-female-vits/resolve/main/"],
    ["glowtts-male", "best_model_77797.pth", "config-1.json", "https://huggingface.co/Kamtera/persian-tts-male-glow_tts/resolve/main/"],
    ["glowtts-female", "best_model.pth", "config.json", "https://huggingface.co/Kamtera/persian-tts-female-glow_tts/resolve/main/"],
    ["vits male1 (best)", "checkpoint_88000.pth", "config.json", "https://huggingface.co/Kamtera/persian-tts-male1-vits/resolve/main/"],
    ["vits female1", "checkpoint_50000.pth", "config.json", "https://huggingface.co/Kamtera/persian-tts-female1-vits/resolve/main/"],
    ["female tacotron2", "checkpoint_313000.pth", "config-2.json", "https://huggingface.co/Kamtera/persian-tts-female-tacotron2/resolve/main/"]
]

for d in modelInfo:
    if not os.path.exists(d[0]):
        os.makedirs(d[0])
        download_url(d[3]+d[1], d[0], "best_model.pth")
        download_url(d[3]+d[2], d[0], "config.json")

def split_chapters(text):
    chapters = re.split(r'\n\s*(?:فصل|بخش)[^\n]*\n', text)
    return [ch.strip() for ch in chapters if ch.strip()]

def synthesize_text(text, synthesizer):
    chunks = [text[i:i+MAX_TXT_LEN] for i in range(0, len(text), MAX_TXT_LEN)]
    audio_paths = []
    for i, chunk in enumerate(chunks):
        wav = synthesizer.tts(chunk)
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
            synthesizer.save_wav(wav, fp.name)
            audio_paths.append(fp.name)
    return audio_paths

def pdf_to_chapter_audio(pdf_file, model_name):
    # Load model
    synthesizer = Synthesizer(f"{model_name}/best_model.pth", f"{model_name}/config.json")

    # Extract PDF text
    doc = fitz.open(pdf_file.name)
    text = "\n".join([page.get_text() for page in doc])
    chapters = split_chapters(text)
    
    chapter_audios = []
    for i, chapter in enumerate(chapters):
        paths = synthesize_text(chapter, synthesizer)
        chapter_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
        os.system(f"sox {' '.join(paths)} {chapter_path}")  # Merge if multiple chunks
        chapter_audios.append((f"Chapter {i+1}", chapter_path))
    return chapter_audios

gr.Interface(
    fn=pdf_to_chapter_audio,
    inputs=[
        gr.File(label="Upload Persian PDF Book"),
        gr.Radio(label="Pick a TTS Model", choices=MODEL_NAMES, value="vits female (best)"),
    ],
    outputs=gr.Dataset(components=["text", gr.Audio(label="Chapter Audio", type='filepath')]),
    title="📚 Persian Book to Audio Chapters",
    description="Upload a Persian PDF book and convert each chapter into audio using a TTS model."
).launch()