Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import tempfile, os, re | |
| import gradio as gr | |
| import fitz # PyMuPDF | |
| from TTS.utils.synthesizer import Synthesizer | |
| from TTS.utils.download import download_url | |
| MODEL_NAMES = [ | |
| "vits male1 (best)", "vits female (best)", "vits-male", "vits female1", | |
| "glowtts-male", "glowtts-female", "female tacotron2" | |
| ] | |
| MAX_TXT_LEN = 800 | |
| # Load/download models if not already present | |
| modelInfo = [ | |
| ["vits-male", "best_model_65633.pth", "config-0.json", "https://huggingface.co/Kamtera/persian-tts-male-vits/resolve/main/"], | |
| ["vits female (best)", "checkpoint_48000.pth", "config-2.json", "https://huggingface.co/Kamtera/persian-tts-female-vits/resolve/main/"], | |
| ["glowtts-male", "best_model_77797.pth", "config-1.json", "https://huggingface.co/Kamtera/persian-tts-male-glow_tts/resolve/main/"], | |
| ["glowtts-female", "best_model.pth", "config.json", "https://huggingface.co/Kamtera/persian-tts-female-glow_tts/resolve/main/"], | |
| ["vits male1 (best)", "checkpoint_88000.pth", "config.json", "https://huggingface.co/Kamtera/persian-tts-male1-vits/resolve/main/"], | |
| ["vits female1", "checkpoint_50000.pth", "config.json", "https://huggingface.co/Kamtera/persian-tts-female1-vits/resolve/main/"], | |
| ["female tacotron2", "checkpoint_313000.pth", "config-2.json", "https://huggingface.co/Kamtera/persian-tts-female-tacotron2/resolve/main/"] | |
| ] | |
| for d in modelInfo: | |
| if not os.path.exists(d[0]): | |
| os.makedirs(d[0]) | |
| download_url(d[3]+d[1], d[0], "best_model.pth") | |
| download_url(d[3]+d[2], d[0], "config.json") | |
| def split_chapters(text): | |
| chapters = re.split(r'\n\s*(?:ูุตู|ุจุฎุด)[^\n]*\n', text) | |
| return [ch.strip() for ch in chapters if ch.strip()] | |
| def synthesize_text(text, synthesizer): | |
| chunks = [text[i:i+MAX_TXT_LEN] for i in range(0, len(text), MAX_TXT_LEN)] | |
| audio_paths = [] | |
| for i, chunk in enumerate(chunks): | |
| wav = synthesizer.tts(chunk) | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: | |
| synthesizer.save_wav(wav, fp.name) | |
| audio_paths.append(fp.name) | |
| return audio_paths | |
| def pdf_to_chapter_audio(pdf_file, model_name): | |
| # Load model | |
| synthesizer = Synthesizer(f"{model_name}/best_model.pth", f"{model_name}/config.json") | |
| # Extract PDF text | |
| doc = fitz.open(pdf_file.name) | |
| text = "\n".join([page.get_text() for page in doc]) | |
| chapters = split_chapters(text) | |
| chapter_audios = [] | |
| for i, chapter in enumerate(chapters): | |
| paths = synthesize_text(chapter, synthesizer) | |
| chapter_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name | |
| os.system(f"sox {' '.join(paths)} {chapter_path}") # Merge if multiple chunks | |
| chapter_audios.append((f"Chapter {i+1}", chapter_path)) | |
| return chapter_audios | |
| gr.Interface( | |
| fn=pdf_to_chapter_audio, | |
| inputs=[ | |
| gr.File(label="Upload Persian PDF Book"), | |
| gr.Radio(label="Pick a TTS Model", choices=MODEL_NAMES, value="vits female (best)"), | |
| ], | |
| outputs=gr.Dataset(components=["text", gr.Audio(label="Chapter Audio", type='filepath')]), | |
| title="๐ Persian Book to Audio Chapters", | |
| description="Upload a Persian PDF book and convert each chapter into audio using a TTS model." | |
| ).launch() | |