Spaces:

Kwai-Kolors
/

Kolors-Virtual-Try-On

Running on CPU Upgrade

App Files Files Community

394

Kolors-Virtual-Try-On / app.py

alimirferdos

Update app.py

1dc10eb verified 4 months ago

raw

history blame

3.25 kB

	import tempfile, os, re
	import gradio as gr
	import fitz # PyMuPDF
	from TTS.utils.synthesizer import Synthesizer
	from TTS.utils.download import download_url

	MODEL_NAMES = [
	"vits male1 (best)", "vits female (best)", "vits-male", "vits female1",
	"glowtts-male", "glowtts-female", "female tacotron2"
	]
	MAX_TXT_LEN = 800

	# Load/download models if not already present
	modelInfo = [
	["vits-male", "best_model_65633.pth", "config-0.json", "https://huggingface.co/Kamtera/persian-tts-male-vits/resolve/main/"],
	["vits female (best)", "checkpoint_48000.pth", "config-2.json", "https://huggingface.co/Kamtera/persian-tts-female-vits/resolve/main/"],
	["glowtts-male", "best_model_77797.pth", "config-1.json", "https://huggingface.co/Kamtera/persian-tts-male-glow_tts/resolve/main/"],
	["glowtts-female", "best_model.pth", "config.json", "https://huggingface.co/Kamtera/persian-tts-female-glow_tts/resolve/main/"],
	["vits male1 (best)", "checkpoint_88000.pth", "config.json", "https://huggingface.co/Kamtera/persian-tts-male1-vits/resolve/main/"],
	["vits female1", "checkpoint_50000.pth", "config.json", "https://huggingface.co/Kamtera/persian-tts-female1-vits/resolve/main/"],
	["female tacotron2", "checkpoint_313000.pth", "config-2.json", "https://huggingface.co/Kamtera/persian-tts-female-tacotron2/resolve/main/"]
	]

	for d in modelInfo:
	if not os.path.exists(d[0]):
	os.makedirs(d[0])
	download_url(d[3]+d[1], d[0], "best_model.pth")
	download_url(d[3]+d[2], d[0], "config.json")

	def split_chapters(text):
	chapters = re.split(r'\n\s(?:فصل\|بخش)[^\n]\n', text)
	return [ch.strip() for ch in chapters if ch.strip()]

	def synthesize_text(text, synthesizer):
	chunks = [text[i:i+MAX_TXT_LEN] for i in range(0, len(text), MAX_TXT_LEN)]
	audio_paths = []
	for i, chunk in enumerate(chunks):
	wav = synthesizer.tts(chunk)
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
	synthesizer.save_wav(wav, fp.name)
	audio_paths.append(fp.name)
	return audio_paths

	def pdf_to_chapter_audio(pdf_file, model_name):
	# Load model
	synthesizer = Synthesizer(f"{model_name}/best_model.pth", f"{model_name}/config.json")

	# Extract PDF text
	doc = fitz.open(pdf_file.name)
	text = "\n".join([page.get_text() for page in doc])
	chapters = split_chapters(text)

	chapter_audios = []
	for i, chapter in enumerate(chapters):
	paths = synthesize_text(chapter, synthesizer)
	chapter_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
	os.system(f"sox {' '.join(paths)} {chapter_path}") # Merge if multiple chunks
	chapter_audios.append((f"Chapter {i+1}", chapter_path))
	return chapter_audios

	gr.Interface(
	fn=pdf_to_chapter_audio,
	inputs=[
	gr.File(label="Upload Persian PDF Book"),
	gr.Radio(label="Pick a TTS Model", choices=MODEL_NAMES, value="vits female (best)"),
	],
	outputs=gr.Dataset(components=["text", gr.Audio(label="Chapter Audio", type='filepath')]),
	title="📚 Persian Book to Audio Chapters",
	description="Upload a Persian PDF book and convert each chapter into audio using a TTS model."
	).launch()