Spaces:
Sleeping
Sleeping
Update app_quant.py
Browse files- app_quant.py +90 -41
app_quant.py
CHANGED
|
@@ -185,66 +185,115 @@ def generate_audio_from_model(model_to_use, description, text, fname="tts.wav"):
|
|
| 185 |
# -------------------------
|
| 186 |
# Gradio UI
|
| 187 |
# -------------------------
|
| 188 |
-
css = "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
with gr.Blocks(title="NAVA β VEEN + LoRA + SNAC (Optimized)", css=css) as demo:
|
| 190 |
-
|
| 191 |
-
|
|
|
|
| 192 |
|
| 193 |
with gr.Row():
|
|
|
|
| 194 |
with gr.Column(scale=3):
|
| 195 |
-
gr.Markdown("## Inference (CPU/GPU auto)
|
|
|
|
| 196 |
text_in = gr.Textbox(label="Enter Hindi text", value=DEFAULT_TEXT, lines=3)
|
| 197 |
-
preset_select = gr.Dropdown(
|
| 198 |
-
|
| 199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
gen_btn = gr.Button("π Generate Audio (Base + LoRA)")
|
| 201 |
gen_logs = gr.Textbox(label="Logs", lines=10)
|
| 202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
with gr.Column(scale=2):
|
| 204 |
gr.Markdown("### π§ Audio Results Comparison")
|
| 205 |
audio_output_base = gr.Audio(label="Base Model Audio", type="filepath")
|
| 206 |
audio_output_lora = gr.Audio(label="LoRA Model Audio", type="filepath")
|
| 207 |
|
| 208 |
-
#
|
| 209 |
-
# Example section
|
| 210 |
-
# -----------------------------
|
| 211 |
-
gr.Markdown("### Example")
|
| 212 |
-
example_text = DEFAULT_TEXT
|
| 213 |
-
example_audio_path = "audio.wav"
|
| 214 |
-
|
| 215 |
-
gr.Textbox(label="Example Text", value=example_text, lines=2, interactive=False)
|
| 216 |
-
gr.Audio(label="Example Audio", value=example_audio_path, type="filepath", interactive=False)
|
| 217 |
-
with gr.Blocks(css=".video_box {width: 600px; height: 350px;}") as demo:
|
| 218 |
-
gr.Video(
|
| 219 |
-
label="Example Video",
|
| 220 |
-
value="gen_31ff9f64b1.mp4",
|
| 221 |
-
autoplay=False,
|
| 222 |
-
loop=False,
|
| 223 |
-
interactive=False,
|
| 224 |
-
elem_classes=["video_box"]
|
| 225 |
-
)
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
# preset -> description update
|
| 230 |
def _update_desc(preset_name):
|
| 231 |
return PRESET_CHARACTERS.get(preset_name, {}).get("description", "")
|
| 232 |
-
preset_select.change(fn=_update_desc, inputs=[preset_select], outputs=[description_box])
|
| 233 |
|
| 234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
def _generate(text, preset_name, description, emotion):
|
| 236 |
desc = description or PRESET_CHARACTERS.get(preset_name, {}).get("description", "")
|
| 237 |
combined = f"{emotion} {desc}".strip()
|
| 238 |
-
|
| 239 |
-
base_path, log_base = generate_audio_from_model(
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
|
| 249 |
if __name__ == "__main__":
|
| 250 |
demo.launch()
|
|
|
|
| 185 |
# -------------------------
|
| 186 |
# Gradio UI
|
| 187 |
# -------------------------
|
| 188 |
+
css = """
|
| 189 |
+
.gradio-container {max-width: 1400px}
|
| 190 |
+
.example-box {
|
| 191 |
+
border: 1px solid #ccc;
|
| 192 |
+
padding: 12px;
|
| 193 |
+
border-radius: 8px;
|
| 194 |
+
background: #f8f8f8;
|
| 195 |
+
}
|
| 196 |
+
.video_box video {
|
| 197 |
+
width: 260px !important;
|
| 198 |
+
height: 160px !important;
|
| 199 |
+
object-fit: cover;
|
| 200 |
+
}
|
| 201 |
+
"""
|
| 202 |
+
|
| 203 |
with gr.Blocks(title="NAVA β VEEN + LoRA + SNAC (Optimized)", css=css) as demo:
|
| 204 |
+
|
| 205 |
+
gr.Markdown("# πͺΆ NAVA β VEEN + LoRA + SNAC (Optimized)")
|
| 206 |
+
gr.Markdown("Generate emotional Hindi speech using Maya1 base + your LoRA adapter.")
|
| 207 |
|
| 208 |
with gr.Row():
|
| 209 |
+
# ---------------- LEFT SIDE ----------------
|
| 210 |
with gr.Column(scale=3):
|
| 211 |
+
gr.Markdown("## π€ Inference (CPU/GPU auto)")
|
| 212 |
+
|
| 213 |
text_in = gr.Textbox(label="Enter Hindi text", value=DEFAULT_TEXT, lines=3)
|
| 214 |
+
preset_select = gr.Dropdown(
|
| 215 |
+
label="Select Preset Character",
|
| 216 |
+
choices=list(PRESET_CHARACTERS.keys()),
|
| 217 |
+
value="Male American"
|
| 218 |
+
)
|
| 219 |
+
description_box = gr.Textbox(
|
| 220 |
+
label="Voice Description (editable)",
|
| 221 |
+
value=PRESET_CHARACTERS["Male American"]["description"],
|
| 222 |
+
lines=2
|
| 223 |
+
)
|
| 224 |
+
emotion_select = gr.Dropdown(
|
| 225 |
+
label="Select Emotion",
|
| 226 |
+
choices=EMOTION_TAGS,
|
| 227 |
+
value="<neutral>"
|
| 228 |
+
)
|
| 229 |
gen_btn = gr.Button("π Generate Audio (Base + LoRA)")
|
| 230 |
gen_logs = gr.Textbox(label="Logs", lines=10)
|
| 231 |
|
| 232 |
+
# ---------------- EXAMPLES ----------------
|
| 233 |
+
gr.Markdown("## π Example")
|
| 234 |
+
|
| 235 |
+
with gr.Column(elem_classes=["example-box"]):
|
| 236 |
+
example_text = DEFAULT_TEXT
|
| 237 |
+
example_audio_path = "audio.wav"
|
| 238 |
+
example_video = "gen_31ff9f64b1.mp4"
|
| 239 |
+
|
| 240 |
+
gr.Textbox(
|
| 241 |
+
label="Example Text",
|
| 242 |
+
value=example_text,
|
| 243 |
+
lines=2,
|
| 244 |
+
interactive=False
|
| 245 |
+
)
|
| 246 |
+
gr.Audio(
|
| 247 |
+
label="Example Audio",
|
| 248 |
+
value=example_audio_path,
|
| 249 |
+
type="filepath",
|
| 250 |
+
interactive=False
|
| 251 |
+
)
|
| 252 |
+
gr.Video(
|
| 253 |
+
label="Example Video",
|
| 254 |
+
value=example_video,
|
| 255 |
+
autoplay=False,
|
| 256 |
+
loop=False,
|
| 257 |
+
interactive=False,
|
| 258 |
+
elem_classes=["video_box"]
|
| 259 |
+
)
|
| 260 |
+
|
| 261 |
+
# ---------------- RIGHT SIDE ----------------
|
| 262 |
with gr.Column(scale=2):
|
| 263 |
gr.Markdown("### π§ Audio Results Comparison")
|
| 264 |
audio_output_base = gr.Audio(label="Base Model Audio", type="filepath")
|
| 265 |
audio_output_lora = gr.Audio(label="LoRA Model Audio", type="filepath")
|
| 266 |
|
| 267 |
+
# ---------------- PRESET UPDATE ----------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
def _update_desc(preset_name):
|
| 269 |
return PRESET_CHARACTERS.get(preset_name, {}).get("description", "")
|
|
|
|
| 270 |
|
| 271 |
+
preset_select.change(
|
| 272 |
+
fn=_update_desc,
|
| 273 |
+
inputs=[preset_select],
|
| 274 |
+
outputs=[description_box]
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
# ---------------- GENERATION HANDLER ----------------
|
| 278 |
def _generate(text, preset_name, description, emotion):
|
| 279 |
desc = description or PRESET_CHARACTERS.get(preset_name, {}).get("description", "")
|
| 280 |
combined = f"{emotion} {desc}".strip()
|
| 281 |
+
|
| 282 |
+
base_path, log_base = generate_audio_from_model(
|
| 283 |
+
base_model, combined, text, fname="tts_base.wav"
|
| 284 |
+
)
|
| 285 |
+
lora_path, log_lora = generate_audio_from_model(
|
| 286 |
+
model, combined, text, fname="tts_lora.wav"
|
| 287 |
+
)
|
| 288 |
+
|
| 289 |
+
logs = f"[Base]\n{log_base}\n\n[LoRA]\n{log_lora}"
|
| 290 |
+
return base_path, lora_path, logs
|
| 291 |
+
|
| 292 |
+
gen_btn.click(
|
| 293 |
+
fn=_generate,
|
| 294 |
+
inputs=[text_in, preset_select, description_box, emotion_select],
|
| 295 |
+
outputs=[audio_output_base, audio_output_lora, gen_logs]
|
| 296 |
+
)
|
| 297 |
|
| 298 |
if __name__ == "__main__":
|
| 299 |
demo.launch()
|