Spaces:

rahul7star
/

Nava-Infrence

Sleeping

App Files Files Community

rahul7star commited on 26 days ago

Commit

58f87f6

verified ·

1 Parent(s): b189838

Update app_quant.py

Browse files

Files changed (1) hide show

app_quant.py +90 -41

app_quant.py CHANGED Viewed

@@ -185,66 +185,115 @@ def generate_audio_from_model(model_to_use, description, text, fname="tts.wav"):
 # -------------------------
 # Gradio UI
 # -------------------------
-css = ".gradio-container {max-width: 1400px}"
 with gr.Blocks(title="NAVA — VEEN + LoRA + SNAC (Optimized)", css=css) as demo:
-    gr.Markdown("# 🪶 NAVA — VEEN + LoRA + SNAC (Optimized)\nGenerate emotional Hindi speech using Maya1 base + your LoRA adapter.")
     with gr.Row():
         with gr.Column(scale=3):
-            gr.Markdown("## Inference (CPU/GPU auto)\nType text + pick a preset or write description manually.")
             text_in = gr.Textbox(label="Enter Hindi text", value=DEFAULT_TEXT, lines=3)
-            preset_select = gr.Dropdown(label="Select Preset Character", choices=list(PRESET_CHARACTERS.keys()), value="Male American")
-            description_box = gr.Textbox(label="Voice Description (editable)", value=PRESET_CHARACTERS["Male American"]["description"], lines=2)
-            emotion_select = gr.Dropdown(label="Select Emotion", choices=EMOTION_TAGS, value="<neutral>")
             gen_btn = gr.Button("🔊 Generate Audio (Base + LoRA)")
             gen_logs = gr.Textbox(label="Logs", lines=10)
         with gr.Column(scale=2):
             gr.Markdown("### 🎧 Audio Results Comparison")
             audio_output_base = gr.Audio(label="Base Model Audio", type="filepath")
             audio_output_lora = gr.Audio(label="LoRA Model Audio", type="filepath")
-    # -----------------------------
-    # Example section
-    # -----------------------------
-    gr.Markdown("### Example")
-    example_text = DEFAULT_TEXT
-    example_audio_path = "audio.wav"
-    gr.Textbox(label="Example Text", value=example_text, lines=2, interactive=False)
-    gr.Audio(label="Example Audio", value=example_audio_path, type="filepath", interactive=False)
-    with gr.Blocks(css=".video_box {width: 600px; height: 350px;}") as demo:
-      gr.Video(
-        label="Example Video",
-        value="gen_31ff9f64b1.mp4",
-        autoplay=False,
-        loop=False,
-        interactive=False,
-        elem_classes=["video_box"]
-       )
-    # preset -> description update
     def _update_desc(preset_name):
         return PRESET_CHARACTERS.get(preset_name, {}).get("description", "")
-    preset_select.change(fn=_update_desc, inputs=[preset_select], outputs=[description_box])
-    # generation wrapper
     def _generate(text, preset_name, description, emotion):
         desc = description or PRESET_CHARACTERS.get(preset_name, {}).get("description", "")
         combined = f"{emotion} {desc}".strip()
-        # base
-        base_path, log_base = generate_audio_from_model(base_model, combined, text, fname="tts_base.wav")
-        # lora
-        lora_path, log_lora = generate_audio_from_model(model, combined, text, fname="tts_lora.wav")
-        combined_logs = f"[Base]\n{log_base}\n\n[LoRA]\n{log_lora}"
-        return base_path, lora_path, combined_logs
-    gen_btn.click(fn=_generate,
-                  inputs=[text_in, preset_select, description_box, emotion_select],
-                  outputs=[audio_output_base, audio_output_lora, gen_logs])
 if __name__ == "__main__":
     demo.launch()

 # -------------------------
 # Gradio UI
 # -------------------------
+css = """
+.gradio-container {max-width: 1400px}
+.example-box {
+    border: 1px solid #ccc;
+    padding: 12px;
+    border-radius: 8px;
+    background: #f8f8f8;
+}
+.video_box video {
+    width: 260px !important;
+    height: 160px !important;
+    object-fit: cover;
+}
+"""
 with gr.Blocks(title="NAVA — VEEN + LoRA + SNAC (Optimized)", css=css) as demo:
+    gr.Markdown("# 🪶 NAVA — VEEN + LoRA + SNAC (Optimized)")
+    gr.Markdown("Generate emotional Hindi speech using Maya1 base + your LoRA adapter.")
     with gr.Row():
+        # ---------------- LEFT SIDE ----------------
         with gr.Column(scale=3):
+            gr.Markdown("## 🎤 Inference (CPU/GPU auto)")
             text_in = gr.Textbox(label="Enter Hindi text", value=DEFAULT_TEXT, lines=3)
+            preset_select = gr.Dropdown(
+                label="Select Preset Character",
+                choices=list(PRESET_CHARACTERS.keys()),
+                value="Male American"
+            )
+            description_box = gr.Textbox(
+                label="Voice Description (editable)",
+                value=PRESET_CHARACTERS["Male American"]["description"],
+                lines=2
+            )
+            emotion_select = gr.Dropdown(
+                label="Select Emotion",
+                choices=EMOTION_TAGS,
+                value="<neutral>"
+            )
             gen_btn = gr.Button("🔊 Generate Audio (Base + LoRA)")
             gen_logs = gr.Textbox(label="Logs", lines=10)
+            # ---------------- EXAMPLES ----------------
+            gr.Markdown("## 📎 Example")
+            with gr.Column(elem_classes=["example-box"]):
+                example_text = DEFAULT_TEXT
+                example_audio_path = "audio.wav"
+                example_video = "gen_31ff9f64b1.mp4"
+                gr.Textbox(
+                    label="Example Text",
+                    value=example_text,
+                    lines=2,
+                    interactive=False
+                )
+                gr.Audio(
+                    label="Example Audio",
+                    value=example_audio_path,
+                    type="filepath",
+                    interactive=False
+                )
+                gr.Video(
+                    label="Example Video",
+                    value=example_video,
+                    autoplay=False,
+                    loop=False,
+                    interactive=False,
+                    elem_classes=["video_box"]
+                )
+        # ---------------- RIGHT SIDE ----------------
         with gr.Column(scale=2):
             gr.Markdown("### 🎧 Audio Results Comparison")
             audio_output_base = gr.Audio(label="Base Model Audio", type="filepath")
             audio_output_lora = gr.Audio(label="LoRA Model Audio", type="filepath")
+    # ---------------- PRESET UPDATE ----------------
     def _update_desc(preset_name):
         return PRESET_CHARACTERS.get(preset_name, {}).get("description", "")
+    preset_select.change(
+        fn=_update_desc,
+        inputs=[preset_select],
+        outputs=[description_box]
+    )
+    # ---------------- GENERATION HANDLER ----------------
     def _generate(text, preset_name, description, emotion):
         desc = description or PRESET_CHARACTERS.get(preset_name, {}).get("description", "")
         combined = f"{emotion} {desc}".strip()
+        base_path, log_base = generate_audio_from_model(
+            base_model, combined, text, fname="tts_base.wav"
+        )
+        lora_path, log_lora = generate_audio_from_model(
+            model, combined, text, fname="tts_lora.wav"
+        )
+        logs = f"[Base]\n{log_base}\n\n[LoRA]\n{log_lora}"
+        return base_path, lora_path, logs
+    gen_btn.click(
+        fn=_generate,
+        inputs=[text_in, preset_select, description_box, emotion_select],
+        outputs=[audio_output_base, audio_output_lora, gen_logs]
+    )
 if __name__ == "__main__":
     demo.launch()