rahul7star commited on
Commit
58f87f6
Β·
verified Β·
1 Parent(s): b189838

Update app_quant.py

Browse files
Files changed (1) hide show
  1. app_quant.py +90 -41
app_quant.py CHANGED
@@ -185,66 +185,115 @@ def generate_audio_from_model(model_to_use, description, text, fname="tts.wav"):
185
  # -------------------------
186
  # Gradio UI
187
  # -------------------------
188
- css = ".gradio-container {max-width: 1400px}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  with gr.Blocks(title="NAVA β€” VEEN + LoRA + SNAC (Optimized)", css=css) as demo:
190
- gr.Markdown("# πŸͺΆ NAVA β€” VEEN + LoRA + SNAC (Optimized)\nGenerate emotional Hindi speech using Maya1 base + your LoRA adapter.")
191
-
 
192
 
193
  with gr.Row():
 
194
  with gr.Column(scale=3):
195
- gr.Markdown("## Inference (CPU/GPU auto)\nType text + pick a preset or write description manually.")
 
196
  text_in = gr.Textbox(label="Enter Hindi text", value=DEFAULT_TEXT, lines=3)
197
- preset_select = gr.Dropdown(label="Select Preset Character", choices=list(PRESET_CHARACTERS.keys()), value="Male American")
198
- description_box = gr.Textbox(label="Voice Description (editable)", value=PRESET_CHARACTERS["Male American"]["description"], lines=2)
199
- emotion_select = gr.Dropdown(label="Select Emotion", choices=EMOTION_TAGS, value="<neutral>")
 
 
 
 
 
 
 
 
 
 
 
 
200
  gen_btn = gr.Button("πŸ”Š Generate Audio (Base + LoRA)")
201
  gen_logs = gr.Textbox(label="Logs", lines=10)
202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  with gr.Column(scale=2):
204
  gr.Markdown("### 🎧 Audio Results Comparison")
205
  audio_output_base = gr.Audio(label="Base Model Audio", type="filepath")
206
  audio_output_lora = gr.Audio(label="LoRA Model Audio", type="filepath")
207
 
208
- # -----------------------------
209
- # Example section
210
- # -----------------------------
211
- gr.Markdown("### Example")
212
- example_text = DEFAULT_TEXT
213
- example_audio_path = "audio.wav"
214
-
215
- gr.Textbox(label="Example Text", value=example_text, lines=2, interactive=False)
216
- gr.Audio(label="Example Audio", value=example_audio_path, type="filepath", interactive=False)
217
- with gr.Blocks(css=".video_box {width: 600px; height: 350px;}") as demo:
218
- gr.Video(
219
- label="Example Video",
220
- value="gen_31ff9f64b1.mp4",
221
- autoplay=False,
222
- loop=False,
223
- interactive=False,
224
- elem_classes=["video_box"]
225
- )
226
-
227
-
228
-
229
- # preset -> description update
230
  def _update_desc(preset_name):
231
  return PRESET_CHARACTERS.get(preset_name, {}).get("description", "")
232
- preset_select.change(fn=_update_desc, inputs=[preset_select], outputs=[description_box])
233
 
234
- # generation wrapper
 
 
 
 
 
 
235
  def _generate(text, preset_name, description, emotion):
236
  desc = description or PRESET_CHARACTERS.get(preset_name, {}).get("description", "")
237
  combined = f"{emotion} {desc}".strip()
238
- # base
239
- base_path, log_base = generate_audio_from_model(base_model, combined, text, fname="tts_base.wav")
240
- # lora
241
- lora_path, log_lora = generate_audio_from_model(model, combined, text, fname="tts_lora.wav")
242
- combined_logs = f"[Base]\n{log_base}\n\n[LoRA]\n{log_lora}"
243
- return base_path, lora_path, combined_logs
244
-
245
- gen_btn.click(fn=_generate,
246
- inputs=[text_in, preset_select, description_box, emotion_select],
247
- outputs=[audio_output_base, audio_output_lora, gen_logs])
 
 
 
 
 
 
248
 
249
  if __name__ == "__main__":
250
  demo.launch()
 
185
  # -------------------------
186
  # Gradio UI
187
  # -------------------------
188
+ css = """
189
+ .gradio-container {max-width: 1400px}
190
+ .example-box {
191
+ border: 1px solid #ccc;
192
+ padding: 12px;
193
+ border-radius: 8px;
194
+ background: #f8f8f8;
195
+ }
196
+ .video_box video {
197
+ width: 260px !important;
198
+ height: 160px !important;
199
+ object-fit: cover;
200
+ }
201
+ """
202
+
203
  with gr.Blocks(title="NAVA β€” VEEN + LoRA + SNAC (Optimized)", css=css) as demo:
204
+
205
+ gr.Markdown("# πŸͺΆ NAVA β€” VEEN + LoRA + SNAC (Optimized)")
206
+ gr.Markdown("Generate emotional Hindi speech using Maya1 base + your LoRA adapter.")
207
 
208
  with gr.Row():
209
+ # ---------------- LEFT SIDE ----------------
210
  with gr.Column(scale=3):
211
+ gr.Markdown("## 🎀 Inference (CPU/GPU auto)")
212
+
213
  text_in = gr.Textbox(label="Enter Hindi text", value=DEFAULT_TEXT, lines=3)
214
+ preset_select = gr.Dropdown(
215
+ label="Select Preset Character",
216
+ choices=list(PRESET_CHARACTERS.keys()),
217
+ value="Male American"
218
+ )
219
+ description_box = gr.Textbox(
220
+ label="Voice Description (editable)",
221
+ value=PRESET_CHARACTERS["Male American"]["description"],
222
+ lines=2
223
+ )
224
+ emotion_select = gr.Dropdown(
225
+ label="Select Emotion",
226
+ choices=EMOTION_TAGS,
227
+ value="<neutral>"
228
+ )
229
  gen_btn = gr.Button("πŸ”Š Generate Audio (Base + LoRA)")
230
  gen_logs = gr.Textbox(label="Logs", lines=10)
231
 
232
+ # ---------------- EXAMPLES ----------------
233
+ gr.Markdown("## πŸ“Ž Example")
234
+
235
+ with gr.Column(elem_classes=["example-box"]):
236
+ example_text = DEFAULT_TEXT
237
+ example_audio_path = "audio.wav"
238
+ example_video = "gen_31ff9f64b1.mp4"
239
+
240
+ gr.Textbox(
241
+ label="Example Text",
242
+ value=example_text,
243
+ lines=2,
244
+ interactive=False
245
+ )
246
+ gr.Audio(
247
+ label="Example Audio",
248
+ value=example_audio_path,
249
+ type="filepath",
250
+ interactive=False
251
+ )
252
+ gr.Video(
253
+ label="Example Video",
254
+ value=example_video,
255
+ autoplay=False,
256
+ loop=False,
257
+ interactive=False,
258
+ elem_classes=["video_box"]
259
+ )
260
+
261
+ # ---------------- RIGHT SIDE ----------------
262
  with gr.Column(scale=2):
263
  gr.Markdown("### 🎧 Audio Results Comparison")
264
  audio_output_base = gr.Audio(label="Base Model Audio", type="filepath")
265
  audio_output_lora = gr.Audio(label="LoRA Model Audio", type="filepath")
266
 
267
+ # ---------------- PRESET UPDATE ----------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  def _update_desc(preset_name):
269
  return PRESET_CHARACTERS.get(preset_name, {}).get("description", "")
 
270
 
271
+ preset_select.change(
272
+ fn=_update_desc,
273
+ inputs=[preset_select],
274
+ outputs=[description_box]
275
+ )
276
+
277
+ # ---------------- GENERATION HANDLER ----------------
278
  def _generate(text, preset_name, description, emotion):
279
  desc = description or PRESET_CHARACTERS.get(preset_name, {}).get("description", "")
280
  combined = f"{emotion} {desc}".strip()
281
+
282
+ base_path, log_base = generate_audio_from_model(
283
+ base_model, combined, text, fname="tts_base.wav"
284
+ )
285
+ lora_path, log_lora = generate_audio_from_model(
286
+ model, combined, text, fname="tts_lora.wav"
287
+ )
288
+
289
+ logs = f"[Base]\n{log_base}\n\n[LoRA]\n{log_lora}"
290
+ return base_path, lora_path, logs
291
+
292
+ gen_btn.click(
293
+ fn=_generate,
294
+ inputs=[text_in, preset_select, description_box, emotion_select],
295
+ outputs=[audio_output_base, audio_output_lora, gen_logs]
296
+ )
297
 
298
  if __name__ == "__main__":
299
  demo.launch()