thankfulcarp commited on
Commit
97b7208
·
1 Parent(s): 2b02e16
Files changed (1) hide show
  1. app.py +65 -38
app.py CHANGED
@@ -3,24 +3,6 @@
3
  Hugging Face Space for Text-to-Video generation using the Wan 2.1 model,
4
  enhanced with a base `FusionX` LoRA, dynamic user-selectable style LoRAs,
5
  and an LLM-based prompt enhancer.
6
-
7
- Correction: Replaced `unload_lora_weights` with the correct `delete_adapters`
8
- method for robust and selective LoRA management.
9
-
10
- Fix (July 3, 2024): Corrected a tensor device mismatch error in the
11
- `enhance_prompt_with_llm` function for ZeroGPU compatibility. The fix
12
- involves manually tokenizing the input and moving the resulting tensors
13
- to the CUDA device before calling the model's generate method.
14
-
15
- Fix (July 3, 2025): Addressed an attention_mask warning and clarified ZeroGPU
16
- behavior. The model is now correctly fed both input_ids and attention_mask.
17
- Added comments explaining why the model is moved to CUDA on each call, which
18
- is the expected behavior for ZeroGPU Spaces.
19
-
20
- Fix (July 3, 2025): Corrected the GPU duration estimation logic (`get_t2v_duration`)
21
- to prevent timeouts on longer or high-resolution videos. The new logic now
22
- considers video resolution (width and height) in addition to steps and duration,
23
- and uses more generous time allocations.
24
  """
25
 
26
  # --- 1. Imports ---
@@ -157,14 +139,18 @@ def parse_lset_prompt(lset_prompt):
157
  resolved_prompt = resolved_prompt.replace(f"{{{key}}}", highlighted_value)
158
  return resolved_prompt
159
 
160
- def handle_lora_selection_change(preset_name: str, current_prompt: str):
161
  """
162
- When a preset is selected, this function finds the corresponding .lset file,
163
- parses it, and appends the suggested trigger words to the current prompt.
164
  """
 
 
 
 
165
  if not preset_name or preset_name == "None":
166
- gr.Info("LoRA cleared. Prompt remains unchanged.")
167
- return gr.update(value=current_prompt), gr.update(interactive=False)
168
 
169
  try:
170
  lset_filename = f"{preset_name}.lset"
@@ -177,27 +163,41 @@ def handle_lora_selection_change(preset_name: str, current_prompt: str):
177
 
178
  lset_prompt_raw = None
179
  try:
180
- # Attempt to parse as JSON, which is one of the formats in the repo
181
  lset_data = json.loads(lset_content)
182
  lset_prompt_raw = lset_data.get("prompt")
183
  except json.JSONDecodeError:
184
- # If not JSON, assume the entire file content is the prompt template
185
  print(f"Info: '{lset_filename}' is not JSON. Treating as plain text prompt.")
186
  lset_prompt_raw = lset_content
187
 
188
  if lset_prompt_raw:
189
  resolved_prompt = parse_lset_prompt(lset_prompt_raw)
190
- separator = ", " if current_prompt and not current_prompt.endswith( (",", " ")) else ""
191
- new_prompt = f"{current_prompt}{separator}{resolved_prompt}".strip()
 
192
  gr.Info(f"✅ Appended triggers from '{preset_name}'. Replace highlighted text like __this__.")
193
- return gr.update(value=new_prompt), gr.update(interactive=True)
 
 
 
 
194
  else:
195
- gr.Info(f"ℹ️ No prompt found in '{preset_name}.lset'.")
196
- return gr.update(value=current_prompt), gr.update(interactive=False)
 
 
197
  except Exception as e:
198
  print(f"Info: Could not process .lset for '{preset_name}'. Reason: {e}")
199
  gr.Warning(f"⚠️ Could not load triggers for '{preset_name}'.")
200
- return gr.update(value=current_prompt), gr.update(interactive=False)
 
 
 
 
 
 
 
 
 
201
 
202
 
203
  def _manage_lora_state(pipe, selected_lora: str, lora_weight: float) -> bool:
@@ -379,13 +379,14 @@ def enhance_prompt_with_llm(prompt: str, enhancer_pipeline):
379
  final_answer = tokenizer.decode(newly_generated_ids[0], skip_special_tokens=True)
380
 
381
  print(f"Enhanced prompt: '{final_answer.strip()}'")
382
- return final_answer.strip()
 
383
  except Exception as e:
384
  print(f"❌ Error during prompt enhancement: {e}")
385
  # Adding full traceback for better debugging in the console
386
  traceback.print_exc()
387
  gr.Warning(f"An error occurred during prompt enhancement. See console for details.")
388
- return prompt
389
  finally:
390
  # Explicitly empty the CUDA cache to help release GPU memory.
391
  # This can help resolve intermittent issues where the GPU remains active.
@@ -487,6 +488,9 @@ def generate_t2v_video(
487
  def build_ui(t2v_pipe, enhancer_pipe, available_loras):
488
  """Creates and configures the Gradio UI."""
489
  with gr.Blocks(theme=gr.themes.Soft(), css=".main-container { max-width: 1080px; margin: auto; }") as demo:
 
 
 
490
  gr.Markdown("# ✨ Wan 2.1 Text-to-Video Suite with Dynamic LoRAs")
491
  gr.Markdown("Generate videos from text, enhanced by the base `FusionX` LoRA and your choice of dynamic style LoRA.")
492
 
@@ -512,10 +516,11 @@ def build_ui(t2v_pipe, enhancer_pipe, available_loras):
512
  label="🎨 Dynamic Style LoRA (Optional)",
513
  choices=available_loras,
514
  value="None",
515
- info="Adds a secondary style LoRA. Appends trigger words to your prompt."
516
  )
517
  t2v_lora_weight = gr.Slider(
518
- label="💪 LoRA Weight", minimum=0.0, maximum=2.0, step=0.05, value=0.8, interactive=False
 
519
  )
520
 
521
  t2v_duration = gr.Slider(
@@ -543,16 +548,38 @@ def build_ui(t2v_pipe, enhancer_pipe, available_loras):
543
  # Create a partial function that has the enhancer_pipe "baked in".
544
  # This avoids the need to pass the complex object through Gradio's state.
545
  enhance_fn = partial(enhance_prompt_with_llm, enhancer_pipeline=enhancer_pipe)
 
 
 
 
 
 
 
 
 
 
 
 
546
  t2v_enhance_btn.click(
547
  fn=enhance_fn,
548
  inputs=[t2v_prompt],
549
- outputs=[t2v_prompt]
 
 
 
 
 
550
  )
 
 
551
  t2v_lora_preset.change(
552
  fn=handle_lora_selection_change,
553
- inputs=[t2v_lora_preset, t2v_prompt],
 
554
  outputs=[t2v_prompt, t2v_lora_weight]
555
  )
 
 
556
  t2v_generate_btn.click(
557
  fn=generate_t2v_video,
558
  inputs=[
@@ -575,4 +602,4 @@ if __name__ == "__main__":
575
  available_loras = get_available_presets(DYNAMIC_LORA_REPO_ID, DYNAMIC_LORA_SUBFOLDER)
576
 
577
  app_ui = build_ui(t2v_pipe, enhancer_pipe, available_loras)
578
- app_ui.queue(max_size=10).launch()
 
3
  Hugging Face Space for Text-to-Video generation using the Wan 2.1 model,
4
  enhanced with a base `FusionX` LoRA, dynamic user-selectable style LoRAs,
5
  and an LLM-based prompt enhancer.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  """
7
 
8
  # --- 1. Imports ---
 
139
  resolved_prompt = resolved_prompt.replace(f"{{{key}}}", highlighted_value)
140
  return resolved_prompt
141
 
142
+ def handle_lora_selection_change(preset_name: str, base_prompt: str):
143
  """
144
+ When a preset is selected, this function combines the base_prompt (from state)
145
+ with the new LoRA's trigger words. This makes the update idempotent.
146
  """
147
+ # Default state for the slider is hidden and non-interactive
148
+ lora_slider_update = gr.update(visible=False, interactive=False)
149
+
150
+ # If "None" is selected, the displayed prompt is just the base prompt.
151
  if not preset_name or preset_name == "None":
152
+ gr.Info("LoRA cleared.")
153
+ return gr.update(value=base_prompt), lora_slider_update
154
 
155
  try:
156
  lset_filename = f"{preset_name}.lset"
 
163
 
164
  lset_prompt_raw = None
165
  try:
 
166
  lset_data = json.loads(lset_content)
167
  lset_prompt_raw = lset_data.get("prompt")
168
  except json.JSONDecodeError:
 
169
  print(f"Info: '{lset_filename}' is not JSON. Treating as plain text prompt.")
170
  lset_prompt_raw = lset_content
171
 
172
  if lset_prompt_raw:
173
  resolved_prompt = parse_lset_prompt(lset_prompt_raw)
174
+ separator = ", " if base_prompt and not base_prompt.endswith((",", " ")) else ""
175
+ # The new prompt is always constructed from the base prompt and the new triggers.
176
+ new_prompt = f"{base_prompt}{separator}{resolved_prompt}".strip()
177
  gr.Info(f"✅ Appended triggers from '{preset_name}'. Replace highlighted text like __this__.")
178
+
179
+ # On success, update the prompt and make the slider visible and interactive
180
+ new_prompt_update = gr.update(value=new_prompt)
181
+ lora_slider_update = gr.update(visible=True, interactive=True)
182
+ return new_prompt_update, lora_slider_update
183
  else:
184
+ gr.Info(f"ℹ️ No prompt found in '{preset_name}.lset'. Prompt unchanged.")
185
+ # If no triggers, the prompt is just the base prompt.
186
+ return gr.update(value=base_prompt), lora_slider_update
187
+
188
  except Exception as e:
189
  print(f"Info: Could not process .lset for '{preset_name}'. Reason: {e}")
190
  gr.Warning(f"⚠️ Could not load triggers for '{preset_name}'.")
191
+ # On error, revert to just the base prompt.
192
+ return gr.update(value=base_prompt), lora_slider_update
193
+
194
+ def update_base_prompt(new_prompt_text):
195
+ """
196
+ Called when the user manually edits the prompt textbox.
197
+ This updates the base prompt state and resets the LoRA selection, as the
198
+ old triggers are no longer relevant to the new, manually-entered prompt.
199
+ """
200
+ return new_prompt_text, "None", gr.update(visible=False, interactive=False)
201
 
202
 
203
  def _manage_lora_state(pipe, selected_lora: str, lora_weight: float) -> bool:
 
379
  final_answer = tokenizer.decode(newly_generated_ids[0], skip_special_tokens=True)
380
 
381
  print(f"Enhanced prompt: '{final_answer.strip()}'")
382
+ # Return the enhanced prompt and also reset the LoRA dropdown
383
+ return final_answer.strip(), "None", gr.update(visible=False, interactive=False)
384
  except Exception as e:
385
  print(f"❌ Error during prompt enhancement: {e}")
386
  # Adding full traceback for better debugging in the console
387
  traceback.print_exc()
388
  gr.Warning(f"An error occurred during prompt enhancement. See console for details.")
389
+ return prompt, gr.update(), gr.update() # Return original prompt, don't change LoRA
390
  finally:
391
  # Explicitly empty the CUDA cache to help release GPU memory.
392
  # This can help resolve intermittent issues where the GPU remains active.
 
488
  def build_ui(t2v_pipe, enhancer_pipe, available_loras):
489
  """Creates and configures the Gradio UI."""
490
  with gr.Blocks(theme=gr.themes.Soft(), css=".main-container { max-width: 1080px; margin: auto; }") as demo:
491
+ # --- FIX: Add a state component to reliably store the user's base prompt ---
492
+ base_prompt_state = gr.State(value=DEFAULT_PROMPT_T2V)
493
+
494
  gr.Markdown("# ✨ Wan 2.1 Text-to-Video Suite with Dynamic LoRAs")
495
  gr.Markdown("Generate videos from text, enhanced by the base `FusionX` LoRA and your choice of dynamic style LoRA.")
496
 
 
516
  label="🎨 Dynamic Style LoRA (Optional)",
517
  choices=available_loras,
518
  value="None",
519
+ info="Adds a secondary style LoRA. Replaces previous LoRA triggers."
520
  )
521
  t2v_lora_weight = gr.Slider(
522
+ label="💪 LoRA Weight", minimum=0.0, maximum=2.0, step=0.05, value=0.8,
523
+ interactive=False, visible=False
524
  )
525
 
526
  t2v_duration = gr.Slider(
 
548
  # Create a partial function that has the enhancer_pipe "baked in".
549
  # This avoids the need to pass the complex object through Gradio's state.
550
  enhance_fn = partial(enhance_prompt_with_llm, enhancer_pipeline=enhancer_pipe)
551
+
552
+ # --- FIX: Wire up the new state-based event handlers ---
553
+
554
+ # 1. When the user manually types in the prompt box
555
+ t2v_prompt.change(
556
+ fn=update_base_prompt,
557
+ inputs=[t2v_prompt],
558
+ outputs=[base_prompt_state, t2v_lora_preset, t2v_lora_weight],
559
+ queue=False # This should be a quick update
560
+ )
561
+
562
+ # 2. When the user enhances the prompt with the LLM
563
  t2v_enhance_btn.click(
564
  fn=enhance_fn,
565
  inputs=[t2v_prompt],
566
+ # The enhance function now also resets the LoRA dropdown
567
+ outputs=[t2v_prompt, t2v_lora_preset, t2v_lora_weight]
568
+ ).then(
569
+ fn=lambda p: p, # A simple function to pass the new prompt through
570
+ inputs=[t2v_prompt],
571
+ outputs=[base_prompt_state] # Update the base prompt state with the enhanced version
572
  )
573
+
574
+ # 3. When the user selects a LoRA from the dropdown
575
  t2v_lora_preset.change(
576
  fn=handle_lora_selection_change,
577
+ # The input is now the reliable base_prompt_state, not the textbox
578
+ inputs=[t2v_lora_preset, base_prompt_state],
579
  outputs=[t2v_prompt, t2v_lora_weight]
580
  )
581
+
582
+ # 4. When the user clicks the final generate button
583
  t2v_generate_btn.click(
584
  fn=generate_t2v_video,
585
  inputs=[
 
602
  available_loras = get_available_presets(DYNAMIC_LORA_REPO_ID, DYNAMIC_LORA_SUBFOLDER)
603
 
604
  app_ui = build_ui(t2v_pipe, enhancer_pipe, available_loras)
605
+ app_ui.queue(max_size=10).launch()