Spaces:
Runtime error
Runtime error
Commit
·
97b7208
1
Parent(s):
2b02e16
Fixes
Browse files
app.py
CHANGED
@@ -3,24 +3,6 @@
|
|
3 |
Hugging Face Space for Text-to-Video generation using the Wan 2.1 model,
|
4 |
enhanced with a base `FusionX` LoRA, dynamic user-selectable style LoRAs,
|
5 |
and an LLM-based prompt enhancer.
|
6 |
-
|
7 |
-
Correction: Replaced `unload_lora_weights` with the correct `delete_adapters`
|
8 |
-
method for robust and selective LoRA management.
|
9 |
-
|
10 |
-
Fix (July 3, 2024): Corrected a tensor device mismatch error in the
|
11 |
-
`enhance_prompt_with_llm` function for ZeroGPU compatibility. The fix
|
12 |
-
involves manually tokenizing the input and moving the resulting tensors
|
13 |
-
to the CUDA device before calling the model's generate method.
|
14 |
-
|
15 |
-
Fix (July 3, 2025): Addressed an attention_mask warning and clarified ZeroGPU
|
16 |
-
behavior. The model is now correctly fed both input_ids and attention_mask.
|
17 |
-
Added comments explaining why the model is moved to CUDA on each call, which
|
18 |
-
is the expected behavior for ZeroGPU Spaces.
|
19 |
-
|
20 |
-
Fix (July 3, 2025): Corrected the GPU duration estimation logic (`get_t2v_duration`)
|
21 |
-
to prevent timeouts on longer or high-resolution videos. The new logic now
|
22 |
-
considers video resolution (width and height) in addition to steps and duration,
|
23 |
-
and uses more generous time allocations.
|
24 |
"""
|
25 |
|
26 |
# --- 1. Imports ---
|
@@ -157,14 +139,18 @@ def parse_lset_prompt(lset_prompt):
|
|
157 |
resolved_prompt = resolved_prompt.replace(f"{{{key}}}", highlighted_value)
|
158 |
return resolved_prompt
|
159 |
|
160 |
-
def handle_lora_selection_change(preset_name: str,
|
161 |
"""
|
162 |
-
When a preset is selected, this function
|
163 |
-
|
164 |
"""
|
|
|
|
|
|
|
|
|
165 |
if not preset_name or preset_name == "None":
|
166 |
-
gr.Info("LoRA cleared.
|
167 |
-
return gr.update(value=
|
168 |
|
169 |
try:
|
170 |
lset_filename = f"{preset_name}.lset"
|
@@ -177,27 +163,41 @@ def handle_lora_selection_change(preset_name: str, current_prompt: str):
|
|
177 |
|
178 |
lset_prompt_raw = None
|
179 |
try:
|
180 |
-
# Attempt to parse as JSON, which is one of the formats in the repo
|
181 |
lset_data = json.loads(lset_content)
|
182 |
lset_prompt_raw = lset_data.get("prompt")
|
183 |
except json.JSONDecodeError:
|
184 |
-
# If not JSON, assume the entire file content is the prompt template
|
185 |
print(f"Info: '{lset_filename}' is not JSON. Treating as plain text prompt.")
|
186 |
lset_prompt_raw = lset_content
|
187 |
|
188 |
if lset_prompt_raw:
|
189 |
resolved_prompt = parse_lset_prompt(lset_prompt_raw)
|
190 |
-
separator = ", " if
|
191 |
-
|
|
|
192 |
gr.Info(f"✅ Appended triggers from '{preset_name}'. Replace highlighted text like __this__.")
|
193 |
-
|
|
|
|
|
|
|
|
|
194 |
else:
|
195 |
-
gr.Info(f"ℹ️ No prompt found in '{preset_name}.lset'.")
|
196 |
-
|
|
|
|
|
197 |
except Exception as e:
|
198 |
print(f"Info: Could not process .lset for '{preset_name}'. Reason: {e}")
|
199 |
gr.Warning(f"⚠️ Could not load triggers for '{preset_name}'.")
|
200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
|
203 |
def _manage_lora_state(pipe, selected_lora: str, lora_weight: float) -> bool:
|
@@ -379,13 +379,14 @@ def enhance_prompt_with_llm(prompt: str, enhancer_pipeline):
|
|
379 |
final_answer = tokenizer.decode(newly_generated_ids[0], skip_special_tokens=True)
|
380 |
|
381 |
print(f"Enhanced prompt: '{final_answer.strip()}'")
|
382 |
-
|
|
|
383 |
except Exception as e:
|
384 |
print(f"❌ Error during prompt enhancement: {e}")
|
385 |
# Adding full traceback for better debugging in the console
|
386 |
traceback.print_exc()
|
387 |
gr.Warning(f"An error occurred during prompt enhancement. See console for details.")
|
388 |
-
return prompt
|
389 |
finally:
|
390 |
# Explicitly empty the CUDA cache to help release GPU memory.
|
391 |
# This can help resolve intermittent issues where the GPU remains active.
|
@@ -487,6 +488,9 @@ def generate_t2v_video(
|
|
487 |
def build_ui(t2v_pipe, enhancer_pipe, available_loras):
|
488 |
"""Creates and configures the Gradio UI."""
|
489 |
with gr.Blocks(theme=gr.themes.Soft(), css=".main-container { max-width: 1080px; margin: auto; }") as demo:
|
|
|
|
|
|
|
490 |
gr.Markdown("# ✨ Wan 2.1 Text-to-Video Suite with Dynamic LoRAs")
|
491 |
gr.Markdown("Generate videos from text, enhanced by the base `FusionX` LoRA and your choice of dynamic style LoRA.")
|
492 |
|
@@ -512,10 +516,11 @@ def build_ui(t2v_pipe, enhancer_pipe, available_loras):
|
|
512 |
label="🎨 Dynamic Style LoRA (Optional)",
|
513 |
choices=available_loras,
|
514 |
value="None",
|
515 |
-
info="Adds a secondary style LoRA.
|
516 |
)
|
517 |
t2v_lora_weight = gr.Slider(
|
518 |
-
label="💪 LoRA Weight", minimum=0.0, maximum=2.0, step=0.05, value=0.8,
|
|
|
519 |
)
|
520 |
|
521 |
t2v_duration = gr.Slider(
|
@@ -543,16 +548,38 @@ def build_ui(t2v_pipe, enhancer_pipe, available_loras):
|
|
543 |
# Create a partial function that has the enhancer_pipe "baked in".
|
544 |
# This avoids the need to pass the complex object through Gradio's state.
|
545 |
enhance_fn = partial(enhance_prompt_with_llm, enhancer_pipeline=enhancer_pipe)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
546 |
t2v_enhance_btn.click(
|
547 |
fn=enhance_fn,
|
548 |
inputs=[t2v_prompt],
|
549 |
-
|
|
|
|
|
|
|
|
|
|
|
550 |
)
|
|
|
|
|
551 |
t2v_lora_preset.change(
|
552 |
fn=handle_lora_selection_change,
|
553 |
-
|
|
|
554 |
outputs=[t2v_prompt, t2v_lora_weight]
|
555 |
)
|
|
|
|
|
556 |
t2v_generate_btn.click(
|
557 |
fn=generate_t2v_video,
|
558 |
inputs=[
|
@@ -575,4 +602,4 @@ if __name__ == "__main__":
|
|
575 |
available_loras = get_available_presets(DYNAMIC_LORA_REPO_ID, DYNAMIC_LORA_SUBFOLDER)
|
576 |
|
577 |
app_ui = build_ui(t2v_pipe, enhancer_pipe, available_loras)
|
578 |
-
app_ui.queue(max_size=10).launch()
|
|
|
3 |
Hugging Face Space for Text-to-Video generation using the Wan 2.1 model,
|
4 |
enhanced with a base `FusionX` LoRA, dynamic user-selectable style LoRAs,
|
5 |
and an LLM-based prompt enhancer.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
"""
|
7 |
|
8 |
# --- 1. Imports ---
|
|
|
139 |
resolved_prompt = resolved_prompt.replace(f"{{{key}}}", highlighted_value)
|
140 |
return resolved_prompt
|
141 |
|
142 |
+
def handle_lora_selection_change(preset_name: str, base_prompt: str):
|
143 |
"""
|
144 |
+
When a preset is selected, this function combines the base_prompt (from state)
|
145 |
+
with the new LoRA's trigger words. This makes the update idempotent.
|
146 |
"""
|
147 |
+
# Default state for the slider is hidden and non-interactive
|
148 |
+
lora_slider_update = gr.update(visible=False, interactive=False)
|
149 |
+
|
150 |
+
# If "None" is selected, the displayed prompt is just the base prompt.
|
151 |
if not preset_name or preset_name == "None":
|
152 |
+
gr.Info("LoRA cleared.")
|
153 |
+
return gr.update(value=base_prompt), lora_slider_update
|
154 |
|
155 |
try:
|
156 |
lset_filename = f"{preset_name}.lset"
|
|
|
163 |
|
164 |
lset_prompt_raw = None
|
165 |
try:
|
|
|
166 |
lset_data = json.loads(lset_content)
|
167 |
lset_prompt_raw = lset_data.get("prompt")
|
168 |
except json.JSONDecodeError:
|
|
|
169 |
print(f"Info: '{lset_filename}' is not JSON. Treating as plain text prompt.")
|
170 |
lset_prompt_raw = lset_content
|
171 |
|
172 |
if lset_prompt_raw:
|
173 |
resolved_prompt = parse_lset_prompt(lset_prompt_raw)
|
174 |
+
separator = ", " if base_prompt and not base_prompt.endswith((",", " ")) else ""
|
175 |
+
# The new prompt is always constructed from the base prompt and the new triggers.
|
176 |
+
new_prompt = f"{base_prompt}{separator}{resolved_prompt}".strip()
|
177 |
gr.Info(f"✅ Appended triggers from '{preset_name}'. Replace highlighted text like __this__.")
|
178 |
+
|
179 |
+
# On success, update the prompt and make the slider visible and interactive
|
180 |
+
new_prompt_update = gr.update(value=new_prompt)
|
181 |
+
lora_slider_update = gr.update(visible=True, interactive=True)
|
182 |
+
return new_prompt_update, lora_slider_update
|
183 |
else:
|
184 |
+
gr.Info(f"ℹ️ No prompt found in '{preset_name}.lset'. Prompt unchanged.")
|
185 |
+
# If no triggers, the prompt is just the base prompt.
|
186 |
+
return gr.update(value=base_prompt), lora_slider_update
|
187 |
+
|
188 |
except Exception as e:
|
189 |
print(f"Info: Could not process .lset for '{preset_name}'. Reason: {e}")
|
190 |
gr.Warning(f"⚠️ Could not load triggers for '{preset_name}'.")
|
191 |
+
# On error, revert to just the base prompt.
|
192 |
+
return gr.update(value=base_prompt), lora_slider_update
|
193 |
+
|
194 |
+
def update_base_prompt(new_prompt_text):
|
195 |
+
"""
|
196 |
+
Called when the user manually edits the prompt textbox.
|
197 |
+
This updates the base prompt state and resets the LoRA selection, as the
|
198 |
+
old triggers are no longer relevant to the new, manually-entered prompt.
|
199 |
+
"""
|
200 |
+
return new_prompt_text, "None", gr.update(visible=False, interactive=False)
|
201 |
|
202 |
|
203 |
def _manage_lora_state(pipe, selected_lora: str, lora_weight: float) -> bool:
|
|
|
379 |
final_answer = tokenizer.decode(newly_generated_ids[0], skip_special_tokens=True)
|
380 |
|
381 |
print(f"Enhanced prompt: '{final_answer.strip()}'")
|
382 |
+
# Return the enhanced prompt and also reset the LoRA dropdown
|
383 |
+
return final_answer.strip(), "None", gr.update(visible=False, interactive=False)
|
384 |
except Exception as e:
|
385 |
print(f"❌ Error during prompt enhancement: {e}")
|
386 |
# Adding full traceback for better debugging in the console
|
387 |
traceback.print_exc()
|
388 |
gr.Warning(f"An error occurred during prompt enhancement. See console for details.")
|
389 |
+
return prompt, gr.update(), gr.update() # Return original prompt, don't change LoRA
|
390 |
finally:
|
391 |
# Explicitly empty the CUDA cache to help release GPU memory.
|
392 |
# This can help resolve intermittent issues where the GPU remains active.
|
|
|
488 |
def build_ui(t2v_pipe, enhancer_pipe, available_loras):
|
489 |
"""Creates and configures the Gradio UI."""
|
490 |
with gr.Blocks(theme=gr.themes.Soft(), css=".main-container { max-width: 1080px; margin: auto; }") as demo:
|
491 |
+
# --- FIX: Add a state component to reliably store the user's base prompt ---
|
492 |
+
base_prompt_state = gr.State(value=DEFAULT_PROMPT_T2V)
|
493 |
+
|
494 |
gr.Markdown("# ✨ Wan 2.1 Text-to-Video Suite with Dynamic LoRAs")
|
495 |
gr.Markdown("Generate videos from text, enhanced by the base `FusionX` LoRA and your choice of dynamic style LoRA.")
|
496 |
|
|
|
516 |
label="🎨 Dynamic Style LoRA (Optional)",
|
517 |
choices=available_loras,
|
518 |
value="None",
|
519 |
+
info="Adds a secondary style LoRA. Replaces previous LoRA triggers."
|
520 |
)
|
521 |
t2v_lora_weight = gr.Slider(
|
522 |
+
label="💪 LoRA Weight", minimum=0.0, maximum=2.0, step=0.05, value=0.8,
|
523 |
+
interactive=False, visible=False
|
524 |
)
|
525 |
|
526 |
t2v_duration = gr.Slider(
|
|
|
548 |
# Create a partial function that has the enhancer_pipe "baked in".
|
549 |
# This avoids the need to pass the complex object through Gradio's state.
|
550 |
enhance_fn = partial(enhance_prompt_with_llm, enhancer_pipeline=enhancer_pipe)
|
551 |
+
|
552 |
+
# --- FIX: Wire up the new state-based event handlers ---
|
553 |
+
|
554 |
+
# 1. When the user manually types in the prompt box
|
555 |
+
t2v_prompt.change(
|
556 |
+
fn=update_base_prompt,
|
557 |
+
inputs=[t2v_prompt],
|
558 |
+
outputs=[base_prompt_state, t2v_lora_preset, t2v_lora_weight],
|
559 |
+
queue=False # This should be a quick update
|
560 |
+
)
|
561 |
+
|
562 |
+
# 2. When the user enhances the prompt with the LLM
|
563 |
t2v_enhance_btn.click(
|
564 |
fn=enhance_fn,
|
565 |
inputs=[t2v_prompt],
|
566 |
+
# The enhance function now also resets the LoRA dropdown
|
567 |
+
outputs=[t2v_prompt, t2v_lora_preset, t2v_lora_weight]
|
568 |
+
).then(
|
569 |
+
fn=lambda p: p, # A simple function to pass the new prompt through
|
570 |
+
inputs=[t2v_prompt],
|
571 |
+
outputs=[base_prompt_state] # Update the base prompt state with the enhanced version
|
572 |
)
|
573 |
+
|
574 |
+
# 3. When the user selects a LoRA from the dropdown
|
575 |
t2v_lora_preset.change(
|
576 |
fn=handle_lora_selection_change,
|
577 |
+
# The input is now the reliable base_prompt_state, not the textbox
|
578 |
+
inputs=[t2v_lora_preset, base_prompt_state],
|
579 |
outputs=[t2v_prompt, t2v_lora_weight]
|
580 |
)
|
581 |
+
|
582 |
+
# 4. When the user clicks the final generate button
|
583 |
t2v_generate_btn.click(
|
584 |
fn=generate_t2v_video,
|
585 |
inputs=[
|
|
|
602 |
available_loras = get_available_presets(DYNAMIC_LORA_REPO_ID, DYNAMIC_LORA_SUBFOLDER)
|
603 |
|
604 |
app_ui = build_ui(t2v_pipe, enhancer_pipe, available_loras)
|
605 |
+
app_ui.queue(max_size=10).launch()
|