Spaces:

thankfulcarp
/

Wan_t2v_FusionX_with_Loras

Runtime error

App Files Files Community

thankfulcarp commited on Jul 3

Commit

97b7208

1 Parent(s): 2b02e16

Fixes

Browse files

Files changed (1) hide show

app.py +65 -38

app.py CHANGED Viewed

@@ -3,24 +3,6 @@
 Hugging Face Space for Text-to-Video generation using the Wan 2.1 model,
 enhanced with a base `FusionX` LoRA, dynamic user-selectable style LoRAs,
 and an LLM-based prompt enhancer.
-Correction: Replaced `unload_lora_weights` with the correct `delete_adapters`
-method for robust and selective LoRA management.
-Fix (July 3, 2024): Corrected a tensor device mismatch error in the
-`enhance_prompt_with_llm` function for ZeroGPU compatibility. The fix
-involves manually tokenizing the input and moving the resulting tensors
-to the CUDA device before calling the model's generate method.
-Fix (July 3, 2025): Addressed an attention_mask warning and clarified ZeroGPU
-behavior. The model is now correctly fed both input_ids and attention_mask.
-Added comments explaining why the model is moved to CUDA on each call, which
-is the expected behavior for ZeroGPU Spaces.
-Fix (July 3, 2025): Corrected the GPU duration estimation logic (`get_t2v_duration`)
-to prevent timeouts on longer or high-resolution videos. The new logic now
-considers video resolution (width and height) in addition to steps and duration,
-and uses more generous time allocations.
 """
 # --- 1. Imports ---
@@ -157,14 +139,18 @@ def parse_lset_prompt(lset_prompt):
         resolved_prompt = resolved_prompt.replace(f"{{{key}}}", highlighted_value)
     return resolved_prompt
-def handle_lora_selection_change(preset_name: str, current_prompt: str):
     """
-    When a preset is selected, this function finds the corresponding .lset file,
-    parses it, and appends the suggested trigger words to the current prompt.
     """
     if not preset_name or preset_name == "None":
-        gr.Info("LoRA cleared. Prompt remains unchanged.")
-        return gr.update(value=current_prompt), gr.update(interactive=False)
     try:
         lset_filename = f"{preset_name}.lset"
@@ -177,27 +163,41 @@ def handle_lora_selection_change(preset_name: str, current_prompt: str):
         lset_prompt_raw = None
         try:
-            # Attempt to parse as JSON, which is one of the formats in the repo
             lset_data = json.loads(lset_content)
             lset_prompt_raw = lset_data.get("prompt")
         except json.JSONDecodeError:
-            # If not JSON, assume the entire file content is the prompt template
             print(f"Info: '{lset_filename}' is not JSON. Treating as plain text prompt.")
             lset_prompt_raw = lset_content
         if lset_prompt_raw:
             resolved_prompt = parse_lset_prompt(lset_prompt_raw)
-            separator = ", " if current_prompt and not current_prompt.endswith( (",", " ")) else ""
-            new_prompt = f"{current_prompt}{separator}{resolved_prompt}".strip()
             gr.Info(f"✅ Appended triggers from '{preset_name}'. Replace highlighted text like __this__.")
-            return gr.update(value=new_prompt), gr.update(interactive=True)
         else:
-            gr.Info(f"ℹ️ No prompt found in '{preset_name}.lset'.")
-            return gr.update(value=current_prompt), gr.update(interactive=False)
     except Exception as e:
         print(f"Info: Could not process .lset for '{preset_name}'. Reason: {e}")
         gr.Warning(f"⚠️ Could not load triggers for '{preset_name}'.")
-        return gr.update(value=current_prompt), gr.update(interactive=False)
 def _manage_lora_state(pipe, selected_lora: str, lora_weight: float) -> bool:
@@ -379,13 +379,14 @@ def enhance_prompt_with_llm(prompt: str, enhancer_pipeline):
         final_answer = tokenizer.decode(newly_generated_ids[0], skip_special_tokens=True)
         print(f"Enhanced prompt: '{final_answer.strip()}'")
-        return final_answer.strip()
     except Exception as e:
         print(f"❌ Error during prompt enhancement: {e}")
         # Adding full traceback for better debugging in the console
         traceback.print_exc()
         gr.Warning(f"An error occurred during prompt enhancement. See console for details.")
-        return prompt
     finally:
         # Explicitly empty the CUDA cache to help release GPU memory.
         # This can help resolve intermittent issues where the GPU remains active.
@@ -487,6 +488,9 @@ def generate_t2v_video(
 def build_ui(t2v_pipe, enhancer_pipe, available_loras):
     """Creates and configures the Gradio UI."""
     with gr.Blocks(theme=gr.themes.Soft(), css=".main-container { max-width: 1080px; margin: auto; }") as demo:
         gr.Markdown("# ✨ Wan 2.1 Text-to-Video Suite with Dynamic LoRAs")
         gr.Markdown("Generate videos from text, enhanced by the base `FusionX` LoRA and your choice of dynamic style LoRA.")
@@ -512,10 +516,11 @@ def build_ui(t2v_pipe, enhancer_pipe, available_loras):
                                     label="🎨 Dynamic Style LoRA (Optional)",
                                     choices=available_loras,
                                     value="None",
-                                    info="Adds a secondary style LoRA. Appends trigger words to your prompt."
                                 )
                                 t2v_lora_weight = gr.Slider(
-                                    label="💪 LoRA Weight", minimum=0.0, maximum=2.0, step=0.05, value=0.8, interactive=False
                                 )
                             t2v_duration = gr.Slider(
@@ -543,16 +548,38 @@ def build_ui(t2v_pipe, enhancer_pipe, available_loras):
             # Create a partial function that has the enhancer_pipe "baked in".
             # This avoids the need to pass the complex object through Gradio's state.
             enhance_fn = partial(enhance_prompt_with_llm, enhancer_pipeline=enhancer_pipe)
             t2v_enhance_btn.click(
                 fn=enhance_fn,
                 inputs=[t2v_prompt],
-                outputs=[t2v_prompt]
             )
             t2v_lora_preset.change(
                 fn=handle_lora_selection_change,
-                inputs=[t2v_lora_preset, t2v_prompt],
                 outputs=[t2v_prompt, t2v_lora_weight]
             )
             t2v_generate_btn.click(
                 fn=generate_t2v_video,
                 inputs=[
@@ -575,4 +602,4 @@ if __name__ == "__main__":
         available_loras = get_available_presets(DYNAMIC_LORA_REPO_ID, DYNAMIC_LORA_SUBFOLDER)
     app_ui = build_ui(t2v_pipe, enhancer_pipe, available_loras)
-    app_ui.queue(max_size=10).launch()

 Hugging Face Space for Text-to-Video generation using the Wan 2.1 model,
 enhanced with a base `FusionX` LoRA, dynamic user-selectable style LoRAs,
 and an LLM-based prompt enhancer.
 """
 # --- 1. Imports ---
         resolved_prompt = resolved_prompt.replace(f"{{{key}}}", highlighted_value)
     return resolved_prompt
+def handle_lora_selection_change(preset_name: str, base_prompt: str):
     """
+    When a preset is selected, this function combines the base_prompt (from state)
+    with the new LoRA's trigger words. This makes the update idempotent.
     """
+    # Default state for the slider is hidden and non-interactive
+    lora_slider_update = gr.update(visible=False, interactive=False)
+    # If "None" is selected, the displayed prompt is just the base prompt.
     if not preset_name or preset_name == "None":
+        gr.Info("LoRA cleared.")
+        return gr.update(value=base_prompt), lora_slider_update
     try:
         lset_filename = f"{preset_name}.lset"
         lset_prompt_raw = None
         try:
             lset_data = json.loads(lset_content)
             lset_prompt_raw = lset_data.get("prompt")
         except json.JSONDecodeError:
             print(f"Info: '{lset_filename}' is not JSON. Treating as plain text prompt.")
             lset_prompt_raw = lset_content
         if lset_prompt_raw:
             resolved_prompt = parse_lset_prompt(lset_prompt_raw)
+            separator = ", " if base_prompt and not base_prompt.endswith((",", " ")) else ""
+            # The new prompt is always constructed from the base prompt and the new triggers.
+            new_prompt = f"{base_prompt}{separator}{resolved_prompt}".strip()
             gr.Info(f"✅ Appended triggers from '{preset_name}'. Replace highlighted text like __this__.")
+            # On success, update the prompt and make the slider visible and interactive
+            new_prompt_update = gr.update(value=new_prompt)
+            lora_slider_update = gr.update(visible=True, interactive=True)
+            return new_prompt_update, lora_slider_update
         else:
+            gr.Info(f"ℹ️ No prompt found in '{preset_name}.lset'. Prompt unchanged.")
+            # If no triggers, the prompt is just the base prompt.
+            return gr.update(value=base_prompt), lora_slider_update
     except Exception as e:
         print(f"Info: Could not process .lset for '{preset_name}'. Reason: {e}")
         gr.Warning(f"⚠️ Could not load triggers for '{preset_name}'.")
+        # On error, revert to just the base prompt.
+        return gr.update(value=base_prompt), lora_slider_update
+def update_base_prompt(new_prompt_text):
+    """
+    Called when the user manually edits the prompt textbox.
+    This updates the base prompt state and resets the LoRA selection, as the
+    old triggers are no longer relevant to the new, manually-entered prompt.
+    """
+    return new_prompt_text, "None", gr.update(visible=False, interactive=False)
 def _manage_lora_state(pipe, selected_lora: str, lora_weight: float) -> bool:
         final_answer = tokenizer.decode(newly_generated_ids[0], skip_special_tokens=True)
         print(f"Enhanced prompt: '{final_answer.strip()}'")
+        # Return the enhanced prompt and also reset the LoRA dropdown
+        return final_answer.strip(), "None", gr.update(visible=False, interactive=False)
     except Exception as e:
         print(f"❌ Error during prompt enhancement: {e}")
         # Adding full traceback for better debugging in the console
         traceback.print_exc()
         gr.Warning(f"An error occurred during prompt enhancement. See console for details.")
+        return prompt, gr.update(), gr.update() # Return original prompt, don't change LoRA
     finally:
         # Explicitly empty the CUDA cache to help release GPU memory.
         # This can help resolve intermittent issues where the GPU remains active.
 def build_ui(t2v_pipe, enhancer_pipe, available_loras):
     """Creates and configures the Gradio UI."""
     with gr.Blocks(theme=gr.themes.Soft(), css=".main-container { max-width: 1080px; margin: auto; }") as demo:
+        # --- FIX: Add a state component to reliably store the user's base prompt ---
+        base_prompt_state = gr.State(value=DEFAULT_PROMPT_T2V)
         gr.Markdown("# ✨ Wan 2.1 Text-to-Video Suite with Dynamic LoRAs")
         gr.Markdown("Generate videos from text, enhanced by the base `FusionX` LoRA and your choice of dynamic style LoRA.")
                                     label="🎨 Dynamic Style LoRA (Optional)",
                                     choices=available_loras,
                                     value="None",
+                                    info="Adds a secondary style LoRA. Replaces previous LoRA triggers."
                                 )
                                 t2v_lora_weight = gr.Slider(
+                                    label="💪 LoRA Weight", minimum=0.0, maximum=2.0, step=0.05, value=0.8,
+                                    interactive=False, visible=False
                                 )
                             t2v_duration = gr.Slider(
             # Create a partial function that has the enhancer_pipe "baked in".
             # This avoids the need to pass the complex object through Gradio's state.
             enhance_fn = partial(enhance_prompt_with_llm, enhancer_pipeline=enhancer_pipe)
+            # --- FIX: Wire up the new state-based event handlers ---
+            # 1. When the user manually types in the prompt box
+            t2v_prompt.change(
+                fn=update_base_prompt,
+                inputs=[t2v_prompt],
+                outputs=[base_prompt_state, t2v_lora_preset, t2v_lora_weight],
+                queue=False # This should be a quick update
+            )
+            # 2. When the user enhances the prompt with the LLM
             t2v_enhance_btn.click(
                 fn=enhance_fn,
                 inputs=[t2v_prompt],
+                # The enhance function now also resets the LoRA dropdown
+                outputs=[t2v_prompt, t2v_lora_preset, t2v_lora_weight]
+            ).then(
+                fn=lambda p: p, # A simple function to pass the new prompt through
+                inputs=[t2v_prompt],
+                outputs=[base_prompt_state] # Update the base prompt state with the enhanced version
             )
+            # 3. When the user selects a LoRA from the dropdown
             t2v_lora_preset.change(
                 fn=handle_lora_selection_change,
+                # The input is now the reliable base_prompt_state, not the textbox
+                inputs=[t2v_lora_preset, base_prompt_state],
                 outputs=[t2v_prompt, t2v_lora_weight]
             )
+            # 4. When the user clicks the final generate button
             t2v_generate_btn.click(
                 fn=generate_t2v_video,
                 inputs=[
         available_loras = get_available_presets(DYNAMIC_LORA_REPO_ID, DYNAMIC_LORA_SUBFOLDER)
     app_ui = build_ui(t2v_pipe, enhancer_pipe, available_loras)
+    app_ui.queue(max_size=10).launch()