Spaces:

Econogoat
/

KryptoCreator

Running

App Files Files Community

Econogoat commited on Jul 13

Commit

38b2790

verified ·

1 Parent(s): a563432

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -12

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ import gradio as gr
 import torch
 from PIL import Image
 import spaces
 from diffusers import DiffusionPipeline, AutoencoderTiny, AutoencoderKL
 from live_preview_helpers import calculate_shift, retrieve_timesteps, flux_pipe_call_that_returns_an_iterable_of_images
 from diffusers.utils import load_image
@@ -18,7 +20,7 @@ KRYPTO_LORA = {
 }
 # --- Lazy Loading Setup ---
-# CORRECTION : On ne charge rien ici. Les modèles sont initialisés à None.
 pipe = None
 good_vae = None
 taef1 = None
@@ -35,27 +37,37 @@ HF_TOKEN = os.getenv("HF_TOKEN")
 def load_models():
     """
-    Cette fonction charge tous les modèles et les place sur le GPU.
-    Elle n'est appelée qu'une seule fois, lors de la première génération.
     """
     global pipe, good_vae, taef1
-    print("Performing first-time setup: loading models to GPU...")
-    # On charge tout directement sur CUDA car cette fonction est appelée depuis un contexte GPU.
     taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to("cuda")
     good_vae = AutoencoderKL.from_pretrained(base_model, subfolder="vae", torch_dtype=dtype, token=HF_TOKEN).to("cuda")
-    pipe = DiffusionPipeline.from_pretrained(base_model, torch_dtype=dtype, vae=taef1, token=HF_TOKEN).to("cuda")
     print(f"Loading on-board LoRA: {KRYPTO_LORA['repo']}")
     pipe.load_lora_weights(
         KRYPTO_LORA['repo'],
-        low_cpu_mem_usage=True,
         adapter_name=KRYPTO_LORA['adapter_name'],
         token=HF_TOKEN
     )
-    # Monkey-patch the pipeline for live preview
     pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(pipe)
     print("Models loaded and ready on GPU.")
@@ -108,13 +120,11 @@ def update_history(new_image, history):
 @spaces.GPU(duration=75)
 def run_generation(prompt, lora_scale, cfg_scale, steps, randomize_seed, seed, aspect_ratio, base_resolution, progress=gr.Progress(track_tqdm=True)):
-    global pipe
     if not prompt:
         raise gr.Error("Prompt cannot be empty.")
-    # CORRECTION : On charge les modèles seulement si ce n'est pas déjà fait.
-    if pipe is None:
-        load_models()
     prompt_mash = f"{KRYPTO_LORA['trigger']}, {prompt}"
     print("Final prompt:", prompt_mash)

 import torch
 from PIL import Image
 import spaces
+# On importe BitsAndBytesConfig comme suggéré par votre analyse
+from transformers import BitsAndBytesConfig
 from diffusers import DiffusionPipeline, AutoencoderTiny, AutoencoderKL
 from live_preview_helpers import calculate_shift, retrieve_timesteps, flux_pipe_call_that_returns_an_iterable_of_images
 from diffusers.utils import load_image
 }
 # --- Lazy Loading Setup ---
+# Conformément à l'analyse, RIEN n'est chargé ici. Les modèles sont initialisés à None.
 pipe = None
 good_vae = None
 taef1 = None
 def load_models():
     """
+    Charge tous les modèles et les place sur le GPU.
+    N'est appelée qu'une seule fois, depuis un contexte GPU.
     """
     global pipe, good_vae, taef1
+    # Évite de recharger si déjà fait.
+    if pipe is not None:
+        return
+    print("Performing first-time setup: loading all models directly to GPU...")
+    # On charge tout directement sur CUDA.
     taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to("cuda")
     good_vae = AutoencoderKL.from_pretrained(base_model, subfolder="vae", torch_dtype=dtype, token=HF_TOKEN).to("cuda")
+    # La pipe principale est aussi chargée directement sur le GPU.
+    pipe = DiffusionPipeline.from_pretrained(
+        base_model,
+        torch_dtype=dtype,
+        vae=taef1,
+        token=HF_TOKEN
+    ).to("cuda")
     print(f"Loading on-board LoRA: {KRYPTO_LORA['repo']}")
     pipe.load_lora_weights(
         KRYPTO_LORA['repo'],
         adapter_name=KRYPTO_LORA['adapter_name'],
         token=HF_TOKEN
     )
+    # Appliquer le patch pour la prévisualisation en direct.
     pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(pipe)
     print("Models loaded and ready on GPU.")
 @spaces.GPU(duration=75)
 def run_generation(prompt, lora_scale, cfg_scale, steps, randomize_seed, seed, aspect_ratio, base_resolution, progress=gr.Progress(track_tqdm=True)):
     if not prompt:
         raise gr.Error("Prompt cannot be empty.")
+    # C'est la clé : on ne charge les modèles que si c'est la première fois.
+    load_models()
     prompt_mash = f"{KRYPTO_LORA['trigger']}, {prompt}"
     print("Final prompt:", prompt_mash)