Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,8 @@ import gradio as gr
|
|
3 |
import torch
|
4 |
from PIL import Image
|
5 |
import spaces
|
|
|
|
|
6 |
from diffusers import DiffusionPipeline, AutoencoderTiny, AutoencoderKL
|
7 |
from live_preview_helpers import calculate_shift, retrieve_timesteps, flux_pipe_call_that_returns_an_iterable_of_images
|
8 |
from diffusers.utils import load_image
|
@@ -18,7 +20,7 @@ KRYPTO_LORA = {
|
|
18 |
}
|
19 |
|
20 |
# --- Lazy Loading Setup ---
|
21 |
-
#
|
22 |
pipe = None
|
23 |
good_vae = None
|
24 |
taef1 = None
|
@@ -35,27 +37,37 @@ HF_TOKEN = os.getenv("HF_TOKEN")
|
|
35 |
|
36 |
def load_models():
|
37 |
"""
|
38 |
-
|
39 |
-
|
40 |
"""
|
41 |
global pipe, good_vae, taef1
|
42 |
|
43 |
-
|
|
|
|
|
|
|
|
|
44 |
|
45 |
-
# On charge tout directement sur CUDA
|
46 |
taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to("cuda")
|
47 |
good_vae = AutoencoderKL.from_pretrained(base_model, subfolder="vae", torch_dtype=dtype, token=HF_TOKEN).to("cuda")
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
print(f"Loading on-board LoRA: {KRYPTO_LORA['repo']}")
|
51 |
pipe.load_lora_weights(
|
52 |
KRYPTO_LORA['repo'],
|
53 |
-
low_cpu_mem_usage=True,
|
54 |
adapter_name=KRYPTO_LORA['adapter_name'],
|
55 |
token=HF_TOKEN
|
56 |
)
|
57 |
|
58 |
-
#
|
59 |
pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(pipe)
|
60 |
|
61 |
print("Models loaded and ready on GPU.")
|
@@ -108,13 +120,11 @@ def update_history(new_image, history):
|
|
108 |
|
109 |
@spaces.GPU(duration=75)
|
110 |
def run_generation(prompt, lora_scale, cfg_scale, steps, randomize_seed, seed, aspect_ratio, base_resolution, progress=gr.Progress(track_tqdm=True)):
|
111 |
-
global pipe
|
112 |
if not prompt:
|
113 |
raise gr.Error("Prompt cannot be empty.")
|
114 |
|
115 |
-
#
|
116 |
-
|
117 |
-
load_models()
|
118 |
|
119 |
prompt_mash = f"{KRYPTO_LORA['trigger']}, {prompt}"
|
120 |
print("Final prompt:", prompt_mash)
|
|
|
3 |
import torch
|
4 |
from PIL import Image
|
5 |
import spaces
|
6 |
+
# On importe BitsAndBytesConfig comme suggéré par votre analyse
|
7 |
+
from transformers import BitsAndBytesConfig
|
8 |
from diffusers import DiffusionPipeline, AutoencoderTiny, AutoencoderKL
|
9 |
from live_preview_helpers import calculate_shift, retrieve_timesteps, flux_pipe_call_that_returns_an_iterable_of_images
|
10 |
from diffusers.utils import load_image
|
|
|
20 |
}
|
21 |
|
22 |
# --- Lazy Loading Setup ---
|
23 |
+
# Conformément à l'analyse, RIEN n'est chargé ici. Les modèles sont initialisés à None.
|
24 |
pipe = None
|
25 |
good_vae = None
|
26 |
taef1 = None
|
|
|
37 |
|
38 |
def load_models():
|
39 |
"""
|
40 |
+
Charge tous les modèles et les place sur le GPU.
|
41 |
+
N'est appelée qu'une seule fois, depuis un contexte GPU.
|
42 |
"""
|
43 |
global pipe, good_vae, taef1
|
44 |
|
45 |
+
# Évite de recharger si déjà fait.
|
46 |
+
if pipe is not None:
|
47 |
+
return
|
48 |
+
|
49 |
+
print("Performing first-time setup: loading all models directly to GPU...")
|
50 |
|
51 |
+
# On charge tout directement sur CUDA.
|
52 |
taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to("cuda")
|
53 |
good_vae = AutoencoderKL.from_pretrained(base_model, subfolder="vae", torch_dtype=dtype, token=HF_TOKEN).to("cuda")
|
54 |
+
|
55 |
+
# La pipe principale est aussi chargée directement sur le GPU.
|
56 |
+
pipe = DiffusionPipeline.from_pretrained(
|
57 |
+
base_model,
|
58 |
+
torch_dtype=dtype,
|
59 |
+
vae=taef1,
|
60 |
+
token=HF_TOKEN
|
61 |
+
).to("cuda")
|
62 |
|
63 |
print(f"Loading on-board LoRA: {KRYPTO_LORA['repo']}")
|
64 |
pipe.load_lora_weights(
|
65 |
KRYPTO_LORA['repo'],
|
|
|
66 |
adapter_name=KRYPTO_LORA['adapter_name'],
|
67 |
token=HF_TOKEN
|
68 |
)
|
69 |
|
70 |
+
# Appliquer le patch pour la prévisualisation en direct.
|
71 |
pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(pipe)
|
72 |
|
73 |
print("Models loaded and ready on GPU.")
|
|
|
120 |
|
121 |
@spaces.GPU(duration=75)
|
122 |
def run_generation(prompt, lora_scale, cfg_scale, steps, randomize_seed, seed, aspect_ratio, base_resolution, progress=gr.Progress(track_tqdm=True)):
|
|
|
123 |
if not prompt:
|
124 |
raise gr.Error("Prompt cannot be empty.")
|
125 |
|
126 |
+
# C'est la clé : on ne charge les modèles que si c'est la première fois.
|
127 |
+
load_models()
|
|
|
128 |
|
129 |
prompt_mash = f"{KRYPTO_LORA['trigger']}, {prompt}"
|
130 |
print("Final prompt:", prompt_mash)
|