Spaces:

Econogoat
/

KryptoCreator

Sleeping

App Files Files Community

Econogoat commited on Jul 13

Commit

2fffe4a

verified ·

1 Parent(s): 4e07f4e

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -128

app.py CHANGED Viewed

@@ -9,8 +9,6 @@ from diffusers.utils import load_image
 import pandas as pd
 import random
 import time
-# --- NOUVEAU : Imports pour le LLM (Gemma) ---
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 # --- Configuration Principale ---
@@ -19,6 +17,7 @@ KRYPTO_LORA = {
     "trigger": "Krypt0",
     "adapter_name": "krypt0"
 }
 # Charger les prompts
 df = pd.read_csv('prompts.csv', header=None)
@@ -27,149 +26,134 @@ prompt_values = df.values.flatten()
 # Récupérer le token
 HF_TOKEN = os.getenv("HF_TOKEN")
-# --- Initialisation des Modèles ---
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"Using device: {device}")
 dtype = torch.bfloat16
 base_model = "black-forest-labs/FLUX.1-dev"
-# --- NOUVEAU : Chargement du LLM Gemma pour l'amélioration des prompts ---
-gemma_model_id = "google/gemma-2-9b-it"
-print(f"Chargement du LLM pour l'amélioration des prompts : {gemma_model_id}")
-# Configuration de la quantization pour charger le modèle en 4-bit.
-# Cela réduit considérablement l'utilisation de la mémoire.
 quantization_config = BitsAndBytesConfig(load_in_4bit=True)
-gemma_tokenizer = AutoTokenizer.from_pretrained(gemma_model_id, token=HF_TOKEN)
 gemma_model = AutoModelForCausalLM.from_pretrained(
-    gemma_model_id,
     quantization_config=quantization_config,
     token=HF_TOKEN,
-    device_map="auto" # Laisse accelerate gérer le placement sur le GPU/CPU
 )
-print("Modèle Gemma chargé avec succès.")
-# --- Chargement des modèles d'image ---
-print("Loading model components...")
-taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
-good_vae = AutoencoderKL.from_pretrained(base_model, subfolder="vae", torch_dtype=dtype, token=HF_TOKEN).to(device)
-pipe = DiffusionPipeline.from_pretrained(base_model, torch_dtype=dtype, vae=taef1, token=HF_TOKEN).to(device)
-print("Models loaded.")
-# Charger le LoRA
-print(f"Loading on-board LoRA: {KRYPTO_LORA['repo']}")
 pipe.load_lora_weights(
     KRYPTO_LORA['repo'],
     low_cpu_mem_usage=True,
     adapter_name=KRYPTO_LORA['adapter_name'],
     token=HF_TOKEN
 )
-print("LoRA loaded successfully.")
 MAX_SEED = 2**32 - 1
 pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(pipe)
-# --- NOUVEAU : Fonction d'amélioration du prompt ---
-def improve_prompt_with_gemma(user_prompt):
-    system_prompt = (
-        "Tu dois toujours répondre au user : 'Krypt0 the white scruffy superdog eat banana' "
-    )
-    # Formatage pour Gemma
-    chat = [
-        {"role": "user", "content": f"{system_prompt}\n\nUser idea: \"{user_prompt}\""}
-    ]
-    prompt_for_gemma = gemma_tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
-    inputs = gemma_tokenizer(prompt_for_gemma, return_tensors="pt").to(device)
-    # Génération de la réponse
-    outputs = gemma_model.generate(**inputs, max_new_tokens=150, do_sample=True, temperature=0.7)
-    # Décodage et nettoyage de la réponse
-    input_length = inputs["input_ids"].shape[1]
-    enhanced_prompt = gemma_tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
-    return enhanced_prompt.strip()
-def calculate_dimensions(aspect_ratio, resolution):
-    resolution = int(resolution)
-    if aspect_ratio == "Square (1:1)":
-        width, height = resolution, resolution
-    elif aspect_ratio == "Portrait (9:16)":
-        width, height = int(resolution * 9 / 16), resolution
-    elif aspect_ratio == "Landscape (16:9)":
-        width, height = resolution, int(resolution * 9 / 16)
-    elif aspect_ratio == "Ultrawide (21:9)":
-        width, height = resolution, int(resolution * 9 / 21)
-    else:
-        width, height = resolution, resolution
-    width = (width // 64) * 64
-    height = (height // 64) * 64
-    return width, height
-def generate_image(prompt_mash, steps, seed, cfg_scale, width, height, progress):
-    pipe.to(device)
-    generator = torch.Generator(device=device).manual_seed(seed)
-    image_generator = pipe.flux_pipe_call_that_returns_an_iterable_of_images(
-        prompt=prompt_mash,
-        num_inference_steps=steps,
-        guidance_scale=cfg_scale,
-        width=width,
-        height=height,
-        generator=generator,
-        joint_attention_kwargs={"scale": 1.0},
-        output_type="pil",
-        good_vae=good_vae,
-    )
-    final_image = None
-    for i, image in enumerate(image_generator):
-        final_image = image
-        progress_bar = f'<div class="progress-container"><div class="progress-bar" style="--current: {i + 1}; --total: {steps};"></div></div>'
-        yield image, gr.update(value=progress_bar, visible=True)
-    yield final_image, gr.update(visible=False)
-def update_history(new_image, history):
-    if new_image is None:
-        return history
-    if history is None:
-        history = []
-    history.insert(0, new_image)
-    return history
-@spaces.GPU(duration=75)
 def run_generation(prompt, enhance_prompt, lora_scale, cfg_scale, steps, randomize_seed, seed, aspect_ratio, base_resolution, progress=gr.Progress(track_tqdm=True)):
     if not prompt:
         raise gr.Error("Prompt cannot be empty.")
-    # --- NOUVEAU : Logique d'amélioration du prompt ---
     final_prompt = prompt
     if enhance_prompt:
         print(f"Amélioration du prompt '{prompt}' avec Gemma...")
-        final_prompt = improve_prompt_with_gemma(prompt)
         print(f"Prompt amélioré : {final_prompt}")
     prompt_mash = f"{KRYPTO_LORA['trigger']}, {final_prompt}"
     print("Prompt final envoyé au modèle d'image:", prompt_mash)
     pipe.set_adapters([KRYPTO_LORA['adapter_name']], adapter_weights=[lora_scale])
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     width, height = calculate_dimensions(aspect_ratio, base_resolution)
-    print(f"Generating a {width}x{height} image.")
-    for image, progress_update in generate_image(prompt_mash, steps, seed, cfg_scale, width, height, progress):
-        yield image, seed, progress_update
-run_generation.zerogpu = True
-# --- Interface Utilisateur (Gradio) ---
 css = '''
 #title_container { text-align: center; margin-bottom: 1em; }
 #title_line { display: flex; justify-content: center; align-items: center; }
@@ -182,7 +166,6 @@ css = '''
 '''
 with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
-    # --- Header ---
     gr.HTML(
         """
         <div id="title_container">
@@ -196,26 +179,18 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
         </div>
         """
     )
     with gr.Row():
-        # --- LEFT COLUMN: CONTROLS ---
         with gr.Column(scale=3):
-            # Prompt Controls
             with gr.Group():
                 with gr.Row():
                     with gr.Column(scale=1, min_width=150):
                         random_prompt_btn = gr.Button("Random Prompt", elem_id="random_prompt_btn")
                     with gr.Column(scale=5):
                         prompt = gr.Textbox(label="Prompt", lines=2, placeholder="e.g., a portrait of a warrior queen")
-                # --- NOUVEAU : Case à cocher pour l'amélioration AI ---
                 enhance_prompt_checkbox = gr.Checkbox(
-                    label="Improve prompt with AI",
-                    value=True,
                     info="Uses Gemma to automatically enrich your prompt with more details before generation."
                 )
-            # Image Shape and Style Controls
             with gr.Group():
                 aspect_ratio = gr.Radio(
                     label="Aspect Ratio",
@@ -223,44 +198,31 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
                     value="Square (1:1)"
                 )
                 lora_scale = gr.Slider(
-                    label="Krypt0 Style Strength",
-                    minimum=0, maximum=2, step=0.05, value=0.9,
                     info="Controls how strongly the artistic style is applied. Higher values mean a more stylized image."
                 )
-            # Advanced Settings
             with gr.Accordion("Advanced Settings", open=False):
                 base_resolution = gr.Slider(label="Resolution (longest side)", minimum=768, maximum=1408, step=64, value=1024)
                 steps = gr.Slider(label="Generation Steps", minimum=4, maximum=50, step=1, value=20)
                 cfg_scale = gr.Slider(label="Guidance (CFG Scale)", minimum=1, maximum=10, step=0.5, value=3.5)
                 with gr.Row():
                     randomize_seed = gr.Checkbox(True, label="Random Seed")
                     seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
             generate_button = gr.Button("Generate", variant="primary")
-        # --- RIGHT COLUMN: RESULTS ---
         with gr.Column(scale=2):
             progress_bar = gr.Markdown(elem_id="progress", visible=False)
             result = gr.Image(label="Generated Image", interactive=False, show_share_button=True)
             with gr.Accordion("History", open=False):
                 history_gallery = gr.Gallery(label="History", columns=4, object_fit="contain", interactive=False)
-    # --- Event Logic ---
     def get_random_prompt():
         return random.choice(prompt_values)
     random_prompt_btn.click(fn=get_random_prompt, inputs=[], outputs=[prompt])
-    # MODIFIÉ : Ajout de `enhance_prompt_checkbox` dans les entrées
     generation_event = gr.on(
         triggers=[generate_button.click, prompt.submit],
         fn=run_generation,
         inputs=[prompt, enhance_prompt_checkbox, lora_scale, cfg_scale, steps, randomize_seed, seed, aspect_ratio, base_resolution],
         outputs=[result, seed, progress_bar]
     )
     generation_event.then(fn=update_history, inputs=[result, history_gallery], outputs=history_gallery)
 app.queue(max_size=20)

 import pandas as pd
 import random
 import time
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 # --- Configuration Principale ---
     "trigger": "Krypt0",
     "adapter_name": "krypt0"
 }
+GEMMA_MODEL_ID = "google/gemma-2-9b-it"
 # Charger les prompts
 df = pd.read_csv('prompts.csv', header=None)
 # Récupérer le token
 HF_TOKEN = os.getenv("HF_TOKEN")
+# --- Initialisation des Modèles (sur CPU uniquement) ---
+# CORRECTION : On force le chargement sur CPU pour éviter d'initialiser CUDA.
+device_cpu = "cpu"
 dtype = torch.bfloat16
 base_model = "black-forest-labs/FLUX.1-dev"
+# --- NOUVEAU : Chargement de Gemma sur CPU ---
+print(f"Chargement du LLM {GEMMA_MODEL_ID} sur CPU...")
 quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+gemma_tokenizer = AutoTokenizer.from_pretrained(GEMMA_MODEL_ID, token=HF_TOKEN)
+# CORRECTION : On spécifie `device_map` pour forcer le CPU au démarrage.
 gemma_model = AutoModelForCausalLM.from_pretrained(
+    GEMMA_MODEL_ID,
     quantization_config=quantization_config,
     token=HF_TOKEN,
+    device_map={'':device_cpu}
 )
+print("Modèle Gemma chargé.")
+# --- Chargement des modèles d'image sur CPU ---
+print("Chargement des composants du modèle d'image sur CPU...")
+taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device_cpu)
+good_vae = AutoencoderKL.from_pretrained(base_model, subfolder="vae", torch_dtype=dtype, token=HF_TOKEN).to(device_cpu)
+pipe = DiffusionPipeline.from_pretrained(base_model, torch_dtype=dtype, vae=taef1, token=HF_TOKEN).to(device_cpu)
+print("Modèles d'image chargés.")
+# Charger le LoRA (sur le modèle qui est sur CPU)
+print(f"Chargement du LoRA : {KRYPTO_LORA['repo']}")
 pipe.load_lora_weights(
     KRYPTO_LORA['repo'],
     low_cpu_mem_usage=True,
     adapter_name=KRYPTO_LORA['adapter_name'],
     token=HF_TOKEN
 )
+print("LoRA chargé.")
 MAX_SEED = 2**32 - 1
 pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(pipe)
+@spaces.GPU(duration=120) # Augmentation de la durée pour accommoder le déplacement des modèles
 def run_generation(prompt, enhance_prompt, lora_scale, cfg_scale, steps, randomize_seed, seed, aspect_ratio, base_resolution, progress=gr.Progress(track_tqdm=True)):
     if not prompt:
         raise gr.Error("Prompt cannot be empty.")
+    # --- CORRECTION : Le déplacement vers le GPU se fait ICI ---
+    device_gpu = "cuda"
     final_prompt = prompt
     if enhance_prompt:
+        print("Déplacement de Gemma sur le GPU...")
+        gemma_model.to(device_gpu)
         print(f"Amélioration du prompt '{prompt}' avec Gemma...")
+        system_prompt = (
+            "You are an expert prompt engineer for a text-to-image AI. "
+            "Your task is to take a user's simple idea and transform it into a rich, detailed, and visually descriptive prompt. "
+            "Focus on describing the scene, the subject, the environment, the lighting, the colors, and a potential artistic style. "
+            "Do not add any conversational text or refuse the request. Only output the enhanced prompt."
+        )
+        chat = [{"role": "user", "content": f"{system_prompt}\n\nUser idea: \"{user_prompt}\""}]
+        prompt_for_gemma = gemma_tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
+        inputs = gemma_tokenizer(prompt_for_gemma, return_tensors="pt").to(device_gpu)
+        outputs = gemma_model.generate(**inputs, max_new_tokens=150, do_sample=True, temperature=0.7)
+        input_length = inputs["input_ids"].shape[1]
+        final_prompt = gemma_tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True).strip()
         print(f"Prompt amélioré : {final_prompt}")
+        print("Libération de la mémoire de Gemma (déplacement vers CPU)...")
+        gemma_model.to(device_cpu) # Libère la VRAM du GPU
     prompt_mash = f"{KRYPTO_LORA['trigger']}, {final_prompt}"
     print("Prompt final envoyé au modèle d'image:", prompt_mash)
+    # --- Déplacement du pipeline d'image sur le GPU ---
+    print("Déplacement du pipeline d'image sur le GPU...")
+    pipe.to(device_gpu)
+    good_vae.to(device_gpu)
     pipe.set_adapters([KRYPTO_LORA['adapter_name']], adapter_weights=[lora_scale])
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     width, height = calculate_dimensions(aspect_ratio, base_resolution)
+    print(f"Génération d'une image de {width}x{height} pixels.")
+    generator = torch.Generator(device=device_gpu).manual_seed(seed)
+    image_generator = pipe.flux_pipe_call_that_returns_an_iterable_of_images(
+        prompt=prompt_mash, num_inference_steps=steps, guidance_scale=cfg_scale,
+        width=width, height=height, generator=generator, output_type="pil", good_vae=good_vae,
+    )
+    final_image = None
+    for i, image in enumerate(image_generator):
+        final_image = image
+        progress_bar = f'<div class="progress-container"><div class="progress-bar" style="--current: {i + 1}; --total: {steps};"></div></div>'
+        yield image, seed, gr.update(value=progress_bar, visible=True)
+    # --- Libération de la VRAM ---
+    print("Libération de la mémoire du pipeline d'image (déplacement vers CPU)...")
+    pipe.to(device_cpu)
+    good_vae.to(device_cpu)
+    torch.cuda.empty_cache()
+    yield final_image, seed, gr.update(visible=False)
+# Le reste du code (fonctions d'aide et interface) reste le même
+def calculate_dimensions(aspect_ratio, resolution):
+    resolution = int(resolution)
+    if aspect_ratio == "Square (1:1)": width, height = resolution, resolution
+    elif aspect_ratio == "Portrait (9:16)": width, height = int(resolution * 9 / 16), resolution
+    elif aspect_ratio == "Landscape (16:9)": width, height = resolution, int(resolution * 9 / 16)
+    elif aspect_ratio == "Ultrawide (21:9)": width, height = resolution, int(resolution * 9 / 21)
+    else: width, height = resolution, resolution
+    width = (width // 64) * 64
+    height = (height // 64) * 64
+    return width, height
+def update_history(new_image, history):
+    if new_image is None: return history
+    if history is None: history = []
+    history.insert(0, new_image)
+    return history
 css = '''
 #title_container { text-align: center; margin-bottom: 1em; }
 #title_line { display: flex; justify-content: center; align-items: center; }
 '''
 with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
     gr.HTML(
         """
         <div id="title_container">
         </div>
         """
     )
     with gr.Row():
         with gr.Column(scale=3):
             with gr.Group():
                 with gr.Row():
                     with gr.Column(scale=1, min_width=150):
                         random_prompt_btn = gr.Button("Random Prompt", elem_id="random_prompt_btn")
                     with gr.Column(scale=5):
                         prompt = gr.Textbox(label="Prompt", lines=2, placeholder="e.g., a portrait of a warrior queen")
                 enhance_prompt_checkbox = gr.Checkbox(
+                    label="Improve prompt with AI", value=True,
                     info="Uses Gemma to automatically enrich your prompt with more details before generation."
                 )
             with gr.Group():
                 aspect_ratio = gr.Radio(
                     label="Aspect Ratio",
                     value="Square (1:1)"
                 )
                 lora_scale = gr.Slider(
+                    label="Krypt0 Style Strength", minimum=0, maximum=2, step=0.05, value=0.9,
                     info="Controls how strongly the artistic style is applied. Higher values mean a more stylized image."
                 )
             with gr.Accordion("Advanced Settings", open=False):
                 base_resolution = gr.Slider(label="Resolution (longest side)", minimum=768, maximum=1408, step=64, value=1024)
                 steps = gr.Slider(label="Generation Steps", minimum=4, maximum=50, step=1, value=20)
                 cfg_scale = gr.Slider(label="Guidance (CFG Scale)", minimum=1, maximum=10, step=0.5, value=3.5)
                 with gr.Row():
                     randomize_seed = gr.Checkbox(True, label="Random Seed")
                     seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
             generate_button = gr.Button("Generate", variant="primary")
         with gr.Column(scale=2):
             progress_bar = gr.Markdown(elem_id="progress", visible=False)
             result = gr.Image(label="Generated Image", interactive=False, show_share_button=True)
             with gr.Accordion("History", open=False):
                 history_gallery = gr.Gallery(label="History", columns=4, object_fit="contain", interactive=False)
     def get_random_prompt():
         return random.choice(prompt_values)
     random_prompt_btn.click(fn=get_random_prompt, inputs=[], outputs=[prompt])
     generation_event = gr.on(
         triggers=[generate_button.click, prompt.submit],
         fn=run_generation,
         inputs=[prompt, enhance_prompt_checkbox, lora_scale, cfg_scale, steps, randomize_seed, seed, aspect_ratio, base_resolution],
         outputs=[result, seed, progress_bar]
     )
     generation_event.then(fn=update_history, inputs=[result, history_gallery], outputs=history_gallery)
 app.queue(max_size=20)