Spaces:

Econogoat
/

KryptoCreator

Sleeping

App Files Files Community

Econogoat commited on Jul 13

Commit

a6ee92e

verified ·

1 Parent(s): c09e4fb

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -36

app.py CHANGED Viewed

@@ -10,51 +10,93 @@ import pandas as pd
 import random
 import time
-# --- Main Configuration ---
 KRYPTO_LORA = {
-    # CORRECTION : Le nom du dépôt était mal orthographié (O majuscule au lieu d'un zéro).
-    "repo": "Econogoat/Krypt0_LORA",
     "trigger": "Krypt0",
     "adapter_name": "krypt0"
 }
-# Load prompts for the randomize button
 df = pd.read_csv('prompts.csv', header=None)
 prompt_values = df.values.flatten()
-# Get access token from Space secrets
 HF_TOKEN = os.getenv("HF_TOKEN")
-if not HF_TOKEN:
-    print("WARNING: HF_TOKEN secret is not set. Gated model downloads may fail.")
-# --- Model Initialization ---
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
 dtype = torch.bfloat16
 base_model = "black-forest-labs/FLUX.1-dev"
-# Load model components
 print("Loading model components...")
 taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
 good_vae = AutoencoderKL.from_pretrained(base_model, subfolder="vae", torch_dtype=dtype, token=HF_TOKEN).to(device)
 pipe = DiffusionPipeline.from_pretrained(base_model, torch_dtype=dtype, vae=taef1, token=HF_TOKEN).to(device)
 print("Models loaded.")
-# Load the LoRA adapter once on startup
 print(f"Loading on-board LoRA: {KRYPTO_LORA['repo']}")
 pipe.load_lora_weights(
     KRYPTO_LORA['repo'],
     low_cpu_mem_usage=True,
     adapter_name=KRYPTO_LORA['adapter_name'],
-    token=HF_TOKEN # Ajout du token ici aussi pour les LoRA privés/protégés
 )
 print("LoRA loaded successfully.")
 MAX_SEED = 2**32 - 1
-# Monkey-patch the pipeline for live preview
 pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(pipe)
 def calculate_dimensions(aspect_ratio, resolution):
     resolution = int(resolution)
@@ -103,15 +145,21 @@ def update_history(new_image, history):
     return history
 @spaces.GPU(duration=75)
-def run_generation(prompt, lora_scale, cfg_scale, steps, randomize_seed, seed, aspect_ratio, base_resolution, progress=gr.Progress(track_tqdm=True)):
     if not prompt:
         raise gr.Error("Prompt cannot be empty.")
-    prompt_mash = f"{KRYPTO_LORA['trigger']}, {prompt}"
-    print("Final prompt:", prompt_mash)
     pipe.set_adapters([KRYPTO_LORA['adapter_name']], adapter_weights=[lora_scale])
-    print(f"Adapter '{KRYPTO_LORA['adapter_name']}' activated with weight {lora_scale}.")
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
@@ -124,7 +172,7 @@ def run_generation(prompt, lora_scale, cfg_scale, steps, randomize_seed, seed, a
 run_generation.zerogpu = True
-# --- User Interface (Gradio) ---
 css = '''
 #title_container { text-align: center; margin-bottom: 1em; }
 #title_line { display: flex; justify-content: center; align-items: center; }
@@ -142,7 +190,7 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
         """
         <div id="title_container">
             <div id="title_line">
-                <img src="/file=LogoKrypto.png" alt="Krypt0 Logo">
                 <h1>Krypto Image Generator - beta v1</h1>
             </div>
             <div id="subtitle">
@@ -162,7 +210,14 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
                         random_prompt_btn = gr.Button("Random Prompt", elem_id="random_prompt_btn")
                     with gr.Column(scale=5):
                         prompt = gr.Textbox(label="Prompt", lines=2, placeholder="e.g., a portrait of a warrior queen")
             # Image Shape and Style Controls
             with gr.Group():
                 aspect_ratio = gr.Radio(
@@ -172,15 +227,11 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
                 )
                 lora_scale = gr.Slider(
                     label="Krypt0 Style Strength",
-                    minimum=0,
-                    maximum=2,
-                    step=0.05,
-                    value=0.9,
                     info="Controls how strongly the artistic style is applied. Higher values mean a more stylized image."
                 )
             # Advanced Settings
-            # CORRECTION : L'accordéon doit être fermé par défaut.
             with gr.Accordion("Advanced Settings", open=False):
                 base_resolution = gr.Slider(label="Resolution (longest side)", minimum=768, maximum=1408, step=64, value=1024)
                 steps = gr.Slider(label="Generation Steps", minimum=4, maximum=50, step=1, value=20)
@@ -203,24 +254,17 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
     def get_random_prompt():
         return random.choice(prompt_values)
-    random_prompt_btn.click(
-        fn=get_random_prompt,
-        inputs=[],
-        outputs=[prompt]
-    )
     generation_event = gr.on(
         triggers=[generate_button.click, prompt.submit],
         fn=run_generation,
-        inputs=[prompt, lora_scale, cfg_scale, steps, randomize_seed, seed, aspect_ratio, base_resolution],
         outputs=[result, seed, progress_bar]
     )
-    generation_event.then(
-        fn=update_history,
-        inputs=[result, history_gallery],
-        outputs=history_gallery,
-    )
 app.queue(max_size=20)
 app.launch()

 import random
 import time
+# --- NOUVEAU : Imports pour le LLM (Gemma) ---
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+# --- Configuration Principale ---
 KRYPTO_LORA = {
+    "repo": "Econogoat/Krypt0_LORA",
     "trigger": "Krypt0",
     "adapter_name": "krypt0"
 }
+# Charger les prompts
 df = pd.read_csv('prompts.csv', header=None)
 prompt_values = df.values.flatten()
+# Récupérer le token
 HF_TOKEN = os.getenv("HF_TOKEN")
+# --- Initialisation des Modèles ---
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
 dtype = torch.bfloat16
 base_model = "black-forest-labs/FLUX.1-dev"
+# --- NOUVEAU : Chargement du LLM Gemma pour l'amélioration des prompts ---
+gemma_model_id = "google/gemma-2-9b-it"
+print(f"Chargement du LLM pour l'amélioration des prompts : {gemma_model_id}")
+# Configuration de la quantization pour charger le modèle en 4-bit.
+# Cela réduit considérablement l'utilisation de la mémoire.
+quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+gemma_tokenizer = AutoTokenizer.from_pretrained(gemma_model_id, token=HF_TOKEN)
+gemma_model = AutoModelForCausalLM.from_pretrained(
+    gemma_model_id,
+    quantization_config=quantization_config,
+    token=HF_TOKEN,
+    device_map="auto" # Laisse accelerate gérer le placement sur le GPU/CPU
+)
+print("Modèle Gemma chargé avec succès.")
+# --- Chargement des modèles d'image ---
 print("Loading model components...")
 taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
 good_vae = AutoencoderKL.from_pretrained(base_model, subfolder="vae", torch_dtype=dtype, token=HF_TOKEN).to(device)
 pipe = DiffusionPipeline.from_pretrained(base_model, torch_dtype=dtype, vae=taef1, token=HF_TOKEN).to(device)
 print("Models loaded.")
+# Charger le LoRA
 print(f"Loading on-board LoRA: {KRYPTO_LORA['repo']}")
 pipe.load_lora_weights(
     KRYPTO_LORA['repo'],
     low_cpu_mem_usage=True,
     adapter_name=KRYPTO_LORA['adapter_name'],
+    token=HF_TOKEN
 )
 print("LoRA loaded successfully.")
 MAX_SEED = 2**32 - 1
 pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(pipe)
+# --- NOUVEAU : Fonction d'amélioration du prompt ---
+def improve_prompt_with_gemma(user_prompt):
+    system_prompt = (
+        "You are an expert prompt engineer for a text-to-image AI. "
+        "Your task is to take a user's simple idea and transform it into a rich, detailed, and visually descriptive prompt. "
+        "Focus on describing the scene, the subject, the environment, the lighting, the colors, and a potential artistic style. "
+        "Do not add any conversational text or refuse the request. Only output the enhanced prompt."
+    )
+    # Formatage pour Gemma
+    chat = [
+        {"role": "user", "content": f"{system_prompt}\n\nUser idea: \"{user_prompt}\""}
+    ]
+    prompt_for_gemma = gemma_tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
+    inputs = gemma_tokenizer(prompt_for_gemma, return_tensors="pt").to(device)
+    # Génération de la réponse
+    outputs = gemma_model.generate(**inputs, max_new_tokens=150, do_sample=True, temperature=0.7)
+    # Décodage et nettoyage de la réponse
+    input_length = inputs["input_ids"].shape[1]
+    enhanced_prompt = gemma_tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
+    return enhanced_prompt.strip()
 def calculate_dimensions(aspect_ratio, resolution):
     resolution = int(resolution)
     return history
 @spaces.GPU(duration=75)
+def run_generation(prompt, enhance_prompt, lora_scale, cfg_scale, steps, randomize_seed, seed, aspect_ratio, base_resolution, progress=gr.Progress(track_tqdm=True)):
     if not prompt:
         raise gr.Error("Prompt cannot be empty.")
+    # --- NOUVEAU : Logique d'amélioration du prompt ---
+    final_prompt = prompt
+    if enhance_prompt:
+        print(f"Amélioration du prompt '{prompt}' avec Gemma...")
+        final_prompt = improve_prompt_with_gemma(prompt)
+        print(f"Prompt amélioré : {final_prompt}")
+    prompt_mash = f"{KRYPTO_LORA['trigger']}, {final_prompt}"
+    print("Prompt final envoyé au modèle d'image:", prompt_mash)
     pipe.set_adapters([KRYPTO_LORA['adapter_name']], adapter_weights=[lora_scale])
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
 run_generation.zerogpu = True
+# --- Interface Utilisateur (Gradio) ---
 css = '''
 #title_container { text-align: center; margin-bottom: 1em; }
 #title_line { display: flex; justify-content: center; align-items: center; }
         """
         <div id="title_container">
             <div id="title_line">
+                <img src="/file=logo.png" alt="Krypt0 Logo">
                 <h1>Krypto Image Generator - beta v1</h1>
             </div>
             <div id="subtitle">
                         random_prompt_btn = gr.Button("Random Prompt", elem_id="random_prompt_btn")
                     with gr.Column(scale=5):
                         prompt = gr.Textbox(label="Prompt", lines=2, placeholder="e.g., a portrait of a warrior queen")
+                # --- NOUVEAU : Case à cocher pour l'amélioration AI ---
+                enhance_prompt_checkbox = gr.Checkbox(
+                    label="Improve prompt with AI",
+                    value=True,
+                    info="Uses Gemma to automatically enrich your prompt with more details before generation."
+                )
             # Image Shape and Style Controls
             with gr.Group():
                 aspect_ratio = gr.Radio(
                 )
                 lora_scale = gr.Slider(
                     label="Krypt0 Style Strength",
+                    minimum=0, maximum=2, step=0.05, value=0.9,
                     info="Controls how strongly the artistic style is applied. Higher values mean a more stylized image."
                 )
             # Advanced Settings
             with gr.Accordion("Advanced Settings", open=False):
                 base_resolution = gr.Slider(label="Resolution (longest side)", minimum=768, maximum=1408, step=64, value=1024)
                 steps = gr.Slider(label="Generation Steps", minimum=4, maximum=50, step=1, value=20)
     def get_random_prompt():
         return random.choice(prompt_values)
+    random_prompt_btn.click(fn=get_random_prompt, inputs=[], outputs=[prompt])
+    # MODIFIÉ : Ajout de `enhance_prompt_checkbox` dans les entrées
     generation_event = gr.on(
         triggers=[generate_button.click, prompt.submit],
         fn=run_generation,
+        inputs=[prompt, enhance_prompt_checkbox, lora_scale, cfg_scale, steps, randomize_seed, seed, aspect_ratio, base_resolution],
         outputs=[result, seed, progress_bar]
     )
+    generation_event.then(fn=update_history, inputs=[result, history_gallery], outputs=history_gallery)
 app.queue(max_size=20)
 app.launch()