import gradio as gr
import torch
from diffusers import DiffusionPipeline
import gc

# Shared state for model cache
model_cache = {}

def load_flux_model():
    model_id = "LPX55/FLUX.1-merged_lightning_v2"
    pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float32)
    pipe = pipe.to("cpu")
    pipe.enable_attention_slicing()
    return pipe

def unload_flux_model():
    if "flux" in model_cache:
        del model_cache["flux"]
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

def run_flux(prompt, width, height, steps):
    if "flux" not in model_cache:
        return None, "Model not loaded!"
    pipe = model_cache["flux"]
    image = pipe(
        prompt=prompt,
        width=width,
        height=height,
        num_inference_steps=steps,
    ).images[0]
    return image, "Success!"

with gr.Blocks() as demo:
    with gr.Tab("FLUX Diffusion"):
        status = gr.Markdown("Model not loaded.")
        load_btn = gr.Button("Load Model")
        unload_btn = gr.Button("Unload Model")
        prompt = gr.Textbox(label="Prompt", value="A cat holding a sign that says hello world")
        width = gr.Slider(256, 1536, value=768, step=64, label="Width")
        height = gr.Slider(256, 1536, value=1152, step=64, label="Height")
        steps = gr.Slider(1, 50, value=8, step=1, label="Inference Steps")
        run_btn = gr.Button("Generate Image")
        output_img = gr.Image(label="Output Image")
        output_msg = gr.Textbox(label="Status", interactive=False)

        def do_load():
            model_cache["flux"] = load_flux_model()
            return "Model loaded!"

        def do_unload():
            unload_flux_model()
            return "Model unloaded!"

        load_btn.click(do_load, None, status)
        unload_btn.click(do_unload, None, status)
        run_btn.click(run_flux, [prompt, width, height, steps], [output_img, output_msg])

demo.launch()