|
|
|
import spaces |
|
|
|
import gradio as gr |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
from diffusers import StableDiffusionPipeline, DiffusionPipeline |
|
import requests |
|
from PIL import Image |
|
import io |
|
import base64 |
|
import os |
|
import time |
|
import numpy as np |
|
import random |
|
from huggingface_hub import login |
|
from fastapi import FastAPI, HTTPException |
|
from fastapi.middleware.cors import CORSMiddleware |
|
from pydantic import BaseModel |
|
|
|
print("🚀 Iniciando NTIA Space con ZeroGPU H200...") |
|
print(f"📁 Directorio actual: {os.getcwd()}") |
|
print(f"🐍 Python version: {os.sys.version}") |
|
|
|
|
|
os.environ["SPACES_GPU_TIMEOUT"] = "30" |
|
os.environ["SPACES_GPU_MEMORY"] = "8" |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
print(f"🖥️ Dispositivo detectado: {device}") |
|
print(f"🔥 CUDA disponible: {torch.cuda.is_available()}") |
|
|
|
if torch.cuda.is_available(): |
|
print(f"🎮 GPU: {torch.cuda.get_device_name(0)}") |
|
print(f"💾 Memoria GPU: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB") |
|
print("🚀 ZeroGPU H200 detectado - Optimizando para máximo rendimiento") |
|
|
|
|
|
torch_dtype = torch.float16 |
|
print("⚡ Usando torch.float16 para H200") |
|
|
|
|
|
torch.backends.cudnn.benchmark = True |
|
torch.backends.cuda.matmul.allow_tf32 = True |
|
torch.backends.cudnn.allow_tf32 = True |
|
print("🔧 Optimizaciones CUDA habilitadas para H200") |
|
else: |
|
torch_dtype = torch.float32 |
|
print("🐌 Usando torch.float32 para CPU") |
|
|
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN") |
|
if HF_TOKEN: |
|
try: |
|
print(f"🔑 Token detectado: {HF_TOKEN[:10]}...") |
|
login(token=HF_TOKEN) |
|
print("✅ Autenticado con Hugging Face") |
|
except Exception as e: |
|
print(f"⚠️ Error de autenticación: {e}") |
|
else: |
|
print("⚠️ No se encontró HF_TOKEN - modelos gated no estarán disponibles") |
|
|
|
|
|
MODELS = { |
|
"text": { |
|
"microsoft/DialoGPT-medium": "Chat conversacional", |
|
"microsoft/DialoGPT-large": "Chat conversacional avanzado", |
|
"microsoft/DialoGPT-small": "Chat conversacional rápido", |
|
"gpt2": "Generación de texto", |
|
"gpt2-medium": "GPT-2 mediano", |
|
"gpt2-large": "GPT-2 grande", |
|
"distilgpt2": "GPT-2 optimizado", |
|
"EleutherAI/gpt-neo-125M": "GPT-Neo pequeño", |
|
"EleutherAI/gpt-neo-1.3B": "GPT-Neo mediano", |
|
"facebook/opt-125m": "OPT pequeño", |
|
"facebook/opt-350m": "OPT mediano", |
|
"bigscience/bloom-560m": "BLOOM multilingüe", |
|
"bigscience/bloom-1b1": "BLOOM grande", |
|
"Helsinki-NLP/opus-mt-es-en": "Traductor español-inglés", |
|
"Helsinki-NLP/opus-mt-en-es": "Traductor inglés-español", |
|
|
|
"mistralai/Voxtral-Mini-3B-2507": "Voxtral Mini 3B - Multimodal", |
|
"tiiuae/falcon-7b-instruct": "Falcon 7B Instruct", |
|
"google/flan-t5-base": "Flan-T5 Base - Tareas múltiples" |
|
}, |
|
"image": { |
|
|
|
"stabilityai/sdxl-turbo": "⚡ SDXL Turbo", |
|
"stabilityai/sd-turbo": "⚡ SD Turbo", |
|
"ByteDance/SDXL-Lightning": "⚡ SDXL Lightning", |
|
|
|
|
|
"stabilityai/stable-diffusion-3.5-large": "🌟 SD 3.5 Large (Premium)", |
|
|
|
|
|
"stabilityai/stable-diffusion-xl-base-1.0": "SDXL Base", |
|
"stabilityai/stable-diffusion-2-1": "Stable Diffusion 2.1", |
|
"CompVis/stable-diffusion-v1-4": "Stable Diffusion v1.4 (Libre)", |
|
"runwayml/stable-diffusion-v1-5": "Stable Diffusion v1.5", |
|
|
|
|
|
"prompthero/openjourney": "Midjourney Style", |
|
"prompthero/openjourney-v4": "OpenJourney v4", |
|
"WarriorMama777/OrangeMixs": "Orange Mixs", |
|
"hakurei/waifu-diffusion": "Waifu Diffusion", |
|
"SG161222/Realistic_Vision_V5.1_noVAE": "Realistic Vision", |
|
"Linaqruf/anything-v3.0": "Anything v3", |
|
"XpucT/deliberate-v2": "Deliberate v2", |
|
"dreamlike-art/dreamlike-diffusion-1.0": "Dreamlike Diffusion", |
|
"KBlueLeaf/kohaku-v2.1": "Kohaku V2.1", |
|
|
|
|
|
"black-forest-labs/FLUX.1-dev": "FLUX.1 Dev (Requiere acceso)", |
|
"black-forest-labs/FLUX.1-schnell": "FLUX.1 Schnell (Requiere acceso)", |
|
|
|
|
|
"stabilityai/stable-diffusion-3.5-large": "🌟 SD 3.5 Large (Alta Calidad)", |
|
|
|
|
|
"CompVis/ldm-text2im-large-256": "Latent Diffusion Model 256" |
|
}, |
|
"video": { |
|
|
|
"ByteDance/AnimateDiff-Lightning": "⚡ AnimateDiff Lightning (Más rápido)", |
|
"cerspense/zeroscope_v2_576w": "⚡ Zeroscope v2 576w (Rápido)", |
|
"damo-vilab/text-to-video-ms-1.7b": "⚡ Text-to-Video MS 1.7B (Rápido)", |
|
|
|
|
|
"cerspense/zeroscope_v2_XL": "🎬 Zeroscope v2 XL (Alta calidad)", |
|
|
|
|
|
"Wan-AI/Wan2.1-T2V-14B-Diffusers": "🌟 Wan2.1 T2V 14B (Máxima calidad - Requiere mucho espacio)", |
|
|
|
|
|
"ali-vilab/modelscope-damo-text-to-video-synthesis": "🔄 ModelScope Text-to-Video (Experimental)" |
|
}, |
|
"chat": { |
|
"microsoft/DialoGPT-medium": "Chat conversacional", |
|
"microsoft/DialoGPT-large": "Chat conversacional avanzado", |
|
"microsoft/DialoGPT-small": "Chat conversacional rápido", |
|
"facebook/opt-350m": "OPT conversacional", |
|
"bigscience/bloom-560m": "BLOOM multilingüe", |
|
|
|
"mistralai/Voxtral-Mini-3B-2507": "Voxtral Mini 3B - Multimodal", |
|
"tiiuae/falcon-7b-instruct": "Falcon 7B Instruct" |
|
} |
|
} |
|
|
|
|
|
model_cache = {} |
|
|
|
|
|
video_generation_in_progress = False |
|
|
|
def load_text_model(model_name): |
|
"""Cargar modelo de texto""" |
|
if model_name not in model_cache: |
|
print(f"Cargando modelo de texto: {model_name}") |
|
|
|
try: |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained(model_name) |
|
|
|
if "dialogpt" in model_name.lower(): |
|
tokenizer.pad_token = tokenizer.eos_token |
|
model.config.pad_token_id = model.config.eos_token_id |
|
|
|
model_cache[model_name] = { |
|
"tokenizer": tokenizer, |
|
"model": model, |
|
"type": "text" |
|
} |
|
|
|
except Exception as e: |
|
print(f"Error cargando modelo de texto {model_name}: {e}") |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium") |
|
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium") |
|
tokenizer.pad_token = tokenizer.eos_token |
|
model.config.pad_token_id = model.config.eos_token_id |
|
|
|
model_cache[model_name] = { |
|
"tokenizer": tokenizer, |
|
"model": model, |
|
"type": "text" |
|
} |
|
|
|
return model_cache[model_name] |
|
|
|
def load_image_model(model_name): |
|
"""Cargar modelo de imagen optimizado para H200""" |
|
if model_name not in model_cache: |
|
print(f"\n🔄 Iniciando carga del modelo: {model_name}") |
|
|
|
try: |
|
start_time = time.time() |
|
|
|
|
|
use_fp16_variant = False |
|
if torch.cuda.is_available(): |
|
|
|
fp16_supported_models = [ |
|
"stabilityai/sdxl-turbo", |
|
"stabilityai/sd-turbo", |
|
"stabilityai/stable-diffusion-xl-base-1.0", |
|
"runwayml/stable-diffusion-v1-5", |
|
"CompVis/stable-diffusion-v1-4" |
|
] |
|
use_fp16_variant = any(model in model_name for model in fp16_supported_models) |
|
print(f"🔧 FP16 variant: {'✅ Habilitado' if use_fp16_variant else '❌ Deshabilitado'} para {model_name}") |
|
|
|
|
|
if "flux" in model_name.lower() or "black-forest" in model_name.lower(): |
|
if not HF_TOKEN: |
|
print("❌ No hay acceso a modelos gated. Configura HF_TOKEN en el Space.") |
|
raise Exception("Acceso denegado a modelos FLUX. Configura HF_TOKEN en las variables de entorno del Space.") |
|
|
|
try: |
|
from diffusers import FluxPipeline |
|
print("🚀 Cargando FLUX Pipeline...") |
|
print(f"🔧 Modelo: {model_name}") |
|
print(f"🔑 Usando token de autenticación: {'Sí' if HF_TOKEN else 'No'}") |
|
|
|
|
|
pipe = FluxPipeline.from_pretrained( |
|
model_name, |
|
torch_dtype=torch_dtype, |
|
use_auth_token=HF_TOKEN, |
|
variant="fp16" if use_fp16_variant else None |
|
) |
|
|
|
print("✅ FLUX Pipeline cargado exitosamente") |
|
|
|
except Exception as e: |
|
print(f"❌ Error cargando FLUX: {e}") |
|
print(f"🔍 Tipo de error: {type(e).__name__}") |
|
|
|
|
|
if "NVML_SUCCESS" in str(e) or "CUDACachingAllocator" in str(e): |
|
print("🚨 Error de CUDA detectado en FLUX.1-schnell") |
|
print("🔧 Intentando solución alternativa...") |
|
|
|
try: |
|
|
|
if torch.cuda.is_available(): |
|
torch.cuda.empty_cache() |
|
print("🧹 Memoria CUDA limpiada") |
|
|
|
|
|
pipe = FluxPipeline.from_pretrained( |
|
model_name, |
|
torch_dtype=torch.float32, |
|
use_auth_token=HF_TOKEN, |
|
device_map="auto" |
|
) |
|
print("✅ FLUX cargado con configuración alternativa") |
|
|
|
except Exception as e2: |
|
print(f"❌ Error persistente: {e2}") |
|
print("🔄 Fallback a FLUX.1-dev...") |
|
|
|
try: |
|
pipe = FluxPipeline.from_pretrained( |
|
"black-forest-labs/FLUX.1-dev", |
|
torch_dtype=torch.float32, |
|
use_auth_token=HF_TOKEN |
|
) |
|
print("✅ FLUX.1-dev cargado como fallback") |
|
except Exception as e3: |
|
print(f"❌ Error con FLUX.1-dev: {e3}") |
|
print("🔄 Fallback final a Stable Diffusion...") |
|
pipe = StableDiffusionPipeline.from_pretrained( |
|
"CompVis/stable-diffusion-v1-4", |
|
torch_dtype=torch_dtype, |
|
safety_checker=None |
|
) |
|
|
|
|
|
elif "401" in str(e) or "unauthorized" in str(e).lower(): |
|
print("🔐 Error de autenticación. Asegúrate de:") |
|
print(" 1. Tener acceso al modelo FLUX en Hugging Face") |
|
print(" 2. Configurar HF_TOKEN en las variables de entorno del Space") |
|
print(" 3. Que el token tenga permisos para acceder a modelos gated") |
|
|
|
|
|
print("🔄 Fallback a Stable Diffusion...") |
|
pipe = StableDiffusionPipeline.from_pretrained( |
|
"CompVis/stable-diffusion-v1-4", |
|
torch_dtype=torch_dtype, |
|
safety_checker=None |
|
) |
|
|
|
|
|
else: |
|
print("🔄 Fallback a Stable Diffusion...") |
|
pipe = StableDiffusionPipeline.from_pretrained( |
|
"CompVis/stable-diffusion-v1-4", |
|
torch_dtype=torch_dtype, |
|
safety_checker=None |
|
) |
|
|
|
|
|
elif "stable-diffusion-3.5-large" in model_name: |
|
try: |
|
from diffusers import StableDiffusion3Pipeline |
|
print("🌟 Cargando SD 3.5 Large (Premium)...") |
|
print(f"🔧 Modelo: {model_name}") |
|
|
|
|
|
pipe = StableDiffusion3Pipeline.from_pretrained( |
|
model_name, |
|
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, |
|
use_auth_token=HF_TOKEN if HF_TOKEN else None |
|
) |
|
|
|
print("✅ SD 3.5 Large cargado exitosamente") |
|
|
|
except Exception as e: |
|
print(f"❌ Error cargando SD 3.5 Large: {e}") |
|
print(f"🔍 Tipo de error: {type(e).__name__}") |
|
|
|
|
|
if "401" in str(e) or "unauthorized" in str(e).lower(): |
|
print("🔐 Error de autenticación. Asegúrate de:") |
|
print(" 1. Tener acceso al modelo SD 3.5 Large en Hugging Face") |
|
print(" 2. Configurar HF_TOKEN en las variables de entorno del Space") |
|
print(" 3. Que el token tenga permisos para acceder a modelos gated") |
|
|
|
|
|
print("🔄 Fallback a SDXL...") |
|
pipe = StableDiffusionPipeline.from_pretrained( |
|
"stabilityai/stable-diffusion-xl-base-1.0", |
|
torch_dtype=torch_dtype, |
|
safety_checker=None |
|
) |
|
|
|
|
|
elif "stable-diffusion-2-1" in model_name: |
|
try: |
|
pipe = StableDiffusionPipeline.from_pretrained( |
|
model_name, |
|
torch_dtype=torch_dtype, |
|
safety_checker=None, |
|
requires_safety_checker=False, |
|
variant="fp16" if use_fp16_variant else None |
|
) |
|
except Exception as e: |
|
print(f"Error cargando SD 2.1: {e}") |
|
|
|
pipe = StableDiffusionPipeline.from_pretrained( |
|
"CompVis/stable-diffusion-v1-4", |
|
torch_dtype=torch_dtype, |
|
safety_checker=None |
|
) |
|
|
|
|
|
elif "sdxl-turbo" in model_name: |
|
try: |
|
print("⚡ Cargando SDXL Turbo con configuración específica...") |
|
from diffusers import AutoPipelineForText2Image |
|
pipe = AutoPipelineForText2Image.from_pretrained( |
|
model_name, |
|
torch_dtype=torch_dtype, |
|
variant="fp16" if use_fp16_variant else None |
|
) |
|
print("✅ SDXL Turbo cargado exitosamente") |
|
except Exception as e: |
|
print(f"❌ Error cargando SDXL Turbo: {e}") |
|
print(f"🔍 Tipo de error: {type(e).__name__}") |
|
|
|
print("🔄 Fallback a SD Turbo...") |
|
pipe = StableDiffusionPipeline.from_pretrained( |
|
"stabilityai/sd-turbo", |
|
torch_dtype=torch_dtype, |
|
safety_checker=None |
|
) |
|
|
|
|
|
elif "sd-turbo" in model_name: |
|
try: |
|
print("⚡ Cargando SD Turbo...") |
|
pipe = StableDiffusionPipeline.from_pretrained( |
|
model_name, |
|
torch_dtype=torch_dtype, |
|
safety_checker=None, |
|
requires_safety_checker=False, |
|
variant="fp16" if use_fp16_variant else None |
|
) |
|
print("✅ SD Turbo cargado exitosamente") |
|
except Exception as e: |
|
print(f"❌ Error cargando SD Turbo: {e}") |
|
|
|
pipe = StableDiffusionPipeline.from_pretrained( |
|
"CompVis/stable-diffusion-v1-4", |
|
torch_dtype=torch_dtype, |
|
safety_checker=None |
|
) |
|
|
|
|
|
elif "ldm-text2im" in model_name: |
|
try: |
|
print("🎨 Cargando Latent Diffusion Model con optimizaciones...") |
|
from diffusers import DiffusionPipeline |
|
|
|
|
|
pipe = DiffusionPipeline.from_pretrained( |
|
model_name, |
|
torch_dtype=torch.float32, |
|
safety_checker=None, |
|
low_cpu_mem_usage=True, |
|
device_map="auto" |
|
) |
|
|
|
print("✅ LDM cargado con optimizaciones de memoria") |
|
|
|
except Exception as e: |
|
print(f"❌ Error cargando LDM: {e}") |
|
print("🔄 Fallback a SD 1.4 (más eficiente)...") |
|
|
|
pipe = StableDiffusionPipeline.from_pretrained( |
|
"CompVis/stable-diffusion-v1-4", |
|
torch_dtype=torch_dtype, |
|
safety_checker=None |
|
) |
|
|
|
|
|
else: |
|
try: |
|
pipe = StableDiffusionPipeline.from_pretrained( |
|
model_name, |
|
torch_dtype=torch_dtype, |
|
safety_checker=None, |
|
variant="fp16" if use_fp16_variant else None |
|
) |
|
except Exception as e: |
|
print(f"Error cargando {model_name}: {e}") |
|
|
|
pipe = StableDiffusionPipeline.from_pretrained( |
|
"CompVis/stable-diffusion-v1-4", |
|
torch_dtype=torch_dtype, |
|
safety_checker=None |
|
) |
|
|
|
load_time = time.time() - start_time |
|
print(f"⏱️ Tiempo de carga: {load_time:.2f} segundos") |
|
|
|
print(f"🚀 Moviendo modelo a dispositivo: {device}") |
|
try: |
|
|
|
pipe = pipe.to_empty(device) |
|
print("✅ Modelo movido exitosamente usando to_empty()") |
|
except Exception as e: |
|
print(f"⚠️ Error con to_empty(): {e}") |
|
try: |
|
|
|
pipe = pipe.to(device) |
|
print("✅ Modelo movido exitosamente usando to()") |
|
except Exception as e2: |
|
print(f"❌ Error cargando modelo {model_name}: {e2}") |
|
print(f"🔍 Tipo de error: {type(e2).__name__}") |
|
print(f"📋 Detalles del error: {e2}") |
|
raise e2 |
|
|
|
|
|
if torch.cuda.is_available(): |
|
print("🔧 Aplicando optimizaciones para H200...") |
|
|
|
|
|
if hasattr(pipe, 'enable_attention_slicing'): |
|
pipe.enable_attention_slicing() |
|
print("✅ Attention slicing habilitado") |
|
|
|
|
|
|
|
|
|
|
|
|
|
if hasattr(pipe, 'enable_vae_slicing'): |
|
pipe.enable_vae_slicing() |
|
print("✅ VAE slicing habilitado") |
|
|
|
|
|
if hasattr(pipe, 'enable_xformers_memory_efficient_attention'): |
|
|
|
if "flux" in model_name.lower() or "black-forest" in model_name.lower(): |
|
print("⚠️ XFormers deshabilitado para modelos FLUX (incompatible)") |
|
else: |
|
try: |
|
pipe.enable_xformers_memory_efficient_attention() |
|
print("✅ XFormers memory efficient attention habilitado") |
|
except Exception as e: |
|
print(f"⚠️ XFormers no disponible: {e}") |
|
print("🔄 Usando atención estándar") |
|
|
|
print(f"✅ Modelo {model_name} cargado exitosamente") |
|
|
|
if torch.cuda.is_available(): |
|
memory_used = torch.cuda.memory_allocated() / 1024**3 |
|
memory_reserved = torch.cuda.memory_reserved() / 1024**3 |
|
print(f"💾 Memoria GPU utilizada: {memory_used:.2f} GB") |
|
print(f"💾 Memoria GPU reservada: {memory_reserved:.2f} GB") |
|
|
|
|
|
if memory_used < 0.1: |
|
print("⚠️ ADVERTENCIA: Memoria GPU muy baja - posible problema de carga") |
|
else: |
|
print("💾 Memoria CPU") |
|
|
|
|
|
model_cache[model_name] = pipe |
|
|
|
except Exception as e: |
|
print(f"❌ Error cargando modelo {model_name}: {e}") |
|
print(f"🔍 Tipo de error: {type(e).__name__}") |
|
|
|
|
|
if "variant" in str(e) and "fp16" in str(e): |
|
print("🔄 Reintentando sin variant fp16...") |
|
try: |
|
pipe = StableDiffusionPipeline.from_pretrained( |
|
model_name, |
|
torch_dtype=torch_dtype, |
|
use_auth_token=HF_TOKEN if HF_TOKEN and ("flux" in model_name.lower() or "black-forest" in model_name.lower()) else None |
|
) |
|
pipe = pipe.to(device) |
|
model_cache[model_name] = pipe |
|
print(f"✅ Modelo {model_name} cargado exitosamente (sin fp16 variant)") |
|
except Exception as e2: |
|
print(f"❌ Error en segundo intento: {e2}") |
|
raise e2 |
|
else: |
|
raise e |
|
else: |
|
print(f"♻️ Modelo {model_name} ya está cargado, reutilizando...") |
|
|
|
return model_cache[model_name] |
|
|
|
def load_video_model(model_name): |
|
"""Cargar modelo de video optimizado para H200 con ZeroGPU""" |
|
if model_name not in model_cache: |
|
print(f"\n🔄 Iniciando carga del modelo de video: {model_name}") |
|
|
|
try: |
|
start_time = time.time() |
|
|
|
|
|
use_fp16 = torch.cuda.is_available() and torch_dtype == torch.float16 |
|
|
|
|
|
fast_models = [ |
|
"ByteDance/AnimateDiff-Lightning", |
|
"cerspense/zeroscope_v2_576w", |
|
"damo-vilab/text-to-video-ms-1.7b" |
|
] |
|
|
|
|
|
if "wan2.1-t2v-14b" in model_name.lower(): |
|
|
|
print("⚠️ ADVERTENCIA: Wan2.1 T2V 14B es un modelo muy grande (50GB+)") |
|
print("⚠️ Puede agotar la cuota de ZeroGPU rápidamente") |
|
print("🌟 Cargando Wan2.1 T2V 14B (modelo de alta calidad)...") |
|
|
|
try: |
|
from diffusers import AutoencoderKLWan, WanPipeline |
|
|
|
|
|
vae = AutoencoderKLWan.from_pretrained( |
|
model_name, |
|
subfolder="vae", |
|
torch_dtype=torch.float32 |
|
) |
|
|
|
|
|
pipe = WanPipeline.from_pretrained( |
|
model_name, |
|
vae=vae, |
|
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32 |
|
) |
|
print("✅ Wan2.1 T2V 14B cargado exitosamente") |
|
except Exception as e: |
|
print(f"❌ Error cargando Wan2.1: {e}") |
|
print("🔄 Fallback a modelo rápido...") |
|
|
|
from diffusers import DiffusionPipeline |
|
pipe = DiffusionPipeline.from_pretrained( |
|
"damo-vilab/text-to-video-ms-1.7b", |
|
torch_dtype=torch_dtype, |
|
variant="fp16" if use_fp16 else None |
|
) |
|
print("✅ Fallback exitoso con modelo rápido") |
|
|
|
elif "animatediff-lightning" in model_name.lower(): |
|
|
|
from diffusers import DiffusionPipeline |
|
pipe = DiffusionPipeline.from_pretrained( |
|
model_name, |
|
torch_dtype=torch_dtype, |
|
variant="fp16" if use_fp16 else None |
|
) |
|
print("⚡ Cargando AnimateDiff Lightning (modelo rápido)") |
|
|
|
elif "zeroscope" in model_name.lower(): |
|
|
|
from diffusers import DiffusionPipeline |
|
pipe = DiffusionPipeline.from_pretrained( |
|
model_name, |
|
torch_dtype=torch_dtype, |
|
variant="fp16" if use_fp16 else None |
|
) |
|
print("⚡ Cargando Zeroscope (modelo rápido)") |
|
|
|
elif "text-to-video-ms-1.7b" in model_name.lower(): |
|
|
|
from diffusers import DiffusionPipeline |
|
pipe = DiffusionPipeline.from_pretrained( |
|
model_name, |
|
torch_dtype=torch_dtype, |
|
variant="fp16" if use_fp16 else None |
|
) |
|
print("⚡ Cargando Text-to-Video MS 1.7B (modelo rápido)") |
|
|
|
elif "modelscope-damo" in model_name.lower(): |
|
|
|
from diffusers import DiffusionPipeline |
|
pipe = DiffusionPipeline.from_pretrained( |
|
model_name, |
|
torch_dtype=torch_dtype, |
|
variant="fp16" if use_fp16 else None |
|
) |
|
print("🔄 Cargando ModelScope Text-to-Video (experimental)") |
|
|
|
else: |
|
|
|
print("🔄 Modelo no reconocido, usando fallback...") |
|
from diffusers import DiffusionPipeline |
|
pipe = DiffusionPipeline.from_pretrained( |
|
"damo-vilab/text-to-video-ms-1.7b", |
|
torch_dtype=torch_dtype, |
|
variant="fp16" if use_fp16 else None |
|
) |
|
print("✅ Fallback exitoso con modelo rápido") |
|
|
|
|
|
print("🔧 Aplicando optimizaciones para H200...") |
|
|
|
|
|
if hasattr(pipe, 'enable_attention_slicing'): |
|
pipe.enable_attention_slicing() |
|
print("✅ Attention slicing habilitado") |
|
|
|
|
|
if hasattr(pipe, 'enable_model_cpu_offload'): |
|
pipe.enable_model_cpu_offload() |
|
print("✅ Model CPU offload habilitado") |
|
|
|
|
|
if hasattr(pipe, 'enable_xformers_memory_efficient_attention'): |
|
try: |
|
pipe.enable_xformers_memory_efficient_attention() |
|
print("✅ XFormers memory efficient attention habilitado") |
|
except Exception as e: |
|
print(f"⚠️ XFormers no disponible: {e}") |
|
|
|
|
|
if torch.cuda.is_available(): |
|
pipe = pipe.to(device) |
|
print(f"✅ Modelo movido a {device}") |
|
|
|
load_time = time.time() - start_time |
|
print(f"✅ Modelo de video cargado en {load_time:.2f}s") |
|
|
|
model_cache[model_name] = { |
|
"pipeline": pipe, |
|
"type": "video", |
|
"is_fast_model": any(fast_model.lower() in model_name.lower() for fast_model in fast_models) |
|
} |
|
|
|
except Exception as e: |
|
print(f"❌ Error cargando modelo de video {model_name}: {e}") |
|
|
|
try: |
|
print("🔄 Intentando fallback a modelo rápido...") |
|
from diffusers import DiffusionPipeline |
|
pipe = DiffusionPipeline.from_pretrained( |
|
"damo-vilab/text-to-video-ms-1.7b", |
|
torch_dtype=torch_dtype, |
|
variant="fp16" if use_fp16 else None |
|
) |
|
|
|
|
|
if hasattr(pipe, 'enable_attention_slicing'): |
|
pipe.enable_attention_slicing() |
|
if torch.cuda.is_available(): |
|
pipe = pipe.to(device) |
|
|
|
model_cache[model_name] = { |
|
"pipeline": pipe, |
|
"type": "video", |
|
"is_fast_model": True |
|
} |
|
print("✅ Fallback exitoso con modelo rápido") |
|
except Exception as fallback_error: |
|
print(f"❌ Error crítico en fallback de video: {fallback_error}") |
|
raise |
|
|
|
return model_cache[model_name] |
|
|
|
@spaces.GPU(compute_unit="gpu.t4.micro", timeout=60) |
|
def generate_video(prompt, model_name, num_frames=16, num_inference_steps=20): |
|
"""Generar video optimizado con ZeroGPU H200""" |
|
global video_generation_in_progress |
|
|
|
|
|
if video_generation_in_progress: |
|
print("⚠️ Ya hay una generación de video en progreso, esperando...") |
|
return "⏳ Ya hay una generación de video en progreso. Espera a que termine." |
|
|
|
video_generation_in_progress = True |
|
|
|
try: |
|
print(f"🎬 Iniciando generación de video...") |
|
print(f"📝 Modelo: {model_name}") |
|
print(f"📝 Prompt: {prompt}") |
|
print(f"🎞️ Frames: {num_frames}") |
|
print(f"⚡ Pasos: {num_inference_steps}") |
|
|
|
start_time = time.time() |
|
|
|
model_data = load_video_model(model_name) |
|
pipeline = model_data["pipeline"] |
|
is_fast_model = model_data.get("is_fast_model", False) |
|
|
|
|
|
if is_fast_model: |
|
print("⚡ Usando configuración rápida para modelo optimizado") |
|
|
|
optimized_steps = min(num_inference_steps, 15) |
|
optimized_frames = min(num_frames, 16) |
|
else: |
|
print("🎬 Usando configuración estándar") |
|
optimized_steps = num_inference_steps |
|
optimized_frames = num_frames |
|
|
|
print(f"🔧 Parámetros optimizados - Frames: {optimized_frames}, Pasos: {optimized_steps}") |
|
|
|
|
|
if "wan2.1-t2v-14b" in model_name.lower(): |
|
|
|
print("🌟 Generando video con Wan2.1 T2V 14B...") |
|
|
|
|
|
result = pipeline( |
|
prompt=prompt, |
|
negative_prompt="Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards", |
|
height=480, |
|
width=832, |
|
num_frames=optimized_frames, |
|
guidance_scale=5.0, |
|
num_inference_steps=optimized_steps |
|
) |
|
print("✅ Video Wan2.1 T2V 14B generado") |
|
|
|
elif "zeroscope" in model_name.lower(): |
|
|
|
result = pipeline( |
|
prompt, |
|
num_inference_steps=optimized_steps, |
|
num_frames=optimized_frames, |
|
height=256, |
|
width=256, |
|
guidance_scale=7.5 |
|
) |
|
print("✅ Video Zeroscope generado") |
|
|
|
elif "animatediff-lightning" in model_name.lower(): |
|
|
|
result = pipeline( |
|
prompt, |
|
num_inference_steps=optimized_steps, |
|
num_frames=optimized_frames, |
|
guidance_scale=7.5 |
|
) |
|
print("✅ Video AnimateDiff Lightning generado") |
|
|
|
elif "text-to-video-ms-1.7b" in model_name.lower(): |
|
|
|
result = pipeline( |
|
prompt, |
|
num_inference_steps=optimized_steps, |
|
num_frames=optimized_frames, |
|
guidance_scale=7.5 |
|
) |
|
print("✅ Video Text-to-Video MS 1.7B generado") |
|
|
|
elif "modelscope-damo" in model_name.lower(): |
|
|
|
result = pipeline( |
|
prompt, |
|
num_inference_steps=optimized_steps, |
|
num_frames=optimized_frames, |
|
guidance_scale=7.5 |
|
) |
|
print("✅ Video ModelScope generado") |
|
|
|
else: |
|
|
|
result = pipeline( |
|
prompt, |
|
num_inference_steps=optimized_steps, |
|
num_frames=optimized_frames, |
|
guidance_scale=7.5 |
|
) |
|
print("✅ Video generado con configuración genérica") |
|
|
|
generation_time = time.time() - start_time |
|
print(f"⏱️ Tiempo de generación: {generation_time:.2f}s") |
|
|
|
|
|
if hasattr(result, 'frames'): |
|
video_frames = result.frames |
|
elif hasattr(result, 'videos'): |
|
video_frames = result.videos |
|
else: |
|
video_frames = result |
|
|
|
|
|
if "wan2.1" in model_name.lower(): |
|
|
|
if hasattr(result, 'frames') and len(result.frames) > 0: |
|
video_frames = result.frames[0] |
|
print(f"📐 Forma del video Wan2.1: {video_frames.shape}") |
|
else: |
|
video_frames = result |
|
|
|
|
|
if isinstance(video_frames, list): |
|
if len(video_frames) == 1: |
|
return video_frames[0] |
|
else: |
|
return video_frames |
|
else: |
|
|
|
if hasattr(video_frames, 'shape'): |
|
import numpy as np |
|
print(f"📐 Forma del video: {video_frames.shape}") |
|
|
|
|
|
if len(video_frames.shape) == 4: |
|
|
|
frames_list = [] |
|
for i in range(video_frames.shape[0]): |
|
frame = video_frames[i] |
|
|
|
if frame.dtype == np.float32 or frame.dtype == np.float16: |
|
frame = (frame * 255).astype(np.uint8) |
|
frames_list.append(frame) |
|
|
|
|
|
import imageio |
|
import tempfile |
|
import os |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file: |
|
temp_path = tmp_file.name |
|
|
|
|
|
fps = 8 if is_fast_model else 6 |
|
imageio.mimsave(temp_path, frames_list, fps=fps) |
|
|
|
print(f"💾 Video guardado en: {temp_path}") |
|
print(f"🎬 FPS del video: {fps}") |
|
return temp_path |
|
|
|
elif len(video_frames.shape) == 5: |
|
|
|
print("🔄 Procesando video con forma (batch, frames, height, width, channels)") |
|
frames = video_frames[0] |
|
|
|
|
|
frames_list = [] |
|
for i in range(frames.shape[0]): |
|
frame = frames[i] |
|
|
|
if frame.dtype == np.float32 or frame.dtype == np.float16: |
|
frame = (frame * 255).astype(np.uint8) |
|
frames_list.append(frame) |
|
|
|
|
|
import imageio |
|
import tempfile |
|
import os |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file: |
|
temp_path = tmp_file.name |
|
|
|
|
|
fps = 8 if is_fast_model else 6 |
|
imageio.mimsave(temp_path, frames_list, fps=fps) |
|
|
|
print(f"💾 Video guardado en: {temp_path}") |
|
print(f"🎬 FPS del video: {fps}") |
|
return temp_path |
|
else: |
|
print(f"❌ Forma no reconocida: {video_frames.shape}") |
|
return None |
|
else: |
|
return video_frames |
|
|
|
except Exception as e: |
|
error_message = str(e) |
|
print(f"❌ Error generando video: {error_message}") |
|
print(f"🔍 Tipo de error: {type(e).__name__}") |
|
|
|
|
|
if "quota exceeded" in error_message.lower() or "gpu quota" in error_message.lower(): |
|
raise Exception(f"🚫 Cuota de ZeroGPU agotada. Intenta en unos minutos. Error: {error_message}") |
|
|
|
if "401" in error_message or "unauthorized" in error_message: |
|
raise Exception(f"🔐 Error de autenticación. Verifica el acceso al modelo {model_name}. Error: {error_message}") |
|
|
|
if "404" in error_message or "not found" in error_message: |
|
raise Exception(f"❌ Modelo {model_name} no encontrado. Error: {error_message}") |
|
|
|
if "timeout" in error_message.lower() or "timed out" in error_message.lower(): |
|
raise Exception(f"⏰ Timeout en la generación. El modelo {model_name} puede estar sobrecargado. Error: {error_message}") |
|
|
|
if "out of memory" in error_message.lower() or "oom" in error_message.lower(): |
|
raise Exception(f"💾 Error de memoria GPU. Intenta con menos frames o pasos. Error: {error_message}") |
|
|
|
import traceback |
|
traceback.print_exc() |
|
raise Exception(f"Error generando video con {model_name}: {error_message}") |
|
|
|
finally: |
|
|
|
video_generation_in_progress = False |
|
print("✅ Generación de video completada, liberando recursos") |
|
|
|
def generate_text(prompt, model_name, max_length=100): |
|
"""Generar texto con el modelo seleccionado""" |
|
try: |
|
model_data = load_text_model(model_name) |
|
tokenizer = model_data["tokenizer"] |
|
model = model_data["model"] |
|
|
|
inputs = tokenizer.encode(prompt, return_tensors="pt") |
|
|
|
with torch.no_grad(): |
|
outputs = model.generate( |
|
inputs, |
|
max_length=max_length, |
|
num_return_sequences=1, |
|
temperature=0.7, |
|
do_sample=True, |
|
pad_token_id=tokenizer.eos_token_id |
|
) |
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
if "dialogpt" in model_name.lower(): |
|
response = response.replace(prompt, "").strip() |
|
|
|
return response |
|
|
|
except Exception as e: |
|
return f"Error generando texto: {str(e)}" |
|
|
|
@spaces.GPU(compute_unit="gpu.t4.micro", timeout=30) |
|
def generate_image(prompt, model_name, negative_prompt="", seed=0, width=1024, height=1024, guidance_scale=7.5, num_inference_steps=20): |
|
"""Generar imagen optimizada para H200 con estimación precisa de cuota""" |
|
try: |
|
print(f"\n🎨 Iniciando generación de imagen con H200...") |
|
print(f"📝 Prompt: {prompt}") |
|
print(f"🚫 Negative prompt: {negative_prompt}") |
|
print(f"🎯 Modelo seleccionado: {model_name}") |
|
print(f"🔄 Inference steps: {num_inference_steps}") |
|
print(f"🎲 Seed: {seed}") |
|
print(f"📐 Dimensiones: {width}x{height}") |
|
print(f"🎯 Guidance scale: {guidance_scale}") |
|
|
|
|
|
estimated_time = 5 |
|
if "turbo" in model_name.lower(): |
|
estimated_time = 2 |
|
elif "ldm-text2im" in model_name.lower(): |
|
estimated_time = 8 |
|
elif num_inference_steps > 20: |
|
estimated_time += (num_inference_steps - 20) * 0.2 |
|
elif width > 512 or height > 512: |
|
estimated_time += 2 |
|
|
|
print(f"⏱️ Tiempo estimado: {estimated_time:.1f} segundos") |
|
|
|
start_time = time.time() |
|
|
|
|
|
if isinstance(num_inference_steps, str): |
|
try: |
|
num_inference_steps = int(num_inference_steps) |
|
except ValueError: |
|
num_inference_steps = 20 |
|
print(f"⚠️ No se pudo convertir '{num_inference_steps}' a entero, usando 20") |
|
|
|
if isinstance(seed, str): |
|
try: |
|
seed = int(seed) |
|
except ValueError: |
|
seed = 0 |
|
print(f"⚠️ No se pudo convertir '{seed}' a entero, usando 0") |
|
|
|
if isinstance(width, str): |
|
try: |
|
width = int(width) |
|
except ValueError: |
|
width = 1024 |
|
print(f"⚠️ No se pudo convertir '{width}' a entero, usando 1024") |
|
|
|
if isinstance(height, str): |
|
try: |
|
height = int(height) |
|
except ValueError: |
|
height = 1024 |
|
print(f"⚠️ No se pudo convertir '{height}' a entero, usando 1024") |
|
|
|
if isinstance(guidance_scale, str): |
|
try: |
|
guidance_scale = float(guidance_scale) |
|
except ValueError: |
|
guidance_scale = 7.5 |
|
print(f"⚠️ No se pudo convertir '{guidance_scale}' a float, usando 7.5") |
|
|
|
|
|
pipe = load_image_model(model_name) |
|
|
|
|
|
if pipe is None: |
|
print("❌ Error: El modelo no se pudo cargar") |
|
raise Exception("Modelo no disponible") |
|
|
|
print(f"✅ Modelo cargado correctamente: {type(pipe).__name__}") |
|
|
|
|
|
if "sdxl-turbo" in model_name.lower(): |
|
|
|
guidance_scale = 0.0 |
|
num_inference_steps = 1 |
|
print(f"⚡ SDXL Turbo - Ajustando parámetros: guidance={guidance_scale}, steps={num_inference_steps}") |
|
elif "sd-turbo" in model_name.lower(): |
|
guidance_scale = min(guidance_scale, 1.0) |
|
num_inference_steps = min(num_inference_steps, 2) |
|
print(f"⚡ SD Turbo - Ajustando parámetros: guidance={guidance_scale}, steps={num_inference_steps}") |
|
elif "turbo" in model_name.lower(): |
|
guidance_scale = min(guidance_scale, 1.0) |
|
num_inference_steps = min(num_inference_steps, 2) |
|
print(f"⚡ Modelo turbo - Ajustando parámetros: guidance={guidance_scale}, steps={num_inference_steps}") |
|
elif "lightning" in model_name.lower(): |
|
guidance_scale = min(guidance_scale, 1.0) |
|
num_inference_steps = max(num_inference_steps, 2) |
|
print(f"⚡ Modelo lightning - Ajustando parámetros: guidance={guidance_scale}, steps={num_inference_steps}") |
|
elif "flux" in model_name.lower(): |
|
guidance_scale = max(3.5, min(guidance_scale, 7.5)) |
|
num_inference_steps = max(10, num_inference_steps) |
|
print(f"🔐 Modelo FLUX - Ajustando parámetros: guidance={guidance_scale}, steps={num_inference_steps}") |
|
elif "stable-diffusion-3.5-large" in model_name: |
|
|
|
guidance_scale = max(3.5, min(guidance_scale, 4.5)) |
|
num_inference_steps = max(20, num_inference_steps) |
|
print(f"🌟 SD 3.5 Large - Ajustando parámetros: guidance={guidance_scale}, steps={num_inference_steps}") |
|
elif "ldm-text2im" in model_name.lower(): |
|
|
|
guidance_scale = min(guidance_scale, 2.0) |
|
num_inference_steps = min(num_inference_steps, 8) |
|
width = min(width, 512) |
|
height = min(height, 512) |
|
print(f"🎨 LDM - Ajustando parámetros ultra conservadores: guidance={guidance_scale}, steps={num_inference_steps}, size={width}x{height}") |
|
else: |
|
|
|
num_inference_steps = min(num_inference_steps, 15) |
|
print(f"🎨 Modelo estándar - Ajustando parámetros: guidance={guidance_scale}, steps={num_inference_steps}") |
|
|
|
generator = torch.Generator(device=device).manual_seed(seed) |
|
|
|
print("🎨 Iniciando generación de imagen con H200...") |
|
inference_start = time.time() |
|
|
|
|
|
if torch.cuda.is_available(): |
|
print("🚀 Aplicando optimizaciones específicas para H200...") |
|
|
|
|
|
torch.cuda.empty_cache() |
|
|
|
|
|
generation_kwargs = { |
|
"prompt": prompt, |
|
"height": height, |
|
"width": width, |
|
"guidance_scale": guidance_scale, |
|
"num_inference_steps": num_inference_steps, |
|
"generator": generator |
|
} |
|
|
|
|
|
if "sdxl-turbo" in model_name.lower(): |
|
|
|
generation_kwargs = { |
|
"prompt": prompt, |
|
"height": height, |
|
"width": width, |
|
"guidance_scale": 0.0, |
|
"num_inference_steps": 1, |
|
"generator": generator |
|
} |
|
|
|
print("⚡ Usando configuración específica para SDXL Turbo (sin negative_prompt)") |
|
else: |
|
|
|
generation_kwargs = { |
|
"prompt": prompt, |
|
"height": height, |
|
"width": width, |
|
"guidance_scale": guidance_scale, |
|
"num_inference_steps": num_inference_steps, |
|
"generator": generator |
|
} |
|
|
|
|
|
if negative_prompt and negative_prompt.strip(): |
|
generation_kwargs["negative_prompt"] = negative_prompt.strip() |
|
|
|
|
|
result = pipe(**generation_kwargs) |
|
|
|
|
|
if hasattr(result, 'images') and len(result.images) > 0: |
|
image = result.images[0] |
|
|
|
|
|
if image is not None: |
|
|
|
img_array = np.array(image) |
|
if img_array.size > 0: |
|
|
|
if np.all(img_array == 0) or np.all(img_array < 10): |
|
print("⚠️ ADVERTENCIA: Imagen generada es completamente negra") |
|
print("🔄 Reintentando con parámetros ajustados...") |
|
|
|
|
|
generation_kwargs["guidance_scale"] = max(1.0, guidance_scale * 0.8) |
|
generation_kwargs["num_inference_steps"] = max(10, num_inference_steps) |
|
|
|
result = pipe(**generation_kwargs) |
|
image = result.images[0] |
|
else: |
|
print("✅ Imagen generada correctamente") |
|
else: |
|
print("❌ Error: Imagen vacía") |
|
raise Exception("Imagen vacía generada") |
|
else: |
|
print("❌ Error: Imagen es None") |
|
raise Exception("Imagen es None") |
|
else: |
|
print("❌ Error: No se generaron imágenes") |
|
raise Exception("No se generaron imágenes") |
|
else: |
|
|
|
generation_kwargs = { |
|
"prompt": prompt, |
|
"height": height, |
|
"width": width, |
|
"guidance_scale": guidance_scale, |
|
"num_inference_steps": num_inference_steps, |
|
"generator": generator |
|
} |
|
|
|
if negative_prompt and negative_prompt.strip(): |
|
generation_kwargs["negative_prompt"] = negative_prompt.strip() |
|
|
|
result = pipe(**generation_kwargs) |
|
image = result.images[0] |
|
|
|
inference_time = time.time() - inference_start |
|
total_time = time.time() - start_time |
|
|
|
print(f"✅ Imagen generada exitosamente con H200!") |
|
print(f"⏱️ Tiempo de inferencia: {inference_time:.2f} segundos") |
|
print(f"⏱️ Tiempo total: {total_time:.2f} segundos") |
|
print(f"🎲 Seed final: {seed}") |
|
|
|
if torch.cuda.is_available(): |
|
print(f"💾 Memoria GPU utilizada: {torch.cuda.memory_allocated() / 1024**3:.2f} GB") |
|
print(f"💾 Memoria GPU libre: {torch.cuda.memory_reserved() / 1024**3:.2f} GB") |
|
print(f"🚀 Velocidad H200: {num_inference_steps/inference_time:.1f} steps/segundo") |
|
else: |
|
print("💾 Memoria CPU") |
|
|
|
return image |
|
|
|
except Exception as e: |
|
print(f"❌ Error en inferencia: {e}") |
|
print(f"🔍 Tipo de error: {type(e).__name__}") |
|
print(f"📋 Detalles del error: {str(e)}") |
|
|
|
error_image = Image.new('RGB', (512, 512), color='red') |
|
return error_image |
|
|
|
def chat_with_model(message, history, model_name): |
|
"""Función de chat para DialoGPT""" |
|
try: |
|
model_data = load_text_model(model_name) |
|
tokenizer = model_data["tokenizer"] |
|
model = model_data["model"] |
|
|
|
conversation = "" |
|
for msg in history: |
|
if msg["role"] == "user": |
|
conversation += f"User: {msg['content']}\n" |
|
elif msg["role"] == "assistant": |
|
conversation += f"Assistant: {msg['content']}\n" |
|
|
|
conversation += f"User: {message}\nAssistant:" |
|
|
|
inputs = tokenizer.encode(conversation, return_tensors="pt", truncation=True, max_length=512) |
|
|
|
with torch.no_grad(): |
|
outputs = model.generate( |
|
inputs, |
|
max_length=inputs.shape[1] + 50, |
|
temperature=0.7, |
|
do_sample=True, |
|
pad_token_id=tokenizer.eos_token_id |
|
) |
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
response = response.split("Assistant:")[-1].strip() |
|
|
|
history.append({"role": "user", "content": message}) |
|
history.append({"role": "assistant", "content": response}) |
|
|
|
return history |
|
|
|
except Exception as e: |
|
error_msg = f"Error en el chat: {str(e)}" |
|
history.append({"role": "user", "content": message}) |
|
history.append({"role": "assistant", "content": error_msg}) |
|
return history |
|
|
|
|
|
def check_gated_model_access(): |
|
"""Verificar si tenemos acceso a modelos gated""" |
|
if not HF_TOKEN: |
|
return False |
|
|
|
try: |
|
|
|
from huggingface_hub import model_info |
|
info = model_info("black-forest-labs/FLUX.1-dev", token=HF_TOKEN) |
|
print(f"✅ Acceso verificado a FLUX.1-dev: {info.modelId}") |
|
return True |
|
except Exception as e: |
|
print(f"❌ No se pudo verificar acceso a modelos gated: {e}") |
|
return False |
|
|
|
|
|
GATED_ACCESS = check_gated_model_access() |
|
|
|
|
|
print("=" * 60) |
|
print("🚀 SPACE NTIA - ESTADO DE CONFIGURACIÓN") |
|
print("=" * 60) |
|
print(f"🔑 Token HF configurado: {'✅' if HF_TOKEN else '❌'}") |
|
print(f"🔐 Acceso a modelos gated: {'✅' if GATED_ACCESS else '❌'}") |
|
print(f"🎨 Modelos FLUX disponibles: {'✅' if GATED_ACCESS else '❌'}") |
|
print("=" * 60) |
|
|
|
if not GATED_ACCESS: |
|
print("⚠️ Para usar modelos FLUX:") |
|
print(" 1. Configura HF_TOKEN en las variables de entorno del Space") |
|
print(" 2. Solicita acceso a los modelos FLUX en Hugging Face") |
|
print(" 3. Acepta los términos de licencia") |
|
print("=" * 60) |
|
|
|
|
|
with gr.Blocks(title="Modelos Libres de IA", theme=gr.themes.Soft()) as demo: |
|
gr.Markdown("# 🤖 Modelos Libres de IA") |
|
gr.Markdown("### Genera texto, imágenes y videos sin límites de cuota") |
|
|
|
with gr.Tabs(): |
|
|
|
with gr.TabItem("📝 Generación de Texto"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
text_model = gr.Dropdown( |
|
choices=list(MODELS["text"].keys()), |
|
value="microsoft/DialoGPT-medium", |
|
label="Modelo de Texto" |
|
) |
|
text_prompt = gr.Textbox( |
|
label="Prompt", |
|
placeholder="Escribe tu prompt aquí...", |
|
lines=3 |
|
) |
|
max_length = gr.Slider( |
|
minimum=50, |
|
maximum=200, |
|
value=100, |
|
step=10, |
|
label="Longitud máxima" |
|
) |
|
text_btn = gr.Button("Generar Texto", variant="primary") |
|
|
|
with gr.Column(): |
|
text_output = gr.Textbox( |
|
label="Resultado", |
|
lines=10, |
|
interactive=False |
|
) |
|
|
|
text_btn.click( |
|
generate_text, |
|
inputs=[text_prompt, text_model, max_length], |
|
outputs=text_output |
|
) |
|
|
|
|
|
with gr.TabItem("💬 Chat"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
chat_model = gr.Dropdown( |
|
choices=list(MODELS["chat"].keys()), |
|
value="microsoft/DialoGPT-medium", |
|
label="Modelo de Chat" |
|
) |
|
|
|
with gr.Column(): |
|
chatbot = gr.Chatbot( |
|
label="Chat", |
|
height=400, |
|
type="messages" |
|
) |
|
chat_input = gr.Textbox( |
|
label="Mensaje", |
|
placeholder="Escribe tu mensaje...", |
|
lines=2 |
|
) |
|
chat_btn = gr.Button("Enviar", variant="primary") |
|
|
|
chat_btn.click( |
|
chat_with_model, |
|
inputs=[chat_input, chatbot, chat_model], |
|
outputs=[chatbot] |
|
) |
|
|
|
chat_input.submit( |
|
chat_with_model, |
|
inputs=[chat_input, chatbot, chat_model], |
|
outputs=[chatbot] |
|
) |
|
|
|
|
|
with gr.TabItem("🌐 Traducción"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
translate_model = gr.Dropdown( |
|
choices=["Helsinki-NLP/opus-mt-es-en", "Helsinki-NLP/opus-mt-en-es"], |
|
value="Helsinki-NLP/opus-mt-es-en", |
|
label="Modelo de Traducción" |
|
) |
|
translate_text = gr.Textbox( |
|
label="Texto a traducir", |
|
placeholder="Escribe el texto que quieres traducir...", |
|
lines=3 |
|
) |
|
translate_btn = gr.Button("Traducir", variant="primary") |
|
|
|
with gr.Column(): |
|
translate_output = gr.Textbox( |
|
label="Traducción", |
|
lines=3, |
|
interactive=False |
|
) |
|
|
|
translate_btn.click( |
|
generate_text, |
|
inputs=[translate_text, translate_model, gr.Slider(value=100, visible=False)], |
|
outputs=translate_output |
|
) |
|
|
|
|
|
with gr.TabItem("🎨 Generación de Imágenes"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
|
|
image_model = gr.Dropdown( |
|
choices=list(MODELS["image"].keys()), |
|
value="CompVis/stable-diffusion-v1-4", |
|
label="Modelo", |
|
info="Select a high-quality model (FLUX models require HF_TOKEN)" |
|
) |
|
|
|
|
|
image_prompt = gr.Textbox( |
|
label="Prompt", |
|
placeholder="Describe la imagen que quieres generar...", |
|
lines=3 |
|
) |
|
|
|
|
|
negative_prompt = gr.Textbox( |
|
label="Negative prompt", |
|
placeholder="Enter a negative prompt (optional)", |
|
lines=2 |
|
) |
|
|
|
|
|
with gr.Accordion("Advanced Settings", open=False): |
|
with gr.Row(): |
|
with gr.Column(): |
|
seed = gr.Slider( |
|
minimum=0, |
|
maximum=2147483647, |
|
value=324354329, |
|
step=1, |
|
label="Seed", |
|
info="Random seed for generation" |
|
) |
|
|
|
with gr.Column(): |
|
guidance_scale = gr.Slider( |
|
minimum=0, |
|
maximum=20, |
|
value=7.5, |
|
step=0.1, |
|
label="Guidance scale", |
|
info="Controls how closely the image follows the prompt (higher = more adherence)" |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
width = gr.Slider( |
|
minimum=256, |
|
maximum=1024, |
|
value=1024, |
|
step=64, |
|
label="Width" |
|
) |
|
height = gr.Slider( |
|
minimum=256, |
|
maximum=1024, |
|
value=1024, |
|
step=64, |
|
label="Height" |
|
) |
|
|
|
with gr.Column(): |
|
num_inference_steps = gr.Slider( |
|
minimum=1, |
|
maximum=100, |
|
value=20, |
|
step=1, |
|
label="Number of inference steps", |
|
info="More steps = higher quality but slower generation" |
|
) |
|
|
|
|
|
image_btn = gr.Button("Generar Imagen", variant="primary") |
|
|
|
with gr.Column(): |
|
|
|
model_info = gr.Markdown( |
|
value="**Model Info:** CompVis/stable-diffusion-v1-4\n\n" |
|
"🎨 Stable Diffusion v1.4 • Recommended steps: 20-50 • " |
|
"Guidance scale: 7.5-15 • Best for: General purpose\n\n" |
|
"**Status:** ✅ Available" |
|
) |
|
|
|
|
|
examples = gr.Examples( |
|
examples=[ |
|
["Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"], |
|
["An astronaut riding a green horse"], |
|
["A delicious ceviche cheesecake slice"], |
|
["Futuristic AI assistant in a glowing galaxy, neon lights, sci-fi style, cinematic"], |
|
["Portrait of a beautiful woman, realistic, high quality, detailed"], |
|
["Anime girl with blue hair, detailed, high quality"], |
|
["Cyberpunk city at night, neon lights, detailed, 8k"], |
|
["Fantasy landscape with mountains and dragons, epic, detailed"] |
|
], |
|
inputs=image_prompt |
|
) |
|
|
|
|
|
image_output = gr.Image( |
|
label="Imagen Generada", |
|
type="pil" |
|
) |
|
|
|
|
|
def update_model_info(model_name): |
|
model_descriptions = { |
|
"CompVis/stable-diffusion-v1-4": "🎨 Stable Diffusion v1.4 • Recommended steps: 20-50 • Guidance scale: 7.5-15 • Best for: General purpose", |
|
"stabilityai/stable-diffusion-2-1": "🎨 Stable Diffusion 2.1 • Recommended steps: 20-50 • Guidance scale: 7.5-15 • Best for: High quality", |
|
"stabilityai/stable-diffusion-xl-base-1.0": "🎨 SDXL Base • Recommended steps: 25-50 • Guidance scale: 7.5-15 • Best for: High resolution", |
|
"stabilityai/sdxl-turbo": "⚡ SDXL Turbo • Recommended steps: 1-4 • Guidance scale: 1.0 • Best for: Fast generation", |
|
"stabilityai/sd-turbo": "⚡ SD Turbo • Recommended steps: 1-4 • Guidance scale: 1.0 • Best for: Fast generation", |
|
"black-forest-labs/FLUX.1-dev": "🔐 FLUX Model - High quality • Recommended steps: 20-50 • Guidance scale: 3.5-7.5 • Best for: Professional results", |
|
"black-forest-labs/FLUX.1-schnell": "🔐 FLUX Schnell - Fast quality • Recommended steps: 15-30 • Guidance scale: 3.5-7.5 • Best for: Quick professional results" |
|
} |
|
|
|
description = model_descriptions.get(model_name, "🎨 Model • Recommended steps: 20-50 • Guidance scale: 7.5-15 • Best for: General purpose") |
|
return f"**Model Info:** {model_name}\n\n{description}\n\n**Status:** ✅ Available" |
|
|
|
|
|
image_model.change( |
|
update_model_info, |
|
inputs=[image_model], |
|
outputs=[model_info] |
|
) |
|
|
|
image_btn.click( |
|
generate_image, |
|
inputs=[ |
|
image_prompt, |
|
image_model, |
|
negative_prompt, |
|
seed, |
|
width, |
|
height, |
|
guidance_scale, |
|
num_inference_steps |
|
], |
|
outputs=image_output |
|
) |
|
|
|
|
|
with gr.TabItem("🎬 Generación de Videos"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
video_model = gr.Dropdown( |
|
choices=list(MODELS["video"].keys()), |
|
value="damo-vilab/text-to-video-ms-1.7b", |
|
label="Modelo de Video", |
|
info="⚡ Modelos marcados son más rápidos" |
|
) |
|
video_prompt = gr.Textbox( |
|
label="Prompt de Video", |
|
placeholder="Describe el video que quieres generar...", |
|
lines=3 |
|
) |
|
|
|
with gr.Accordion("⚡ Configuración Rápida", open=True): |
|
with gr.Row(): |
|
with gr.Column(): |
|
num_frames = gr.Slider( |
|
minimum=8, |
|
maximum=32, |
|
value=16, |
|
step=4, |
|
label="Número de frames", |
|
info="Menos frames = más rápido" |
|
) |
|
with gr.Column(): |
|
video_steps = gr.Slider( |
|
minimum=5, |
|
maximum=50, |
|
value=15, |
|
step=5, |
|
label="Pasos de inferencia", |
|
info="Menos pasos = más rápido" |
|
) |
|
|
|
video_btn = gr.Button("🎬 Generar Video", variant="primary") |
|
|
|
with gr.Column(): |
|
|
|
video_model_info = gr.Markdown( |
|
value="**Modelo:** damo-vilab/text-to-video-ms-1.7b\n\n" |
|
"⚡ Text-to-Video MS 1.7B • Frames recomendados: 8-16 • " |
|
"Pasos recomendados: 10-20 • Velocidad: Rápida\n\n" |
|
"**Estado:** ✅ Disponible • **Optimizado para ZeroGPU**" |
|
) |
|
|
|
|
|
video_examples = gr.Examples( |
|
examples=[ |
|
["A cat walking in a garden"], |
|
["A car driving on a highway"], |
|
["A person dancing"], |
|
["Waves crashing on the beach"], |
|
["A butterfly flying in a flower field"], |
|
["A rocket launching into space"], |
|
["A robot walking in a futuristic city"], |
|
["A bird flying over mountains"] |
|
], |
|
inputs=video_prompt |
|
) |
|
|
|
video_output = gr.Video( |
|
label="Video Generado", |
|
format="mp4" |
|
) |
|
|
|
|
|
def update_video_model_info(model_name): |
|
model_descriptions = { |
|
"ByteDance/AnimateDiff-Lightning": "⚡ AnimateDiff Lightning • Frames recomendados: 8-16 • Pasos recomendados: 10-20 • Velocidad: Muy rápida", |
|
"cerspense/zeroscope_v2_576w": "⚡ Zeroscope v2 576w • Frames recomendados: 8-16 • Pasos recomendados: 10-20 • Velocidad: Rápida", |
|
"damo-vilab/text-to-video-ms-1.7b": "⚡ Text-to-Video MS 1.7B • Frames recomendados: 8-16 • Pasos recomendados: 10-20 • Velocidad: Rápida", |
|
"cerspense/zeroscope_v2_XL": "🎬 Zeroscope v2 XL • Frames recomendados: 12-24 • Pasos recomendados: 20-30 • Velocidad: Media", |
|
"Wan-AI/Wan2.1-T2V-14B-Diffusers": "🌟 Wan2.1 T2V 14B • Frames recomendados: 16-32 • Pasos recomendados: 25-40 • Velocidad: Lenta • ⚠️ Requiere mucho espacio (50GB+)", |
|
"ali-vilab/modelscope-damo-text-to-video-synthesis": "🔄 ModelScope Text-to-Video • Frames recomendados: 8-16 • Pasos recomendados: 15-25 • Velocidad: Experimental" |
|
} |
|
|
|
description = model_descriptions.get(model_name, "🎬 Modelo • Frames recomendados: 12-24 • Pasos recomendados: 20-30 • Velocidad: Media") |
|
is_fast = "⚡" in description |
|
status = "✅ Disponible • **Optimizado para ZeroGPU**" if is_fast else "✅ Disponible" |
|
|
|
return f"**Modelo:** {model_name}\n\n{description}\n\n**Estado:** {status}" |
|
|
|
|
|
video_model.change( |
|
update_video_model_info, |
|
inputs=[video_model], |
|
outputs=[video_model_info] |
|
) |
|
|
|
video_btn.click( |
|
generate_video, |
|
inputs=[video_prompt, video_model, num_frames, video_steps], |
|
outputs=video_output |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |