|
import gradio as gr |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
from diffusers import StableDiffusionPipeline, DiffusionPipeline |
|
import requests |
|
from PIL import Image |
|
import io |
|
import base64 |
|
import os |
|
from huggingface_hub import login |
|
|
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
if HF_TOKEN: |
|
try: |
|
login(token=HF_TOKEN) |
|
print("✅ Autenticado con Hugging Face") |
|
except Exception as e: |
|
print(f"⚠️ Error de autenticación: {e}") |
|
else: |
|
print("⚠️ No se encontró HF_TOKEN - modelos gated no estarán disponibles") |
|
|
|
|
|
MODELS = { |
|
"text": { |
|
"microsoft/DialoGPT-medium": "Chat conversacional", |
|
"microsoft/DialoGPT-large": "Chat conversacional avanzado", |
|
"microsoft/DialoGPT-small": "Chat conversacional rápido", |
|
"gpt2": "Generación de texto", |
|
"gpt2-medium": "GPT-2 mediano", |
|
"gpt2-large": "GPT-2 grande", |
|
"distilgpt2": "GPT-2 optimizado", |
|
"EleutherAI/gpt-neo-125M": "GPT-Neo pequeño", |
|
"EleutherAI/gpt-neo-1.3B": "GPT-Neo mediano", |
|
"microsoft/DialoGPT-medium": "Chat conversacional", |
|
"facebook/opt-125m": "OPT pequeño", |
|
"facebook/opt-350m": "OPT mediano", |
|
"bigscience/bloom-560m": "BLOOM multilingüe", |
|
"bigscience/bloom-1b1": "BLOOM grande", |
|
"microsoft/DialoGPT-medium": "Chat conversacional", |
|
"Helsinki-NLP/opus-mt-es-en": "Traductor español-inglés", |
|
"Helsinki-NLP/opus-mt-en-es": "Traductor inglés-español" |
|
}, |
|
"image": { |
|
"CompVis/stable-diffusion-v1-4": "Stable Diffusion v1.4 (Libre)", |
|
"stabilityai/stable-diffusion-2-1": "Stable Diffusion 2.1", |
|
"stabilityai/stable-diffusion-xl-base-1.0": "SDXL Base", |
|
"stabilityai/stable-diffusion-3-medium": "SD 3 Medium", |
|
"prompthero/openjourney": "Midjourney Style", |
|
"WarriorMama777/OrangeMixs": "Orange Mixs", |
|
"hakurei/waifu-diffusion": "Waifu Diffusion", |
|
"black-forest-labs/FLUX.1-schnell": "FLUX.1 Schnell (Requiere acceso)", |
|
"black-forest-labs/FLUX.1-dev": "FLUX.1 Dev (Requiere acceso)" |
|
}, |
|
"video": { |
|
"damo-vilab/text-to-video-ms-1.7b": "Text-to-Video MS 1.7B (Libre)", |
|
"ali-vilab/text-to-video-ms-1.7b": "Text-to-Video MS 1.7B Alt", |
|
"cerspense/zeroscope_v2_576w": "Zeroscope v2 576w (Libre)", |
|
"cerspense/zeroscope_v2_XL": "Zeroscope v2 XL (Libre)", |
|
"damo-vilab/text-to-video-ms-1.7b": "Text-to-Video MS 1.7B", |
|
"ali-vilab/text-to-video-ms-1.7b": "Text-to-Video MS 1.7B Alt", |
|
"cerspense/zeroscope_v2_576w": "Zeroscope v2 576w", |
|
"cerspense/zeroscope_v2_XL": "Zeroscope v2 XL", |
|
"ByteDance/AnimateDiff-Lightning": "AnimateDiff Lightning (Libre)", |
|
"THUDM/CogVideoX-5b": "CogVideoX 5B (Libre)", |
|
"rain1011/pyramid-flow-sd3": "Pyramid Flow SD3 (Libre)" |
|
}, |
|
"chat": { |
|
"microsoft/DialoGPT-medium": "Chat conversacional", |
|
"microsoft/DialoGPT-large": "Chat conversacional avanzado", |
|
"microsoft/DialoGPT-small": "Chat conversacional rápido", |
|
"facebook/opt-350m": "OPT conversacional", |
|
"bigscience/bloom-560m": "BLOOM multilingüe" |
|
} |
|
} |
|
|
|
|
|
model_cache = {} |
|
|
|
def load_text_model(model_name): |
|
"""Cargar modelo de texto con soporte para diferentes tipos""" |
|
if model_name not in model_cache: |
|
print(f"Cargando modelo de texto: {model_name}") |
|
|
|
|
|
if "opus-mt" in model_name.lower(): |
|
|
|
from transformers import MarianMTModel, MarianTokenizer |
|
tokenizer = MarianTokenizer.from_pretrained(model_name) |
|
model = MarianMTModel.from_pretrained(model_name) |
|
else: |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained(model_name) |
|
|
|
|
|
if "dialogpt" in model_name.lower(): |
|
tokenizer.pad_token = tokenizer.eos_token |
|
model.config.pad_token_id = model.config.eos_token_id |
|
|
|
model_cache[model_name] = { |
|
"tokenizer": tokenizer, |
|
"model": model, |
|
"type": "text" |
|
} |
|
|
|
return model_cache[model_name] |
|
|
|
def load_image_model(model_name): |
|
"""Cargar modelo de imagen - versión simplificada con soporte para FLUX""" |
|
if model_name not in model_cache: |
|
print(f"Cargando modelo de imagen: {model_name}") |
|
|
|
|
|
if "flux" in model_name.lower(): |
|
try: |
|
from diffusers import FluxPipeline |
|
pipe = FluxPipeline.from_pretrained( |
|
model_name, |
|
torch_dtype=torch.bfloat16 |
|
) |
|
pipe.enable_model_cpu_offload() |
|
except Exception as e: |
|
print(f"Error cargando FLUX: {e}") |
|
|
|
pipe = StableDiffusionPipeline.from_pretrained( |
|
"CompVis/stable-diffusion-v1-4", |
|
torch_dtype=torch.float32, |
|
safety_checker=None |
|
) |
|
else: |
|
|
|
pipe = StableDiffusionPipeline.from_pretrained( |
|
model_name, |
|
torch_dtype=torch.float32, |
|
safety_checker=None |
|
) |
|
|
|
|
|
pipe.enable_attention_slicing() |
|
|
|
model_cache[model_name] = { |
|
"pipeline": pipe, |
|
"type": "image" |
|
} |
|
|
|
return model_cache[model_name] |
|
|
|
def load_video_model(model_name): |
|
"""Cargar modelo de video con soporte para diferentes tipos""" |
|
if model_name not in model_cache: |
|
print(f"Cargando modelo de video: {model_name}") |
|
|
|
try: |
|
|
|
if "text-to-video" in model_name.lower(): |
|
|
|
from diffusers import DiffusionPipeline |
|
pipe = DiffusionPipeline.from_pretrained( |
|
model_name, |
|
torch_dtype=torch.float32, |
|
variant="fp16" |
|
) |
|
elif "zeroscope" in model_name.lower(): |
|
|
|
from diffusers import DiffusionPipeline |
|
pipe = DiffusionPipeline.from_pretrained( |
|
model_name, |
|
torch_dtype=torch.float32 |
|
) |
|
elif "animatediff" in model_name.lower(): |
|
|
|
from diffusers import DiffusionPipeline |
|
pipe = DiffusionPipeline.from_pretrained( |
|
model_name, |
|
torch_dtype=torch.float32 |
|
) |
|
else: |
|
|
|
from diffusers import DiffusionPipeline |
|
pipe = DiffusionPipeline.from_pretrained( |
|
model_name, |
|
torch_dtype=torch.float32 |
|
) |
|
|
|
|
|
pipe.enable_attention_slicing() |
|
pipe.enable_model_cpu_offload() |
|
|
|
model_cache[model_name] = { |
|
"pipeline": pipe, |
|
"type": "video" |
|
} |
|
|
|
except Exception as e: |
|
print(f"Error cargando modelo de video {model_name}: {e}") |
|
|
|
from diffusers import DiffusionPipeline |
|
pipe = DiffusionPipeline.from_pretrained( |
|
"damo-vilab/text-to-video-ms-1.7b", |
|
torch_dtype=torch.float32 |
|
) |
|
pipe.enable_attention_slicing() |
|
|
|
model_cache[model_name] = { |
|
"pipeline": pipe, |
|
"type": "video" |
|
} |
|
|
|
return model_cache[model_name] |
|
|
|
def generate_text(prompt, model_name, max_length=100): |
|
"""Generar texto con el modelo seleccionado - mejorado para diferentes tipos""" |
|
try: |
|
model_data = load_text_model(model_name) |
|
tokenizer = model_data["tokenizer"] |
|
model = model_data["model"] |
|
|
|
|
|
if "opus-mt" in model_name.lower(): |
|
|
|
inputs = tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True) |
|
with torch.no_grad(): |
|
outputs = model.generate(inputs, max_length=max_length, num_beams=4, early_stopping=True) |
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
else: |
|
|
|
inputs = tokenizer.encode(prompt, return_tensors="pt") |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = model.generate( |
|
inputs, |
|
max_length=max_length, |
|
num_return_sequences=1, |
|
temperature=0.7, |
|
do_sample=True, |
|
pad_token_id=tokenizer.eos_token_id |
|
) |
|
|
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
if "dialogpt" in model_name.lower(): |
|
response = response.replace(prompt, "").strip() |
|
|
|
return response |
|
|
|
except Exception as e: |
|
return f"Error generando texto: {str(e)}" |
|
|
|
def generate_image(prompt, model_name, num_inference_steps=20): |
|
"""Generar imagen con el modelo seleccionado - versión simplificada con soporte para FLUX""" |
|
try: |
|
print(f"Generando imagen con modelo: {model_name}") |
|
print(f"Prompt: {prompt}") |
|
print(f"Pasos: {num_inference_steps}") |
|
|
|
model_data = load_image_model(model_name) |
|
pipeline = model_data["pipeline"] |
|
|
|
|
|
if "flux" in model_name.lower(): |
|
image = pipeline( |
|
prompt, |
|
guidance_scale=0.0, |
|
num_inference_steps=4, |
|
max_sequence_length=256, |
|
generator=torch.Generator("cpu").manual_seed(0) |
|
).images[0] |
|
else: |
|
|
|
image = pipeline( |
|
prompt, |
|
num_inference_steps=num_inference_steps, |
|
guidance_scale=7.5 |
|
).images[0] |
|
|
|
print("Imagen generada exitosamente") |
|
return image |
|
|
|
except Exception as e: |
|
print(f"Error generando imagen: {str(e)}") |
|
return f"Error generando imagen: {str(e)}" |
|
|
|
def generate_video(prompt, model_name, num_frames=16, num_inference_steps=20): |
|
"""Generar video con el modelo seleccionado""" |
|
try: |
|
print(f"Generando video con modelo: {model_name}") |
|
print(f"Prompt: {prompt}") |
|
print(f"Frames: {num_frames}") |
|
print(f"Pasos: {num_inference_steps}") |
|
|
|
model_data = load_video_model(model_name) |
|
pipeline = model_data["pipeline"] |
|
|
|
|
|
if "zeroscope" in model_name.lower(): |
|
|
|
video_frames = pipeline( |
|
prompt, |
|
num_inference_steps=num_inference_steps, |
|
num_frames=num_frames, |
|
height=256, |
|
width=256 |
|
).frames |
|
elif "animatediff" in model_name.lower(): |
|
|
|
video_frames = pipeline( |
|
prompt, |
|
num_inference_steps=num_inference_steps, |
|
num_frames=num_frames |
|
).frames |
|
else: |
|
|
|
video_frames = pipeline( |
|
prompt, |
|
num_inference_steps=num_inference_steps, |
|
num_frames=num_frames |
|
).frames |
|
|
|
print("Video generado exitosamente") |
|
return video_frames |
|
|
|
except Exception as e: |
|
print(f"Error generando video: {str(e)}") |
|
return f"Error generando video: {str(e)}" |
|
|
|
def chat_with_model(message, history, model_name): |
|
"""Función de chat para DialoGPT con formato de mensajes actualizado""" |
|
try: |
|
model_data = load_text_model(model_name) |
|
tokenizer = model_data["tokenizer"] |
|
model = model_data["model"] |
|
|
|
|
|
conversation = "" |
|
for msg in history: |
|
if msg["role"] == "user": |
|
conversation += f"User: {msg['content']}\n" |
|
elif msg["role"] == "assistant": |
|
conversation += f"Assistant: {msg['content']}\n" |
|
|
|
conversation += f"User: {message}\nAssistant:" |
|
|
|
|
|
inputs = tokenizer.encode(conversation, return_tensors="pt", truncation=True, max_length=512) |
|
|
|
with torch.no_grad(): |
|
outputs = model.generate( |
|
inputs, |
|
max_length=inputs.shape[1] + 50, |
|
temperature=0.7, |
|
do_sample=True, |
|
pad_token_id=tokenizer.eos_token_id |
|
) |
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
response = response.split("Assistant:")[-1].strip() |
|
|
|
|
|
history.append({"role": "user", "content": message}) |
|
history.append({"role": "assistant", "content": response}) |
|
|
|
return history |
|
|
|
except Exception as e: |
|
error_msg = f"Error en el chat: {str(e)}" |
|
history.append({"role": "user", "content": message}) |
|
history.append({"role": "assistant", "content": error_msg}) |
|
return history |
|
|
|
|
|
with gr.Blocks(title="Modelos Libres de IA", theme=gr.themes.Soft()) as demo: |
|
gr.Markdown("# 🤖 Modelos Libres de IA") |
|
gr.Markdown("### Genera texto e imágenes sin límites de cuota") |
|
|
|
with gr.Tabs(): |
|
|
|
with gr.TabItem("📝 Generación de Texto"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
text_model = gr.Dropdown( |
|
choices=list(MODELS["text"].keys()), |
|
value="microsoft/DialoGPT-medium", |
|
label="Modelo de Texto" |
|
) |
|
text_prompt = gr.Textbox( |
|
label="Prompt", |
|
placeholder="Escribe tu prompt aquí...", |
|
lines=3 |
|
) |
|
max_length = gr.Slider( |
|
minimum=50, |
|
maximum=200, |
|
value=100, |
|
step=10, |
|
label="Longitud máxima" |
|
) |
|
text_btn = gr.Button("Generar Texto", variant="primary") |
|
|
|
with gr.Column(): |
|
text_output = gr.Textbox( |
|
label="Resultado", |
|
lines=10, |
|
interactive=False |
|
) |
|
|
|
text_btn.click( |
|
generate_text, |
|
inputs=[text_prompt, text_model, max_length], |
|
outputs=text_output |
|
) |
|
|
|
|
|
with gr.TabItem("💬 Chat"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
chat_model = gr.Dropdown( |
|
choices=list(MODELS["chat"].keys()), |
|
value="microsoft/DialoGPT-medium", |
|
label="Modelo de Chat" |
|
) |
|
|
|
with gr.Column(): |
|
chatbot = gr.Chatbot( |
|
label="Chat", |
|
height=400, |
|
type="messages" |
|
) |
|
chat_input = gr.Textbox( |
|
label="Mensaje", |
|
placeholder="Escribe tu mensaje...", |
|
lines=2 |
|
) |
|
chat_btn = gr.Button("Enviar", variant="primary") |
|
|
|
chat_btn.click( |
|
chat_with_model, |
|
inputs=[chat_input, chatbot, chat_model], |
|
outputs=[chatbot] |
|
) |
|
|
|
chat_input.submit( |
|
chat_with_model, |
|
inputs=[chat_input, chatbot, chat_model], |
|
outputs=[chatbot] |
|
) |
|
|
|
|
|
with gr.TabItem("🌐 Traducción"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
translate_model = gr.Dropdown( |
|
choices=["Helsinki-NLP/opus-mt-es-en", "Helsinki-NLP/opus-mt-en-es"], |
|
value="Helsinki-NLP/opus-mt-es-en", |
|
label="Modelo de Traducción" |
|
) |
|
translate_text = gr.Textbox( |
|
label="Texto a traducir", |
|
placeholder="Escribe el texto que quieres traducir...", |
|
lines=3 |
|
) |
|
translate_btn = gr.Button("Traducir", variant="primary") |
|
|
|
with gr.Column(): |
|
translate_output = gr.Textbox( |
|
label="Traducción", |
|
lines=3, |
|
interactive=False |
|
) |
|
|
|
translate_btn.click( |
|
generate_text, |
|
inputs=[translate_text, translate_model, gr.Slider(value=100, visible=False)], |
|
outputs=translate_output |
|
) |
|
|
|
|
|
with gr.TabItem("🎨 Generación de Imágenes"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
image_model = gr.Dropdown( |
|
choices=list(MODELS["image"].keys()), |
|
value="CompVis/stable-diffusion-v1-4", |
|
label="Modelo de Imagen" |
|
) |
|
image_prompt = gr.Textbox( |
|
label="Prompt de Imagen", |
|
placeholder="Describe la imagen que quieres generar...", |
|
lines=3 |
|
) |
|
steps = gr.Slider( |
|
minimum=10, |
|
maximum=50, |
|
value=15, |
|
step=5, |
|
label="Pasos de inferencia" |
|
) |
|
image_btn = gr.Button("Generar Imagen", variant="primary") |
|
|
|
with gr.Column(): |
|
image_output = gr.Image( |
|
label="Imagen Generada", |
|
type="pil" |
|
) |
|
|
|
image_btn.click( |
|
generate_image, |
|
inputs=[image_prompt, image_model, steps], |
|
outputs=image_output |
|
) |
|
|
|
|
|
with gr.TabItem("🎬 Generación de Videos"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
video_model = gr.Dropdown( |
|
choices=list(MODELS["video"].keys()), |
|
value="damo-vilab/text-to-video-ms-1.7b", |
|
label="Modelo de Video" |
|
) |
|
video_prompt = gr.Textbox( |
|
label="Prompt de Video", |
|
placeholder="Describe el video que quieres generar...", |
|
lines=3 |
|
) |
|
num_frames = gr.Slider( |
|
minimum=8, |
|
maximum=32, |
|
value=16, |
|
step=4, |
|
label="Número de frames" |
|
) |
|
video_steps = gr.Slider( |
|
minimum=10, |
|
maximum=50, |
|
value=20, |
|
step=5, |
|
label="Pasos de inferencia" |
|
) |
|
video_btn = gr.Button("Generar Video", variant="primary") |
|
|
|
with gr.Column(): |
|
video_output = gr.Video( |
|
label="Video Generado", |
|
format="mp4" |
|
) |
|
|
|
video_btn.click( |
|
generate_video, |
|
inputs=[video_prompt, video_model, num_frames, video_steps], |
|
outputs=video_output |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch( |
|
server_name="0.0.0.0", |
|
server_port=7860, |
|
share=False |
|
) |