Agregando soporte completo para generacion de videos con modelos gratuitos
Browse files- app.py +161 -0
- requirements.txt +4 -1
app.py
CHANGED
@@ -52,6 +52,19 @@ MODELS = {
|
|
52 |
"black-forest-labs/FLUX.1-schnell": "FLUX.1 Schnell (Requiere acceso)",
|
53 |
"black-forest-labs/FLUX.1-dev": "FLUX.1 Dev (Requiere acceso)"
|
54 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
"chat": {
|
56 |
"microsoft/DialoGPT-medium": "Chat conversacional",
|
57 |
"microsoft/DialoGPT-large": "Chat conversacional avanzado",
|
@@ -133,6 +146,69 @@ def load_image_model(model_name):
|
|
133 |
|
134 |
return model_cache[model_name]
|
135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
def generate_text(prompt, model_name, max_length=100):
|
137 |
"""Generar texto con el modelo seleccionado - mejorado para diferentes tipos"""
|
138 |
try:
|
@@ -208,6 +284,49 @@ def generate_image(prompt, model_name, num_inference_steps=20):
|
|
208 |
print(f"Error generando imagen: {str(e)}")
|
209 |
return f"Error generando imagen: {str(e)}"
|
210 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
def chat_with_model(message, history, model_name):
|
212 |
"""Funci贸n de chat para DialoGPT con formato de mensajes actualizado"""
|
213 |
try:
|
@@ -394,6 +513,48 @@ with gr.Blocks(title="Modelos Libres de IA", theme=gr.themes.Soft()) as demo:
|
|
394 |
inputs=[image_prompt, image_model, steps],
|
395 |
outputs=image_output
|
396 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
397 |
|
398 |
# Configuraci贸n para Hugging Face Spaces
|
399 |
if __name__ == "__main__":
|
|
|
52 |
"black-forest-labs/FLUX.1-schnell": "FLUX.1 Schnell (Requiere acceso)",
|
53 |
"black-forest-labs/FLUX.1-dev": "FLUX.1 Dev (Requiere acceso)"
|
54 |
},
|
55 |
+
"video": {
|
56 |
+
"damo-vilab/text-to-video-ms-1.7b": "Text-to-Video MS 1.7B (Libre)",
|
57 |
+
"ali-vilab/text-to-video-ms-1.7b": "Text-to-Video MS 1.7B Alt",
|
58 |
+
"cerspense/zeroscope_v2_576w": "Zeroscope v2 576w (Libre)",
|
59 |
+
"cerspense/zeroscope_v2_XL": "Zeroscope v2 XL (Libre)",
|
60 |
+
"damo-vilab/text-to-video-ms-1.7b": "Text-to-Video MS 1.7B",
|
61 |
+
"ali-vilab/text-to-video-ms-1.7b": "Text-to-Video MS 1.7B Alt",
|
62 |
+
"cerspense/zeroscope_v2_576w": "Zeroscope v2 576w",
|
63 |
+
"cerspense/zeroscope_v2_XL": "Zeroscope v2 XL",
|
64 |
+
"ByteDance/AnimateDiff-Lightning": "AnimateDiff Lightning (Libre)",
|
65 |
+
"THUDM/CogVideoX-5b": "CogVideoX 5B (Libre)",
|
66 |
+
"rain1011/pyramid-flow-sd3": "Pyramid Flow SD3 (Libre)"
|
67 |
+
},
|
68 |
"chat": {
|
69 |
"microsoft/DialoGPT-medium": "Chat conversacional",
|
70 |
"microsoft/DialoGPT-large": "Chat conversacional avanzado",
|
|
|
146 |
|
147 |
return model_cache[model_name]
|
148 |
|
149 |
+
def load_video_model(model_name):
|
150 |
+
"""Cargar modelo de video con soporte para diferentes tipos"""
|
151 |
+
if model_name not in model_cache:
|
152 |
+
print(f"Cargando modelo de video: {model_name}")
|
153 |
+
|
154 |
+
try:
|
155 |
+
# Detectar tipo de modelo de video
|
156 |
+
if "text-to-video" in model_name.lower():
|
157 |
+
# Modelos de texto a video
|
158 |
+
from diffusers import DiffusionPipeline
|
159 |
+
pipe = DiffusionPipeline.from_pretrained(
|
160 |
+
model_name,
|
161 |
+
torch_dtype=torch.float32,
|
162 |
+
variant="fp16"
|
163 |
+
)
|
164 |
+
elif "zeroscope" in model_name.lower():
|
165 |
+
# Zeroscope models
|
166 |
+
from diffusers import DiffusionPipeline
|
167 |
+
pipe = DiffusionPipeline.from_pretrained(
|
168 |
+
model_name,
|
169 |
+
torch_dtype=torch.float32
|
170 |
+
)
|
171 |
+
elif "animatediff" in model_name.lower():
|
172 |
+
# AnimateDiff models
|
173 |
+
from diffusers import DiffusionPipeline
|
174 |
+
pipe = DiffusionPipeline.from_pretrained(
|
175 |
+
model_name,
|
176 |
+
torch_dtype=torch.float32
|
177 |
+
)
|
178 |
+
else:
|
179 |
+
# Fallback a text-to-video gen茅rico
|
180 |
+
from diffusers import DiffusionPipeline
|
181 |
+
pipe = DiffusionPipeline.from_pretrained(
|
182 |
+
model_name,
|
183 |
+
torch_dtype=torch.float32
|
184 |
+
)
|
185 |
+
|
186 |
+
# Optimizaciones b谩sicas
|
187 |
+
pipe.enable_attention_slicing()
|
188 |
+
pipe.enable_model_cpu_offload()
|
189 |
+
|
190 |
+
model_cache[model_name] = {
|
191 |
+
"pipeline": pipe,
|
192 |
+
"type": "video"
|
193 |
+
}
|
194 |
+
|
195 |
+
except Exception as e:
|
196 |
+
print(f"Error cargando modelo de video {model_name}: {e}")
|
197 |
+
# Fallback a un modelo b谩sico
|
198 |
+
from diffusers import DiffusionPipeline
|
199 |
+
pipe = DiffusionPipeline.from_pretrained(
|
200 |
+
"damo-vilab/text-to-video-ms-1.7b",
|
201 |
+
torch_dtype=torch.float32
|
202 |
+
)
|
203 |
+
pipe.enable_attention_slicing()
|
204 |
+
|
205 |
+
model_cache[model_name] = {
|
206 |
+
"pipeline": pipe,
|
207 |
+
"type": "video"
|
208 |
+
}
|
209 |
+
|
210 |
+
return model_cache[model_name]
|
211 |
+
|
212 |
def generate_text(prompt, model_name, max_length=100):
|
213 |
"""Generar texto con el modelo seleccionado - mejorado para diferentes tipos"""
|
214 |
try:
|
|
|
284 |
print(f"Error generando imagen: {str(e)}")
|
285 |
return f"Error generando imagen: {str(e)}"
|
286 |
|
287 |
+
def generate_video(prompt, model_name, num_frames=16, num_inference_steps=20):
|
288 |
+
"""Generar video con el modelo seleccionado"""
|
289 |
+
try:
|
290 |
+
print(f"Generando video con modelo: {model_name}")
|
291 |
+
print(f"Prompt: {prompt}")
|
292 |
+
print(f"Frames: {num_frames}")
|
293 |
+
print(f"Pasos: {num_inference_steps}")
|
294 |
+
|
295 |
+
model_data = load_video_model(model_name)
|
296 |
+
pipeline = model_data["pipeline"]
|
297 |
+
|
298 |
+
# Configuraci贸n espec铆fica por tipo de modelo
|
299 |
+
if "zeroscope" in model_name.lower():
|
300 |
+
# Zeroscope models
|
301 |
+
video_frames = pipeline(
|
302 |
+
prompt,
|
303 |
+
num_inference_steps=num_inference_steps,
|
304 |
+
num_frames=num_frames,
|
305 |
+
height=256,
|
306 |
+
width=256
|
307 |
+
).frames
|
308 |
+
elif "animatediff" in model_name.lower():
|
309 |
+
# AnimateDiff models
|
310 |
+
video_frames = pipeline(
|
311 |
+
prompt,
|
312 |
+
num_inference_steps=num_inference_steps,
|
313 |
+
num_frames=num_frames
|
314 |
+
).frames
|
315 |
+
else:
|
316 |
+
# Text-to-video models (default)
|
317 |
+
video_frames = pipeline(
|
318 |
+
prompt,
|
319 |
+
num_inference_steps=num_inference_steps,
|
320 |
+
num_frames=num_frames
|
321 |
+
).frames
|
322 |
+
|
323 |
+
print("Video generado exitosamente")
|
324 |
+
return video_frames
|
325 |
+
|
326 |
+
except Exception as e:
|
327 |
+
print(f"Error generando video: {str(e)}")
|
328 |
+
return f"Error generando video: {str(e)}"
|
329 |
+
|
330 |
def chat_with_model(message, history, model_name):
|
331 |
"""Funci贸n de chat para DialoGPT con formato de mensajes actualizado"""
|
332 |
try:
|
|
|
513 |
inputs=[image_prompt, image_model, steps],
|
514 |
outputs=image_output
|
515 |
)
|
516 |
+
|
517 |
+
# Tab de Generaci贸n de Videos
|
518 |
+
with gr.TabItem("馃幀 Generaci贸n de Videos"):
|
519 |
+
with gr.Row():
|
520 |
+
with gr.Column():
|
521 |
+
video_model = gr.Dropdown(
|
522 |
+
choices=list(MODELS["video"].keys()),
|
523 |
+
value="damo-vilab/text-to-video-ms-1.7b",
|
524 |
+
label="Modelo de Video"
|
525 |
+
)
|
526 |
+
video_prompt = gr.Textbox(
|
527 |
+
label="Prompt de Video",
|
528 |
+
placeholder="Describe el video que quieres generar...",
|
529 |
+
lines=3
|
530 |
+
)
|
531 |
+
num_frames = gr.Slider(
|
532 |
+
minimum=8,
|
533 |
+
maximum=32,
|
534 |
+
value=16,
|
535 |
+
step=4,
|
536 |
+
label="N煤mero de frames"
|
537 |
+
)
|
538 |
+
video_steps = gr.Slider(
|
539 |
+
minimum=10,
|
540 |
+
maximum=50,
|
541 |
+
value=20,
|
542 |
+
step=5,
|
543 |
+
label="Pasos de inferencia"
|
544 |
+
)
|
545 |
+
video_btn = gr.Button("Generar Video", variant="primary")
|
546 |
+
|
547 |
+
with gr.Column():
|
548 |
+
video_output = gr.Video(
|
549 |
+
label="Video Generado",
|
550 |
+
format="mp4"
|
551 |
+
)
|
552 |
+
|
553 |
+
video_btn.click(
|
554 |
+
generate_video,
|
555 |
+
inputs=[video_prompt, video_model, num_frames, video_steps],
|
556 |
+
outputs=video_output
|
557 |
+
)
|
558 |
|
559 |
# Configuraci贸n para Hugging Face Spaces
|
560 |
if __name__ == "__main__":
|
requirements.txt
CHANGED
@@ -8,4 +8,7 @@ numpy>=1.21.0
|
|
8 |
requests>=2.28.0
|
9 |
safetensors>=0.3.0
|
10 |
xformers>=0.0.20
|
11 |
-
huggingface_hub>=0.19.0
|
|
|
|
|
|
|
|
8 |
requests>=2.28.0
|
9 |
safetensors>=0.3.0
|
10 |
xformers>=0.0.20
|
11 |
+
huggingface_hub>=0.19.0
|
12 |
+
opencv-python>=4.8.0
|
13 |
+
imageio>=2.31.0
|
14 |
+
imageio-ffmpeg>=0.4.8
|