Ntdeseb commited on
Commit
f775172
1 Parent(s): 65a882e

Agregando soporte completo para generacion de videos con modelos gratuitos

Browse files
Files changed (2) hide show
  1. app.py +161 -0
  2. requirements.txt +4 -1
app.py CHANGED
@@ -52,6 +52,19 @@ MODELS = {
52
  "black-forest-labs/FLUX.1-schnell": "FLUX.1 Schnell (Requiere acceso)",
53
  "black-forest-labs/FLUX.1-dev": "FLUX.1 Dev (Requiere acceso)"
54
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  "chat": {
56
  "microsoft/DialoGPT-medium": "Chat conversacional",
57
  "microsoft/DialoGPT-large": "Chat conversacional avanzado",
@@ -133,6 +146,69 @@ def load_image_model(model_name):
133
 
134
  return model_cache[model_name]
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  def generate_text(prompt, model_name, max_length=100):
137
  """Generar texto con el modelo seleccionado - mejorado para diferentes tipos"""
138
  try:
@@ -208,6 +284,49 @@ def generate_image(prompt, model_name, num_inference_steps=20):
208
  print(f"Error generando imagen: {str(e)}")
209
  return f"Error generando imagen: {str(e)}"
210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  def chat_with_model(message, history, model_name):
212
  """Funci贸n de chat para DialoGPT con formato de mensajes actualizado"""
213
  try:
@@ -394,6 +513,48 @@ with gr.Blocks(title="Modelos Libres de IA", theme=gr.themes.Soft()) as demo:
394
  inputs=[image_prompt, image_model, steps],
395
  outputs=image_output
396
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
 
398
  # Configuraci贸n para Hugging Face Spaces
399
  if __name__ == "__main__":
 
52
  "black-forest-labs/FLUX.1-schnell": "FLUX.1 Schnell (Requiere acceso)",
53
  "black-forest-labs/FLUX.1-dev": "FLUX.1 Dev (Requiere acceso)"
54
  },
55
+ "video": {
56
+ "damo-vilab/text-to-video-ms-1.7b": "Text-to-Video MS 1.7B (Libre)",
57
+ "ali-vilab/text-to-video-ms-1.7b": "Text-to-Video MS 1.7B Alt",
58
+ "cerspense/zeroscope_v2_576w": "Zeroscope v2 576w (Libre)",
59
+ "cerspense/zeroscope_v2_XL": "Zeroscope v2 XL (Libre)",
60
+ "damo-vilab/text-to-video-ms-1.7b": "Text-to-Video MS 1.7B",
61
+ "ali-vilab/text-to-video-ms-1.7b": "Text-to-Video MS 1.7B Alt",
62
+ "cerspense/zeroscope_v2_576w": "Zeroscope v2 576w",
63
+ "cerspense/zeroscope_v2_XL": "Zeroscope v2 XL",
64
+ "ByteDance/AnimateDiff-Lightning": "AnimateDiff Lightning (Libre)",
65
+ "THUDM/CogVideoX-5b": "CogVideoX 5B (Libre)",
66
+ "rain1011/pyramid-flow-sd3": "Pyramid Flow SD3 (Libre)"
67
+ },
68
  "chat": {
69
  "microsoft/DialoGPT-medium": "Chat conversacional",
70
  "microsoft/DialoGPT-large": "Chat conversacional avanzado",
 
146
 
147
  return model_cache[model_name]
148
 
149
+ def load_video_model(model_name):
150
+ """Cargar modelo de video con soporte para diferentes tipos"""
151
+ if model_name not in model_cache:
152
+ print(f"Cargando modelo de video: {model_name}")
153
+
154
+ try:
155
+ # Detectar tipo de modelo de video
156
+ if "text-to-video" in model_name.lower():
157
+ # Modelos de texto a video
158
+ from diffusers import DiffusionPipeline
159
+ pipe = DiffusionPipeline.from_pretrained(
160
+ model_name,
161
+ torch_dtype=torch.float32,
162
+ variant="fp16"
163
+ )
164
+ elif "zeroscope" in model_name.lower():
165
+ # Zeroscope models
166
+ from diffusers import DiffusionPipeline
167
+ pipe = DiffusionPipeline.from_pretrained(
168
+ model_name,
169
+ torch_dtype=torch.float32
170
+ )
171
+ elif "animatediff" in model_name.lower():
172
+ # AnimateDiff models
173
+ from diffusers import DiffusionPipeline
174
+ pipe = DiffusionPipeline.from_pretrained(
175
+ model_name,
176
+ torch_dtype=torch.float32
177
+ )
178
+ else:
179
+ # Fallback a text-to-video gen茅rico
180
+ from diffusers import DiffusionPipeline
181
+ pipe = DiffusionPipeline.from_pretrained(
182
+ model_name,
183
+ torch_dtype=torch.float32
184
+ )
185
+
186
+ # Optimizaciones b谩sicas
187
+ pipe.enable_attention_slicing()
188
+ pipe.enable_model_cpu_offload()
189
+
190
+ model_cache[model_name] = {
191
+ "pipeline": pipe,
192
+ "type": "video"
193
+ }
194
+
195
+ except Exception as e:
196
+ print(f"Error cargando modelo de video {model_name}: {e}")
197
+ # Fallback a un modelo b谩sico
198
+ from diffusers import DiffusionPipeline
199
+ pipe = DiffusionPipeline.from_pretrained(
200
+ "damo-vilab/text-to-video-ms-1.7b",
201
+ torch_dtype=torch.float32
202
+ )
203
+ pipe.enable_attention_slicing()
204
+
205
+ model_cache[model_name] = {
206
+ "pipeline": pipe,
207
+ "type": "video"
208
+ }
209
+
210
+ return model_cache[model_name]
211
+
212
  def generate_text(prompt, model_name, max_length=100):
213
  """Generar texto con el modelo seleccionado - mejorado para diferentes tipos"""
214
  try:
 
284
  print(f"Error generando imagen: {str(e)}")
285
  return f"Error generando imagen: {str(e)}"
286
 
287
+ def generate_video(prompt, model_name, num_frames=16, num_inference_steps=20):
288
+ """Generar video con el modelo seleccionado"""
289
+ try:
290
+ print(f"Generando video con modelo: {model_name}")
291
+ print(f"Prompt: {prompt}")
292
+ print(f"Frames: {num_frames}")
293
+ print(f"Pasos: {num_inference_steps}")
294
+
295
+ model_data = load_video_model(model_name)
296
+ pipeline = model_data["pipeline"]
297
+
298
+ # Configuraci贸n espec铆fica por tipo de modelo
299
+ if "zeroscope" in model_name.lower():
300
+ # Zeroscope models
301
+ video_frames = pipeline(
302
+ prompt,
303
+ num_inference_steps=num_inference_steps,
304
+ num_frames=num_frames,
305
+ height=256,
306
+ width=256
307
+ ).frames
308
+ elif "animatediff" in model_name.lower():
309
+ # AnimateDiff models
310
+ video_frames = pipeline(
311
+ prompt,
312
+ num_inference_steps=num_inference_steps,
313
+ num_frames=num_frames
314
+ ).frames
315
+ else:
316
+ # Text-to-video models (default)
317
+ video_frames = pipeline(
318
+ prompt,
319
+ num_inference_steps=num_inference_steps,
320
+ num_frames=num_frames
321
+ ).frames
322
+
323
+ print("Video generado exitosamente")
324
+ return video_frames
325
+
326
+ except Exception as e:
327
+ print(f"Error generando video: {str(e)}")
328
+ return f"Error generando video: {str(e)}"
329
+
330
  def chat_with_model(message, history, model_name):
331
  """Funci贸n de chat para DialoGPT con formato de mensajes actualizado"""
332
  try:
 
513
  inputs=[image_prompt, image_model, steps],
514
  outputs=image_output
515
  )
516
+
517
+ # Tab de Generaci贸n de Videos
518
+ with gr.TabItem("馃幀 Generaci贸n de Videos"):
519
+ with gr.Row():
520
+ with gr.Column():
521
+ video_model = gr.Dropdown(
522
+ choices=list(MODELS["video"].keys()),
523
+ value="damo-vilab/text-to-video-ms-1.7b",
524
+ label="Modelo de Video"
525
+ )
526
+ video_prompt = gr.Textbox(
527
+ label="Prompt de Video",
528
+ placeholder="Describe el video que quieres generar...",
529
+ lines=3
530
+ )
531
+ num_frames = gr.Slider(
532
+ minimum=8,
533
+ maximum=32,
534
+ value=16,
535
+ step=4,
536
+ label="N煤mero de frames"
537
+ )
538
+ video_steps = gr.Slider(
539
+ minimum=10,
540
+ maximum=50,
541
+ value=20,
542
+ step=5,
543
+ label="Pasos de inferencia"
544
+ )
545
+ video_btn = gr.Button("Generar Video", variant="primary")
546
+
547
+ with gr.Column():
548
+ video_output = gr.Video(
549
+ label="Video Generado",
550
+ format="mp4"
551
+ )
552
+
553
+ video_btn.click(
554
+ generate_video,
555
+ inputs=[video_prompt, video_model, num_frames, video_steps],
556
+ outputs=video_output
557
+ )
558
 
559
  # Configuraci贸n para Hugging Face Spaces
560
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -8,4 +8,7 @@ numpy>=1.21.0
8
  requests>=2.28.0
9
  safetensors>=0.3.0
10
  xformers>=0.0.20
11
- huggingface_hub>=0.19.0
 
 
 
 
8
  requests>=2.28.0
9
  safetensors>=0.3.0
10
  xformers>=0.0.20
11
+ huggingface_hub>=0.19.0
12
+ opencv-python>=4.8.0
13
+ imageio>=2.31.0
14
+ imageio-ffmpeg>=0.4.8