Spaces:

oriolgds
/

title-generator

Running

App Files Files Community

oriolgds commited on 21 days ago

Commit

d6aca47

unverified ·

1 Parent(s): 766694a

Revert "Some optimizations"

Browse files

This reverts commit 766694a1e0d3cd95cd563db918de82dad3f192dd.

Files changed (2) hide show

app.py +67 -85
script.py +208 -0

app.py CHANGED Viewed

@@ -2,117 +2,105 @@ import gradio as gr
 import transformers
 import torch
 import os
-import time # Para medir el tiempo
-# --- 0. Optimización de Hilos (Threads) ---
-# Los Spaces gratuitos de HF suelen tener 2 o 4 CPUs.
-# Forzamos a PyTorch a usar todos los núcleos disponibles
-# para operaciones que SÍ puede paralelizar (ej. matmul)
-try:
-    cpu_count = os.cpu_count()
-    torch.set_num_threads(cpu_count)
-    print(f"INFO: Configurando PyTorch para usar {cpu_count} hilos.")
-except Exception as e:
-    print(f"ADVERTENCIA: No se pudo configurar el número de hilos de PyTorch: {e}")
 # --- 1. Configuración del Modelo ---
 # Obtenemos el HF_TOKEN de los "Secrets" del Space.
 HF_TOKEN = os.environ.get("HF_TOKEN")
 if not HF_TOKEN:
     print("ADVERTENCIA: No se ha configurado el secret 'HF_TOKEN'.")
     # raise ValueError("Falta el HF_TOKEN. Configúralo en los secrets del Space.")
 # Cargamos el modelo Llama-3.2-1B-Instruct
 try:
-    print("Iniciando carga del pipeline...")
-    start_load = time.time()
     generator = transformers.pipeline(
         "text-generation",
         model="meta-llama/Llama-3.2-1B-Instruct",
-        dtype=torch.bfloat16,
-        device_map="auto",  # En un Space de CPU, esto será "cpu"
-        token=HF_TOKEN
     )
     if generator.tokenizer.pad_token_id is None:
         generator.tokenizer.pad_token_id = generator.tokenizer.eos_token_id
         print("INFO: pad_token_id no estaba configurado. Se ha establecido en eos_token_id.")
-    end_load = time.time()
-    print(f"Pipeline de Llama-3.2-1B cargado exitosamente en {end_load - start_load:.2f} segundos.")
 except Exception as e:
     print(f"Error cargando el pipeline: {e}")
-    generator = None
-# --- 2. Lógica de Generación (Modificada para Batching) ---
-# Esta función ahora recibirá una LISTA de textos
-def generate_title(text_inputs_list):
     """
-    Toma una LISTA de textos y genera un título para CADA uno.
     """
     if not generator:
-        return ["Error: El modelo no pudo cargarse."] * len(text_inputs_list)
-    print(f"Procesando un lote (batch) de {len(text_inputs_list)} peticiones.")
-    start_gen = time.time()
-    prompts = []
-    for text_input in text_inputs_list:
-        if not text_input or text_input.strip() == "":
-            prompts.append(None) # Marcamos para saltar
-            continue
-        system_prompt = "Eres un experto en resumir textos en títulos cortos y llamativos. Te daré un texto o un historial de chat y tú generarás un título de entre 3 y 7 palabras. Responde SOLAMENTE con el título y nada más."
-        user_prompt = f"Genera un título para el siguiente contenido:\n\n---\n{text_input}\n---"
-        messages = [
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": user_prompt},
-        ]
-        prompts.append(messages)
     terminators_safe = list(set([
         generator.tokenizer.eos_token_id,
         13  # ID del token de nueva línea (\n)
     ]))
     try:
-        # El pipeline maneja la lista de prompts automáticamente
-        outputs_list = generator(
-            prompts, # Pasamos la lista de prompts
-            max_new_tokens=20,
-            eos_token_id=terminators_safe,
-            do_sample=False,
-            temperature=None,
-            top_p=None,
-            pad_token_id=generator.tokenizer.eos_token_id
         )
-        titles = []
-        for i, outputs in enumerate(outputs_list):
-            if prompts[i] is None:
-                titles.append("Por favor, introduce un texto.")
-                continue
-            # Extraemos la respuesta del asistente
-            title = outputs[0]["generated_text"][-1]["content"]
-            title = title.strip().replace('"', '').replace("Título:", "").strip()
-            if not title:
-                titles.append("No se pudo generar un título.")
-            else:
-                titles.append(title)
-        end_gen = time.time()
-        print(f"Lote de {len(text_inputs_list)} procesado en {end_gen - start_gen:.2f} segundos.")
-        return titles
     except Exception as e:
         print(f"Error durante la generación: {e}")
-        return [f"Error al generar: {e}"] * len(text_inputs_list)
 # --- 3. Interfaz de Gradio ---
@@ -128,27 +116,22 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         text_input = gr.Textbox(
             lines=15,
             label="Texto o Historial de Chat",
-            placeholder="Pega tu contenido aquí..."
         )
         title_output = gr.Textbox(
             label="Título Generado",
-            interactive=False
         )
     generate_btn = gr.Button("🚀 Generar Título", variant="primary")
-    # --- Optimización de Batching ---
-    # 1. fn: La función ahora espera una lista y devuelve una lista.
-    # 2. batch=True: Le dice a Gradio que agrupe las peticiones.
-    # 3. max_batch_size=4: Agrupa hasta 4 peticiones juntas.
-    #    Ajusta este número según la RAM de tu Space.
     generate_btn.click(
         fn=generate_title,
         inputs=text_input,
         outputs=title_output,
-        api_name="generate_title",
-        batch=True,          # <-- Habilitar batching
-        max_batch_size=4     # <-- Definir tamaño del lote
     )
     gr.Examples(
@@ -164,9 +147,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         label="Ejemplos de Entrada"
     )
-# --- Optimización de Fila (Queue) ---
-# enable_queue=True es ESENCIAL para que el batching funcione.
-# Permite que Gradio ponga las peticiones en una fila mientras espera
-# a que se llene el lote (batch).
 if __name__ == "__main__":
-    demo.queue().launch() # <-- Usamos .queue()

 import transformers
 import torch
 import os
 # --- 1. Configuración del Modelo ---
 # Obtenemos el HF_TOKEN de los "Secrets" del Space.
+# ¡NUNCA escribas tu token directamente en el código!
 HF_TOKEN = os.environ.get("HF_TOKEN")
 if not HF_TOKEN:
     print("ADVERTENCIA: No se ha configurado el secret 'HF_TOKEN'.")
+    # Si no hay token, la app puede fallar al cargar el modelo gated.
+    # Para pruebas locales, puedes crear un archivo .env o setear la variable.
     # raise ValueError("Falta el HF_TOKEN. Configúralo en los secrets del Space.")
 # Cargamos el modelo Llama-3.2-1B-Instruct
+# Usamos un pipeline para facilitar la generación de texto
 try:
     generator = transformers.pipeline(
         "text-generation",
         model="meta-llama/Llama-3.2-1B-Instruct",
+        dtype=torch.bfloat16, # Optimización para velocidad y memoria (reemplaza a torch_dtype)
+        device_map="auto",  # Usa GPU si está disponible
+        token=HF_TOKEN      # Token para acceder al modelo gated
     )
+    # --- INICIO DE LA CORRECCIÓN ---
+    # CORRECCIÓN 1: Asegurarse de que pad_token_id esté configurado
+    # Algunos modelos no tienen un pad_token_id por defecto, lo que causa el error 'NoneType'
     if generator.tokenizer.pad_token_id is None:
         generator.tokenizer.pad_token_id = generator.tokenizer.eos_token_id
         print("INFO: pad_token_id no estaba configurado. Se ha establecido en eos_token_id.")
+    # --- FIN DE LA CORRECCIÓN ---
+    print("Pipeline de Llama-3.2-1B cargado exitosamente.")
 except Exception as e:
     print(f"Error cargando el pipeline: {e}")
+    # Si falla aquí, probablemente es por el token o falta de acceso.
+    generator = None # Marcamos que falló
+# --- 2. Lógica de Generación ---
+def generate_title(text_input):
     """
+    Toma un texto (o historial) y genera un título conciso.
     """
     if not generator:
+        return "Error: El modelo no pudo cargarse. ¿Configuraste el HF_TOKEN y tienes acceso a meta-llama/Llama-3.2-1B-Instruct?"
+    if not text_input or text_input.strip() == "":
+        return "Por favor, introduce un texto."
+    # Prompt engineering: Damos instrucciones claras al modelo.
+    # Llama 3.2 usa un formato de chat específico.
+    system_prompt = "Eres un experto en resumir textos en títulos cortos y llamativos. Te daré un texto o un historial de chat y tú generarás un título de entre 3 y 7 palabras. Responde SOLAMENTE con el título y nada más."
+    user_prompt = f"Genera un título para el siguiente contenido:\n\n---\n{text_input}\n---"
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt},
+    ]
+    # --- INICIO DE LA CORRECCIÓN ---
+    # CORRECCIÓN 2: Lista de terminadores robusta
+    # El ID para "\n" (nueva línea) en Llama 3 es 13.
+    # generator.tokenizer.eos_token_id es el ID de <|eot_id|>
+    # Usamos una lista explícita de enteros para evitar 'None'.
     terminators_safe = list(set([
         generator.tokenizer.eos_token_id,
         13  # ID del token de nueva línea (\n)
     ]))
+    # --- FIN DE LA CORRECCIÓN ---
     try:
+        outputs = generator(
+            messages,
+            max_new_tokens=20,          # Un título no necesita más de 20 tokens
+            eos_token_id=terminators_safe, # Usamos la lista corregida
+            do_sample=False,            # Queremos la respuesta más probable, no creativa
+            temperature=None,           # No necesario si do_sample=False
+            top_p=None,                 # No necesario si do_sample=False
+            pad_token_id=generator.tokenizer.eos_token_id # Ahora es seguro usar esto
         )
+        # Extraemos la respuesta del asistente
+        # La estructura es: outputs[0]["generated_text"] es una *lista* de mensajes
+        # El último mensaje [-1] es el del asistente
+        title = outputs[0]["generated_text"][-1]["content"]
+        # Limpiamos el título (quitar espacios, comillas, etc.)
+        title = title.strip().replace('"', '').replace("Título:", "").strip()
+        if not title:
+            return "No se pudo generar un título."
+        return title
     except Exception as e:
         print(f"Error durante la generación: {e}")
+        return f"Error al generar: {e}"
 # --- 3. Interfaz de Gradio ---
         text_input = gr.Textbox(
             lines=15,
             label="Texto o Historial de Chat",
+            placeholder="Pega tu contenido aquí. Por ejemplo:\n\nUser: ¿Qué es la IA?\nAssistant: La IA es...\nUser: ¿Y el machine learning?\n\nO simplemente pega un artículo largo."
         )
         title_output = gr.Textbox(
             label="Título Generado",
+            interactive=False # El usuario no puede editar esto
         )
     generate_btn = gr.Button("🚀 Generar Título", variant="primary")
+    # Conectamos el botón a la función
+    # api_name="generate_title" habilita el endpoint /api/generate_title
     generate_btn.click(
         fn=generate_title,
         inputs=text_input,
         outputs=title_output,
+        api_name="generate_title"
     )
     gr.Examples(
         label="Ejemplos de Entrada"
     )
+# Lanzamos la aplicación
 if __name__ == "__main__":
+    demo.launch()

script.py ADDED Viewed

	@@ -0,0 +1,208 @@

+# Crear el código completo para un espacio de Hugging Face que genera títulos
+# usando Llama-3.2-1B-Instruct con interfaz Gradio y API
+app_code = '''import gradio as gr
+import os
+from huggingface_hub import InferenceClient
+# Obtener el token de HF desde los secrets
+HF_TOKEN = os.environ.get("HF_TOKEN")
+# Inicializar el cliente de inferencia con el modelo Llama
+client = InferenceClient(
+    model="meta-llama/Llama-3.2-1B-Instruct",
+    token=HF_TOKEN
+)
+def generate_title(text_or_history, max_length=50):
+    """
+    Genera un título a partir de texto o historial de conversación
+    Args:
+        text_or_history: Puede ser texto simple o una lista de mensajes
+        max_length: Longitud máxima del título
+    Returns:
+        El título generado
+    """
+    try:
+        # Si es una lista (historial), convertirla a texto
+        if isinstance(text_or_history, list):
+            # Formatear el historial como conversación
+            conversation_text = "\\n".join([
+                f"{msg.get('role', 'user')}: {msg.get('content', '')}"
+                for msg in text_or_history
+            ])
+        else:
+            conversation_text = str(text_or_history)
+        # Crear el prompt para generar título
+        prompt = f"""Based on the following conversation or text, generate a short, concise title (maximum 10 words):
+{conversation_text}
+Title:"""
+        # Generar el título usando el modelo
+        messages = [
+            {"role": "user", "content": prompt}
+        ]
+        response = ""
+        for message in client.chat_completion(
+            messages=messages,
+            max_tokens=max_length,
+            temperature=0.7,
+            stream=True
+        ):
+            token = message.choices[0].delta.content
+            if token:
+                response += token
+        # Limpiar el título (quitar saltos de línea extra, etc.)
+        title = response.strip().split("\\n")[0]
+        return title
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Crear la interfaz de Gradio
+with gr.Blocks(title="Title Generator with Llama 3.2") as demo:
+    gr.Markdown("# 📝 AI Title Generator")
+    gr.Markdown("Generate concise titles from text or conversation history using Llama 3.2-1B-Instruct")
+    with gr.Tab("Text Input"):
+        text_input = gr.Textbox(
+            label="Enter your text",
+            placeholder="Paste your text or conversation here...",
+            lines=10
+        )
+        text_button = gr.Button("Generate Title", variant="primary")
+        text_output = gr.Textbox(label="Generated Title", lines=2)
+        text_button.click(
+            fn=generate_title,
+            inputs=[text_input],
+            outputs=[text_output]
+        )
+    with gr.Tab("History/List Input"):
+        gr.Markdown("Enter conversation history as JSON format:")
+        gr.Markdown('Example: `[{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi there!"}]`')
+        history_input = gr.Textbox(
+            label="Conversation History (JSON)",
+            placeholder='[{"role": "user", "content": "Your message here"}]',
+            lines=10
+        )
+        history_button = gr.Button("Generate Title", variant="primary")
+        history_output = gr.Textbox(label="Generated Title", lines=2)
+        def process_history(history_json):
+            try:
+                import json
+                history_list = json.loads(history_json)
+                return generate_title(history_list)
+            except json.JSONDecodeError:
+                return "Error: Invalid JSON format"
+        history_button.click(
+            fn=process_history,
+            inputs=[history_input],
+            outputs=[history_output]
+        )
+    gr.Markdown("---")
+    gr.Markdown("### API Usage")
+    gr.Markdown("""
+    You can use this API with CURL:
+    ```bash
+    curl -X POST "https://YOUR-SPACE-URL/call/generate_title" \\
+      -H "Content-Type: application/json" \\
+      -d '{"data": ["Your text here"]}'
+    ```
+    """)
+# Lanzar la aplicación con API habilitada
+if __name__ == "__main__":
+    demo.launch(show_api=True)
+'''
+# Guardar el código en un archivo
+with open('app.py', 'w', encoding='utf-8') as f:
+    f.write(app_code)
+# Crear el archivo requirements.txt
+requirements = '''gradio>=4.0.0
+huggingface_hub>=0.19.0
+'''
+with open('requirements.txt', 'w', encoding='utf-8') as f:
+    f.write(requirements)
+# Crear el README con instrucciones
+readme = '''---
+title: Title Generator with Llama 3.2
+emoji: 📝
+colorFrom: blue
+colorTo: purple
+sdk: gradio
+sdk_version: 4.44.0
+app_file: app.py
+pinned: false
+license: mit
+---
+# Title Generator with Llama 3.2-1B-Instruct
+Generate concise titles from text or conversation history using Meta's Llama 3.2-1B-Instruct model.
+## Features
+- 📝 Generate titles from plain text
+- 💬 Generate titles from conversation history
+- 🚀 Fast inference with Llama 3.2-1B
+- 🔌 RESTful API support for integration
+## Setup
+1. Go to your Space settings
+2. Add a new secret: `HF_TOKEN` with your Hugging Face token
+3. Make sure you have access to `meta-llama/Llama-3.2-1B-Instruct` (accept the gated model)
+## API Usage
+### CURL Example
+```bash
+curl -X POST "https://YOUR-SPACE-URL/call/generate_title" \\
+  -H "Content-Type: application/json" \\
+  -d '{"data": ["Your text or conversation here"]}'
+```
+### Python Example
+```python
+from gradio_client import Client
+client = Client("YOUR-SPACE-URL")
+result = client.predict("Your text here", api_name="/generate_title")
+print(result)
+```
+## License
+MIT License
+'''
+with open('README.md', 'w', encoding='utf-8') as f:
+    f.write(readme)
+print("✅ Archivos generados exitosamente:")
+print("- app.py")
+print("- requirements.txt")
+print("- README.md")
+print("\n📦 Archivos listos para subir a Hugging Face Space")