DeepRethink

Sleeping

App Files Files Community

kulia-moon commited on Jun 21

Commit

424d82d

verified ·

1 Parent(s): 6e3ff91

Changed:))

Browse files

Files changed (1) hide show

app.py +83 -48

app.py CHANGED Viewed

@@ -6,12 +6,12 @@ import json
 import os
 from tqdm import tqdm
 from huggingface_hub import HfApi, login
-import datetime # For timestamping logs and commits
 # --- Configuration for the Gradio app's internal logic ---
 # Local cache directory (data will be accumulated here first)
 OUTPUT_DIR = "generated"
-DATA_FILE = os.path.join(OUTPUT_DIR, "conversations.jsonl")
 # Hugging Face Dataset repository to push to
 HF_DATASET_REPO_ID = "kulia-moon/LimeStory-1.0" # This is the target dataset
@@ -22,12 +22,27 @@ client = openai.OpenAI(
     api_key="none"  # Pollinations.ai doesn't require an API key
 )
-# Define models (prioritizing fast ones)
 AVAILABLE_MODELS = {
     "openai": {"description": "GPT-4o mini (generally fast, good all-rounder)", "speed": "Fast"},
     "gemini": {"description": "Gemini 2.0 Flash (designed for speed)", "speed": "Very Fast"},
     "mistral": {"description": "Mistral 3.1 (often performant for its size)", "speed": "Fast"},
-    "llama": {"description": "Llama 3.3 70B (larger, might be slower, but good for diversity)", "speed": "Moderate"},
 }
 # Diverse Names Dataset
@@ -38,7 +53,7 @@ DIVERSE_NAMES = [
     "Eva", "Omar", "Anya", "Arthur", "Zoe", "Dante", "Freya", "Ivan", "Layla", "Milo"
 ]
-# Role-playing system prompts
 role_play_prompts = [
     "You are a mischievous but sweet little dragon, Puff, who loves shiny objects and telling riddles. Respond with playful fire sparks and curious questions.",
     "You are a fluffy cloud, Nimbus, who enjoys floating peacefully and bringing gentle rain to flowers. Speak with soft, dreamy words and comforting observations.",
@@ -80,40 +95,39 @@ def chat(system, prompt, selected_model_name, seed=None, num_exchanges=5):
     ]
     try:
-        for i in range(num_exchanges):
-            response = client.chat.completions.create(
                 model=selected_model_name,
-                messages=messages,
-                max_tokens=150,
-                temperature=0.9,
-                seed=seed
             )
-            gpt_response = response.choices[0].message.content.strip()
-            conversation.append({"from": "gpt", "value": gpt_response})
-            if i < num_exchanges - 1:
-                follow_up_prompt_messages = [
-                    {"role": "system", "content": f"You are a helpful and engaging assistant. Based on the last response, generate a polite, open-ended, and cute follow-up question or statement to keep a friendly conversation going. Make it relevant to the last message and consistent with a 'cute' and positive tone."},
-                    {"role": "assistant", "content": gpt_response},
-                    {"role": "user", "content": "Generate a cute and friendly follow-up."}
-                ]
-                follow_up_response = client.chat.completions.create(
-                    model=selected_model_name,
-                    messages=follow_up_prompt_messages,
-                    max_tokens=70,
-                    temperature=0.8,
-                    seed=seed + 1000
-                )
-                follow_up = follow_up_response.choices[0].message.content.strip()
-                conversation.append({"from": "human", "value": follow_up})
-                messages.append({"role": "assistant", "content": gpt_response})
-                messages.append({"role": "user", "content": follow_up})
-                seed += 1
         return conversation
     except Exception as e:
         error_message = f"An error occurred with model {selected_model_name}: {e}"
@@ -155,10 +169,11 @@ def push_to_huggingface_dataset():
                 f.write(json.dumps(conv) + "\n")
         # Push the temporary file to the dataset repo
-        commit_message = f"Update conversations.jsonl from Gradio app on {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
         api.upload_file(
-            path_or_fileobj=temp_data_file,
-            path_in_repo="conversations.jsonl", # The target file name within the dataset repo
             repo_id=HF_DATASET_REPO_ID,
             repo_type="dataset", # Specify repo_type="dataset"
             commit_message=commit_message,
@@ -179,7 +194,7 @@ def push_to_huggingface_dataset():
 # --- Gradio Interface Logic ---
-def generate_and_display_conversations(num_conversations_input, custom_prompts_input):
     """
     Function to be called by Gradio to generate and return conversations,
     and then automatically push to the dataset.
@@ -207,27 +222,37 @@ def generate_and_display_conversations(num_conversations_input, custom_prompts_i
     model_names_to_use = list(AVAILABLE_MODELS.keys())
     generation_log = []
-    generation_log.append(f"Starting conversation generation at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
     generation_log.append(f"Generating {num_conversations} conversations.")
     for i in tqdm(range(num_conversations), desc="Generating conversations"):
         seed = random.randint(0, 1000000)
-        system = random.choice(role_play_prompts)
         random_name = random.choice(DIVERSE_NAMES)
         prompt_template = random.choice(current_prompts)
         prompt = prompt_template.replace("[NAME]", random_name)
-        selected_model_name = random.choice(model_names_to_use)
         conversation = chat(system, prompt, selected_model_name, seed=seed, num_exchanges=5)
         if len(conversation) > 1 and not any(d.get("from") == "error" for d in conversation):
             new_conversations.append({"model_used": selected_model_name, "conversations": conversation})
-            generation_log.append(f"Generated conversation {i+1}/{num_conversations} with model '{selected_model_name}'.")
         else:
-            generation_log.append(f"Skipping conversation {i+1}/{num_conversations} due to error or no content.")
             if conversation and conversation[-1].get("from") == "error":
-                generation_log.append(f"Error details: {conversation[-1]['value']}")
     all_conversations = existing_conversations + new_conversations
@@ -242,7 +267,7 @@ def generate_and_display_conversations(num_conversations_input, custom_prompts_i
     # --- Auto-push to Hugging Face Dataset ---
     push_status = push_to_huggingface_dataset()
     generation_log.append(push_status)
-    generation_log.append(f"Process complete at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
     return json.dumps(all_conversations, indent=2), "\n".join(generation_log)
@@ -257,6 +282,13 @@ with gr.Blocks() as demo:
     with gr.Row():
         num_conversations_input = gr.Slider(minimum=1, maximum=20, value=3, step=1, label="Number of Conversations to Generate", info="More conversations take longer and might hit API limits.")
     custom_prompts_input = gr.Textbox(
         label="Custom Initial Prompts (optional)",
         placeholder="e.g., What's your favorite color?, Tell me a joke, What makes you happy?",
@@ -267,11 +299,11 @@ with gr.Blocks() as demo:
     generate_button = gr.Button("Generate & Push Conversations")
     output_conversations = gr.JSON(label="Generated Conversations (Content of conversations.jsonl)")
-    output_log = gr.Textbox(label="Process Log", interactive=False, lines=10)
     generate_button.click(
         fn=generate_and_display_conversations,
-        inputs=[num_conversations_input, custom_prompts_input],
         outputs=[output_conversations, output_log],
         show_progress=True
     )
@@ -282,6 +314,9 @@ with gr.Blocks() as demo:
         f"`{HF_DATASET_REPO_ID}` using a Hugging Face token securely stored as a Space Secret (`HF_TOKEN`). "
         "User tokens are not required."
     )
 # Launch the Gradio app
 if __name__ == "__main__":

 import os
 from tqdm import tqdm
 from huggingface_hub import HfApi, login
+import datetime
 # --- Configuration for the Gradio app's internal logic ---
 # Local cache directory (data will be accumulated here first)
 OUTPUT_DIR = "generated"
+DATA_FILE = os.path.join(OUTPUT_DIR, f"conversations_{datetime.now()}.jsonl")
 # Hugging Face Dataset repository to push to
 HF_DATASET_REPO_ID = "kulia-moon/LimeStory-1.0" # This is the target dataset
     api_key="none"  # Pollinations.ai doesn't require an API key
 )
+# Define ALL available models from https://text.pollinations.ai/models
+# This list is more comprehensive. Speeds are approximate relative to each other.
 AVAILABLE_MODELS = {
     "openai": {"description": "GPT-4o mini (generally fast, good all-rounder)", "speed": "Fast"},
     "gemini": {"description": "Gemini 2.0 Flash (designed for speed)", "speed": "Very Fast"},
     "mistral": {"description": "Mistral 3.1 (often performant for its size)", "speed": "Fast"},
+    "llama": {"description": "Llama 3.3 70B (larger, good for diversity)", "speed": "Moderate"},
+    "claude": {"description": "Claude 3.5 Haiku (via Pollinations gateway, good for chat)", "speed": "Moderate"},
+    "qwen-coder": {"description": "Qwen 2.5 Coder 32B (coder-focused, general chat is okay)", "speed": "Moderate"},
+    "gemma": {"description": "Gemma 7B (Google's open model, good generalist)", "speed": "Moderate"},
+    "dbrx": {"description": "DBRX (Databricks's large open model, might be slower)", "speed": "Slow"},
+    "mixtral": {"description": "Mixtral 8x7B (Mixture of Experts, good balance of speed/quality)", "speed": "Fast/Moderate"},
+    "command-r": {"description": "Command R (Cohere's powerful model)", "speed": "Moderate"},
+    "cohere-chat": {"description": "Cohere's general chat model", "speed": "Moderate"},
+    "pplx-7b": {"description": "Perplexity Llama 2 7B (fast, good code/text)", "speed": "Fast"},
+    "pplx-70b": {"description": "Perplexity Llama 2 70B (larger, more capable Perplexity model)", "speed": "Moderate"},
+    "yi-34b": {"description": "Yi 34B (zero-one.ai model, capable generalist)", "speed": "Moderate"},
+    "grok": {"description": "Grok (X.ai's model, may have specific tone/style)", "speed": "Moderate"},
+    "stable-lm": {"description": "Stable LM (Stability AI's model)", "speed": "Fast"},
+    "nous-hermes": {"description": "Nous Hermes (fine-tune of Mistral)", "speed": "Fast"},
+    "openchat": {"description": "OpenChat 3.5 (fine-tune of Mistral)", "speed": "Fast"},
 }
 # Diverse Names Dataset
     "Eva", "Omar", "Anya", "Arthur", "Zoe", "Dante", "Freya", "Ivan", "Layla", "Milo"
 ]
+# Role-playing system prompts (defaults if user doesn't provide one)
 role_play_prompts = [
     "You are a mischievous but sweet little dragon, Puff, who loves shiny objects and telling riddles. Respond with playful fire sparks and curious questions.",
     "You are a fluffy cloud, Nimbus, who enjoys floating peacefully and bringing gentle rain to flowers. Speak with soft, dreamy words and comforting observations.",
     ]
     try:
+        response = client.chat.completions.create(
+            model=selected_model_name,
+            messages=messages,
+            max_tokens=150,
+            temperature=0.9,
+            seed=seed
+        )
+        gpt_response = response.choices[0].message.content.strip()
+        conversation.append({"from": "gpt", "value": gpt_response})
+        for i in range(num_exchanges - 1): # Loop for subsequent exchanges
+            follow_up_prompt_messages = [
+                {"role": "system", "content": f"You are a helpful and engaging assistant. Based on the last response, generate a polite, open-ended, and cute follow-up question or statement to keep a friendly conversation going. Make it relevant to the last message and consistent with a 'cute' and positive tone."},
+                {"role": "assistant", "content": gpt_response},
+                {"role": "user", "content": "Generate a cute and friendly follow-up."}
+            ]
+            follow_up_response = client.chat.completions.create(
                 model=selected_model_name,
+                messages=follow_up_prompt_messages,
+                max_tokens=70,
+                temperature=0.8,
+                seed=seed + 1000 + i # Vary seed for follow-ups
             )
+            follow_up = follow_up_response.choices[0].message.content.strip()
+            conversation.append({"from": "human", "value": follow_up})
+            messages.append({"role": "assistant", "content": gpt_response})
+            messages.append({"role": "user", "content": follow_up})
+            gpt_response = follow_up_response.choices[0].message.content.strip() # Update gpt_response for next turn's context
         return conversation
     except Exception as e:
         error_message = f"An error occurred with model {selected_model_name}: {e}"
                 f.write(json.dumps(conv) + "\n")
         # Push the temporary file to the dataset repo
+        current_time_str = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+        commit_message = f"Update conversations.jsonl from Gradio app on {current_time_str} (An Nhơn, Binh Dinh, Vietnam)"
         api.upload_file(
+            path_or_fileobj=DATA_FILE ,
+            path_in_repo=DATA_FILE, # The target file name within the dataset repo
             repo_id=HF_DATASET_REPO_ID,
             repo_type="dataset", # Specify repo_type="dataset"
             commit_message=commit_message,
 # --- Gradio Interface Logic ---
+def generate_and_display_conversations(num_conversations_input, custom_prompts_input, custom_system_prompt_input):
     """
     Function to be called by Gradio to generate and return conversations,
     and then automatically push to the dataset.
     model_names_to_use = list(AVAILABLE_MODELS.keys())
     generation_log = []
+    current_time_loc = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + " (An Nhơn, Binh Dinh, Vietnam)"
+    generation_log.append(f"Starting conversation generation at {current_time_loc}")
     generation_log.append(f"Generating {num_conversations} conversations.")
+    generation_log.append(f"Models to be used: {', '.join(model_names_to_use)}")
     for i in tqdm(range(num_conversations), desc="Generating conversations"):
         seed = random.randint(0, 1000000)
+        # Select system prompt: user's custom prompt if provided, else random from defaults
+        if custom_system_prompt_input:
+            system = custom_system_prompt_input.strip()
+        else:
+            system = random.choice(role_play_prompts)
         random_name = random.choice(DIVERSE_NAMES)
         prompt_template = random.choice(current_prompts)
+        # Ensure that if [NAME] is not in the template, it's not a problem
         prompt = prompt_template.replace("[NAME]", random_name)
+        selected_model_name = random.choice(model_names_to_use) # Randomly pick from ALL models
+        generation_log.append(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] Generating conv {i+1}/{num_conversations} with '{selected_model_name}' (System: '{system[:50]}...')") # Log first 50 chars of system prompt
         conversation = chat(system, prompt, selected_model_name, seed=seed, num_exchanges=5)
         if len(conversation) > 1 and not any(d.get("from") == "error" for d in conversation):
             new_conversations.append({"model_used": selected_model_name, "conversations": conversation})
+            generation_log.append(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] Successfully generated conv {i+1}/{num_conversations}.")
         else:
+            generation_log.append(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] Skipping conv {i+1}/{num_conversations} due to error or no content.")
             if conversation and conversation[-1].get("from") == "error":
+                generation_log.append(f"  Error details: {conversation[-1]['value']}")
     all_conversations = existing_conversations + new_conversations
     # --- Auto-push to Hugging Face Dataset ---
     push_status = push_to_huggingface_dataset()
     generation_log.append(push_status)
+    generation_log.append(f"Process complete at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} (An Nhơn, Binh Dinh, Vietnam)")
     return json.dumps(all_conversations, indent=2), "\n".join(generation_log)
     with gr.Row():
         num_conversations_input = gr.Slider(minimum=1, maximum=20, value=3, step=1, label="Number of Conversations to Generate", info="More conversations take longer and might hit API limits.")
+    custom_system_prompt_input = gr.Textbox(
+        label="Custom System Prompt (optional)",
+        placeholder="e.g., You are a helpful and kind AI assistant.",
+        info="Define the AI's role or personality. If left empty, a random cute role-play prompt will be used.",
+        lines=3
+    )
     custom_prompts_input = gr.Textbox(
         label="Custom Initial Prompts (optional)",
         placeholder="e.g., What's your favorite color?, Tell me a joke, What makes you happy?",
     generate_button = gr.Button("Generate & Push Conversations")
     output_conversations = gr.JSON(label="Generated Conversations (Content of conversations.jsonl)")
+    output_log = gr.Textbox(label="Process Log", interactive=False, lines=10, max_lines=20) # Increased max_lines for more log visibility
     generate_button.click(
         fn=generate_and_display_conversations,
+        inputs=[num_conversations_input, custom_prompts_input, custom_system_prompt_input],
         outputs=[output_conversations, output_log],
         show_progress=True
     )
         f"`{HF_DATASET_REPO_ID}` using a Hugging Face token securely stored as a Space Secret (`HF_TOKEN`). "
         "User tokens are not required."
     )
+    current_datetime_vietnam = datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=7))).strftime('%Y-%m-%d %H:%M:%S %Z%z')
+    gr.Markdown(f"Current server time: {current_datetime_vietnam} (Vietnam)")
 # Launch the Gradio app
 if __name__ == "__main__":