kulia-moon commited on
Commit
424d82d
·
verified ·
1 Parent(s): 6e3ff91

Changed:))

Browse files
Files changed (1) hide show
  1. app.py +83 -48
app.py CHANGED
@@ -6,12 +6,12 @@ import json
6
  import os
7
  from tqdm import tqdm
8
  from huggingface_hub import HfApi, login
9
- import datetime # For timestamping logs and commits
10
 
11
  # --- Configuration for the Gradio app's internal logic ---
12
  # Local cache directory (data will be accumulated here first)
13
  OUTPUT_DIR = "generated"
14
- DATA_FILE = os.path.join(OUTPUT_DIR, "conversations.jsonl")
15
 
16
  # Hugging Face Dataset repository to push to
17
  HF_DATASET_REPO_ID = "kulia-moon/LimeStory-1.0" # This is the target dataset
@@ -22,12 +22,27 @@ client = openai.OpenAI(
22
  api_key="none" # Pollinations.ai doesn't require an API key
23
  )
24
 
25
- # Define models (prioritizing fast ones)
 
26
  AVAILABLE_MODELS = {
27
  "openai": {"description": "GPT-4o mini (generally fast, good all-rounder)", "speed": "Fast"},
28
  "gemini": {"description": "Gemini 2.0 Flash (designed for speed)", "speed": "Very Fast"},
29
  "mistral": {"description": "Mistral 3.1 (often performant for its size)", "speed": "Fast"},
30
- "llama": {"description": "Llama 3.3 70B (larger, might be slower, but good for diversity)", "speed": "Moderate"},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  }
32
 
33
  # Diverse Names Dataset
@@ -38,7 +53,7 @@ DIVERSE_NAMES = [
38
  "Eva", "Omar", "Anya", "Arthur", "Zoe", "Dante", "Freya", "Ivan", "Layla", "Milo"
39
  ]
40
 
41
- # Role-playing system prompts
42
  role_play_prompts = [
43
  "You are a mischievous but sweet little dragon, Puff, who loves shiny objects and telling riddles. Respond with playful fire sparks and curious questions.",
44
  "You are a fluffy cloud, Nimbus, who enjoys floating peacefully and bringing gentle rain to flowers. Speak with soft, dreamy words and comforting observations.",
@@ -80,40 +95,39 @@ def chat(system, prompt, selected_model_name, seed=None, num_exchanges=5):
80
  ]
81
 
82
  try:
83
- for i in range(num_exchanges):
84
- response = client.chat.completions.create(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  model=selected_model_name,
86
- messages=messages,
87
- max_tokens=150,
88
- temperature=0.9,
89
- seed=seed
90
  )
91
- gpt_response = response.choices[0].message.content.strip()
92
 
93
- conversation.append({"from": "gpt", "value": gpt_response})
94
-
95
- if i < num_exchanges - 1:
96
- follow_up_prompt_messages = [
97
- {"role": "system", "content": f"You are a helpful and engaging assistant. Based on the last response, generate a polite, open-ended, and cute follow-up question or statement to keep a friendly conversation going. Make it relevant to the last message and consistent with a 'cute' and positive tone."},
98
- {"role": "assistant", "content": gpt_response},
99
- {"role": "user", "content": "Generate a cute and friendly follow-up."}
100
- ]
101
-
102
- follow_up_response = client.chat.completions.create(
103
- model=selected_model_name,
104
- messages=follow_up_prompt_messages,
105
- max_tokens=70,
106
- temperature=0.8,
107
- seed=seed + 1000
108
- )
109
- follow_up = follow_up_response.choices[0].message.content.strip()
110
-
111
- conversation.append({"from": "human", "value": follow_up})
112
-
113
- messages.append({"role": "assistant", "content": gpt_response})
114
- messages.append({"role": "user", "content": follow_up})
115
- seed += 1
116
 
 
 
 
 
117
  return conversation
118
  except Exception as e:
119
  error_message = f"An error occurred with model {selected_model_name}: {e}"
@@ -155,10 +169,11 @@ def push_to_huggingface_dataset():
155
  f.write(json.dumps(conv) + "\n")
156
 
157
  # Push the temporary file to the dataset repo
158
- commit_message = f"Update conversations.jsonl from Gradio app on {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
 
159
  api.upload_file(
160
- path_or_fileobj=temp_data_file,
161
- path_in_repo="conversations.jsonl", # The target file name within the dataset repo
162
  repo_id=HF_DATASET_REPO_ID,
163
  repo_type="dataset", # Specify repo_type="dataset"
164
  commit_message=commit_message,
@@ -179,7 +194,7 @@ def push_to_huggingface_dataset():
179
 
180
  # --- Gradio Interface Logic ---
181
 
182
- def generate_and_display_conversations(num_conversations_input, custom_prompts_input):
183
  """
184
  Function to be called by Gradio to generate and return conversations,
185
  and then automatically push to the dataset.
@@ -207,27 +222,37 @@ def generate_and_display_conversations(num_conversations_input, custom_prompts_i
207
  model_names_to_use = list(AVAILABLE_MODELS.keys())
208
 
209
  generation_log = []
210
- generation_log.append(f"Starting conversation generation at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
 
211
  generation_log.append(f"Generating {num_conversations} conversations.")
 
212
 
213
  for i in tqdm(range(num_conversations), desc="Generating conversations"):
214
  seed = random.randint(0, 1000000)
215
- system = random.choice(role_play_prompts)
 
 
 
 
 
216
 
217
  random_name = random.choice(DIVERSE_NAMES)
218
  prompt_template = random.choice(current_prompts)
 
219
  prompt = prompt_template.replace("[NAME]", random_name)
220
 
221
- selected_model_name = random.choice(model_names_to_use)
222
 
 
223
  conversation = chat(system, prompt, selected_model_name, seed=seed, num_exchanges=5)
 
224
  if len(conversation) > 1 and not any(d.get("from") == "error" for d in conversation):
225
  new_conversations.append({"model_used": selected_model_name, "conversations": conversation})
226
- generation_log.append(f"Generated conversation {i+1}/{num_conversations} with model '{selected_model_name}'.")
227
  else:
228
- generation_log.append(f"Skipping conversation {i+1}/{num_conversations} due to error or no content.")
229
  if conversation and conversation[-1].get("from") == "error":
230
- generation_log.append(f"Error details: {conversation[-1]['value']}")
231
 
232
  all_conversations = existing_conversations + new_conversations
233
 
@@ -242,7 +267,7 @@ def generate_and_display_conversations(num_conversations_input, custom_prompts_i
242
  # --- Auto-push to Hugging Face Dataset ---
243
  push_status = push_to_huggingface_dataset()
244
  generation_log.append(push_status)
245
- generation_log.append(f"Process complete at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
246
 
247
  return json.dumps(all_conversations, indent=2), "\n".join(generation_log)
248
 
@@ -257,6 +282,13 @@ with gr.Blocks() as demo:
257
  with gr.Row():
258
  num_conversations_input = gr.Slider(minimum=1, maximum=20, value=3, step=1, label="Number of Conversations to Generate", info="More conversations take longer and might hit API limits.")
259
 
 
 
 
 
 
 
 
260
  custom_prompts_input = gr.Textbox(
261
  label="Custom Initial Prompts (optional)",
262
  placeholder="e.g., What's your favorite color?, Tell me a joke, What makes you happy?",
@@ -267,11 +299,11 @@ with gr.Blocks() as demo:
267
  generate_button = gr.Button("Generate & Push Conversations")
268
 
269
  output_conversations = gr.JSON(label="Generated Conversations (Content of conversations.jsonl)")
270
- output_log = gr.Textbox(label="Process Log", interactive=False, lines=10)
271
 
272
  generate_button.click(
273
  fn=generate_and_display_conversations,
274
- inputs=[num_conversations_input, custom_prompts_input],
275
  outputs=[output_conversations, output_log],
276
  show_progress=True
277
  )
@@ -282,6 +314,9 @@ with gr.Blocks() as demo:
282
  f"`{HF_DATASET_REPO_ID}` using a Hugging Face token securely stored as a Space Secret (`HF_TOKEN`). "
283
  "User tokens are not required."
284
  )
 
 
 
285
 
286
  # Launch the Gradio app
287
  if __name__ == "__main__":
 
6
  import os
7
  from tqdm import tqdm
8
  from huggingface_hub import HfApi, login
9
+ import datetime
10
 
11
  # --- Configuration for the Gradio app's internal logic ---
12
  # Local cache directory (data will be accumulated here first)
13
  OUTPUT_DIR = "generated"
14
+ DATA_FILE = os.path.join(OUTPUT_DIR, f"conversations_{datetime.now()}.jsonl")
15
 
16
  # Hugging Face Dataset repository to push to
17
  HF_DATASET_REPO_ID = "kulia-moon/LimeStory-1.0" # This is the target dataset
 
22
  api_key="none" # Pollinations.ai doesn't require an API key
23
  )
24
 
25
+ # Define ALL available models from https://text.pollinations.ai/models
26
+ # This list is more comprehensive. Speeds are approximate relative to each other.
27
  AVAILABLE_MODELS = {
28
  "openai": {"description": "GPT-4o mini (generally fast, good all-rounder)", "speed": "Fast"},
29
  "gemini": {"description": "Gemini 2.0 Flash (designed for speed)", "speed": "Very Fast"},
30
  "mistral": {"description": "Mistral 3.1 (often performant for its size)", "speed": "Fast"},
31
+ "llama": {"description": "Llama 3.3 70B (larger, good for diversity)", "speed": "Moderate"},
32
+ "claude": {"description": "Claude 3.5 Haiku (via Pollinations gateway, good for chat)", "speed": "Moderate"},
33
+ "qwen-coder": {"description": "Qwen 2.5 Coder 32B (coder-focused, general chat is okay)", "speed": "Moderate"},
34
+ "gemma": {"description": "Gemma 7B (Google's open model, good generalist)", "speed": "Moderate"},
35
+ "dbrx": {"description": "DBRX (Databricks's large open model, might be slower)", "speed": "Slow"},
36
+ "mixtral": {"description": "Mixtral 8x7B (Mixture of Experts, good balance of speed/quality)", "speed": "Fast/Moderate"},
37
+ "command-r": {"description": "Command R (Cohere's powerful model)", "speed": "Moderate"},
38
+ "cohere-chat": {"description": "Cohere's general chat model", "speed": "Moderate"},
39
+ "pplx-7b": {"description": "Perplexity Llama 2 7B (fast, good code/text)", "speed": "Fast"},
40
+ "pplx-70b": {"description": "Perplexity Llama 2 70B (larger, more capable Perplexity model)", "speed": "Moderate"},
41
+ "yi-34b": {"description": "Yi 34B (zero-one.ai model, capable generalist)", "speed": "Moderate"},
42
+ "grok": {"description": "Grok (X.ai's model, may have specific tone/style)", "speed": "Moderate"},
43
+ "stable-lm": {"description": "Stable LM (Stability AI's model)", "speed": "Fast"},
44
+ "nous-hermes": {"description": "Nous Hermes (fine-tune of Mistral)", "speed": "Fast"},
45
+ "openchat": {"description": "OpenChat 3.5 (fine-tune of Mistral)", "speed": "Fast"},
46
  }
47
 
48
  # Diverse Names Dataset
 
53
  "Eva", "Omar", "Anya", "Arthur", "Zoe", "Dante", "Freya", "Ivan", "Layla", "Milo"
54
  ]
55
 
56
+ # Role-playing system prompts (defaults if user doesn't provide one)
57
  role_play_prompts = [
58
  "You are a mischievous but sweet little dragon, Puff, who loves shiny objects and telling riddles. Respond with playful fire sparks and curious questions.",
59
  "You are a fluffy cloud, Nimbus, who enjoys floating peacefully and bringing gentle rain to flowers. Speak with soft, dreamy words and comforting observations.",
 
95
  ]
96
 
97
  try:
98
+ response = client.chat.completions.create(
99
+ model=selected_model_name,
100
+ messages=messages,
101
+ max_tokens=150,
102
+ temperature=0.9,
103
+ seed=seed
104
+ )
105
+ gpt_response = response.choices[0].message.content.strip()
106
+
107
+ conversation.append({"from": "gpt", "value": gpt_response})
108
+
109
+ for i in range(num_exchanges - 1): # Loop for subsequent exchanges
110
+ follow_up_prompt_messages = [
111
+ {"role": "system", "content": f"You are a helpful and engaging assistant. Based on the last response, generate a polite, open-ended, and cute follow-up question or statement to keep a friendly conversation going. Make it relevant to the last message and consistent with a 'cute' and positive tone."},
112
+ {"role": "assistant", "content": gpt_response},
113
+ {"role": "user", "content": "Generate a cute and friendly follow-up."}
114
+ ]
115
+
116
+ follow_up_response = client.chat.completions.create(
117
  model=selected_model_name,
118
+ messages=follow_up_prompt_messages,
119
+ max_tokens=70,
120
+ temperature=0.8,
121
+ seed=seed + 1000 + i # Vary seed for follow-ups
122
  )
123
+ follow_up = follow_up_response.choices[0].message.content.strip()
124
 
125
+ conversation.append({"from": "human", "value": follow_up})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
+ messages.append({"role": "assistant", "content": gpt_response})
128
+ messages.append({"role": "user", "content": follow_up})
129
+ gpt_response = follow_up_response.choices[0].message.content.strip() # Update gpt_response for next turn's context
130
+
131
  return conversation
132
  except Exception as e:
133
  error_message = f"An error occurred with model {selected_model_name}: {e}"
 
169
  f.write(json.dumps(conv) + "\n")
170
 
171
  # Push the temporary file to the dataset repo
172
+ current_time_str = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
173
+ commit_message = f"Update conversations.jsonl from Gradio app on {current_time_str} (An Nhơn, Binh Dinh, Vietnam)"
174
  api.upload_file(
175
+ path_or_fileobj=DATA_FILE ,
176
+ path_in_repo=DATA_FILE, # The target file name within the dataset repo
177
  repo_id=HF_DATASET_REPO_ID,
178
  repo_type="dataset", # Specify repo_type="dataset"
179
  commit_message=commit_message,
 
194
 
195
  # --- Gradio Interface Logic ---
196
 
197
+ def generate_and_display_conversations(num_conversations_input, custom_prompts_input, custom_system_prompt_input):
198
  """
199
  Function to be called by Gradio to generate and return conversations,
200
  and then automatically push to the dataset.
 
222
  model_names_to_use = list(AVAILABLE_MODELS.keys())
223
 
224
  generation_log = []
225
+ current_time_loc = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + " (An Nhơn, Binh Dinh, Vietnam)"
226
+ generation_log.append(f"Starting conversation generation at {current_time_loc}")
227
  generation_log.append(f"Generating {num_conversations} conversations.")
228
+ generation_log.append(f"Models to be used: {', '.join(model_names_to_use)}")
229
 
230
  for i in tqdm(range(num_conversations), desc="Generating conversations"):
231
  seed = random.randint(0, 1000000)
232
+
233
+ # Select system prompt: user's custom prompt if provided, else random from defaults
234
+ if custom_system_prompt_input:
235
+ system = custom_system_prompt_input.strip()
236
+ else:
237
+ system = random.choice(role_play_prompts)
238
 
239
  random_name = random.choice(DIVERSE_NAMES)
240
  prompt_template = random.choice(current_prompts)
241
+ # Ensure that if [NAME] is not in the template, it's not a problem
242
  prompt = prompt_template.replace("[NAME]", random_name)
243
 
244
+ selected_model_name = random.choice(model_names_to_use) # Randomly pick from ALL models
245
 
246
+ generation_log.append(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] Generating conv {i+1}/{num_conversations} with '{selected_model_name}' (System: '{system[:50]}...')") # Log first 50 chars of system prompt
247
  conversation = chat(system, prompt, selected_model_name, seed=seed, num_exchanges=5)
248
+
249
  if len(conversation) > 1 and not any(d.get("from") == "error" for d in conversation):
250
  new_conversations.append({"model_used": selected_model_name, "conversations": conversation})
251
+ generation_log.append(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] Successfully generated conv {i+1}/{num_conversations}.")
252
  else:
253
+ generation_log.append(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] Skipping conv {i+1}/{num_conversations} due to error or no content.")
254
  if conversation and conversation[-1].get("from") == "error":
255
+ generation_log.append(f" Error details: {conversation[-1]['value']}")
256
 
257
  all_conversations = existing_conversations + new_conversations
258
 
 
267
  # --- Auto-push to Hugging Face Dataset ---
268
  push_status = push_to_huggingface_dataset()
269
  generation_log.append(push_status)
270
+ generation_log.append(f"Process complete at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} (An Nhơn, Binh Dinh, Vietnam)")
271
 
272
  return json.dumps(all_conversations, indent=2), "\n".join(generation_log)
273
 
 
282
  with gr.Row():
283
  num_conversations_input = gr.Slider(minimum=1, maximum=20, value=3, step=1, label="Number of Conversations to Generate", info="More conversations take longer and might hit API limits.")
284
 
285
+ custom_system_prompt_input = gr.Textbox(
286
+ label="Custom System Prompt (optional)",
287
+ placeholder="e.g., You are a helpful and kind AI assistant.",
288
+ info="Define the AI's role or personality. If left empty, a random cute role-play prompt will be used.",
289
+ lines=3
290
+ )
291
+
292
  custom_prompts_input = gr.Textbox(
293
  label="Custom Initial Prompts (optional)",
294
  placeholder="e.g., What's your favorite color?, Tell me a joke, What makes you happy?",
 
299
  generate_button = gr.Button("Generate & Push Conversations")
300
 
301
  output_conversations = gr.JSON(label="Generated Conversations (Content of conversations.jsonl)")
302
+ output_log = gr.Textbox(label="Process Log", interactive=False, lines=10, max_lines=20) # Increased max_lines for more log visibility
303
 
304
  generate_button.click(
305
  fn=generate_and_display_conversations,
306
+ inputs=[num_conversations_input, custom_prompts_input, custom_system_prompt_input],
307
  outputs=[output_conversations, output_log],
308
  show_progress=True
309
  )
 
314
  f"`{HF_DATASET_REPO_ID}` using a Hugging Face token securely stored as a Space Secret (`HF_TOKEN`). "
315
  "User tokens are not required."
316
  )
317
+ current_datetime_vietnam = datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=7))).strftime('%Y-%m-%d %H:%M:%S %Z%z')
318
+ gr.Markdown(f"Current server time: {current_datetime_vietnam} (Vietnam)")
319
+
320
 
321
  # Launch the Gradio app
322
  if __name__ == "__main__":