yongdong commited on
Commit
1f6f70b
Β·
1 Parent(s): c6b828a

Disable sampling for deterministic JSON output

Browse files
Files changed (1) hide show
  1. app.py +8 -29
app.py CHANGED
@@ -92,7 +92,7 @@ def load_model_on_gpu():
92
  raise load_error
93
 
94
  @spaces.GPU(duration=60) # GPU inference
95
- def generate_response_gpu(prompt, max_tokens=200, temperature=0.7, top_p=0.9):
96
  """Generate response - executed on GPU"""
97
  global model
98
 
@@ -109,7 +109,6 @@ def generate_response_gpu(prompt, max_tokens=200, temperature=0.7, top_p=0.9):
109
 
110
  try:
111
  formatted_prompt = (
112
- "You are a JSON generator. Please output only a valid JSON object and no additional text.\n\n"
113
  "### Instruction:\n"
114
  f"{prompt.strip()}\n\n"
115
  "### Response:\n"
@@ -128,9 +127,7 @@ def generate_response_gpu(prompt, max_tokens=200, temperature=0.7, top_p=0.9):
128
  outputs = model.generate(
129
  **inputs,
130
  max_new_tokens=max_tokens,
131
- do_sample=True,
132
- temperature=temperature,
133
- top_p=top_p,
134
  pad_token_id=tokenizer.pad_token_id,
135
  eos_token_id=tokenizer.eos_token_id,
136
  repetition_penalty=1.1,
@@ -152,7 +149,7 @@ def generate_response_gpu(prompt, max_tokens=200, temperature=0.7, top_p=0.9):
152
  except Exception as generation_error:
153
  return f"❌ Generation Error: {str(generation_error)}"
154
 
155
- def chat_interface(message, history, max_tokens, temperature, top_p):
156
  """Chat interface - runs on CPU, calls GPU functions"""
157
  if not message.strip():
158
  return history, ""
@@ -163,7 +160,7 @@ def chat_interface(message, history, max_tokens, temperature, top_p):
163
 
164
  try:
165
  # Call GPU function to generate response
166
- response = generate_response_gpu(message, max_tokens, temperature, top_p)
167
  history.append((message, response))
168
  return history, ""
169
  except Exception as chat_error:
@@ -226,31 +223,13 @@ with gr.Blocks(
226
 
227
  max_tokens = gr.Slider(
228
  minimum=50,
229
- maximum=500,
230
- value=200,
231
  step=10,
232
  label="Max Tokens",
233
  info="Maximum number of tokens to generate"
234
  )
235
 
236
- temperature = gr.Slider(
237
- minimum=0.1,
238
- maximum=2.0,
239
- value=0.7,
240
- step=0.1,
241
- label="Temperature",
242
- info="Controls randomness (lower = more focused)"
243
- )
244
-
245
- top_p = gr.Slider(
246
- minimum=0.1,
247
- maximum=1.0,
248
- value=0.9,
249
- step=0.05,
250
- label="Top-p",
251
- info="Nucleus sampling threshold"
252
- )
253
-
254
  gr.Markdown("""
255
  ### πŸ“Š Model Status
256
  - **Hardware**: ZeroGPU (Dynamic Nvidia H200)
@@ -268,13 +247,13 @@ with gr.Blocks(
268
  # Event handling
269
  msg.submit(
270
  chat_interface,
271
- inputs=[msg, chatbot, max_tokens, temperature, top_p],
272
  outputs=[chatbot, msg]
273
  )
274
 
275
  send_btn.click(
276
  chat_interface,
277
- inputs=[msg, chatbot, max_tokens, temperature, top_p],
278
  outputs=[chatbot, msg]
279
  )
280
 
 
92
  raise load_error
93
 
94
  @spaces.GPU(duration=60) # GPU inference
95
+ def generate_response_gpu(prompt, max_tokens=512):
96
  """Generate response - executed on GPU"""
97
  global model
98
 
 
109
 
110
  try:
111
  formatted_prompt = (
 
112
  "### Instruction:\n"
113
  f"{prompt.strip()}\n\n"
114
  "### Response:\n"
 
127
  outputs = model.generate(
128
  **inputs,
129
  max_new_tokens=max_tokens,
130
+ do_sample=False,
 
 
131
  pad_token_id=tokenizer.pad_token_id,
132
  eos_token_id=tokenizer.eos_token_id,
133
  repetition_penalty=1.1,
 
149
  except Exception as generation_error:
150
  return f"❌ Generation Error: {str(generation_error)}"
151
 
152
+ def chat_interface(message, history, max_tokens):
153
  """Chat interface - runs on CPU, calls GPU functions"""
154
  if not message.strip():
155
  return history, ""
 
160
 
161
  try:
162
  # Call GPU function to generate response
163
+ response = generate_response_gpu(message, max_tokens)
164
  history.append((message, response))
165
  return history, ""
166
  except Exception as chat_error:
 
223
 
224
  max_tokens = gr.Slider(
225
  minimum=50,
226
+ maximum=5000,
227
+ value=512,
228
  step=10,
229
  label="Max Tokens",
230
  info="Maximum number of tokens to generate"
231
  )
232
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  gr.Markdown("""
234
  ### πŸ“Š Model Status
235
  - **Hardware**: ZeroGPU (Dynamic Nvidia H200)
 
247
  # Event handling
248
  msg.submit(
249
  chat_interface,
250
+ inputs=[msg, chatbot, max_tokens],
251
  outputs=[chatbot, msg]
252
  )
253
 
254
  send_btn.click(
255
  chat_interface,
256
+ inputs=[msg, chatbot, max_tokens],
257
  outputs=[chatbot, msg]
258
  )
259