Upload app.py
Browse files
app.py
CHANGED
|
@@ -359,80 +359,137 @@ def analyze_solution(question: str, solution: str):
|
|
| 359 |
|
| 360 |
|
| 361 |
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
"""
|
| 365 |
-
Classify the math solution
|
| 366 |
-
Returns: (classification_label, confidence_score, explanation)
|
| 367 |
-
"""
|
| 368 |
if not question.strip() or not solution.strip():
|
| 369 |
-
|
| 370 |
-
|
|
|
|
|
|
|
| 371 |
if not models_ready():
|
| 372 |
-
|
| 373 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 374 |
try:
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
logger.exception("inference failed")
|
|
|
|
|
|
|
| 380 |
|
| 381 |
|
| 382 |
|
| 383 |
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
# Create Gradio interface
|
| 387 |
with gr.Blocks(title="Math Solution Classifier", theme=gr.themes.Soft()) as app:
|
| 388 |
gr.Markdown("# 🧮 Math Solution Classifier")
|
| 389 |
-
gr.Markdown("Classify math solutions as correct
|
| 390 |
-
|
|
|
|
| 391 |
with gr.Row():
|
| 392 |
-
|
|
|
|
| 393 |
question_input = gr.Textbox(
|
| 394 |
label="Math Question",
|
| 395 |
placeholder="e.g., Solve for x: 2x + 5 = 13",
|
| 396 |
-
lines=3
|
| 397 |
)
|
| 398 |
-
|
| 399 |
solution_input = gr.Textbox(
|
| 400 |
-
label="Proposed Solution",
|
| 401 |
placeholder="e.g., 2x + 5 = 13\n2x = 13 - 5\n2x = 8\nx = 4",
|
| 402 |
-
lines=
|
| 403 |
)
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
|
|
|
|
|
|
| 408 |
classification_output = gr.Textbox(label="Classification", interactive=False)
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
# Examples
|
| 413 |
gr.Examples(
|
| 414 |
examples=[
|
| 415 |
[
|
| 416 |
"Solve for x: 2x + 5 = 13",
|
| 417 |
-
"2x + 5 = 13\n2x = 13 - 5\n2x = 8\nx = 4"
|
| 418 |
],
|
| 419 |
[
|
| 420 |
-
"John has three apples and Mary has seven, how many apples do they have together?",
|
| 421 |
-
"They have 7 + 3 = 11 apples."
|
| 422 |
],
|
| 423 |
[
|
| 424 |
"What is 15% of 200?",
|
| 425 |
-
"15% = 15/100 = 0.15\n0.15 × 200 = 30"
|
| 426 |
-
]
|
| 427 |
],
|
| 428 |
-
inputs=[question_input, solution_input]
|
| 429 |
)
|
| 430 |
-
|
|
|
|
| 431 |
classify_btn.click(
|
| 432 |
-
fn=
|
| 433 |
inputs=[question_input, solution_input],
|
| 434 |
-
outputs=[classification_output, explanation_output]
|
|
|
|
| 435 |
)
|
| 436 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 437 |
if __name__ == "__main__":
|
| 438 |
app.launch()
|
|
|
|
| 359 |
|
| 360 |
|
| 361 |
|
| 362 |
+
def classify_solution_stream(question: str, solution: str, progress=gr.Progress()):
|
| 363 |
+
# always yield EXACTLY THREE outputs (classification, explanation, status)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 364 |
if not question.strip() or not solution.strip():
|
| 365 |
+
yield "Please fill in both fields", "", "⚠️ Provide a question and a solution."
|
| 366 |
+
return
|
| 367 |
+
|
| 368 |
+
# lazy init if needed
|
| 369 |
if not models_ready():
|
| 370 |
+
yield "⏳ Loading models…", "", "Booting models…"
|
| 371 |
+
msg = load_model()
|
| 372 |
+
progress(0.1, desc="Loading models")
|
| 373 |
+
if not models_ready():
|
| 374 |
+
yield "Models not loaded", "", f"❌ {msg}"
|
| 375 |
+
return
|
| 376 |
+
|
| 377 |
try:
|
| 378 |
+
# Stage 1 — conceptual
|
| 379 |
+
progress(0.2, desc="Stage 1: Conceptual check")
|
| 380 |
+
yield "⏳ Working…", "Starting conceptual check…", "🔎 **Stage 1:** running classifier…"
|
| 381 |
+
conceptual = run_conceptual_check(question, solution, classifier_model, classifier_tokenizer)
|
| 382 |
+
conf = conceptual['probabilities'][conceptual['prediction']]
|
| 383 |
+
yield "⏳ Working…", f"Stage 1: model predicts **{conceptual['prediction']}** (confidence {conf:.2%}). Now checking calculations…", \
|
| 384 |
+
f"✅ **Stage 1 done** — prediction: **{conceptual['prediction']}** (p={conf:.2%})."
|
| 385 |
+
|
| 386 |
+
# Stage 2 — computational
|
| 387 |
+
progress(0.6, desc="Stage 2: Computational check")
|
| 388 |
+
yield "⏳ Working…", "Running computational check…", "🧮 **Stage 2:** extracting & evaluating equations…"
|
| 389 |
+
computational = run_computational_check(solution, gemma_model, gemma_tokenizer)
|
| 390 |
+
|
| 391 |
+
# Final verdict
|
| 392 |
+
if computational["error"]:
|
| 393 |
+
classification = "computational_error"
|
| 394 |
+
explanation = (
|
| 395 |
+
f"A calculation error was found.\n"
|
| 396 |
+
f"On the line: \"{computational['line_text']}\"\n"
|
| 397 |
+
f"The correct calculation should be: {computational['correct_calc']}"
|
| 398 |
+
)
|
| 399 |
+
status = "🟥 **Final:** computational error detected."
|
| 400 |
+
else:
|
| 401 |
+
if conceptual['prediction'] == 'correct':
|
| 402 |
+
classification = 'correct'
|
| 403 |
+
explanation = "All calculations are correct and the overall logic appears to be sound."
|
| 404 |
+
status = "🟩 **Final:** correct."
|
| 405 |
+
else:
|
| 406 |
+
classification = 'conceptual_error'
|
| 407 |
+
explanation = "All calculations are correct, but there appears to be a conceptual error in the logic or setup of the solution."
|
| 408 |
+
status = "🟨 **Final:** conceptual issue."
|
| 409 |
+
|
| 410 |
+
progress(1.0, desc="Done")
|
| 411 |
+
yield classification, explanation, status
|
| 412 |
+
|
| 413 |
+
except Exception as e:
|
| 414 |
logger.exception("inference failed")
|
| 415 |
+
yield "Runtime error", f"{type(e).__name__}: {e}", "❌ Exception during inference."
|
| 416 |
+
|
| 417 |
|
| 418 |
|
| 419 |
|
| 420 |
|
| 421 |
+
# Create Gradio interface (streaming UI)
|
|
|
|
|
|
|
| 422 |
with gr.Blocks(title="Math Solution Classifier", theme=gr.themes.Soft()) as app:
|
| 423 |
gr.Markdown("# 🧮 Math Solution Classifier")
|
| 424 |
+
gr.Markdown("Classify math solutions as **correct**, **conceptually flawed**, or **computationally flawed**. "
|
| 425 |
+
"Status updates stream below as the two-stage pipeline runs.")
|
| 426 |
+
|
| 427 |
with gr.Row():
|
| 428 |
+
# ---------- Left: inputs ----------
|
| 429 |
+
with gr.Column(scale=1):
|
| 430 |
question_input = gr.Textbox(
|
| 431 |
label="Math Question",
|
| 432 |
placeholder="e.g., Solve for x: 2x + 5 = 13",
|
| 433 |
+
lines=3,
|
| 434 |
)
|
|
|
|
| 435 |
solution_input = gr.Textbox(
|
| 436 |
+
label="Proposed Solution",
|
| 437 |
placeholder="e.g., 2x + 5 = 13\n2x = 13 - 5\n2x = 8\nx = 4",
|
| 438 |
+
lines=8,
|
| 439 |
)
|
| 440 |
+
with gr.Row():
|
| 441 |
+
classify_btn = gr.Button("Classify Solution", variant="primary")
|
| 442 |
+
clear_btn = gr.Button("Clear")
|
| 443 |
+
|
| 444 |
+
# ---------- Right: outputs ----------
|
| 445 |
+
with gr.Column(scale=1):
|
| 446 |
classification_output = gr.Textbox(label="Classification", interactive=False)
|
| 447 |
+
explanation_output = gr.Textbox(label="Explanation", interactive=False, lines=6)
|
| 448 |
+
status_output = gr.Markdown(value="*(idle)*") # live stage updates
|
| 449 |
+
|
| 450 |
+
# ---------- Examples ----------
|
| 451 |
gr.Examples(
|
| 452 |
examples=[
|
| 453 |
[
|
| 454 |
"Solve for x: 2x + 5 = 13",
|
| 455 |
+
"2x + 5 = 13\n2x = 13 - 5\n2x = 8\nx = 4",
|
| 456 |
],
|
| 457 |
[
|
| 458 |
+
"John has three apples and Mary has seven, how many apples do they have together?",
|
| 459 |
+
"They have 7 + 3 = 11 apples.",
|
| 460 |
],
|
| 461 |
[
|
| 462 |
"What is 15% of 200?",
|
| 463 |
+
"15% = 15/100 = 0.15\n0.15 × 200 = 30",
|
| 464 |
+
],
|
| 465 |
],
|
| 466 |
+
inputs=[question_input, solution_input],
|
| 467 |
)
|
| 468 |
+
|
| 469 |
+
# ---------- Wiring ----------
|
| 470 |
classify_btn.click(
|
| 471 |
+
fn=classify_solution_stream, # <- your generator function
|
| 472 |
inputs=[question_input, solution_input],
|
| 473 |
+
outputs=[classification_output, explanation_output, status_output],
|
| 474 |
+
show_progress="full",
|
| 475 |
)
|
| 476 |
|
| 477 |
+
# Clear everything
|
| 478 |
+
clear_btn.click(
|
| 479 |
+
lambda: ("", "", "", "", "*(idle)*"),
|
| 480 |
+
inputs=None,
|
| 481 |
+
outputs=[
|
| 482 |
+
question_input,
|
| 483 |
+
solution_input,
|
| 484 |
+
classification_output,
|
| 485 |
+
explanation_output,
|
| 486 |
+
status_output,
|
| 487 |
+
],
|
| 488 |
+
queue=False,
|
| 489 |
+
)
|
| 490 |
+
|
| 491 |
+
if __name__ == "__main__":
|
| 492 |
+
app.launch()
|
| 493 |
+
|
| 494 |
if __name__ == "__main__":
|
| 495 |
app.launch()
|