Spaces:

gaonkarrs
/

RAG_Evaluation_System

Sleeping

App Files Files Community

gaonkarrs commited on Aug 2

Commit

d95e5de

1 Parent(s): eef9432

New changes

Browse files

Files changed (2) hide show

.gitignore +2 -0
app.py +47 -23

.gitignore CHANGED Viewed

	@@ -1 +1,3 @@
1	.env

 .env
+app copy.py
+new_gradio.py

app.py CHANGED Viewed

@@ -505,6 +505,22 @@ def compute_ragbench_metrics(judge_response: dict, retrieved_sentence_keys: list
         "Adherence": adherence
     }
 def evaluate_rag_pipeline(domain, q_indices):
     import torch
@@ -597,39 +613,47 @@ def evaluate_rag_pipeline(domain, q_indices):
 # Updated wrapper
 def evaluate_rag_gradio(domain, q_indices_str):
-    # Capture logs
     log_stream = io.StringIO()
     sys.stdout = log_stream
     try:
-        # Parse comma-separated indices
         q_indices = [int(x.strip()) for x in q_indices_str.split(",") if x.strip().isdigit()]
         results = evaluate_rag_pipeline(domain, q_indices)
         logs = log_stream.getvalue()
         return results, logs
     except Exception as e:
         traceback.print_exc()
         return {"error": str(e)}, log_stream.getvalue()
     finally:
-        sys.stdout = sys.__stdout__  # Restore stdout
-# Gradio interface
-iface = gr.Interface(
-    fn=evaluate_rag_gradio,
-    inputs=[
-        gr.Dropdown(choices=["Legal", "Medical", "GK", "CS", "Finance"], label="Domain"),
-        gr.Textbox(label="Comma-separated Query Indices (e.g. 89,121,245)", lines=1),
-    ],
-    outputs=[
-        gr.JSON(label="Evaluation Metrics (RMSE & AUC-ROC)"),
-        gr.Textbox(label="Execution Log", lines=10, interactive=True),
-    ],
-    title="RAG Evaluation Dashboard",
-    description="Evaluate your RAG pipeline across selected queries using GPT-based generation and judgment."
-)
-# Launch app
-iface.launch(server_name="0.0.0.0", server_port=7860, debug=True)

         "Adherence": adherence
     }
+# --- Dataset dictionary ---
+domain_datasets = {
+    "Legal": legal_dataset,
+    "Medical": med_dataset,
+    "GK": gk_dataset,
+    "CS": cs_dataset,
+    "Finance": fin_dataset
+}
+# --- Get questions for selected domain ---
+def get_questions_for_domain(domain):
+    dataset = domain_datasets.get(domain, [])
+    if not dataset:
+        return "⚠️ No dataset found for the selected domain."
+    return "\n".join([f"{i}. {item['question']}" for i, item in enumerate(dataset)])
 def evaluate_rag_pipeline(domain, q_indices):
     import torch
 # Updated wrapper
 def evaluate_rag_gradio(domain, q_indices_str):
     log_stream = io.StringIO()
     sys.stdout = log_stream
     try:
         q_indices = [int(x.strip()) for x in q_indices_str.split(",") if x.strip().isdigit()]
         results = evaluate_rag_pipeline(domain, q_indices)
         logs = log_stream.getvalue()
         return results, logs
     except Exception as e:
         traceback.print_exc()
         return {"error": str(e)}, log_stream.getvalue()
     finally:
+        sys.stdout = sys.__stdout__
+# === Gradio UI using Blocks ===
+    with gr.Blocks(title="RAG Evaluation Dashboard") as demo:
+        gr.Markdown("## 📊 RAG Evaluation Dashboard")
+        gr.Markdown("Evaluate your RAG pipeline and also browse the questions available for each domain.")
+    with gr.Row():
+        domain_input = gr.Dropdown(choices=list(domain_datasets.keys()), label="Select Domain")
+        q_index_input = gr.Textbox(label="Enter Query Indices (e.g., 89,121,245)", lines=1)
+    with gr.Row():
+        view_btn = gr.Button("📋 View Questions for Selected Domain")
+        questions_display = gr.Textbox(label="Domain Questions", lines=10, interactive=False)
+    with gr.Row():
+        run_btn = gr.Button("🚀 Run Evaluation")
+    result_output = gr.JSON(label="Evaluation Metrics (RMSE & AUC-ROC)")
+    log_output = gr.Textbox(label="Execution Log", lines=10, interactive=True)
+    # Bindings
+    view_btn.click(fn=get_questions_for_domain, inputs=domain_input, outputs=questions_display)
+    run_btn.click(
+        fn=evaluate_rag_gradio,
+        inputs=[domain_input, q_index_input],
+        outputs=[result_output, log_output]
+    )
+# === Launch ===
+demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)