Spaces:

taesiri
/

SubmitQuestions

Paused

App Files Files Community

taesiri commited on Jan 10

Commit

71783c2

1 Parent(s): b522908

backup

Browse files

Files changed (1) hide show

app.py +603 -0

app.py ADDED Viewed

	@@ -0,0 +1,603 @@

+import gradio as gr
+import base64
+import json
+import os
+import shutil
+import uuid
+import shortuuid
+from huggingface_hub import CommitScheduler
+scheduler = CommitScheduler(
+    repo_id="taesiri/EdgeQuest",
+    repo_type="dataset",
+    folder_path="./data",
+    path_in_repo="data",
+    every=1,
+)
+def generate_json_files(
+    system_message,
+    # New fields
+    name,
+    email_address,
+    institution,
+    openreview_profile,
+    question_categories,
+    subquestion_1_text,
+    subquestion_1_answer,
+    subquestion_2_text,
+    subquestion_2_answer,
+    # Existing fields
+    question,
+    final_answer,
+    rationale_text,
+    # Question images
+    image1,
+    image2,
+    image3,
+    image4,
+    # Rationale images
+    rationale_image1,
+    rationale_image2,
+):
+    """
+    For each request:
+      1) Create a unique folder under ./data/
+      2) Copy uploaded images (question + rationale) into that folder
+      3) Produce two JSON files:
+         - request_urls.json   (local file paths in content)
+         - request_base64.json (base64-encoded images in content)
+      4) Return paths to both files for Gradio to provide as download links
+    """
+    # 1) Create parent data folder if it doesn't exist
+    parent_data_folder = "./data"
+    os.makedirs(parent_data_folder, exist_ok=True)
+    # 2) Generate a unique request ID and create a subfolder
+    request_id = str(uuid.uuid4())  # unique ID
+    request_folder = os.path.join(parent_data_folder, request_id)
+    os.makedirs(request_folder)
+    # Handle defaults
+    if not system_message:
+        system_message = "You are a helpful assistant"
+    # Convert None strings
+    def safe_str(val):
+        return val if val is not None else ""
+    name = safe_str(name)
+    email_address = safe_str(email_address)
+    institution = safe_str(institution)
+    openreview_profile = safe_str(openreview_profile)
+    # Convert question_categories to list
+    question_categories = (
+        [cat.strip() for cat in safe_str(question_categories).split(",")]
+        if question_categories
+        else []
+    )
+    subquestion_1_text = safe_str(subquestion_1_text)
+    subquestion_1_answer = safe_str(subquestion_1_answer)
+    subquestion_2_text = safe_str(subquestion_2_text)
+    subquestion_2_answer = safe_str(subquestion_2_answer)
+    question = safe_str(question)
+    final_answer = safe_str(final_answer)
+    rationale_text = safe_str(rationale_text)
+    # Collect image-like fields so we can process them in one loop
+    all_images = [
+        ("question_image_1", image1),
+        ("question_image_2", image2),
+        ("question_image_3", image3),
+        ("question_image_4", image4),
+        ("rationale_image_1", rationale_image1),
+        ("rationale_image_2", rationale_image2),
+    ]
+    files_list = []
+    for idx, (img_label, img_obj) in enumerate(all_images):
+        if img_obj is not None:
+            temp_path = os.path.join(request_folder, f"{img_label}.png")
+            if isinstance(img_obj, str):
+                # If image is a file path
+                shutil.copy2(img_obj, temp_path)
+            else:
+                # If image is a numpy array
+                gr.processing_utils.save_image(img_obj, temp_path)
+            # Keep track of the saved path + label
+            files_list.append((img_label, temp_path))
+    # Build user content in two flavors: local file paths vs base64
+    # We’ll store text fields as simple dictionaries, and then images separately.
+    content_list_urls = [
+        {"type": "field", "label": "name", "value": name},
+        {"type": "field", "label": "email_address", "value": email_address},
+        {"type": "field", "label": "institution", "value": institution},
+        {"type": "field", "label": "openreview_profile", "value": openreview_profile},
+        {"type": "field", "label": "question_categories", "value": question_categories},
+        {"type": "field", "label": "subquestion_1_text", "value": subquestion_1_text},
+        {
+            "type": "field",
+            "label": "subquestion_1_answer",
+            "value": subquestion_1_answer,
+        },
+        {"type": "field", "label": "subquestion_2_text", "value": subquestion_2_text},
+        {
+            "type": "field",
+            "label": "subquestion_2_answer",
+            "value": subquestion_2_answer,
+        },
+        {"type": "field", "label": "question", "value": question},
+        {"type": "field", "label": "final_answer", "value": final_answer},
+        {"type": "field", "label": "rationale_text", "value": rationale_text},
+    ]
+    content_list_base64 = [
+        {"type": "field", "label": "name", "value": name},
+        {"type": "field", "label": "email_address", "value": email_address},
+        {"type": "field", "label": "institution", "value": institution},
+        {"type": "field", "label": "openreview_profile", "value": openreview_profile},
+        {"type": "field", "label": "question_categories", "value": question_categories},
+        {"type": "field", "label": "subquestion_1_text", "value": subquestion_1_text},
+        {
+            "type": "field",
+            "label": "subquestion_1_answer",
+            "value": subquestion_1_answer,
+        },
+        {"type": "field", "label": "subquestion_2_text", "value": subquestion_2_text},
+        {
+            "type": "field",
+            "label": "subquestion_2_answer",
+            "value": subquestion_2_answer,
+        },
+        {"type": "field", "label": "question", "value": question},
+        {"type": "field", "label": "final_answer", "value": final_answer},
+        {"type": "field", "label": "rationale_text", "value": rationale_text},
+    ]
+    # Append image references
+    for img_label, file_path in files_list:
+        # 1) Local path (URL) version
+        rel_path = os.path.join(".", os.path.basename(file_path))
+        content_list_urls.append(
+            {
+                "type": "image_url",
+                "label": img_label,
+                "image_url": {"url": {"data:image/png;path": rel_path}},
+            }
+        )
+        # 2) Base64 version
+        with open(file_path, "rb") as f:
+            file_bytes = f.read()
+        img_b64_str = base64.b64encode(file_bytes).decode("utf-8")
+        content_list_base64.append(
+            {
+                "type": "image_url",
+                "label": img_label,
+                "image_url": {"url": {"data:image/png;base64": img_b64_str}},
+            }
+        )
+    # Build the final JSON structures for each approach
+    i = 1
+    assistant_content = [
+        {"type": "text", "text": rationale_text},
+        {"type": "text", "text": final_answer},
+    ]
+    # A) URLs JSON
+    item_urls = {
+        "custom_id": f"request______{i}",
+        # Metadata at top level
+        "name": name,
+        "email_address": email_address,
+        "institution": institution,
+        "openreview_profile": openreview_profile,
+        "question_categories": question_categories,
+        "question": {
+            "messages": [
+                {"role": "system", "content": system_message},
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "label": "question", "value": question}
+                    ]
+                    + [
+                        item
+                        for item in content_list_urls
+                        if item.get("type") == "image_url"
+                        and "question_image" in item.get("label", "")
+                    ],
+                },
+            ],
+        },
+        "subquestions": [
+            {"text": subquestion_1_text, "answer": subquestion_1_answer},
+            {"text": subquestion_2_text, "answer": subquestion_2_answer},
+        ],
+        "answer": {
+            "final_answer": final_answer,
+            "rationale_text": rationale_text,
+            "rationale_images": [
+                item
+                for item in content_list_urls
+                if item.get("type") == "image_url"
+                and "rationale_image" in item.get("label", "")
+            ],
+        },
+    }
+    # B) Base64 JSON
+    item_base64 = {
+        "custom_id": f"request______{i}",
+        # Metadata at top level
+        "name": name,
+        "email_address": email_address,
+        "institution": institution,
+        "openreview_profile": openreview_profile,
+        # Question-related fields at top level
+        "question_categories": question_categories,
+        "subquestions": [
+            {"text": subquestion_1_text, "answer": subquestion_1_answer},
+            {"text": subquestion_2_text, "answer": subquestion_2_answer},
+        ],
+        "final_answer": final_answer,
+        "rationale_text": rationale_text,
+        "body": {
+            "model": "MODEL_NAME",
+            "messages": [
+                {"role": "system", "content": system_message},
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "field", "label": "question", "value": question}
+                    ]
+                    + [
+                        item
+                        for item in content_list_base64
+                        if item.get("type") == "image_url"
+                        and "question_image" in item.get("label", "")
+                    ],
+                },
+                {
+                    "role": "assistant",
+                    "content": [
+                        {"type": "text", "text": rationale_text},
+                        {"type": "text", "text": final_answer},
+                        *[
+                            item
+                            for item in content_list_base64
+                            if item.get("type") == "image_url"
+                            and "rationale_image" in item.get("label", "")
+                        ],
+                    ],
+                },
+            ],
+        },
+    }
+    # Convert each to JSON line format
+    urls_json_line = json.dumps(item_urls, ensure_ascii=False)
+    base64_json_line = json.dumps(item_base64, ensure_ascii=False)
+    # 3) Write out two JSON files in request_folder
+    urls_jsonl_path = os.path.join(request_folder, "request_urls.json")
+    base64_jsonl_path = os.path.join(request_folder, "request_base64.json")
+    with open(urls_jsonl_path, "w", encoding="utf-8") as f:
+        f.write(urls_json_line + "\n")
+    with open(base64_jsonl_path, "w", encoding="utf-8") as f:
+        f.write(base64_json_line + "\n")
+    # Return the two file paths so Gradio can offer them as downloads
+    return urls_jsonl_path, base64_jsonl_path
+# Build the Gradio app
+with gr.Blocks() as demo:
+    gr.Markdown("# Dataset Builder")
+    with gr.Accordion("Instructions", open=True):
+        gr.HTML(
+            """
+            <h3>Instructions:</h3>
+            <p>Welcome to the Hugging Face space for collecting questions for new benchmark datasets.</p>
+            <table style="width:100%; border-collapse: collapse; margin: 10px 0;">
+                <tr>
+                    <th style="width:50%; background-color: #3366f0; padding: 8px; text-align: left; border: 1px solid #ddd;">
+                        Required Fields
+                    </th>
+                    <th style="width:50%; background-color: #3366f0; padding: 8px; text-align: left; border: 1px solid #ddd;">
+                        Optional Fields
+                    </th>
+                </tr>
+                <tr>
+                    <td style="vertical-align: top; padding: 8px; border: 1px solid #ddd;">
+                        <ul style="margin: 0;">
+                            <li>Author Information</li>
+                            <li>At least <b>one question image</b></li>
+                            <li>The <b>question text</b></li>
+                            <li>The <b>final answer</b></li>
+                        </ul>
+                    </td>
+                    <td style="vertical-align: top; padding: 8px; border: 1px solid #ddd;">
+                        <ul style="margin: 0;">
+                            <li>Up to four question images</li>
+                            <li>Supporting images for your answer</li>
+                            <li><b>Rationale text</b> to explain your reasoning</li>
+                            <li><b>Sub-questions</b> with their answers</li>
+                        </ul>
+                    </td>
+                </tr>
+            </table>
+            <p>While not all fields are mandatory, providing additional context through optional fields will help create a more comprehensive dataset. After submitting a question, you can clear up the form to submit another one.</p>
+            """
+        )
+    gr.Markdown("## Author Information")
+    with gr.Row():
+        name_input = gr.Textbox(label="Name", lines=1)
+        email_address_input = gr.Textbox(label="Email Address", lines=1)
+        institution_input = gr.Textbox(
+            label="Institution or 'Independent'",
+            lines=1,
+            placeholder="e.g. MIT, Google, Independent, etc.",
+        )
+        openreview_profile_input = gr.Textbox(
+            label="OpenReview Profile Name",
+            lines=1,
+            placeholder="Your OpenReview username or profile name",
+        )
+    gr.Markdown("## Question Information")
+    # Question Images - Individual Tabs
+    with gr.Tabs():
+        with gr.Tab("Image 1"):
+            image1 = gr.Image(label="Question Image 1", type="filepath")
+        with gr.Tab("Image 2 (Optional)"):
+            image2 = gr.Image(label="Question Image 2", type="filepath")
+        with gr.Tab("Image 3 (Optional)"):
+            image3 = gr.Image(label="Question Image 3", type="filepath")
+        with gr.Tab("Image 4 (Optional)"):
+            image4 = gr.Image(label="Question Image 4", type="filepath")
+    question_input = gr.Textbox(
+        label="Question", lines=15, placeholder="Type your question here..."
+    )
+    question_categories_input = gr.Textbox(
+        label="Question Categories",
+        lines=1,
+        placeholder="Comma-separated tags, e.g. math, geometry",
+    )
+    # Answer Section
+    gr.Markdown("## Answer ")
+    final_answer_input = gr.Textbox(
+        label="Final Answer",
+        lines=1,
+        placeholder="Enter the short/concise final answer...",
+    )
+    rationale_text_input = gr.Textbox(
+        label="Rationale Text",
+        lines=5,
+        placeholder="Enter the reasoning or explanation for the answer...",
+    )
+    # Rationale Images - Individual Tabs
+    with gr.Tabs():
+        with gr.Tab("Rationale 1 (Optional)"):
+            rationale_image1 = gr.Image(label="Rationale Image 1", type="filepath")
+        with gr.Tab("Rationale 2 (Optional)"):
+            rationale_image2 = gr.Image(label="Rationale Image 2", type="filepath")
+    # Subquestions Section
+    gr.Markdown("## Subquestions")
+    with gr.Row():
+        subquestion_1_text_input = gr.Textbox(
+            label="Subquestion 1 Text", lines=2, placeholder="First sub-question..."
+        )
+        subquestion_1_answer_input = gr.Textbox(
+            label="Subquestion 1 Answer",
+            lines=2,
+            placeholder="Answer to sub-question 1...",
+        )
+    with gr.Row():
+        subquestion_2_text_input = gr.Textbox(
+            label="Subquestion 2 Text", lines=2, placeholder="Second sub-question..."
+        )
+        subquestion_2_answer_input = gr.Textbox(
+            label="Subquestion 2 Answer",
+            lines=2,
+            placeholder="Answer to sub-question 2...",
+        )
+    system_message_input = gr.Textbox(
+        label="System Message",
+        value="You are a helpful assistant",
+        lines=2,
+        placeholder="Enter the system message that defines the AI assistant's role and behavior...",
+    )
+    with gr.Row():
+        submit_button = gr.Button("Submit")
+        clear_button = gr.Button("Clear Form")
+    with gr.Row():
+        output_file_urls = gr.File(
+            label="Download URLs JSON", interactive=False, visible=False
+        )
+        output_file_base64 = gr.File(
+            label="Download Base64 JSON", interactive=False, visible=False
+        )
+    # On Submit, we call generate_json_files with all relevant fields
+    def validate_and_generate(
+        sys_msg,
+        nm,
+        em,
+        inst,
+        orp,
+        qcats,
+        sq1t,
+        sq1a,
+        sq2t,
+        sq2a,
+        q,
+        fa,
+        rt,
+        i1,
+        i2,
+        i3,
+        i4,
+        ri1,
+        ri2,
+    ):
+        # Check all required fields
+        missing_fields = []
+        if not nm or not nm.strip():
+            missing_fields.append("Name")
+        if not em or not em.strip():
+            missing_fields.append("Email Address")
+        if not inst or not inst.strip():
+            missing_fields.append("Institution")
+        if not q or not q.strip():
+            missing_fields.append("Question")
+        if not fa or not fa.strip():
+            missing_fields.append("Final Answer")
+        if not i1:
+            missing_fields.append("First Question Image")
+        # If any required fields are missing, return a warning and keep all fields as is
+        if missing_fields:
+            warning_msg = f"Required fields missing: {', '.join(missing_fields)} ⛔️"
+            # Return all inputs unchanged plus the warning
+            gr.Warning(warning_msg, duration=5)
+            return gr.Button(interactive=True)
+        # Only after successful validation, generate files but keep all fields
+        results = generate_json_files(
+            sys_msg,
+            nm,
+            em,
+            inst,
+            orp,
+            qcats,
+            sq1t,
+            sq1a,
+            sq2t,
+            sq2a,
+            q,
+            fa,
+            rt,
+            i1,
+            i2,
+            i3,
+            i4,
+            ri1,
+            ri2,
+        )
+        gr.Info(
+            "Dataset item created successfully! 🎉, Clear the form to submit a new one"
+        )
+        return gr.update(interactive=False)
+    submit_button.click(
+        fn=validate_and_generate,
+        inputs=[
+            system_message_input,
+            name_input,
+            email_address_input,
+            institution_input,
+            openreview_profile_input,
+            question_categories_input,
+            subquestion_1_text_input,
+            subquestion_1_answer_input,
+            subquestion_2_text_input,
+            subquestion_2_answer_input,
+            question_input,
+            final_answer_input,
+            rationale_text_input,
+            image1,
+            image2,
+            image3,
+            image4,
+            rationale_image1,
+            rationale_image2,
+        ],
+        outputs=[submit_button],
+    )
+    # Clear button functionality
+    def clear_form_fields(sys_msg, name, email, inst, openreview, *args):
+        # Preserve personal info fields
+        return [
+            "You are a helpful assistant",  # Reset system message to default
+            name,  # Preserve name
+            email,  # Preserve email
+            inst,  # Preserve institution
+            openreview,  # Preserve OpenReview profile
+            None,  # Clear question categories
+            None,  # Clear subquestion 1 text
+            None,  # Clear subquestion 1 answer
+            None,  # Clear subquestion 2 text
+            None,  # Clear subquestion 2 answer
+            None,  # Clear question
+            None,  # Clear final answer
+            None,  # Clear rationale text
+            None,  # Clear image1
+            None,  # Clear image2
+            None,  # Clear image3
+            None,  # Clear image4
+            None,  # Clear rationale image1
+            None,  # Clear rationale image2
+            None,  # Clear output file urls
+            None,  # Clear output file base64
+            gr.update(interactive=True),  # Re-enable submit button
+        ]
+    clear_button.click(
+        fn=clear_form_fields,
+        inputs=[
+            system_message_input,
+            name_input,
+            email_address_input,
+            institution_input,
+            openreview_profile_input,
+        ],
+        outputs=[
+            system_message_input,
+            name_input,
+            email_address_input,
+            institution_input,
+            openreview_profile_input,
+            question_categories_input,
+            subquestion_1_text_input,
+            subquestion_1_answer_input,
+            subquestion_2_text_input,
+            subquestion_2_answer_input,
+            question_input,
+            final_answer_input,
+            rationale_text_input,
+            image1,
+            image2,
+            image3,
+            image4,
+            rationale_image1,
+            rationale_image2,
+            output_file_urls,
+            output_file_base64,
+            submit_button,
+        ],
+    )
+demo.launch()