Spaces:

taesiri
/

SubmitQuestions

Paused

App Files Files Community

taesiri commited on Jan 13

Commit

5943a36

1 Parent(s): ae2c2e9

backup

Browse files

Files changed (1) hide show

app.py +267 -27

app.py CHANGED Viewed

@@ -4,6 +4,8 @@ import json
 import os
 import shutil
 import uuid
 from huggingface_hub import CommitScheduler, HfApi
 api = HfApi(token=os.environ["HF_TOKEN"])
@@ -17,6 +19,112 @@ scheduler = CommitScheduler(
 )
 def generate_json_files(
     name,
     email_address,
@@ -44,25 +152,41 @@ def generate_json_files(
     image4,
     rationale_image1,
     rationale_image2,
 ):
     """
     For each request:
-      1) Create a unique folder under ./data/
       2) Copy uploaded images (question + rationale) into that folder
-      3) Produce two JSON files:
-         - question.json   (local file paths in content)
-         - request_base64.json (base64-encoded images in content)
-      4) Return paths to both files for Gradio to provide as download links
     """
-    # 1) Create parent data folder if it doesn't exist
     parent_data_folder = "./data"
     os.makedirs(parent_data_folder, exist_ok=True)
-    # 2) Generate a unique request ID and create a subfolder
-    request_id = str(uuid.uuid4())  # unique ID
     request_folder = os.path.join(parent_data_folder, request_id)
-    os.makedirs(request_folder)
     # Convert None strings
     def safe_str(val):
@@ -104,21 +228,71 @@ def generate_json_files(
         ("rationale_image_2", rationale_image2),
     ]
     files_list = []
     for idx, (img_label, img_obj) in enumerate(all_images):
         if img_obj is not None:
             temp_path = os.path.join(request_folder, f"{img_label}.png")
             if isinstance(img_obj, str):
                 # If image is a file path
-                shutil.copy2(img_obj, temp_path)
             else:
                 # If image is a numpy array
                 gr.processing_utils.save_image(img_obj, temp_path)
-            # Keep track of the saved path + label
-            files_list.append((img_label, temp_path))
     # Build user content in two flavors: local file paths vs base64
-    # We’ll store text fields as simple dictionaries, and then images separately.
     content_list_urls = [
         {"type": "field", "label": "name", "value": name},
         {"type": "field", "label": "email_address", "value": email_address},
@@ -218,7 +392,7 @@ def generate_json_files(
     # Convert each to JSON line format
     urls_json_line = json.dumps(item_urls, ensure_ascii=False)
-    # 3) Write out two JSON files in request_folder
     urls_jsonl_path = os.path.join(request_folder, "question.json")
     with open(urls_jsonl_path, "w", encoding="utf-8") as f:
@@ -230,6 +404,7 @@ def generate_json_files(
 # Build the Gradio app
 with gr.Blocks() as demo:
     gr.Markdown("# Dataset Builder")
     with gr.Accordion("Instructions", open=True):
         gr.HTML(
             """
@@ -436,7 +611,59 @@ with gr.Blocks() as demo:
             label="Download Base64 JSON", interactive=False, visible=False
         )
-    # On Submit, we call generate_json_files with all relevant fields
     def validate_and_generate(
         nm,
         em,
@@ -464,8 +691,9 @@ with gr.Blocks() as demo:
         i4,
         ri1,
         ri2,
     ):
-        # Check all required fields
         missing_fields = []
         if not nm or not nm.strip():
             missing_fields.append("Name")
@@ -492,14 +720,18 @@ with gr.Blocks() as demo:
         if not sq5t or not sq5t.strip() or not sq5a or not sq5a.strip():
             missing_fields.append("Fifth Sub-question and Answer")
-        # If any required fields are missing, return a warning and keep all fields as is
         if missing_fields:
             warning_msg = f"Required fields missing: {', '.join(missing_fields)} ⛔️"
-            # Return all inputs unchanged plus the warning
             gr.Warning(warning_msg, duration=5)
-            return gr.Button(interactive=True)
-        # Only after successful validation, generate files but keep all fields
         results = generate_json_files(
             nm,
             em,
@@ -527,14 +759,19 @@ with gr.Blocks() as demo:
             i4,
             ri1,
             ri2,
         )
         gr.Info(
-            "Dataset item created successfully! 🎉, Clear the form to submit a new one"
         )
-        return gr.update(interactive=False)
     submit_button.click(
         fn=validate_and_generate,
         inputs=[
@@ -564,15 +801,14 @@ with gr.Blocks() as demo:
             image4,
             rationale_image1,
             rationale_image2,
         ],
-        outputs=[submit_button],
     )
-    # Clear button functionality
     def clear_form_fields(name, email, inst, openreview, authorship, *args):
-        # Preserve personal info fields and re-enable submit button
-        gr.Info("Form cleared! Ready for new submission 🔄")
-        return [
             name,  # Preserve name
             email,  # Preserve email
             inst,  # Preserve institution
@@ -601,7 +837,10 @@ with gr.Blocks() as demo:
             None,  # Clear rationale image2
             None,  # Clear output file urls
             gr.Button(interactive=True),  # Re-enable submit button
         ]
     clear_button.click(
         fn=clear_form_fields,
@@ -641,6 +880,7 @@ with gr.Blocks() as demo:
             rationale_image2,
             output_file_urls,
             submit_button,
         ],
     )

 import os
 import shutil
 import uuid
+import glob
 from huggingface_hub import CommitScheduler, HfApi
 api = HfApi(token=os.environ["HF_TOKEN"])
 )
+def load_existing_questions():
+    """
+    Load all existing questions from the data directory
+    Returns a list of tuples (question_id, question_preview)
+    """
+    questions = []
+    data_dir = "./data"
+    if not os.path.exists(data_dir):
+        return questions
+    for question_dir in glob.glob(os.path.join(data_dir, "*")):
+        if os.path.isdir(question_dir):
+            json_path = os.path.join(question_dir, "question.json")
+            if os.path.exists(json_path):
+                try:
+                    with open(json_path, "r", encoding="utf-8") as f:
+                        data = json.loads(f.read().strip())
+                        question_id = os.path.basename(question_dir)
+                        preview = (
+                            f"{data['question'][:100]}..."
+                            if len(data["question"]) > 100
+                            else data["question"]
+                        )
+                        questions.append((question_id, f"{question_id}: {preview}"))
+                except:
+                    continue
+    return sorted(questions, key=lambda x: x[1])
+def load_question_data(question_id):
+    """
+    Load a specific question's data
+    Returns a tuple of all form fields
+    """
+    if not question_id:
+        return [None] * 26  # Return None for all fields
+    # Extract the ID part before the colon from the dropdown selection
+    question_id = (
+        question_id.split(":")[0].strip() if ":" in question_id else question_id
+    )
+    json_path = os.path.join("./data", question_id, "question.json")
+    if not os.path.exists(json_path):
+        print(f"Question file not found: {json_path}")
+        return [None] * 26
+    try:
+        with open(json_path, "r", encoding="utf-8") as f:
+            data = json.loads(f.read().strip())
+        # Load images
+        def load_image(image_path):
+            if not image_path:
+                return None
+            full_path = os.path.join(
+                "./data", question_id, os.path.basename(image_path)
+            )
+            return full_path if os.path.exists(full_path) else None
+        question_images = data.get("question_images", [])
+        rationale_images = data.get("rationale_images", [])
+        # Convert authorship_interest to boolean if it's a string
+        authorship = data["author_info"].get("authorship_interest", False)
+        if isinstance(authorship, str):
+            authorship = authorship.lower() == "true"
+        return [
+            data["author_info"]["name"],
+            data["author_info"]["email_address"],
+            data["author_info"]["institution"],
+            data["author_info"].get("openreview_profile", ""),
+            authorship,
+            (
+                ",".join(data["question_categories"])
+                if isinstance(data["question_categories"], list)
+                else data["question_categories"]
+            ),
+            data.get("subquestions_1_text", "N/A"),
+            data.get("subquestions_1_answer", "N/A"),
+            data.get("subquestions_2_text", "N/A"),
+            data.get("subquestions_2_answer", "N/A"),
+            data.get("subquestions_3_text", "N/A"),
+            data.get("subquestions_3_answer", "N/A"),
+            data.get("subquestions_4_text", "N/A"),
+            data.get("subquestions_4_answer", "N/A"),
+            data.get("subquestions_5_text", "N/A"),
+            data.get("subquestions_5_answer", "N/A"),
+            data["question"],
+            data["final_answer"],
+            data.get("rationale_text", ""),
+            data["image_attribution"],
+            load_image(question_images[0] if question_images else None),
+            load_image(question_images[1] if len(question_images) > 1 else None),
+            load_image(question_images[2] if len(question_images) > 2 else None),
+            load_image(question_images[3] if len(question_images) > 3 else None),
+            load_image(rationale_images[0] if rationale_images else None),
+            load_image(rationale_images[1] if len(rationale_images) > 1 else None),
+        ]
+    except Exception as e:
+        print(f"Error loading question {question_id}: {str(e)}")
+        return [None] * 26
 def generate_json_files(
     name,
     email_address,
     image4,
     rationale_image1,
     rationale_image2,
+    existing_id=None,  # New parameter for updating existing questions
 ):
     """
     For each request:
+      1) Create a unique folder under ./data/ (or use existing if updating)
       2) Copy uploaded images (question + rationale) into that folder
+      3) Produce JSON file with question data
+      4) Return path to the JSON file
     """
+    # Use existing ID if updating, otherwise generate new one
+    request_id = existing_id if existing_id else str(uuid.uuid4())
+    # Create parent data folder if it doesn't exist
     parent_data_folder = "./data"
     os.makedirs(parent_data_folder, exist_ok=True)
+    # Create or clean request folder
     request_folder = os.path.join(parent_data_folder, request_id)
+    if os.path.exists(request_folder):
+        # If updating, remove old image files but only if new images are provided
+        for f in glob.glob(os.path.join(request_folder, "*.png")):
+            # Only remove if we have a new image to replace it
+            filename = os.path.basename(f)
+            if (
+                ("question_image_1" in filename and image1)
+                or ("question_image_2" in filename and image2)
+                or ("question_image_3" in filename and image3)
+                or ("question_image_4" in filename and image4)
+                or ("rationale_image_1" in filename and rationale_image1)
+                or ("rationale_image_2" in filename and rationale_image2)
+            ):
+                os.remove(f)
+    else:
+        os.makedirs(request_folder)
     # Convert None strings
     def safe_str(val):
         ("rationale_image_2", rationale_image2),
     ]
+    # If updating, load existing images that haven't been replaced
+    if existing_id:
+        json_path = os.path.join(parent_data_folder, existing_id, "question.json")
+        if os.path.exists(json_path):
+            try:
+                with open(json_path, "r", encoding="utf-8") as f:
+                    existing_data = json.loads(f.read().strip())
+                    existing_question_images = existing_data.get("question_images", [])
+                    existing_rationale_images = existing_data.get(
+                        "rationale_images", []
+                    )
+                    # Keep existing images if no new ones provided
+                    if not image1 and existing_question_images:
+                        all_images[0] = (
+                            "question_image_1",
+                            existing_question_images[0],
+                        )
+                    if not image2 and len(existing_question_images) > 1:
+                        all_images[1] = (
+                            "question_image_2",
+                            existing_question_images[1],
+                        )
+                    if not image3 and len(existing_question_images) > 2:
+                        all_images[2] = (
+                            "question_image_3",
+                            existing_question_images[2],
+                        )
+                    if not image4 and len(existing_question_images) > 3:
+                        all_images[3] = (
+                            "question_image_4",
+                            existing_question_images[3],
+                        )
+                    if not rationale_image1 and existing_rationale_images:
+                        all_images[4] = (
+                            "rationale_image_1",
+                            existing_rationale_images[0],
+                        )
+                    if not rationale_image2 and len(existing_rationale_images) > 1:
+                        all_images[5] = (
+                            "rationale_image_2",
+                            existing_rationale_images[1],
+                        )
+            except:
+                pass
     files_list = []
     for idx, (img_label, img_obj) in enumerate(all_images):
         if img_obj is not None:
             temp_path = os.path.join(request_folder, f"{img_label}.png")
             if isinstance(img_obj, str):
                 # If image is a file path
+                if os.path.exists(img_obj):
+                    if (
+                        img_obj != temp_path
+                    ):  # Only copy if source and destination are different
+                        shutil.copy2(img_obj, temp_path)
+                    files_list.append((img_label, temp_path))
             else:
                 # If image is a numpy array
                 gr.processing_utils.save_image(img_obj, temp_path)
+                files_list.append((img_label, temp_path))
     # Build user content in two flavors: local file paths vs base64
+    # We'll store text fields as simple dictionaries, and then images separately.
     content_list_urls = [
         {"type": "field", "label": "name", "value": name},
         {"type": "field", "label": "email_address", "value": email_address},
     # Convert each to JSON line format
     urls_json_line = json.dumps(item_urls, ensure_ascii=False)
+    # 3) Write out JSON file in request_folder
     urls_jsonl_path = os.path.join(request_folder, "question.json")
     with open(urls_jsonl_path, "w", encoding="utf-8") as f:
 # Build the Gradio app
 with gr.Blocks() as demo:
     gr.Markdown("# Dataset Builder")
     with gr.Accordion("Instructions", open=True):
         gr.HTML(
             """
             label="Download Base64 JSON", interactive=False, visible=False
         )
+    with gr.Accordion("Load Existing Question", open=False):
+        gr.Markdown("## Load Existing Question")
+        with gr.Row():
+            existing_questions = gr.Dropdown(
+                label="Load Existing Question",
+                choices=load_existing_questions(),
+                type="value",
+                allow_custom_value=False,
+            )
+            refresh_button = gr.Button("🔄 Refresh")
+            load_button = gr.Button("Load Selected Question")
+    def refresh_questions():
+        return gr.Dropdown(choices=load_existing_questions())
+    refresh_button.click(fn=refresh_questions, inputs=[], outputs=[existing_questions])
+    # Load button functionality
+    load_button.click(
+        fn=load_question_data,
+        inputs=[existing_questions],
+        outputs=[
+            name_input,
+            email_address_input,
+            institution_input,
+            openreview_profile_input,
+            authorship_input,
+            question_categories_input,
+            subquestion_1_text_input,
+            subquestion_1_answer_input,
+            subquestion_2_text_input,
+            subquestion_2_answer_input,
+            subquestion_3_text_input,
+            subquestion_3_answer_input,
+            subquestion_4_text_input,
+            subquestion_4_answer_input,
+            subquestion_5_text_input,
+            subquestion_5_answer_input,
+            question_input,
+            final_answer_input,
+            rationale_text_input,
+            image_attribution_input,
+            image1,
+            image2,
+            image3,
+            image4,
+            rationale_image1,
+            rationale_image2,
+        ],
+    )
+    # Modify validate_and_generate to handle updates
     def validate_and_generate(
         nm,
         em,
         i4,
         ri1,
         ri2,
+        selected_question_id,
     ):
+        # Validation code remains the same
         missing_fields = []
         if not nm or not nm.strip():
             missing_fields.append("Name")
         if not sq5t or not sq5t.strip() or not sq5a or not sq5a.strip():
             missing_fields.append("Fifth Sub-question and Answer")
         if missing_fields:
             warning_msg = f"Required fields missing: {', '.join(missing_fields)} ⛔️"
             gr.Warning(warning_msg, duration=5)
+            return gr.Button(interactive=True), gr.Dropdown(
+                choices=load_existing_questions()
+            )
+        # Extract question ID if updating existing question
+        existing_id = (
+            selected_question_id.split(":")[0].strip() if selected_question_id else None
+        )
         results = generate_json_files(
             nm,
             em,
             i4,
             ri1,
             ri2,
+            existing_id,
         )
+        action = "updated" if existing_id else "created"
         gr.Info(
+            f"Dataset item {action} successfully! 🎉 Clear the form to submit a new one"
         )
+        return gr.update(interactive=False), gr.Dropdown(
+            choices=load_existing_questions()
+        )
+    # Update submit button click handler to include selected question
     submit_button.click(
         fn=validate_and_generate,
         inputs=[
             image4,
             rationale_image1,
             rationale_image2,
+            existing_questions,  # Add selected question to inputs
         ],
+        outputs=[submit_button, existing_questions],  # Update dropdown after submit
     )
+    # Update clear button to also clear selected question
     def clear_form_fields(name, email, inst, openreview, authorship, *args):
+        outputs = [
             name,  # Preserve name
             email,  # Preserve email
             inst,  # Preserve institution
             None,  # Clear rationale image2
             None,  # Clear output file urls
             gr.Button(interactive=True),  # Re-enable submit button
+            gr.update(value=None),  # Clear selected question
         ]
+        gr.Info("Form cleared! Ready for new submission 🔄")
+        return outputs
     clear_button.click(
         fn=clear_form_fields,
             rationale_image2,
             output_file_urls,
             submit_button,
+            existing_questions,
         ],
     )