{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/lirt/miniconda3/envs/gaia_env/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "from huggingface_hub import hf_hub_download\n", "import json\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "metadata_file = hf_hub_download(\n", " repo_id = \"gaia-benchmark/GAIA\",\n", " filename = \"2023/validation/metadata.jsonl\",\n", " repo_type = \"dataset\"\n", ")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "questions_and_answers = []" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "with open(metadata_file, 'r') as f:\n", " for line in f:\n", " questions_and_answers.append(json.loads(line))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame(questions_and_answers)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of validation questions: 165\n", "Columns: ['task_id', 'Question', 'Level', 'Final answer', 'file_name', 'Annotator Metadata']\n", "Levels: Level\n", "2 86\n", "1 53\n", "3 26\n", "Name: count, dtype: int64\n", "\n", "Sample question:\n", "Question: A paper about AI regulation that was originally submitted to arXiv.org in June 2022 shows a figure with three axes, where each axis has a label word at both ends. Which of these words is used to describe a type of society in a Physics and Society article submitted to arXiv.org on August 11, 2016?\n", "Answer: egalitarian\n", "Level: 2\n" ] } ], "source": [ "# Display the structure\n", "print(f\"Number of validation questions: {len(df)}\")\n", "print(f\"Columns: {df.columns.tolist()}\")\n", "print(f\"Levels: {df['Level'].value_counts()}\")\n", "\n", "# Show a sample question\n", "print(\"\\nSample question:\")\n", "print(f\"Question: {df.iloc[0]['Question']}\")\n", "print(f\"Answer: {df.iloc[0]['Final answer']}\")\n", "print(f\"Level: {df.iloc[0]['Level']}\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "df_level1 = df[df['Level'] == 1].reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
task_idQuestionLevelFinal answerfile_nameAnnotator Metadata
0e1fc63a2-da7a-432f-be78-7c4a95598703If Eliud Kipchoge could maintain his record-ma...117{'Steps': '1. Googled Eliud Kipchoge marathon ...
18e867cd7-cff9-4e6c-867a-ff5ddc2550beHow many studio albums were published by Merce...13{'Steps': '1. I did a search for Mercedes Sosa...
2ec09fa32-d03f-4bf8-84b0-1f16922c3ae4Here's a fun riddle that I think you'll enjoy....13{'Steps': 'Step 1: Evaluate the problem statem...
35d0080cb-90d7-4712-bc33-848150e917d3What was the volume in m^3 of the fish bag tha...10.1777{'Steps': '1. Searched '\"Can Hiccup Supply Eno...
4a1e91b78-d3d8-4675-bb8d-62741b4b68a6In the video https://www.youtube.com/watch?v=L...13{'Steps': '1. Navigate to the YouTube link.\n", "2....
\n", "
" ], "text/plain": [ " task_id \\\n", "0 e1fc63a2-da7a-432f-be78-7c4a95598703 \n", "1 8e867cd7-cff9-4e6c-867a-ff5ddc2550be \n", "2 ec09fa32-d03f-4bf8-84b0-1f16922c3ae4 \n", "3 5d0080cb-90d7-4712-bc33-848150e917d3 \n", "4 a1e91b78-d3d8-4675-bb8d-62741b4b68a6 \n", "\n", " Question Level Final answer \\\n", "0 If Eliud Kipchoge could maintain his record-ma... 1 17 \n", "1 How many studio albums were published by Merce... 1 3 \n", "2 Here's a fun riddle that I think you'll enjoy.... 1 3 \n", "3 What was the volume in m^3 of the fish bag tha... 1 0.1777 \n", "4 In the video https://www.youtube.com/watch?v=L... 1 3 \n", "\n", " file_name Annotator Metadata \n", "0 {'Steps': '1. Googled Eliud Kipchoge marathon ... \n", "1 {'Steps': '1. I did a search for Mercedes Sosa... \n", "2 {'Steps': 'Step 1: Evaluate the problem statem... \n", "3 {'Steps': '1. Searched '\"Can Hiccup Supply Eno... \n", "4 {'Steps': '1. Navigate to the YouTube link.\n", "2.... " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_level1.head()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'If Eliud Kipchoge could maintain his record-making marathon pace indefinitely, how many thousand hours would it take him to run the distance between the Earth and the Moon its closest approach? Please use the minimum perigee value on the Wikipedia page for the Moon when carrying out your calculation. Round your result to the nearest 1000 hours and do not use any comma separators if necessary.'" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_level1.loc[0, 'Question']" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'17'" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_level1.loc[0, 'Final answer']" ] } ], "metadata": { "kernelspec": { "display_name": "nlp_course", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.12" } }, "nbformat": 4, "nbformat_minor": 2 }