| datasets: | |
| coco_caption: | |
| image_dir: "path/to/image" | |
| gt_path: "path/to/ground_truth" | |
| prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\nGive a brief description of this image in one sentence.<|im_end|><|im_start|>assistant\n" | |
| beam_search: True | |
| beam_size: 1 | |
| output_max_len: 30 | |
| top_k: 3 | |
| temperature: 1.0 | |
| flickr30k_caption: | |
| image_dir: "path/to/image" | |
| gt_path: "path/to/ground_truth" | |
| prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\nGive a brief description of this image in one sentence.<|im_end|><|im_start|>assistant\n" | |
| beam_search: True | |
| beam_size: 1 | |
| output_max_len: 30 | |
| top_k: 3 | |
| temperature: 1.0 | |
| vqav2: | |
| image_dir: "path/to/image" | |
| gt_path: "path/to/ground_truth" | |
| prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}\nAnswer the question using a single word or phrase.<|im_end|><|im_start|>assistant\n" | |
| beam_search: True | |
| beam_size: 1 | |
| top_k: 1 | |
| top_p: 0.0 | |
| output_max_len: 8 | |
| temperature: 1.0 | |
| mmmu: | |
| split: "validation" | |
| beam_search: True | |
| beam_size: 1 | |
| top_k: 1 | |
| top_p: 0.0 | |
| output_max_len: 1024 | |
| temperature: 1.0 | |
| apply_lemmatizer: False | |
| task_instructions: "" | |
| multi_choice_example_format: "{}\n{}\nAnswer with the option's letter from the given choices directly." | |
| short_ans_example_format: "{}\nAnswer the question using a single word or phrase." | |
| use_chat_format: True | |
| conv_format: "yi_nous_sft" | |
| default_image_token: "<image>" | |
| prompt_offset: 4 | |
| answer_dict: "path/to/answer_dict_val.json" | |
| textvqa: | |
| split: "val" | |
| image_dir: "path/to/image" | |
| gt_path: "path/to/ground_truth" | |
| prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}\nAnswer the question using a single word, phrase, or number.<|im_end|><|im_start|>assistant\n" | |
| beam_search: True | |
| beam_size: 1 | |
| top_k: 1 | |
| top_p: 0.0 | |
| output_max_len: 10 | |
| temperature: 1.0 | |
| mathvista: | |
| split: "testmini" | |
| prompt: "<|im_start|>system\nYou are math expert. Use your math knowledge to calculate the answer.<|im_end|><|im_start|>user\n<image>\n{}\nAnswer the question using a single word, phrase, or number.<|im_end|><|im_start|>assistant\n" | |
| beam_search: True | |
| beam_size: 1 | |
| top_k: 1 | |
| top_p: 0.0 | |
| output_max_len: 1024 | |
| temperature: 1.0 | |
| mmbench: | |
| split: "dev" | |
| gt_path: "path/to/ground_truth" | |
| prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}Answer with the option's letter from the given choices directly.<|im_end|><|im_start|>assistant\n" | |
| beam_search: True | |
| beam_size: 1 | |
| top_k: 1 | |
| top_p: 0.0 | |
| output_max_len: 10 | |
| temperature: 1.0 | |
| submission: False | |
| chartqa: | |
| split: "test" | |
| image_dir: "path/to/image" | |
| gt_path: "path/to/ground_truth" | |
| prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}<|im_end|><|im_start|>assistant\n" | |
| beam_search: True | |
| beam_size: 1 | |
| top_k: 1 | |
| top_p: 0.0 | |
| output_max_len: 20 | |
| temperature: 1.0 | |
| docvqa: | |
| split: "val" | |
| image_dir: "path/to/image" | |
| gt_path: "path/to/ground_truth" | |
| prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}<|im_end|><|im_start|>assistant\n" | |
| beam_search: True | |
| beam_size: 1 | |
| top_k: 1 | |
| top_p: 0.0 | |
| output_max_len: 20 | |
| temperature: 1.0 | |
| realworldqa: | |
| split: "test" | |
| image_dir: "path/to/image" | |
| gt_path: "path/to/ground_truth" | |
| prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}<|im_end|><|im_start|>assistant\n" | |
| beam_search: True | |
| beam_size: 1 | |
| top_k: 1 | |
| top_p: 0.0 | |
| output_max_len: 20 | |
| temperature: 1.0 | |
| submission: False | |
| ocrbench: | |
| split: "test" | |
| image_dir: "path/to/image" | |
| gt_path: "path/to/ground_truth" | |
| prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}<|im_end|><|im_start|>assistant\n" | |
| beam_search: True | |
| beam_size: 1 | |
| top_k: 1 | |
| top_p: 0.0 | |
| output_max_len: 70 | |
| temperature: 1.0 | |
| submission: False | |
| ai2diagram: | |
| split: "test" | |
| image_dir: "path/to/image" | |
| gt_path: "path/to/ground_truth" | |
| prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}\nAnswer the question using a single word, phrase, or number.<|im_end|><|im_start|>assistant\n" | |
| beam_search: True | |
| beam_size: 1 | |
| top_k: 1 | |
| top_p: 0.0 | |
| output_max_len: 20 | |
| temperature: 1.0 | |
| ai2diagram_nomask: | |
| split: "test" | |
| image_dir: "path/to/image" | |
| gt_path: "path/to/ground_truth" | |
| prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}\nAnswer the question using a single word, phrase, or number.<|im_end|><|im_start|>assistant\n" | |
| beam_search: True | |
| beam_size: 1 | |
| top_k: 1 | |
| top_p: 0.0 | |
| output_max_len: 20 | |
| temperature: 1.0 | |
| mmmu_pro: | |
| split: "validation" | |
| beam_search: True | |
| beam_size: 1 | |
| top_k: 1 | |
| top_p: 0.0 | |
| output_max_len: 10 | |
| temperature: 1.0 | |
| apply_lemmatizer: False | |
| task_instructions: "" | |
| multi_choice_example_format: "{}\n{}\nAnswer with the option's letter from the given choices directly." | |
| short_ans_example_format: "{}\nAnswer the question using a single word or phrase." | |
| use_chat_format: True | |
| conv_format: "yi_nous_sft" | |
| default_image_token: "<image>" | |
| prompt_offset: 4 | |
| answer_dict: "path/to/answer_dict.json" | |
| docvqa_test: | |
| split: "test" | |
| image_dir: "path/to/image" | |
| gt_path: "path/to/ground_truth" | |
| prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|>\n<|im_start|>user\n<image>\n{}\nAnswer this question using the text in the image directly.<|im_end|>\n<|im_start|>assistant\n" | |
| beam_search: True | |
| beam_size: 1 | |
| top_k: 1 | |
| top_p: 0.0 | |
| output_max_len: 20 | |
| temperature: 1.0 |