From 15d14ceee9398ce79d7c76e54eaf11a69b631dad Mon Sep 17 00:00:00 2001 From: Zhijian Liu Date: Fri, 4 Oct 2024 11:11:37 -0700 Subject: [PATCH] Fix MMMU-Pro evaluation --- lmms_eval/tasks/mmmu_pro/mmmu_pro.yaml | 2 +- .../{mmmu_pro_original.yaml => mmmu_pro_standard.yaml} | 4 ++-- ...{mmmu_pro_original_cot.yaml => mmmu_pro_standard_cot.yaml} | 4 ++-- lmms_eval/tasks/mmmu_pro/mmmu_pro_vision.yaml | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) rename lmms_eval/tasks/mmmu_pro/{mmmu_pro_original.yaml => mmmu_pro_standard.yaml} (92%) rename lmms_eval/tasks/mmmu_pro/{mmmu_pro_original_cot.yaml => mmmu_pro_standard_cot.yaml} (93%) diff --git a/lmms_eval/tasks/mmmu_pro/mmmu_pro.yaml b/lmms_eval/tasks/mmmu_pro/mmmu_pro.yaml index b2878c7c..506a26dd 100755 --- a/lmms_eval/tasks/mmmu_pro/mmmu_pro.yaml +++ b/lmms_eval/tasks/mmmu_pro/mmmu_pro.yaml @@ -2,4 +2,4 @@ group: mmmu_pro task: - mmmu_pro_vision # - mmmu_pro_composite # removing composite task in formal MMMU-Pro evaluation -- mmmu_pro_original +- mmmu_pro_standard diff --git a/lmms_eval/tasks/mmmu_pro/mmmu_pro_original.yaml b/lmms_eval/tasks/mmmu_pro/mmmu_pro_standard.yaml similarity index 92% rename from lmms_eval/tasks/mmmu_pro/mmmu_pro_original.yaml rename to lmms_eval/tasks/mmmu_pro/mmmu_pro_standard.yaml index 0a5b8a5c..523ff26f 100755 --- a/lmms_eval/tasks/mmmu_pro/mmmu_pro_original.yaml +++ b/lmms_eval/tasks/mmmu_pro/mmmu_pro_standard.yaml @@ -1,6 +1,6 @@ -task: "mmmu_pro_original" +task: "mmmu_pro_standard" dataset_path: MMMU/MMMU_Pro -dataset_name: original +dataset_name: standard test_split: test output_type: generate_until doc_to_visual: !function utils.mmmu_pro_doc_to_visual diff --git a/lmms_eval/tasks/mmmu_pro/mmmu_pro_original_cot.yaml b/lmms_eval/tasks/mmmu_pro/mmmu_pro_standard_cot.yaml similarity index 93% rename from lmms_eval/tasks/mmmu_pro/mmmu_pro_original_cot.yaml rename to lmms_eval/tasks/mmmu_pro/mmmu_pro_standard_cot.yaml index bad9f7d2..7979edf5 100755 --- a/lmms_eval/tasks/mmmu_pro/mmmu_pro_original_cot.yaml +++ b/lmms_eval/tasks/mmmu_pro/mmmu_pro_standard_cot.yaml @@ -1,6 +1,6 @@ -task: "mmmu_pro_original_cot" +task: "mmmu_pro_standard_cot" dataset_path: MMMU/MMMU_Pro -dataset_name: original +dataset_name: standard test_split: test output_type: generate_until doc_to_visual: !function utils.mmmu_pro_doc_to_visual diff --git a/lmms_eval/tasks/mmmu_pro/mmmu_pro_vision.yaml b/lmms_eval/tasks/mmmu_pro/mmmu_pro_vision.yaml index 4ced2cd1..ee7973c9 100755 --- a/lmms_eval/tasks/mmmu_pro/mmmu_pro_vision.yaml +++ b/lmms_eval/tasks/mmmu_pro/mmmu_pro_vision.yaml @@ -4,7 +4,7 @@ dataset_name: vision test_split: test output_type: generate_until doc_to_visual: !function utils.mmmu_pro_doc_to_visual -doc_to_text: "Answer with the option letter from the given choices directly. The last line of your response should be of the following format: 'Answer: $LETTER' (without quotes) where LETTER is one of options." +doc_to_text: "Answer with the option letter from the given choices directly." doc_to_target: "{{answer}}" # The return value of process_results will be used by metrics process_results: !function utils.mmmu_pro_process_results