[Fix] Update mmb_eval_gradio.py

open-compass · Nov 22, 2024 · ea61517 · ea61517
1 parent 1f36d98
commit ea61517
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 4 deletions.
diff --git a/scripts/mmb_eval_gradio.py b/scripts/mmb_eval_gradio.py
@@ -1,5 +1,5 @@
 from vlmeval.smp import *
-from vlmeval.evaluate.multiple_choice import multiple_choice_eval
+from vlmeval.tools import EVAL
 import gradio as gr
 
 HEADER = """
@@ -47,10 +47,14 @@ def determine_dataset(eval_file):
     def cn_ratio(data):
         iscn = [cn_string(x) for x in data['question']]
         return np.mean(iscn)
-    if len(data) < 2500 and 'l2-category' not in data:
+    max_ind = np.max([int(x) for x in data['index'] if int(x) < 1e5])
+    if max_ind < 1000 and 'l2-category' not in data:
         return 'CCBench' if cn_ratio(data) > 0.5 else "Unknown" 
-    else:
+    elif max_ind < 3000 :
         return 'MMBench_CN' if cn_ratio(data) > 0.5 else "MMBench"
+    else:
+        return 'MMBench_CN_V11' if cn_ratio(data) > 0.5 else "MMBench_V11"
+
 
 def reformat_acc(acc):
     splits = set(acc['split'])
@@ -78,7 +82,7 @@ def evaluate(file):
     ret = f"Evaluation ID: {eval_id}\n"
     timestamp = datetime.datetime.now().strftime('%Y.%m.%d  %H:%M:%S')
     ret += f'Evaluation Timestamp: {timestamp}\n'
-    acc = multiple_choice_eval(eval_file, dataset=dataset, model='exact_matching')
+    acc = EVAL(dataset, eval_file)
     nacc = reformat_acc(acc).round(1)
     return ret, nacc
 

diff --git a/vlmeval/tools.py b/vlmeval/tools.py
@@ -371,6 +371,7 @@ def EVAL(dataset_name, data_file):
         if len(eval_results) < len(eval_results.columns):
             eval_results = eval_results.T
         logger.info('\n' + tabulate(eval_results))
+    return eval_results
 
 
 def cli():