Add run entries for HELM capabilities (#3226)

stanford-crfm · Dec 19, 2024 · 8164530 · 8164530
1 parent f14e139
commit 8164530
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 0 deletions.
diff --git a/src/helm/benchmark/presentation/run_entries_capabilities.conf b/src/helm/benchmark/presentation/run_entries_capabilities.conf
@@ -0,0 +1,10 @@
+# Scenarios for HELM Capabilities.
+
+entries: [
+  {description: "mmlu_pro:use_chain_of_thought=true,subject=business,model=text", priority: 1}
+  {description: "gpqa:subset=main,use_chain_of_thought=true,model=text", priority: 1}
+  {description: "ifeval:model=text", priority: 1}
+  {description: "wildbench:subset=v2,model=text", priority: 1}
+  {description: "bigcodebench:version=v0.1.2,model=text", priority: 1}
+  {description: "omni_math:model=text", priority: 1}
+]
diff --git a/src/helm/benchmark/run_specs/lite_run_specs.py b/src/helm/benchmark/run_specs/lite_run_specs.py
@@ -375,6 +375,9 @@ def get_gpqa_spec(subset: str, use_chain_of_thought: str = "False", use_few_shot
     del use_chain_of_thought
     del use_few_shot
 
+    if not subset.startswith("gpqa_"):
+        subset = "gpqa_" + subset
+
     scenario_spec = ScenarioSpec(
         class_name="helm.benchmark.scenarios.gpqa_scenario.GPQAScenario", args={"subset": subset}
     )