stanford-crfm · YianZhang · Jun 2, 2023 · Jun 2, 2023 · Jun 2, 2023
diff --git a/src/helm/benchmark/__init__.py b/src/helm/benchmark/__init__.py
@@ -50,6 +50,7 @@
 from .scenarios import opinions_qa_scenario  # noqa
 from .scenarios import vicuna_scenario  # noqa
 from .scenarios import open_assistant_scenario  # noqa
+from .scenarios import koala_scenario  # noqa
 
 # Biomedical
 from .scenarios import covid_dialog_scenario  # noqa

diff --git a/src/helm/benchmark/run_specs.py b/src/helm/benchmark/run_specs.py
@@ -2181,6 +2181,24 @@ def get_open_assistant_spec(language: str) -> RunSpec:
     )
 
 
+@run_spec_function("koala")
+def get_koala_spec() -> RunSpec:
+    scenario_spec = ScenarioSpec(
+        class_name="helm.benchmark.scenarios.koala_scenario.KoalaScenario",
+        args={},
+    )
+
+    adapter_spec = get_instruct_adapter_spec()
+
+    return RunSpec(
+        name="koala",
+        scenario_spec=scenario_spec,
+        adapter_spec=adapter_spec,
+        metric_specs=get_open_ended_generation_metric_specs() + get_generative_harms_metric_specs(),
+        groups=["koala"],
+    )
+
+
 ############################################################
 
 

diff --git a/src/helm/benchmark/scenarios/koala_scenario.py b/src/helm/benchmark/scenarios/koala_scenario.py
@@ -0,0 +1,41 @@
+import json
+import os
+from typing import List
+
+from helm.common.general import ensure_file_downloaded
+from .scenario import Scenario, Instance, Input, TEST_SPLIT
+
+
+class KoalaScenario(Scenario):
+    """
+    This scenario is based on the prompts used by the Koala team to evaluate instruction-following models.
+
+    https://bair.berkeley.edu/blog/2023/04/03/koala/
+    """
+
+    name = "koala"
+    description = "Koala eval dataset"
+    tags = ["instructions"]
+
+    def get_instances(self) -> List[Instance]:
+        # Download the raw data
+        source_url = "https://raw.githubusercontent.com/arnav-gudibande/koala-test-set/main/koala_test_set.jsonl"
+        data_path: str = os.path.join(self.output_path, "Koala_prompts.jsonl")
+
+        ensure_file_downloaded(
+            source_url=source_url,
+            target_path=data_path,
+        )
+
+        # Read all the instances
+        instances: List[Instance] = []
+        for line in open(data_path):
+            # Example: {"id": "koala_1", "prompt": "Take MLK speech \"I had a dream\" but turn it into a top 100 rap song"} # noqa: E501
+            raw = json.loads(line)
+            instance = Instance(
+                input=Input(text=raw["prompt"]),
+                references=[],
+                split=TEST_SPLIT,
+            )
+            instances.append(instance)
+        return instances