stanford-crfm · yifanmai · Apr 10, 2023 · Apr 10, 2023
diff --git a/scripts/helm-run-all.sh b/scripts/helm-run-all.sh
@@ -48,6 +48,8 @@ models=(
   "together/glm"
   "together/gpt-j-6b"
   "together/gpt-neox-20b"
+  "together/opt-1.3b"
+  "together/opt-6.7b"
   "together/opt-66b"
   "together/opt-175b"
   "together/t0pp"

diff --git a/src/helm/benchmark/static/schema.yaml b/src/helm/benchmark/static/schema.yaml
@@ -285,6 +285,22 @@ models:
     num_parameters: 66000000000
     release_date: 2022-05-02
 
+  - name: together/opt-6.7b
+    display_name: OPT (6.7B)
+    description: Open Pre-trained Transformers (6.7B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)).
+    creator_organization: Meta
+    access: open
+    num_parameters: 6700000000
+    release_date: 2022-05-02
+
+  - name: together/opt-1.3b
+    display_name: OPT (1.3B)
+    description: Open Pre-trained Transformers (1.3B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)).
+    creator_organization: Meta
+    access: open
+    num_parameters: 1300000000
+    release_date: 2022-05-02
+
   - name: together/galactica-120b
     display_name: Galactica (120B)
     description: Galactica (120B parameters) is trained on 48 million papers, textbooks, lectures notes, compounds and proteins, scientific websites, etc. ([paper](https://galactica.org/static/paper.pdf)).

diff --git a/src/helm/benchmark/window_services/window_service_factory.py b/src/helm/benchmark/window_services/window_service_factory.py
@@ -87,7 +87,7 @@ def get_window_service(model_name: str, service: TokenizerService) -> WindowServ
             window_service = GPTNeoXWindowService(service)
         elif model_name == "together/h3-2.7b":
             window_service = GPT2WindowService(service)
-        elif model_name in ["together/opt-66b", "together/opt-175b"]:
+        elif model_name in ["together/opt-1.3b", "together/opt-6.7b", "together/opt-66b", "together/opt-175b"]:
             window_service = OPTWindowService(service)
         elif model_name == "together/t0pp":
             window_service = T0ppWindowService(service)

diff --git a/src/helm/proxy/clients/together_client.py b/src/helm/proxy/clients/together_client.py
@@ -15,6 +15,8 @@
 MODEL_ALIASES = {
     "flan-t5-xxl": "flan-t5-xxl-hf",
     "h3-2.7b": "h3-2.7b-h3",
+    "opt-1.3b": "opt-1.3b-ft-tp1",
+    "opt-6.7b": "opt-6.7b-ft-tp1",
 }
 """Together model name aliases.
 

diff --git a/src/helm/proxy/models.py b/src/helm/proxy/models.py
@@ -431,6 +431,26 @@ def engine(self) -> str:
         "transformers that are fully and responsibly shared with interested researchers.",
         tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, OPT_TOKENIZER_TAG],
     ),
+    Model(
+        group="together",
+        creator_organization="Meta",
+        name="together/opt-6.7b",
+        display_name="OPT (6.7B)",
+        # From https://arxiv.org/pdf/2205.01068.pdf
+        description="Open Pre-trained Transformers (6.7B parameters) is a suite of decoder-only pre-trained "
+        "transformers that are fully and responsibly shared with interested researchers.",
+        tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, OPT_TOKENIZER_TAG],
+    ),
+    Model(
+        group="together",
+        creator_organization="Meta",
+        name="together/opt-1.3b",
+        display_name="OPT (1.3B)",
+        # From https://arxiv.org/pdf/2205.01068.pdf
+        description="Open Pre-trained Transformers (1.3B parameters) is a suite of decoder-only pre-trained "
+        "transformers that are fully and responsibly shared with interested researchers.",
+        tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, OPT_TOKENIZER_TAG],
+    ),
     # Microsoft/NVIDIA
     Model(
         group="microsoft",