stanford-crfm · JosselinSomervilleRoberts · May 31, 2023 · May 24, 2023 · May 24, 2023 · May 24, 2023
diff --git a/requirements-freeze.txt b/requirements-freeze.txt
@@ -133,6 +133,7 @@ rsa==4.9
 s3transfer==0.6.0
 sacrebleu==2.2.1
 sacremoses==0.0.53
+scaleapi==2.13.0
 scikit-learn==1.1.2
 scipy==1.9.1
 selenium==4.8.0

diff --git a/requirements.txt b/requirements.txt
@@ -60,6 +60,7 @@ rouge-score~=0.1.2
 pyext~=0.7
 pytrec_eval==0.5
 sacrebleu~=2.2.1
+scaleapi~=2.13.0
 # Work around https://github.com/p-lambda/verified_calibration/issues/11
 # TODO: Remove after this issue is resolved
 scikit-learn~=1.1.2

diff --git a/scripts/scale/create_and_setup_project.py b/scripts/scale/create_and_setup_project.py
@@ -0,0 +1,148 @@
+import argparse
+from scale_utils import get_scale_client
+from scaleapi.tasks import TaskType
+from scaleapi.exceptions import ScaleDuplicateResource
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--project_name", type=str, help="Name of the project to create")
+parser.add_argument(
+    "--credentials_path", type=str, default="prod_env/credentials.conf", help="Path to the credentials file"
+)
+args = parser.parse_args()
+project_name = args.project_name
+client = get_scale_client(args.credentials_path)
+
+print("\nGetting project...")
+try:
+    print(f"Trying to create project {project_name} ...")
+    project = client.create_project(
+        project_name=project_name,
+        task_type=TaskType.TextCollection,
+        rapid=True,
+        params={},
+    )
+    print("Project created.")
+except ScaleDuplicateResource as err:
+    print(f"Project {project_name} already exists. Using existing project. Error: {err}")
+    project = client.get_project(project_name)
+
+
+# Create a calibration batch
+print("\nCreating calibration batch...")
+try:
+    calib_batch_name = project_name + "_calibration"
+    batch = client.create_batch(
+        project=project_name,
+        batch_name=calib_batch_name,
+        calibration_batch=True,
+    )
+    print("Calibration batch created.")
+    # Create 10 tasks in the calibration batch
+    for i in range(10):
+        payload = dict(
+            project=project_name,
+            batch=calib_batch_name,
+            instruction="This is a fake calibration task to bypass the API. Please simply answer Yes.",
+            attachment_type="text",
+            attachments=[
+                {
+                    "type": "text",
+                    "content": "This is a fake calibration task to bypass the API. "
+                    "We do not need calibration but would like to be able to send actual task. "
+                    "In order to do this, we need to finish calibration. Please simply answer Yes.",
+                }
+            ],
+            fields=[
+                {
+                    "type": "category",
+                    "field_id": "answer",
+                    "title": "Continue to the next task?",
+                    "choices": [{"label": "Yes", "value": "yes"}, {"label": "No", "value": "no"}],
+                }
+            ],
+        )
+        client.create_task(TaskType.TextCollection, **payload)
+        print(f"    Calibration task {i} created.")
+    print("Finalizing calibration batch...")
+    client.finalize_batch(calib_batch_name)
+    print("Calibration batch finalized.")
+except ScaleDuplicateResource as err:
+    print(f"Calibration batch {calib_batch_name} already exists. It will not be recreated. Error: {err}")
+
+
+# Create evaluation tasks
+expected_response = {
+    "annotations": {"answer_reasonable": {"type": "category", "field_id": "answer", "response": [["no"]]}}
+}
+initial_response = {
+    "annotations": {"answer_reasonable": {"type": "category", "field_id": "answer", "response": [["yes"]]}}
+}
+attachments = [
+    {
+        "type": "text",
+        "content": "Please Answer Yes to this question. This is simply a way to bypass the need for evaluation tasks.",
+    },
+]
+payload = dict(
+    project=project_name,
+    rapid=True,
+    attachments=attachments,
+    initial_response=initial_response,
+    expected_response=expected_response,
+    fields=[
+        {
+            "type": "category",
+            "field_id": "answer",
+            "title": "Continue to the next task?",
+            "choices": [{"label": "Yes", "value": "yes"}, {"label": "No", "value": "no"}],
+        }
+    ],
+)
+print("\nCreating evaluation tasks...")
+for i in range(10):
+    evaluation_task = client.create_evaluation_task(TaskType.TextCollection, **payload)
+    print(f"    Evaluation task {i} created.")
+print("Evaluation tasks created.")
+
+# Create a test batch
+print("\nCreating test batch...")
+try:
+    test_batch_name = project_name + "_test"
+    batch = client.create_batch(
+        project=project_name,
+        batch_name=test_batch_name,
+        calibration_batch=False,
+    )
+    print("Test batch created.")
+except ScaleDuplicateResource as err:
+    print(f"Test batch {test_batch_name} already exists. It will not be recreated. Error: {err}")
+# Try to create a single task in the test batch
+payload = dict(
+    project=project_name,
+    batch=test_batch_name,
+    instruction="This is a test task to check that we can create tasks. If you are a worker please simply answer Yes.",
+    attachment_type="text",
+    attachments=[
+        {
+            "type": "text",
+            "content": "This is a placeholder for the test task. If you are a worker please simply answer Yes.",
+        }
+    ],
+    fields=[
+        {
+            "type": "category",
+            "field_id": "answer",
+            "title": "Finish?",
+            "choices": [{"label": "Yes", "value": "yes"}, {"label": "No", "value": "no"}],
+        }
+    ],
+)
+print("Creating test task...")
+client.create_task(TaskType.TextCollection, **payload)
+print("Test task created.")
+print("The test batch is not going to be finalized so that it does not get sent to workers.")
+
+# If we are here, it means that the project is ready.
+# Print the project_name and a success message.
+print(f"\n\nProject {project_name} is ready.")
+print("Please go to https://app.scale.com/projects to check that the project is ready.")
diff --git a/scripts/scale/finalize_batch.py b/scripts/scale/finalize_batch.py
@@ -0,0 +1,12 @@
+import argparse
+from scale_utils import get_scale_client
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--batch_name", type=str, help="Name of the batch to finalize")
+parser.add_argument(
+    "--credentials_path", type=str, default="prod_env/credentials.conf", help="Path to the credentials file"
+)
+args = parser.parse_args()
+
+client = get_scale_client(args.credentials_path)
+client.finalize_batch(args.batch_name)
diff --git a/scripts/scale/scale_utils.py b/scripts/scale/scale_utils.py
@@ -0,0 +1,30 @@
+import os
+from typing import Dict
+from scaleapi import ScaleClient
+
+
+def get_credentials(path: str) -> Dict[str, str]:
+    # Reads the credentials from the given path
+    with open(path, "r") as f:
+        # Read line by line, replaces the spaces, splits on the first ":"
+        # The first part is the key, the second part contians the value in between quotes
+        credentials = {}
+        for line in f.readlines():
+            elt = line.replace(" ", "").replace("\n", "").split(":")
+            if len(elt) == 2:
+                credentials[elt[0]] = elt[1].split('"')[1]
+        return credentials
+
+
+def get_scale_client(relative_credentials_path: str) -> ScaleClient:
+    credentials_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), relative_credentials_path)
+    print(f"Reading credentials from {credentials_path}")
+    credentials = get_credentials(credentials_path)
+
+    # Check that scaleApiKey is set
+    if "scaleApiKey" not in credentials:
+        raise Exception("scaleApiKey not found in credentials.conf")
+
+    # Get scale client
+    client = ScaleClient(credentials["scaleApiKey"])
+    return client
diff --git a/setup.cfg b/setup.cfg
@@ -68,6 +68,7 @@ install_requires=
     pyext~=0.7
     pytrec_eval==0.5
     sacrebleu~=2.2.1
+    scaleapi~=2.13.0
     # Work around https://github.com/p-lambda/verified_calibration/issues/11
     # TODO: Remove after this issue is resolved
     scikit-learn~=1.1.2

diff --git a/src/helm/proxy/clients/auto_client.py b/src/helm/proxy/clients/auto_client.py
@@ -14,7 +14,7 @@
     DecodeRequestResult,
 )
 from helm.proxy.retry import retry_request
-from .critique_client import CritiqueClient, RandomCritiqueClient, SurgeAICritiqueClient
+from .critique_client import CritiqueClient, RandomCritiqueClient, SurgeAICritiqueClient, ScaleCritiqueClient
 from .mechanical_turk_critique_client import MechanicalTurkCritiqueClient
 from .client import Client
 from .ai21_client import AI21Client
@@ -263,8 +263,20 @@ def get_critique_client(self) -> CritiqueClient:
             if not surgeai_credentials:
                 raise ValueError("surgeaiApiKey credentials are required for SurgeAICritiqueClient")
             self.critique_client = SurgeAICritiqueClient(surgeai_credentials, self._build_cache_config("surgeai"))
+
+        elif critique_type == "scale":
+            scale_credentials = self.credentials.get("scaleApiKey")
+            scale_batch = self.credentials.get("scaleBatch", None)
+            if scale_batch is None:
+                raise ValueError("scaleBatch is required for ScaleCritiqueClient for now.")
+            if not scale_credentials:
+                raise ValueError("scaleApiKey credentials are required for ScaleCritiqueClient")
+            self.critique_client = ScaleCritiqueClient(
+                scale_credentials, self._build_cache_config("scale"), scale_batch
+            )
         else:
             raise ValueError(
-                "CritiqueClient is not configured; set critiqueType to 'mturk', 'mturk-sandbox', 'surgeai' or 'random'"
+                "CritiqueClient is not configured; set critiqueType to 'mturk',"
+                "'mturk-sandbox', 'surgeai', 'scale' or 'random'"
             )
         return self.critique_client