Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scale AI integration #1609

Merged
merged 25 commits into from
May 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
f0ada77
First Draft of ScaleCritiqueClient
JosselinSomervilleRoberts May 24, 2023
8930ca6
Fixed a few issues
JosselinSomervilleRoberts May 24, 2023
80cf99a
Changed task to fields, now runs
JosselinSomervilleRoberts May 24, 2023
433286e
Added documentation and handled duplication
JosselinSomervilleRoberts May 24, 2023
e64dc28
Swapped instruction and attachments for a task
JosselinSomervilleRoberts May 24, 2023
dbb2600
Added response handling
JosselinSomervilleRoberts May 24, 2023
4a6095f
Change unique_id to sha512
JosselinSomervilleRoberts May 25, 2023
89546a0
Fixed interpolation of files (brackets not removed)
JosselinSomervilleRoberts May 25, 2023
801b9bf
Fix flake
JosselinSomervilleRoberts May 25, 2023
9056fcb
Fix mypy
JosselinSomervilleRoberts May 25, 2023
0efbd20
Add scaleapi to requirements
JosselinSomervilleRoberts May 25, 2023
8ecfdf0
Changes requested: mainly including switching to Rapid
JosselinSomervilleRoberts May 25, 2023
101966e
Remove finalizing and set to self labeling
JosselinSomervilleRoberts May 25, 2023
9eb9708
Add finalize script
JosselinSomervilleRoberts May 26, 2023
56bcc3f
Add script to create project
JosselinSomervilleRoberts May 26, 2023
8a6dbcd
Allow the user to specify the project and batch name in credentials.conf
JosselinSomervilleRoberts May 26, 2023
9c8ef9b
Fix flake
JosselinSomervilleRoberts May 26, 2023
0d0ce1f
Simple changes to project creation script
JosselinSomervilleRoberts May 26, 2023
eb3d36c
Set to self labeling for Tony
JosselinSomervilleRoberts May 26, 2023
08ae8d0
Add testing to the cache key for Tony
JosselinSomervilleRoberts May 26, 2023
bb1746a
Changes requested
JosselinSomervilleRoberts May 30, 2023
d385c23
Set credentials paths as a flag for scale scripts
JosselinSomervilleRoberts May 30, 2023
093e5d4
Fix flake
JosselinSomervilleRoberts May 30, 2023
8be04df
Make batch name mandatory
JosselinSomervilleRoberts May 31, 2023
72c79c5
Fix flake
JosselinSomervilleRoberts May 31, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements-freeze.txt
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ rsa==4.9
s3transfer==0.6.0
sacrebleu==2.2.1
sacremoses==0.0.53
scaleapi==2.13.0
scikit-learn==1.1.2
scipy==1.9.1
selenium==4.8.0
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ rouge-score~=0.1.2
pyext~=0.7
pytrec_eval==0.5
sacrebleu~=2.2.1
scaleapi~=2.13.0
# Work around https://github.com/p-lambda/verified_calibration/issues/11
# TODO: Remove after this issue is resolved
scikit-learn~=1.1.2
Expand Down
148 changes: 148 additions & 0 deletions scripts/scale/create_and_setup_project.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
import argparse
from scale_utils import get_scale_client
from scaleapi.tasks import TaskType
from scaleapi.exceptions import ScaleDuplicateResource

parser = argparse.ArgumentParser()
parser.add_argument("--project_name", type=str, help="Name of the project to create")
parser.add_argument(
"--credentials_path", type=str, default="prod_env/credentials.conf", help="Path to the credentials file"
)
args = parser.parse_args()
project_name = args.project_name
client = get_scale_client(args.credentials_path)

print("\nGetting project...")
try:
print(f"Trying to create project {project_name} ...")
project = client.create_project(
project_name=project_name,
task_type=TaskType.TextCollection,
rapid=True,
params={},
)
print("Project created.")
except ScaleDuplicateResource as err:
print(f"Project {project_name} already exists. Using existing project. Error: {err}")
project = client.get_project(project_name)


# Create a calibration batch
print("\nCreating calibration batch...")
try:
calib_batch_name = project_name + "_calibration"
batch = client.create_batch(
project=project_name,
batch_name=calib_batch_name,
calibration_batch=True,
)
print("Calibration batch created.")
# Create 10 tasks in the calibration batch
for i in range(10):
payload = dict(
project=project_name,
batch=calib_batch_name,
instruction="This is a fake calibration task to bypass the API. Please simply answer Yes.",
attachment_type="text",
attachments=[
{
"type": "text",
"content": "This is a fake calibration task to bypass the API. "
"We do not need calibration but would like to be able to send actual task. "
"In order to do this, we need to finish calibration. Please simply answer Yes.",
}
],
fields=[
{
"type": "category",
"field_id": "answer",
"title": "Continue to the next task?",
"choices": [{"label": "Yes", "value": "yes"}, {"label": "No", "value": "no"}],
}
],
)
client.create_task(TaskType.TextCollection, **payload)
print(f" Calibration task {i} created.")
print("Finalizing calibration batch...")
client.finalize_batch(calib_batch_name)
print("Calibration batch finalized.")
except ScaleDuplicateResource as err:
print(f"Calibration batch {calib_batch_name} already exists. It will not be recreated. Error: {err}")


# Create evaluation tasks
expected_response = {
"annotations": {"answer_reasonable": {"type": "category", "field_id": "answer", "response": [["no"]]}}
}
initial_response = {
"annotations": {"answer_reasonable": {"type": "category", "field_id": "answer", "response": [["yes"]]}}
}
attachments = [
{
"type": "text",
"content": "Please Answer Yes to this question. This is simply a way to bypass the need for evaluation tasks.",
},
]
payload = dict(
project=project_name,
rapid=True,
attachments=attachments,
initial_response=initial_response,
expected_response=expected_response,
fields=[
{
"type": "category",
"field_id": "answer",
"title": "Continue to the next task?",
"choices": [{"label": "Yes", "value": "yes"}, {"label": "No", "value": "no"}],
}
],
)
print("\nCreating evaluation tasks...")
for i in range(10):
evaluation_task = client.create_evaluation_task(TaskType.TextCollection, **payload)
print(f" Evaluation task {i} created.")
print("Evaluation tasks created.")

# Create a test batch
print("\nCreating test batch...")
try:
test_batch_name = project_name + "_test"
batch = client.create_batch(
project=project_name,
batch_name=test_batch_name,
calibration_batch=False,
)
print("Test batch created.")
except ScaleDuplicateResource as err:
print(f"Test batch {test_batch_name} already exists. It will not be recreated. Error: {err}")
# Try to create a single task in the test batch
payload = dict(
project=project_name,
batch=test_batch_name,
instruction="This is a test task to check that we can create tasks. If you are a worker please simply answer Yes.",
attachment_type="text",
attachments=[
{
"type": "text",
"content": "This is a placeholder for the test task. If you are a worker please simply answer Yes.",
}
],
fields=[
{
"type": "category",
"field_id": "answer",
"title": "Finish?",
"choices": [{"label": "Yes", "value": "yes"}, {"label": "No", "value": "no"}],
}
],
)
print("Creating test task...")
client.create_task(TaskType.TextCollection, **payload)
print("Test task created.")
print("The test batch is not going to be finalized so that it does not get sent to workers.")

# If we are here, it means that the project is ready.
# Print the project_name and a success message.
print(f"\n\nProject {project_name} is ready.")
print("Please go to https://app.scale.com/projects to check that the project is ready.")
12 changes: 12 additions & 0 deletions scripts/scale/finalize_batch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import argparse
from scale_utils import get_scale_client

parser = argparse.ArgumentParser()
parser.add_argument("--batch_name", type=str, help="Name of the batch to finalize")
parser.add_argument(
"--credentials_path", type=str, default="prod_env/credentials.conf", help="Path to the credentials file"
)
args = parser.parse_args()

client = get_scale_client(args.credentials_path)
client.finalize_batch(args.batch_name)
30 changes: 30 additions & 0 deletions scripts/scale/scale_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import os
from typing import Dict
from scaleapi import ScaleClient


def get_credentials(path: str) -> Dict[str, str]:
# Reads the credentials from the given path
with open(path, "r") as f:
# Read line by line, replaces the spaces, splits on the first ":"
# The first part is the key, the second part contians the value in between quotes
credentials = {}
for line in f.readlines():
elt = line.replace(" ", "").replace("\n", "").split(":")
if len(elt) == 2:
credentials[elt[0]] = elt[1].split('"')[1]
return credentials


def get_scale_client(relative_credentials_path: str) -> ScaleClient:
credentials_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), relative_credentials_path)
print(f"Reading credentials from {credentials_path}")
credentials = get_credentials(credentials_path)

# Check that scaleApiKey is set
if "scaleApiKey" not in credentials:
raise Exception("scaleApiKey not found in credentials.conf")

# Get scale client
client = ScaleClient(credentials["scaleApiKey"])
return client
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ install_requires=
pyext~=0.7
pytrec_eval==0.5
sacrebleu~=2.2.1
scaleapi~=2.13.0
# Work around https://github.com/p-lambda/verified_calibration/issues/11
# TODO: Remove after this issue is resolved
scikit-learn~=1.1.2
Expand Down
16 changes: 14 additions & 2 deletions src/helm/proxy/clients/auto_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
DecodeRequestResult,
)
from helm.proxy.retry import retry_request
from .critique_client import CritiqueClient, RandomCritiqueClient, SurgeAICritiqueClient
from .critique_client import CritiqueClient, RandomCritiqueClient, SurgeAICritiqueClient, ScaleCritiqueClient
from .mechanical_turk_critique_client import MechanicalTurkCritiqueClient
from .client import Client
from .ai21_client import AI21Client
Expand Down Expand Up @@ -263,8 +263,20 @@ def get_critique_client(self) -> CritiqueClient:
if not surgeai_credentials:
raise ValueError("surgeaiApiKey credentials are required for SurgeAICritiqueClient")
self.critique_client = SurgeAICritiqueClient(surgeai_credentials, self._build_cache_config("surgeai"))

elif critique_type == "scale":
scale_credentials = self.credentials.get("scaleApiKey")
scale_batch = self.credentials.get("scaleBatch", None)
if scale_batch is None:
raise ValueError("scaleBatch is required for ScaleCritiqueClient for now.")
if not scale_credentials:
raise ValueError("scaleApiKey credentials are required for ScaleCritiqueClient")
self.critique_client = ScaleCritiqueClient(
scale_credentials, self._build_cache_config("scale"), scale_batch
)
else:
raise ValueError(
"CritiqueClient is not configured; set critiqueType to 'mturk', 'mturk-sandbox', 'surgeai' or 'random'"
"CritiqueClient is not configured; set critiqueType to 'mturk',"
"'mturk-sandbox', 'surgeai', 'scale' or 'random'"
)
return self.critique_client
Loading