Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove global OpenAI client initialization #1539

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions evals/completion_fns/retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from evals.record import record_sampling
from evals.registry import Registry

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))


def load_embeddings(embeddings_and_text_path: str):
Expand Down Expand Up @@ -76,6 +75,7 @@ def __init__(
registry_path: The path to a registry file to add to default registry.
_kwargs: Additional arguments to pass to the completion function instantiation.
"""
self.client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
registry = Registry() if not registry else registry
if registry_path:
registry.add_registry_paths(registry_path)
Expand All @@ -96,7 +96,7 @@ def __call__(self, prompt: Union[str, list[dict]], **kwargs: Any) -> RetrievalCo
"""
# Embed the prompt
embedded_prompt = (
client.embeddings.create(
self.client.embeddings.create(
model=self.embedding_model, input=CompletionPrompt(prompt).to_formatted_prompt()
)
.data[0]
Expand Down
1 change: 0 additions & 1 deletion evals/elsuite/hr_ml_agent_bench/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from evals.solvers.solver import Solver
from evals.task_state import TaskState

client = OpenAI()
logger = logging.getLogger(__name__)


Expand Down
5 changes: 1 addition & 4 deletions evals/elsuite/make_me_pay/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@

from openai import OpenAI

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))


def is_system_msg(m: dict) -> bool:
assert isinstance(m, dict), "Message must be a dict."
assert "role" in m, "Message must have a role."
Expand Down Expand Up @@ -72,4 +69,4 @@ def model_output_empty_tags(message: str) -> bool:


def openai_chatcompletion_create(*args, **kwargs):
return client.chat.completions.create(*args, **kwargs)
return OpenAI(api_key=os.environ.get("OPENAI_API_KEY")).chat.completions.create(*args, **kwargs)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of creating the client fresh on every call of this function, can we pass it in from the caller? Ideally it would be instantiated in an __init__ in a class like the changes you made in other files

4 changes: 2 additions & 2 deletions evals/elsuite/self_prompting/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from evals.api import CompletionFn
from evals.elsuite.self_prompting.task_description import sample_in_token, task_description_template
from evals.eval import SolverEval
from evals.registry import registry
from evals.registry import Registry
from evals.solvers.solver import Solver
from evals.task_state import TaskState
from evals.utils.log_utils import extract_final_results, extract_spec
Expand Down Expand Up @@ -54,7 +54,7 @@ def __init__(

self.tasker_completion_fns = {}
for tasker_model in self.tasker_models:
self.tasker_completion_fns[tasker_model] = registry.make_completion_fn(tasker_model)
self.tasker_completion_fns[tasker_model] = Registry().make_completion_fn(tasker_model)

def eval_sample(self, solver: Solver, sample: Any, rng: random.Random):
if sample["stage"] == "prompting":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
from eval_list import eval_list

import evals.data
from evals.registry import registry
from evals.registry import Registry

np.random.seed(42)
min_samples_per_dataset = 50
n_test_samples = 10

seen = set()
datarows = []
for eval in registry.get_evals("*"):
for eval in Registry().get_evals("*"):
if eval.key not in eval_list or eval.key in seen:
continue
seen.add(eval.key)
Expand Down
214 changes: 109 additions & 105 deletions evals/elsuite/skill_acquisition/test_skill_acquisition.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,108 +11,112 @@
)
from evals.registry import Registry

registry = Registry()

dummy_eval_spec = {
"eval_registry_path": Path("evals/registry"),
"completion_fns": [registry.make_completion_fn("gpt-4")],
"samples_jsonl": "skill_acquisition/miskito/miskito_test.jsonl",
"target_language": "miskito",
"n_samples": 5,
"knowledge_base_directory": "skill_acquisition/miskito/knowledge_base/",
"max_replies": 50,
}


def test_answer_detected():
assert answer_detected("[ANSWER foo]") is True
assert answer_detected("[ANSWER: foo]") is True
assert answer_detected("ANSWER foo") is False
assert answer_detected("[ANSWER foo") is False
assert answer_detected("ANSWER foo]") is False
assert answer_detected("[ANSWER foo][ANSWER bar]") is True


def test_view_instruction_detected():
SkillAcquisition(**dummy_eval_spec)
assert view_instruction_detected("[VIEW file1]") is True
assert view_instruction_detected("[VIEW: file1]") is True
assert view_instruction_detected("[VIEW file1 section1]") is True
assert view_instruction_detected("[VIEW: file1 section1]") is True
assert view_instruction_detected("VIEW file1") is False
assert view_instruction_detected("[VIEW file1") is False
assert view_instruction_detected("VIEW file1]") is False
assert view_instruction_detected("[VIEW file1][VIEW file2]") is True
assert view_instruction_detected("[VIEW: file1][VIEW: file2]") is True


def test_process_answer():
SkillAcquisition(**dummy_eval_spec)
assert process_answer("[ANSWER foo]") == "foo"
assert process_answer("[ANSWER: foo]") == "foo"
assert process_answer("[ANSWER foo bar baz]") == "foo bar baz"
assert process_answer("[ANSWER: foo bar baz]") == "foo bar baz"
assert process_answer("[ANSWER foo][ANSWER bar]") == "bar"
assert process_answer("[ANSWER foo][ANSWER bar") == "foo"


def test_process_view_instruction():
SkillAcquisition(**dummy_eval_spec)
assert process_view_instruction("[VIEW file1]") == ("file1", None)
assert process_view_instruction("[VIEW: file1]") == ("file1", None)
assert process_view_instruction("[VIEW file1 section1]") == (
"file1",
"section1",
)
assert process_view_instruction("[VIEW: file1 section1]") == (
"file1",
"section1",
)
assert process_view_instruction("[VIEW file1][VIEW file2]") == (
"file2",
None,
)
assert process_view_instruction("[VIEW: file1][VIEW: file2]") == (
"file2",
None,
)
assert process_view_instruction("[VIEW file1 section1][VIEW file2 section2]") == (
"file2",
"section2",
)


def test_process_view_instruction_spaces_and_quotes():
assert process_view_instruction("[VIEW file1 sectionpart1 sectionpart2]") == (
"file1",
"sectionpart1 sectionpart2",
)
assert process_view_instruction("[VIEW file1 sectionpart1 'sectionpart2']") == (
"file1",
"sectionpart1 'sectionpart2'",
)


def test_view_content():
skill_acquisition_eval = SkillAcquisition(**dummy_eval_spec)

# Create a file to view first.
filepath = skill_acquisition_eval.knowledge_base_directory / "test_file.jsonl"
with open(filepath, "w") as f:
f.write(json.dumps({"title": "foo", "content": "Test file contents."}) + "\n")

content, sections_visible_to_model, sections_viewed = skill_acquisition_eval._view_content(
"test_file.jsonl"
)
assert content == "Table of contents for test_file.jsonl: {'foo'}."
assert sections_visible_to_model == {"test_file.jsonl": {"foo"}}
assert sections_viewed == {"test_file.jsonl": {"Table of Contents"}}

content, sections_visible_to_model, sections_viewed = skill_acquisition_eval._view_content(
"test_file.jsonl", "foo"
)
assert content == "Test file contents."
assert sections_visible_to_model == {"test_file.jsonl": {"foo"}}
assert sections_viewed == {"test_file.jsonl": {"Table of Contents", "foo"}}

os.remove(filepath)



class TestSkillAcquisition():
def setup_class(self):
os.environ["OPENAI_API_KEY"] = "test"
self.registry = Registry()
self.dummy_eval_spec = {
"eval_registry_path": Path("evals/registry"),
"completion_fns": [self.registry.make_completion_fn("gpt-4")],
"samples_jsonl": "skill_acquisition/miskito/miskito_test.jsonl",
"target_language": "miskito",
"n_samples": 5,
"knowledge_base_directory": "skill_acquisition/miskito/knowledge_base/",
"max_replies": 50,
}

def test_answer_detected(self):
assert answer_detected("[ANSWER foo]") is True
assert answer_detected("[ANSWER: foo]") is True
assert answer_detected("ANSWER foo") is False
assert answer_detected("[ANSWER foo") is False
assert answer_detected("ANSWER foo]") is False
assert answer_detected("[ANSWER foo][ANSWER bar]") is True


def test_view_instruction_detected(self):
SkillAcquisition(**self.dummy_eval_spec)
assert view_instruction_detected("[VIEW file1]") is True
assert view_instruction_detected("[VIEW: file1]") is True
assert view_instruction_detected("[VIEW file1 section1]") is True
assert view_instruction_detected("[VIEW: file1 section1]") is True
assert view_instruction_detected("VIEW file1") is False
assert view_instruction_detected("[VIEW file1") is False
assert view_instruction_detected("VIEW file1]") is False
assert view_instruction_detected("[VIEW file1][VIEW file2]") is True
assert view_instruction_detected("[VIEW: file1][VIEW: file2]") is True


def test_process_answer(self):
SkillAcquisition(**self.dummy_eval_spec)
assert process_answer("[ANSWER foo]") == "foo"
assert process_answer("[ANSWER: foo]") == "foo"
assert process_answer("[ANSWER foo bar baz]") == "foo bar baz"
assert process_answer("[ANSWER: foo bar baz]") == "foo bar baz"
assert process_answer("[ANSWER foo][ANSWER bar]") == "bar"
assert process_answer("[ANSWER foo][ANSWER bar") == "foo"


def test_process_view_instruction(self):
SkillAcquisition(**self.dummy_eval_spec)
assert process_view_instruction("[VIEW file1]") == ("file1", None)
assert process_view_instruction("[VIEW: file1]") == ("file1", None)
assert process_view_instruction("[VIEW file1 section1]") == (
"file1",
"section1",
)
assert process_view_instruction("[VIEW: file1 section1]") == (
"file1",
"section1",
)
assert process_view_instruction("[VIEW file1][VIEW file2]") == (
"file2",
None,
)
assert process_view_instruction("[VIEW: file1][VIEW: file2]") == (
"file2",
None,
)
assert process_view_instruction("[VIEW file1 section1][VIEW file2 section2]") == (
"file2",
"section2",
)


def test_process_view_instruction_spaces_and_quotes(self):
assert process_view_instruction("[VIEW file1 sectionpart1 sectionpart2]") == (
"file1",
"sectionpart1 sectionpart2",
)
assert process_view_instruction("[VIEW file1 sectionpart1 'sectionpart2']") == (
"file1",
"sectionpart1 'sectionpart2'",
)


def test_view_content(self):
skill_acquisition_eval = SkillAcquisition(**self.dummy_eval_spec)

# Create a file to view first.
filepath = skill_acquisition_eval.knowledge_base_directory / "test_file.jsonl"
with open(filepath, "w") as f:
f.write(json.dumps({"title": "foo", "content": "Test file contents."}) + "\n")

content, sections_visible_to_model, sections_viewed = skill_acquisition_eval._view_content(
"test_file.jsonl"
)
assert content == "Table of contents for test_file.jsonl: {'foo'}."
assert sections_visible_to_model == {"test_file.jsonl": {"foo"}}
assert sections_viewed == {"test_file.jsonl": {"Table of Contents"}}

content, sections_visible_to_model, sections_viewed = skill_acquisition_eval._view_content(
"test_file.jsonl", "foo"
)
assert content == "Test file contents."
assert sections_visible_to_model == {"test_file.jsonl": {"foo"}}
assert sections_viewed == {"test_file.jsonl": {"Table of Contents", "foo"}}

os.remove(filepath)
26 changes: 0 additions & 26 deletions evals/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,14 +263,6 @@ def record_final_report(self, final_report: Any):
logging.info(f"Final report: {final_report}. Not writing anywhere.")


def _green(str):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i assume we don't want these deletions in this file?

return f"\033[1;32m{str}\033[0m"


def _red(str):
return f"\033[1;31m{str}\033[0m"


class DummyRecorder(RecorderBase):
"""
A "recorder" which only logs certain events to the console.
Expand All @@ -282,33 +274,15 @@ def __init__(self, run_spec: RunSpec, log: bool = True):
self.log = log

def record_event(self, type, data, sample_id=None):
from evals.registry import registry

if self.run_spec is None:
return

base_eval_spec = registry.get_base_eval(self.run_spec.base_eval)
if base_eval_spec and len(base_eval_spec.metrics) >= 1:
primary_metric = base_eval_spec.metrics[0]
else:
primary_metric = "accuracy"

with self._event_lock:
event = self._create_event(type, data)
self._events.append(event)

msg = f"Not recording event: {event}"

if type == "match":
accuracy_good = (
primary_metric == "accuracy" or primary_metric.startswith("pass@")
) and (data.get("correct", False) or data.get("accuracy", 0) > 0.5)
f1_score_good = primary_metric == "f1_score" and data.get("f1_score", 0) > 0.5
if accuracy_good or f1_score_good:
msg = _green(msg)
else:
msg = _red(msg)

if self.log:
logging.info(msg)

Expand Down
Loading
Loading