Skip to content

Commit

Permalink
formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
liamjxu committed Nov 28, 2024
1 parent ff1a3fc commit dbdb4c0
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 10 deletions.
11 changes: 6 additions & 5 deletions src/helm/benchmark/annotation/bigcodebench_annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def annotate_all(self, request_states: List[RequestState]) -> Any:
assert all(request_state.result for request_state in request_states)
assert all(len(request_state.result.completions) == 1 for request_state in request_states)
assert all(request_state.instance.extra_data for request_state in request_states)

with TemporaryDirectory() as tmpdir:
# with open(f"{tmpdir}/result.jsonl", "w") as file:
with open(f"tmp_result.jsonl", "w") as file:
Expand All @@ -73,8 +73,10 @@ def annotate_all(self, request_states: List[RequestState]) -> Any:
model_output_text = request_state.result.completions[0].text
solution = code_extract(model_output_text)
escaped_solution = json.dumps(solution)[1:-1]
idx = int(request_state.instance.extra_data["task_id"].split('/')[-1])
res[idx] = f'{{"task_id": "{request_state.instance.extra_data["task_id"]}", "solution": "{escaped_solution}"}}\n'
idx = int(request_state.instance.extra_data["task_id"].split("/")[-1])
res[idx] = (
f'{{"task_id": "{request_state.instance.extra_data["task_id"]}", "solution": "{escaped_solution}"}}\n'
)
for line in res:
file.write(line)

Expand All @@ -91,7 +93,7 @@ def annotate_all(self, request_states: List[RequestState]) -> Any:
# samples=handle_file(f"{tmpdir}/result.jsonl"),
samples=handle_file(f"tmp_result.jsonl"),
pass_k=self.pass_k,
api_name="/predict"
api_name="/predict",
)
success = True # Operation succeeded
pass_at_one = pass_at_k["pass@1"]
Expand All @@ -104,5 +106,4 @@ def annotate_all(self, request_states: List[RequestState]) -> Any:
hlog("Failed to complete the operation after 3 attempts.")
pass_at_one = 0.0


return {"pass_at_one": pass_at_one}
3 changes: 2 additions & 1 deletion src/helm/benchmark/annotation_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def execute(self, scenario_state: ScenarioState) -> ScenarioState:

else:
hlog("!!!!Annotators are not all is_macro!.")

# Do it!
def do_it(request_state: RequestState) -> RequestState:
assert scenario_state.annotator_specs is not None
Expand Down Expand Up @@ -140,4 +141,4 @@ def process_all(self, annotator_specs: List[AnnotatorSpec], states: List[Request
annotations[annotator.name] = new_annotations
except Exception as e:
raise AnnotationExecutorError(f"{str(e)} Request: {states.request}") from e
return [replace(state, annotations=annotations) for state in states]
return [replace(state, annotations=annotations) for state in states]
15 changes: 11 additions & 4 deletions src/helm/benchmark/run_specs/lite_run_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,10 +473,17 @@ def get_bigcodebench_spec(subset: str) -> RunSpec:

# Adapted from https://github.dev/bigcode-project/bigcodebench/blob/main/bigcodebench/evaluate.py
adapter_spec = AdapterSpec(
method=ADAPT_GENERATION, input_prefix="", output_prefix="", max_tokens=1000, num_outputs=1, temperature=0.0,
global_prefix="Please provide a self-contained Python script that solves the following problem in a markdown code block:"
method=ADAPT_GENERATION,
input_prefix="",
output_prefix="",
max_tokens=1000,
num_outputs=1,
temperature=0.0,
global_prefix="Please provide a self-contained Python script that solves the following problem in a markdown code block:",
)
annotator_specs = [AnnotatorSpec(class_name="helm.benchmark.annotation.bigcodebench_annotator.BigCodeBenchAnnotator")]
annotator_specs = [
AnnotatorSpec(class_name="helm.benchmark.annotation.bigcodebench_annotator.BigCodeBenchAnnotator")
]
metric_specs = [MetricSpec(class_name="helm.benchmark.metrics.bigcodebench_metrics.BigCodeBenchMetric")]

return RunSpec(
Expand All @@ -486,4 +493,4 @@ def get_bigcodebench_spec(subset: str) -> RunSpec:
annotators=annotator_specs,
metric_specs=metric_specs,
groups=["bigcodebench"],
)
)

0 comments on commit dbdb4c0

Please sign in to comment.