From c570504c28eed6c36709d9f441bd1fe738b46a33 Mon Sep 17 00:00:00 2001 From: John Yang Date: Mon, 8 Jul 2024 15:54:33 +0000 Subject: [PATCH] Add `max_pulls` arg to `get_tasks_pipeline` --- swebench/collect/get_tasks_pipeline.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/swebench/collect/get_tasks_pipeline.py b/swebench/collect/get_tasks_pipeline.py index 01b927f8..7177f101 100755 --- a/swebench/collect/get_tasks_pipeline.py +++ b/swebench/collect/get_tasks_pipeline.py @@ -48,10 +48,11 @@ def construct_data_files(data: dict): path_tasks (str): Path to save task instance data files to token (str): GitHub token to use for API requests """ - repos, path_prs, path_tasks, cutoff_date, token = ( + repos, path_prs, path_tasks, max_pulls, cutoff_date, token = ( data["repos"], data["path_prs"], data["path_tasks"], + data["max_pulls"], data["cutoff_date"], data["token"], ) @@ -64,7 +65,13 @@ def construct_data_files(data: dict): path_pr = path_pr.replace(".jsonl", f"-{cutoff_date}.jsonl") if not os.path.exists(path_pr): print(f"Pull request data for {repo} not found, creating...") - print_pulls(repo, path_pr, token, cutoff_date) + print_pulls( + repo, + path_pr, + token, + max_pulls=max_pulls, + cutoff_date=cutoff_date + ) print(f"Successfully saved PR data for {repo} to {path_pr}") else: print( @@ -94,6 +101,7 @@ def main( repos: list, path_prs: str, path_tasks: str, + max_pulls: int|None = None, cutoff_date: str = None, ): """ @@ -120,6 +128,7 @@ def main( "repos": repos, "path_prs": path_prs, "path_tasks": path_tasks, + "max_pulls": max_pulls, "cutoff_date": cutoff_date, "token": token } @@ -143,6 +152,12 @@ def main( type=str, help="Path to folder to save task instance data files to", ) + parser.add_argument( + "--max_pulls", + type=int, + help="Maximum number of pulls to log", + default=None + ) parser.add_argument( "--cutoff_date", type=str,