Skip to content

Commit

Permalink
Add max_pulls arg to get_tasks_pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
john-b-yang committed Jul 8, 2024
1 parent 342bece commit c570504
Showing 1 changed file with 17 additions and 2 deletions.
19 changes: 17 additions & 2 deletions swebench/collect/get_tasks_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,11 @@ def construct_data_files(data: dict):
path_tasks (str): Path to save task instance data files to
token (str): GitHub token to use for API requests
"""
repos, path_prs, path_tasks, cutoff_date, token = (
repos, path_prs, path_tasks, max_pulls, cutoff_date, token = (
data["repos"],
data["path_prs"],
data["path_tasks"],
data["max_pulls"],
data["cutoff_date"],
data["token"],
)
Expand All @@ -64,7 +65,13 @@ def construct_data_files(data: dict):
path_pr = path_pr.replace(".jsonl", f"-{cutoff_date}.jsonl")
if not os.path.exists(path_pr):
print(f"Pull request data for {repo} not found, creating...")
print_pulls(repo, path_pr, token, cutoff_date)
print_pulls(
repo,
path_pr,
token,
max_pulls=max_pulls,
cutoff_date=cutoff_date
)
print(f"Successfully saved PR data for {repo} to {path_pr}")
else:
print(
Expand Down Expand Up @@ -94,6 +101,7 @@ def main(
repos: list,
path_prs: str,
path_tasks: str,
max_pulls: int|None = None,
cutoff_date: str = None,
):
"""
Expand All @@ -120,6 +128,7 @@ def main(
"repos": repos,
"path_prs": path_prs,
"path_tasks": path_tasks,
"max_pulls": max_pulls,
"cutoff_date": cutoff_date,
"token": token
}
Expand All @@ -143,6 +152,12 @@ def main(
type=str,
help="Path to folder to save task instance data files to",
)
parser.add_argument(
"--max_pulls",
type=int,
help="Maximum number of pulls to log",
default=None
)
parser.add_argument(
"--cutoff_date",
type=str,
Expand Down

0 comments on commit c570504

Please sign in to comment.