Skip to content

Commit

Permalink
Merge pull request #1933 from chaoss/augur-new-remove-hard-coded
Browse files Browse the repository at this point in the history
Augur new Remove Hard Coded Repo_id and Change platform_id to 1
  • Loading branch information
ABrain7710 authored Jul 29, 2022
2 parents 3f64c89 + aebf5cb commit 81ed9c9
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 42 deletions.
2 changes: 1 addition & 1 deletion augur/application/db/data_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ def extract_need_pr_review_data(reviews, platform_id, repo_id, tool_version, dat

return review_data

def extract_needed_contributor_data(contributor, platform_id, tool_source, tool_version, data_source):
def extract_needed_contributor_data(contributor, tool_source, tool_version, data_source):

cntrb_id = GithubUUID()
cntrb_id["user"] = contributor["id"]
Expand Down
81 changes: 40 additions & 41 deletions augur/tasks/github/issue_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#NOTICE: A pull request is a type of issue as per Github.
#So this file contains functionality for both prs and issues

platform_id = 1


@celery.task
def collect_issues(repo_git: str) -> None:
Expand Down Expand Up @@ -67,7 +69,6 @@ def process_issues(issues, task_name, repo_id) -> None:
# get repo_id or have it passed
tool_source = "Issue Task"
tool_version = "2.0"
platform_id = 25150
data_source = "Github API"

issue_dicts = []
Expand All @@ -83,7 +84,7 @@ def process_issues(issues, task_name, repo_id) -> None:
issue_total-=1
continue

issue, contributor_data = process_issue_contributors(issue, platform_id, tool_source, tool_version, data_source)
issue, contributor_data = process_issue_contributors(issue, tool_source, tool_version, data_source)

contributors += contributor_data

Expand Down Expand Up @@ -163,17 +164,17 @@ def process_issues(issues, task_name, repo_id) -> None:
session.insert_data(issue_assignee_dicts, IssueAssignee, issue_assignee_natural_keys)


def process_issue_contributors(issue, platform_id, tool_source, tool_version, data_source):
def process_issue_contributors(issue, tool_source, tool_version, data_source):

contributors = []

issue_cntrb = extract_needed_contributor_data(issue["user"], platform_id, tool_source, tool_version, data_source)
issue_cntrb = extract_needed_contributor_data(issue["user"], tool_source, tool_version, data_source)
issue["cntrb_id"] = issue_cntrb["cntrb_id"]
contributors.append(issue_cntrb)

for assignee in issue["assignees"]:

issue_assignee_cntrb = extract_needed_contributor_data(issue["user"], platform_id, tool_source, tool_version, data_source)
issue_assignee_cntrb = extract_needed_contributor_data(issue["user"], tool_source, tool_version, data_source)
assignee["cntrb_id"] = issue_assignee_cntrb["cntrb_id"]
contributors.append(issue_assignee_cntrb)

Expand All @@ -192,6 +193,8 @@ def collect_pull_requests(repo_git: str) -> None:
# define GithubTaskSession to handle insertions, and store oauth keys
session = GithubTaskSession(logger)

repo_id = session.query(Repo).filter(Repo.repo_git == repo_git).one().repo_id

logger.info(f"Collecting pull requests for {owner}/{repo}")

url = f"https://api.github.com/repos/{owner}/{repo}/pulls?state=all&direction=desc"
Expand All @@ -213,22 +216,20 @@ def collect_pull_requests(repo_git: str) -> None:
return


process_pull_requests.s(page_data, f"{repo.capitalize()} Pr Page {page} Task").apply_async()
process_pull_requests.s(page_data, f"{repo.capitalize()} Pr Page {page} Task", repo_id).apply_async()


@celery.task
def process_pull_requests(pull_requests, task_name):
def process_pull_requests(pull_requests, task_name, repo_id):

logger = logging.getLogger(process_pull_requests.__name__)

# define GithubTaskSession to handle insertions, and store oauth keys
session = GithubTaskSession(logger)

# get repo_id or have it passed
repo_id = 1
tool_source = "Pr Task"
tool_version = "2.0"
platform_id = 25150
data_source = "Github API"

pr_dicts = []
Expand All @@ -240,7 +241,7 @@ def process_pull_requests(pull_requests, task_name):

# adds cntrb_id to reference the contributors table to the
# prs, assignees, reviewers, and metadata
pr, contributor_data = process_pull_request_contributors(pr, platform_id, tool_source, tool_version, data_source)
pr, contributor_data = process_pull_request_contributors(pr, tool_source, tool_version, data_source)

contributors += contributor_data

Expand Down Expand Up @@ -381,12 +382,12 @@ def process_pull_requests(pull_requests, task_name):
# TODO: Should we insert metadata without user relation?
# NOTE: For contributor related operations: extract_needed_contributor_data takes a piece of github contributor data
# and creates a cntrb_id (primary key for the contributors table) and gets the data needed for the table
def process_pull_request_contributors(pr, platform_id, tool_source, tool_version, data_source):
def process_pull_request_contributors(pr, tool_source, tool_version, data_source):

contributors = []

# get contributor data and set pr cntrb_id
pr_cntrb = extract_needed_contributor_data(pr["user"], platform_id, tool_source, tool_version, data_source)
pr_cntrb = extract_needed_contributor_data(pr["user"], tool_source, tool_version, data_source)
pr["cntrb_id"] = pr_cntrb["cntrb_id"]

contributors.append(pr_cntrb)
Expand All @@ -395,14 +396,14 @@ def process_pull_request_contributors(pr, platform_id, tool_source, tool_version
if pr["base"]["user"]:

# get contributor data and set pr metadat cntrb_id
pr_meta_base_cntrb = extract_needed_contributor_data(pr["base"]["user"], platform_id, tool_source, tool_version, data_source)
pr_meta_base_cntrb = extract_needed_contributor_data(pr["base"]["user"], tool_source, tool_version, data_source)
pr["base"]["cntrb_id"] = pr_meta_base_cntrb["cntrb_id"]

contributors.append(pr_meta_base_cntrb)

if pr["head"]["user"]:

pr_meta_head_cntrb = extract_needed_contributor_data(pr["head"]["user"], platform_id, tool_source, tool_version, data_source)
pr_meta_head_cntrb = extract_needed_contributor_data(pr["head"]["user"], tool_source, tool_version, data_source)
pr["head"]["cntrb_id"] = pr_meta_head_cntrb["cntrb_id"]

contributors.append(pr_meta_head_cntrb)
Expand All @@ -412,7 +413,7 @@ def process_pull_request_contributors(pr, platform_id, tool_source, tool_version
# set cntrb_id for assignees
for assignee in pr["assignees"]:

pr_asignee_cntrb = extract_needed_contributor_data(assignee, platform_id, tool_source, tool_version, data_source)
pr_asignee_cntrb = extract_needed_contributor_data(assignee, tool_source, tool_version, data_source)
assignee["cntrb_id"] = pr_asignee_cntrb["cntrb_id"]

contributors.append(pr_asignee_cntrb)
Expand All @@ -421,7 +422,7 @@ def process_pull_request_contributors(pr, platform_id, tool_source, tool_version
# set cntrb_id for reviewers
for reviewer in pr["requested_reviewers"]:

pr_reviwer_cntrb = extract_needed_contributor_data(reviewer, platform_id, tool_source, tool_version, data_source)
pr_reviwer_cntrb = extract_needed_contributor_data(reviewer, tool_source, tool_version, data_source)
reviewer["cntrb_id"] = pr_reviwer_cntrb["cntrb_id"]

contributors.append(pr_reviwer_cntrb)
Expand All @@ -436,10 +437,12 @@ def collect_events(repo_git: str):

logger = logging.getLogger(collect_events.__name__)

logger.info(f"Collecting pull request events for {owner}/{repo}")
logger.info(f"Collecting Github events for {owner}/{repo}")

# define GithubTaskSession to handle insertions, and store oauth keys
session = GithubTaskSession(logger)

repo_id = session.query(Repo).filter(Repo.repo_git == repo_git).one().repo_id

url = f"https://api.github.com/repos/{owner}/{repo}/issues/events"

Expand All @@ -460,21 +463,18 @@ def collect_events(repo_git: str):
logger.info(f"Events Page {page} of {num_pages}")
return

process_events.s(page_data, f"{repo.capitalize()} Events Page {page} Task").apply_async()
process_events.s(page_data, f"{repo.capitalize()} Events Page {page} Task", repo_id).apply_async()

logger.info("Completed events")

@celery.task
def process_events(events, task_name):
def process_events(events, task_name, repo_id):

logger = logging.getLogger(process_events.__name__)
# define GithubTaskSession to handle insertions, and store oauth keys
session = GithubTaskSession(logger)

# get repo_id
repo_id = 1
platform_id = 25150
tool_source = "Pr event task"
tool_source = "Github events task"
tool_version = "2.0"
data_source = "Github API"

Expand All @@ -485,7 +485,7 @@ def process_events(events, task_name):
event_len = len(events)
for index, event in enumerate(events):

event, contributor = process_github_event_contributors(event, platform_id, tool_source, tool_version, data_source)
event, contributor = process_github_event_contributors(event, tool_source, tool_version, data_source)

if 'pull_request' in list(event["issue"].keys()):
pr_url = event["issue"]["pull_request"]["url"]
Expand Down Expand Up @@ -549,11 +549,11 @@ def process_events(events, task_name):


# TODO: Should we skip an event if there is no contributor to resolve it o
def process_github_event_contributors(event, platform_id, tool_source, tool_version, data_source):
def process_github_event_contributors(event, tool_source, tool_version, data_source):

if event["actor"]:

event_cntrb = extract_needed_contributor_data(event["actor"], platform_id, tool_source, tool_version, data_source)
event_cntrb = extract_needed_contributor_data(event["actor"], tool_source, tool_version, data_source)
event["cntrb_id"] = event_cntrb["cntrb_id"]

else:
Expand All @@ -572,7 +572,9 @@ def collect_issue_and_pr_comments(repo_git: str) -> None:

# define database task session, that also holds autentication keys the GithubPaginator needs
session = GithubTaskSession(logger)


repo_id = session.query(Repo).filter(Repo.repo_git == repo_git).one().repo_id

# url to get issue and pull request comments
url = f"https://api.github.com/repos/{owner}/{repo}/issues/comments"

Expand All @@ -591,21 +593,19 @@ def collect_issue_and_pr_comments(repo_git: str) -> None:

logger.info(f"Github Messages Page {page} of {num_pages}")

process_messages.s(page_data, f"Github Messages Page {page} Task").apply_async()
process_messages.s(page_data, f"Github Messages Page {page} Task", repo_id).apply_async()

logger.info("Completed messages")

@celery.task
def process_messages(messages, task_name):
def process_messages(messages, task_name, repo_id):

# define logger for task
logger = logging.getLogger(process_messages.__name__)

# define database task session, that also holds autentication keys the GithubPaginator needs
session = GithubTaskSession(logger)

repo_id = 1
platform_id = 25150
tool_source = "Pr comment task"
tool_version = "2.0"
data_source = "Github API"
Expand All @@ -628,7 +628,7 @@ def process_messages(messages, task_name):
# this adds the cntrb_id to the message data
# the returned contributor will be added to the contributors list later, if the related issue or pr are found
# this logic is used so we don't insert a contributor when the related message isn't inserted
message, contributor = process_github_comment_contributors(message, platform_id, tool_source, tool_version, data_source)
message, contributor = process_github_comment_contributors(message, tool_source, tool_version, data_source)

if is_issue_message(message["html_url"]):

Expand Down Expand Up @@ -737,9 +737,9 @@ def is_issue_message(html_url):
return 'pull' not in html_url


def process_github_comment_contributors(message, platform_id, tool_source, tool_version, data_source):
def process_github_comment_contributors(message, tool_source, tool_version, data_source):

message_cntrb = extract_needed_contributor_data(message["user"], platform_id, tool_source, tool_version, data_source)
message_cntrb = extract_needed_contributor_data(message["user"], tool_source, tool_version, data_source)
message["cntrb_id"] = message_cntrb["cntrb_id"]

return message, message_cntrb
Expand All @@ -763,9 +763,9 @@ def pull_request_review_comments(repo_git: str) -> None:
pr_review_comments = GithubPaginator(url, session.oauths, logger)

# get repo_id
repo_id = 1
repo_id = session.query(Repo).filter(Repo.repo_git == repo_git).one().repo_id


platform_id = 25150
tool_source = "Pr review comment task"
tool_version = "2.0"
data_source = "Github API"
Expand Down Expand Up @@ -844,8 +844,8 @@ def pull_request_reviews(repo_git: str, pr_number_list: [int]) -> None:

pr_number_list = sorted(pr_number_list, reverse=False)

repo_id = 1
platform_id = 25150
repo_id = session.query(Repo).filter(Repo.repo_git == repo_git).one().repo_id

tool_version = "2.0"
data_source = "Github API"

Expand Down Expand Up @@ -916,8 +916,7 @@ def process_contributors():
logger = logging.getLogger(process_contributors.__name__)
session = GithubTaskSession(logger)

platform = 1
tool_source = "Pr comment task"
tool_source = "Contributors task"
tool_version = "2.0"
data_source = "Github API"

Expand All @@ -926,7 +925,7 @@ def process_contributors():
contributors_len = len(contributors)

if contributors_len == 0:
print("No contributors to enrich...returning...")
logger.info("No contributors to enrich...returning...")
return

print(f"Length of contributors to enrich: {contributors_len}")
Expand Down

0 comments on commit 81ed9c9

Please sign in to comment.