forked from internetarchive/openlibrary
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #178 from jimchamp/comment-digest-workflow
Comment digest workflow
- Loading branch information
Showing
4 changed files
with
334 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
name: new_comment_digest | ||
on: | ||
schedule: # 08:30 daily | ||
- cron: '30 8 * * *' | ||
workflow_dispatch: # This job can also be run on-demand (is this needed?) | ||
permissions: | ||
contents: read # Is this needed? | ||
|
||
jobs: | ||
new_comment_digeste: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@4 | ||
- uses: actions/setup-python@4 | ||
with: | ||
python-version: 3.x | ||
- run: pip install requests | ||
- run: scripts/gh_scripts/issue_comment_bot.py 24 "$SLACK_CHANNEL" "$SLACK_TOKEN" | ||
env: | ||
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }} | ||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL_ABC_TEAM_PLUS }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# GitHub Project Management Scripts | ||
|
||
This directory contains scripts that the Open Library team uses to interact with this GitHub repository. | ||
|
||
To quickly see a script's purpose and arguments, run the script with the `-h` or `--help` flag. | ||
|
||
## `issue_comment_bot.py` | ||
|
||
This script fetches issues that have new comments from contributors within the past number of hours, then posts a message to the team in our Slack channel. | ||
|
||
### Usage: | ||
This script has three positional arguments: | ||
``` | ||
hours Fetch issues that have been updated since this many hours ago | ||
channel Issues will be published to this Slack channel | ||
slack-token Slack authentication token | ||
``` | ||
|
||
__Running the script locally:__ | ||
``` | ||
docker compose exec -e PYTHONPATH=. web bash | ||
# Publish digest of new comments from the past day to #openlibrary-g: | ||
./scripts/gh_scripts/issue_comment_bot.py 24 "#openlibrary-g" "replace-with-slack-token" | ||
``` | ||
|
||
__Note:__ When adding arguments, be sure to place any hyphenated values within double quotes. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,285 @@ | ||
#!/usr/bin/env python | ||
""" | ||
Fetches Open Library GitHub issues that have been commented on | ||
within some amount of time, in hours. | ||
Writes links to each issue to given Slack channel. | ||
""" | ||
import argparse | ||
import errno | ||
import sys | ||
import time | ||
|
||
from datetime import datetime, timedelta | ||
from typing import Any | ||
|
||
import requests | ||
|
||
# Maps lead label to GitHub username | ||
lead_label_to_username = { | ||
'Lead: @mekarpeles': 'mekarpeles', | ||
'Lead: @cdrini': 'cdrini', | ||
'Lead: @scottbarnes': 'scottbarnes', | ||
'Lead: @seabelis': 'seabelis', | ||
'Lead: @jimchamp': 'jimchamp', | ||
} | ||
|
||
# Maps GitHub username to Slack ID | ||
username_to_slack_id = { | ||
'mekarpeles': '<@mek>', | ||
'cdrini': '<@cdrini>', | ||
'scottbarnes': '<@U03MNR6T7FH>', | ||
'seabelis': '<@UAHQ39ACT>', | ||
'jimchamp': '<@U01ARTHG9EV>', | ||
'hornc': '<@U0EUS8DV0>', | ||
} | ||
|
||
|
||
def fetch_issues(updated_since: str): | ||
""" | ||
Fetches all GitHub issues that have been updated since the given date string and have at least one comment. | ||
GitHub results are paginated. This functions appends each result to a list, and does so for all pages. | ||
To keep API calls to a minimum, we request the maximum number of results per request (100 per page, as of writing). | ||
Important: Updated issues need not have a recent comment. Update events include many other things, such as adding a | ||
label to an issue, or moving an issue to a milestone. Issues returned by this function will require additional | ||
processing in order to determine if they have recent comments. | ||
""" | ||
# Make initial query for updated issues: | ||
query = f'repo:internetarchive/openlibrary is:open is:issue comments:>0 updated:>{updated_since}' | ||
p: dict[str, str|int] = { | ||
'q': query, | ||
'per_page': 100, | ||
} | ||
response = requests.get( | ||
'https://api.github.com/search/issues', | ||
params=p, | ||
) | ||
d = response.json() | ||
results = d['items'] | ||
|
||
# Fetch additional updated issues, if any exist | ||
def get_next_page(url: str): | ||
"""Returns list of issues and optional url for next page""" | ||
resp = requests.get(url) | ||
# Get issues | ||
d = resp.json() | ||
issues = d['items'] | ||
# Prepare url for next page | ||
next = resp.links.get('next', {}) | ||
next_url = next.get('url', '') | ||
|
||
return issues, next_url | ||
|
||
links = response.links | ||
next = links.get('next', {}) | ||
next_url = next.get('url', '') | ||
while next_url: | ||
# Make call with next link | ||
issues, next_url = get_next_page(next_url) | ||
results = results + issues | ||
|
||
return results | ||
|
||
|
||
def filter_issues(issues: list, since: datetime): | ||
""" | ||
Returns list of issues that were not last responded to by staff. | ||
Requires fetching the most recent comments for the given issues. | ||
""" | ||
results = [] | ||
|
||
for i in issues: | ||
# Fetch comments using URL from previous GitHub search results | ||
comments_url = i.get('comments_url') | ||
resp = requests.get( | ||
comments_url, | ||
params={ | ||
'per_page': 100 | ||
} | ||
) | ||
|
||
# Ensure that we have the last page of comments | ||
links = resp.links | ||
last = links.get('last', {}) | ||
last_url = last.get('url', '') | ||
|
||
if last_url: | ||
resp = requests.get(last_url) | ||
|
||
# Get last comment | ||
comments = resp.json() | ||
last_comment = comments[-1] | ||
|
||
# Determine if last comment meets our criteria for Slack notifications | ||
# First step: Ensure that the last comment was left after the given `since` datetime | ||
created = datetime.fromisoformat(last_comment['created_at']) | ||
# Removing timezone info to avoid TypeErrors, which occur when | ||
# comparing a timezone-aware datetime with a timezone-naive datetime | ||
created = created.replace(tzinfo=None) | ||
if created > since: | ||
# Next step: Determine if the last commenter is a staff member | ||
last_commenter = last_comment['user']['login'] | ||
if last_commenter not in username_to_slack_id: | ||
lead_label = find_lead_label(i.get('labels', [])) | ||
results.append({ | ||
'comment_url': last_comment['html_url'], | ||
'commenter': last_commenter, | ||
'issue_title': i['title'], | ||
'lead_label': lead_label, | ||
}) | ||
|
||
return results | ||
|
||
|
||
def find_lead_label(labels: list[dict[str, Any]]) -> str: | ||
""" | ||
Finds and returns the name of the first lead label found in the given list of GitHub labels. | ||
Returns an empty string if no lead label is found | ||
""" | ||
result = '' | ||
for label in labels: | ||
if label['name'].startswith('Lead:'): | ||
result = label['name'] | ||
break | ||
|
||
return result | ||
|
||
|
||
def publish_digest(issues: list[dict[str, str]], slack_channel: str, slack_token: str, hours_passed: int): | ||
""" | ||
Creates a threaded Slack messaged containing a digest of recently commented GitHub issues. | ||
Parent Slack message will say how many comments were left, and the timeframe. Each reply | ||
will include a link to the comment, as well as additional information. | ||
""" | ||
# Create the parent message | ||
parent_thread_msg = f'{len(issues)} new GitHub comment(s) since {hours_passed} hour(s) ago' | ||
|
||
response = requests.post( | ||
'https://slack.com/api/chat.postMessage', | ||
headers={ | ||
'Authorization': f"Bearer {slack_token}", | ||
'Content-Type': 'application/json; charset=utf-8', | ||
}, | ||
json={ | ||
'channel': slack_channel, | ||
'text': parent_thread_msg, | ||
}, | ||
) | ||
|
||
if response.status_code != 200: | ||
# XXX : Log this | ||
print(f'Failed to send message to Slack. Status code: {response.status_code}') | ||
# XXX : Add retry logic? | ||
sys.exit(errno.ECOMM) | ||
|
||
d = response.json() | ||
# Store timestamp, which, along with the channel, uniquely identifies the parent thread | ||
ts = d.get('ts') | ||
|
||
def comment_on_thread(message: str): | ||
""" | ||
Posts the given message as a reply to the parent message. | ||
""" | ||
response = requests.post( | ||
'https://slack.com/api/chat.postMessage', | ||
headers={ | ||
'Authorization': f"Bearer {slack_token}", | ||
'Content-Type': 'application/json; charset=utf-8', | ||
}, | ||
json={ | ||
'channel': slack_channel, | ||
'text': message, | ||
'thread_ts': ts, | ||
}, | ||
) | ||
if response.status_code != 200: | ||
# XXX : Check "ok" field for errors | ||
# XXX : Log this | ||
print(f'Failed to POST slack message\n Status code: {response.status_code}\n Message: {message}') | ||
# XXX : Retry logic? | ||
|
||
for i in issues: | ||
# Slack rate limit is roughly 1 request per second | ||
time.sleep(1) | ||
|
||
comment_url = i['comment_url'] | ||
issue_title = i['issue_title'] | ||
commenter = i['commenter'] | ||
message = f'<{comment_url}|Latest comment for: *{issue_title}*>\n' | ||
|
||
username = lead_label_to_username.get(i['lead_label'], '') | ||
slack_id = username_to_slack_id.get(username, '') | ||
if slack_id: | ||
message += f'Lead: {slack_id}\n' | ||
elif i['lead_label']: | ||
message += f'{i["lead_label"]}\n' | ||
else: | ||
message += 'Lead: N/A\n' | ||
|
||
message += f'Commenter: *{commenter}*' | ||
comment_on_thread(message) | ||
|
||
|
||
def time_since(hours): | ||
"""Returns datetime and string representations of the current time, minus the given hour""" | ||
now = datetime.now() | ||
# XXX : Add a minute or two to the delta (to avoid dropping issues)? | ||
since = now - timedelta(hours=hours) | ||
return since, since.strftime('%Y-%m-%dT%H:%M:%S') | ||
|
||
|
||
def start_job(args: argparse.Namespace): | ||
""" | ||
Starts the new comment digest job. | ||
""" | ||
since, date_string = time_since(args.hours) | ||
issues = fetch_issues(date_string) | ||
filtered_issues = filter_issues(issues, since) | ||
|
||
# XXX : If we are only running this script daily, we can remove this condition to | ||
# always post a message to Slack. If the digest is ever not published, we'll know | ||
# that something is wrong with our script runner. | ||
if filtered_issues: | ||
publish_digest(filtered_issues, args.channel, args.slack_token, args.hours) | ||
# XXX : Log this | ||
print('Digest posted to Slack.') | ||
else: | ||
# XXX : Log this | ||
print('No issues needing attention found.') | ||
|
||
|
||
def _get_parser() -> argparse.ArgumentParser: | ||
""" | ||
Creates and returns an ArgumentParser containing default values which were | ||
read from the config file. | ||
""" | ||
parser = argparse.ArgumentParser(description=__doc__) | ||
parser.add_argument( | ||
'hours', | ||
help='Fetch issues that have been updated since this many hours ago', | ||
type=int, | ||
) | ||
parser.add_argument( | ||
'channel', | ||
help="Issues will be published to this Slack channel", | ||
type=str, | ||
) | ||
parser.add_argument( | ||
'slack_token', | ||
metavar='slack-token', | ||
help='Slack auth token', | ||
type=str, | ||
) | ||
|
||
return parser | ||
|
||
|
||
if __name__ == '__main__': | ||
# Process command-line arguments and starts the notification job | ||
parser = _get_parser() | ||
args = parser.parse_args() | ||
start_job(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
requests==2.31.0 |