Skip to content

Commit

Permalink
Merge pull request #178 from jimchamp/comment-digest-workflow
Browse files Browse the repository at this point in the history
Comment digest workflow
  • Loading branch information
jimchamp authored Jan 17, 2024
2 parents 3677dd2 + 9e76a7b commit 50cf50f
Show file tree
Hide file tree
Showing 4 changed files with 334 additions and 0 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/new_comment_digest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: new_comment_digest
on:
schedule: # 08:30 daily
- cron: '30 8 * * *'
workflow_dispatch: # This job can also be run on-demand (is this needed?)
permissions:
contents: read # Is this needed?

jobs:
new_comment_digeste:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@4
- uses: actions/setup-python@4
with:
python-version: 3.x
- run: pip install requests
- run: scripts/gh_scripts/issue_comment_bot.py 24 "$SLACK_CHANNEL" "$SLACK_TOKEN"
env:
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL_ABC_TEAM_PLUS }}
27 changes: 27 additions & 0 deletions scripts/gh_scripts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# GitHub Project Management Scripts

This directory contains scripts that the Open Library team uses to interact with this GitHub repository.

To quickly see a script's purpose and arguments, run the script with the `-h` or `--help` flag.

## `issue_comment_bot.py`

This script fetches issues that have new comments from contributors within the past number of hours, then posts a message to the team in our Slack channel.

### Usage:
This script has three positional arguments:
```
hours Fetch issues that have been updated since this many hours ago
channel Issues will be published to this Slack channel
slack-token Slack authentication token
```

__Running the script locally:__
```
docker compose exec -e PYTHONPATH=. web bash
# Publish digest of new comments from the past day to #openlibrary-g:
./scripts/gh_scripts/issue_comment_bot.py 24 "#openlibrary-g" "replace-with-slack-token"
```

__Note:__ When adding arguments, be sure to place any hyphenated values within double quotes.
285 changes: 285 additions & 0 deletions scripts/gh_scripts/issue_comment_bot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
#!/usr/bin/env python
"""
Fetches Open Library GitHub issues that have been commented on
within some amount of time, in hours.
Writes links to each issue to given Slack channel.
"""
import argparse
import errno
import sys
import time

from datetime import datetime, timedelta
from typing import Any

import requests

# Maps lead label to GitHub username
lead_label_to_username = {
'Lead: @mekarpeles': 'mekarpeles',
'Lead: @cdrini': 'cdrini',
'Lead: @scottbarnes': 'scottbarnes',
'Lead: @seabelis': 'seabelis',
'Lead: @jimchamp': 'jimchamp',
}

# Maps GitHub username to Slack ID
username_to_slack_id = {
'mekarpeles': '<@mek>',
'cdrini': '<@cdrini>',
'scottbarnes': '<@U03MNR6T7FH>',
'seabelis': '<@UAHQ39ACT>',
'jimchamp': '<@U01ARTHG9EV>',
'hornc': '<@U0EUS8DV0>',
}


def fetch_issues(updated_since: str):
"""
Fetches all GitHub issues that have been updated since the given date string and have at least one comment.
GitHub results are paginated. This functions appends each result to a list, and does so for all pages.
To keep API calls to a minimum, we request the maximum number of results per request (100 per page, as of writing).
Important: Updated issues need not have a recent comment. Update events include many other things, such as adding a
label to an issue, or moving an issue to a milestone. Issues returned by this function will require additional
processing in order to determine if they have recent comments.
"""
# Make initial query for updated issues:
query = f'repo:internetarchive/openlibrary is:open is:issue comments:>0 updated:>{updated_since}'
p: dict[str, str|int] = {
'q': query,
'per_page': 100,
}
response = requests.get(
'https://api.github.com/search/issues',
params=p,
)
d = response.json()
results = d['items']

# Fetch additional updated issues, if any exist
def get_next_page(url: str):
"""Returns list of issues and optional url for next page"""
resp = requests.get(url)
# Get issues
d = resp.json()
issues = d['items']
# Prepare url for next page
next = resp.links.get('next', {})
next_url = next.get('url', '')

return issues, next_url

links = response.links
next = links.get('next', {})
next_url = next.get('url', '')
while next_url:
# Make call with next link
issues, next_url = get_next_page(next_url)
results = results + issues

return results


def filter_issues(issues: list, since: datetime):
"""
Returns list of issues that were not last responded to by staff.
Requires fetching the most recent comments for the given issues.
"""
results = []

for i in issues:
# Fetch comments using URL from previous GitHub search results
comments_url = i.get('comments_url')
resp = requests.get(
comments_url,
params={
'per_page': 100
}
)

# Ensure that we have the last page of comments
links = resp.links
last = links.get('last', {})
last_url = last.get('url', '')

if last_url:
resp = requests.get(last_url)

# Get last comment
comments = resp.json()
last_comment = comments[-1]

# Determine if last comment meets our criteria for Slack notifications
# First step: Ensure that the last comment was left after the given `since` datetime
created = datetime.fromisoformat(last_comment['created_at'])
# Removing timezone info to avoid TypeErrors, which occur when
# comparing a timezone-aware datetime with a timezone-naive datetime
created = created.replace(tzinfo=None)
if created > since:
# Next step: Determine if the last commenter is a staff member
last_commenter = last_comment['user']['login']
if last_commenter not in username_to_slack_id:
lead_label = find_lead_label(i.get('labels', []))
results.append({
'comment_url': last_comment['html_url'],
'commenter': last_commenter,
'issue_title': i['title'],
'lead_label': lead_label,
})

return results


def find_lead_label(labels: list[dict[str, Any]]) -> str:
"""
Finds and returns the name of the first lead label found in the given list of GitHub labels.
Returns an empty string if no lead label is found
"""
result = ''
for label in labels:
if label['name'].startswith('Lead:'):
result = label['name']
break

return result


def publish_digest(issues: list[dict[str, str]], slack_channel: str, slack_token: str, hours_passed: int):
"""
Creates a threaded Slack messaged containing a digest of recently commented GitHub issues.
Parent Slack message will say how many comments were left, and the timeframe. Each reply
will include a link to the comment, as well as additional information.
"""
# Create the parent message
parent_thread_msg = f'{len(issues)} new GitHub comment(s) since {hours_passed} hour(s) ago'

response = requests.post(
'https://slack.com/api/chat.postMessage',
headers={
'Authorization': f"Bearer {slack_token}",
'Content-Type': 'application/json; charset=utf-8',
},
json={
'channel': slack_channel,
'text': parent_thread_msg,
},
)

if response.status_code != 200:
# XXX : Log this
print(f'Failed to send message to Slack. Status code: {response.status_code}')
# XXX : Add retry logic?
sys.exit(errno.ECOMM)

d = response.json()
# Store timestamp, which, along with the channel, uniquely identifies the parent thread
ts = d.get('ts')

def comment_on_thread(message: str):
"""
Posts the given message as a reply to the parent message.
"""
response = requests.post(
'https://slack.com/api/chat.postMessage',
headers={
'Authorization': f"Bearer {slack_token}",
'Content-Type': 'application/json; charset=utf-8',
},
json={
'channel': slack_channel,
'text': message,
'thread_ts': ts,
},
)
if response.status_code != 200:
# XXX : Check "ok" field for errors
# XXX : Log this
print(f'Failed to POST slack message\n Status code: {response.status_code}\n Message: {message}')
# XXX : Retry logic?

for i in issues:
# Slack rate limit is roughly 1 request per second
time.sleep(1)

comment_url = i['comment_url']
issue_title = i['issue_title']
commenter = i['commenter']
message = f'<{comment_url}|Latest comment for: *{issue_title}*>\n'

username = lead_label_to_username.get(i['lead_label'], '')
slack_id = username_to_slack_id.get(username, '')
if slack_id:
message += f'Lead: {slack_id}\n'
elif i['lead_label']:
message += f'{i["lead_label"]}\n'
else:
message += 'Lead: N/A\n'

message += f'Commenter: *{commenter}*'
comment_on_thread(message)


def time_since(hours):
"""Returns datetime and string representations of the current time, minus the given hour"""
now = datetime.now()
# XXX : Add a minute or two to the delta (to avoid dropping issues)?
since = now - timedelta(hours=hours)
return since, since.strftime('%Y-%m-%dT%H:%M:%S')


def start_job(args: argparse.Namespace):
"""
Starts the new comment digest job.
"""
since, date_string = time_since(args.hours)
issues = fetch_issues(date_string)
filtered_issues = filter_issues(issues, since)

# XXX : If we are only running this script daily, we can remove this condition to
# always post a message to Slack. If the digest is ever not published, we'll know
# that something is wrong with our script runner.
if filtered_issues:
publish_digest(filtered_issues, args.channel, args.slack_token, args.hours)
# XXX : Log this
print('Digest posted to Slack.')
else:
# XXX : Log this
print('No issues needing attention found.')


def _get_parser() -> argparse.ArgumentParser:
"""
Creates and returns an ArgumentParser containing default values which were
read from the config file.
"""
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'hours',
help='Fetch issues that have been updated since this many hours ago',
type=int,
)
parser.add_argument(
'channel',
help="Issues will be published to this Slack channel",
type=str,
)
parser.add_argument(
'slack_token',
metavar='slack-token',
help='Slack auth token',
type=str,
)

return parser


if __name__ == '__main__':
# Process command-line arguments and starts the notification job
parser = _get_parser()
args = parser.parse_args()
start_job(args)
1 change: 1 addition & 0 deletions scripts/gh_scripts/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
requests==2.31.0

0 comments on commit 50cf50f

Please sign in to comment.