Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

register events to avoid processing duplicate events #7

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@ name: Run tests
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
strategy:
matrix:
python: [3.6, 3.7, 3.8, 3.9, '3.10']
fail-fast: false
steps:
- name: Check out repository code
uses: actions/checkout@v2
uses: actions/checkout@v4

- name: Setup Python
uses: actions/setup-python@v2
uses: actions/setup-python@v5
with:
python-version: ${{matrix.python}}

Expand Down
32 changes: 28 additions & 4 deletions pyghee/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,26 @@ def __init__(self, *args, **kwargs):
else:
del os.environ['GITHUB_APP_SECRET_TOKEN']

self.registered_events = []

def register_event(self, event_id):
"""
Register event by ID.
Returns True if event is new, False is event is not new.
"""
if event_id in self.registered_events:
return False
else:
self.registered_events.append(event_id)

# keep size of list of registered events under control,
# trim in half if maximum size has been reached
max_size = 10000
if len(self.registered_events) >= max_size:
self.registered_events = self.registered_events[max_size//2:]

return True

def handle_event(self, event_info, log_file=None):
"""
Handle event
Expand Down Expand Up @@ -164,10 +184,14 @@ def process_event(self, request, abort_function,
"""
try:
event_info = get_event_info(request)
self.log_event(event_info, events_log_dir=events_log_dir, log_file=log_file)
if verify:
self.verify_request(event_info, abort_function, log_file=log_file)
self.handle_event(event_info, log_file=log_file)
event_id = event_info['id']
if self.register_event(event_id):
self.log_event(event_info, events_log_dir=events_log_dir, log_file=log_file)
if verify:
self.verify_request(event_info, abort_function, log_file=log_file)
self.handle_event(event_info, log_file=log_file)
else:
log("Duplicate event received, id: %s" % event_id)
except Exception as err:
if raise_error:
raise
Expand Down
3 changes: 2 additions & 1 deletion pyghee/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,5 @@ def handle_issue_comment_event(self, event_info, log_file=None):
if __name__ == '__main__':
app = create_app(klass=ExamplePyGHee)
log("App started!")
waitress.serve(app, listen='*:3000')
# stick to single thread so we can avoid processing duplicate event deliveries multiple times
waitress.serve(app, listen='*:3000', threads=1)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wouldn’t do that. It could result in the bot becoming unresponsive if the processing of an event takes a long time.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Short-term, this is a necessary evil, otherwise the detection of duplicate events can't work, that relies on serially processing events...

By default, threads is 4 with waitress, and to be honest I don't expect big impact of this, since processing a single event shouldn't take much longer than a couple of seconds.

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="PyGHee",
version="0.0.3",
version="0.0.4",
author="Kenneth Hoste",
author_email="kenneth.hoste@ugent.be",
description="PyGHee (pronounced as 'piggy') is the GitHub Event Executor, a Python library to facilitate creating a GitHub App implemented in Python to process [events from GitHub",
Expand Down
3 changes: 2 additions & 1 deletion tests/event_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
EVENT_TYPE_ISSUE_COMMENT = 'issue_comment'

REQUEST_ID_001 = 'd3ed7694-8a6c-4008-a93f-b92aa86a95a8'
REQUEST_ID_002 = 'e4fe8783-7b7d-5119-b84e-c83bb97b86b9'
TIMESTAMP_001 = '1645367007403' # 2022-02-20T15:23:27

CREATE_BRANCH_REQUEST = Request(
Expand All @@ -41,7 +42,7 @@
'Timestamp': TIMESTAMP_001,
'X-GitHub-Event': EVENT_TYPE_ISSUE_COMMENT,
'X-Hub-Signature': 'sha1=0123456789abcedf0123456789abcedf01234567', # fake signature!
'X-Github-Delivery': REQUEST_ID_001,
'X-Github-Delivery': REQUEST_ID_002,
}, {
# request.json
'action': ACTION_CREATED,
Expand Down
16 changes: 9 additions & 7 deletions tests/test_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
from pyghee.main import ExamplePyGHee
from requests.structures import CaseInsensitiveDict

from tests.event_data import REQUEST_ID_001, TIMESTAMP_001
from tests.event_data import REQUEST_ID_001, REQUEST_ID_002, TIMESTAMP_001
from tests.event_data import CREATE_BRANCH_REQUEST, ISSUE_COMMENT_CREATED_REQUEST

TEST_REQUESTS = (CREATE_BRANCH_REQUEST, ISSUE_COMMENT_CREATED_REQUEST)
REQUEST_IDS = (REQUEST_ID_001, REQUEST_ID_002)

TEST_SECRET_TOKEN = '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef'

Expand All @@ -28,11 +29,11 @@ def dummy_abort_function(abort_code):


def test_get_event_info():
for request in TEST_REQUESTS:
for idx, request in enumerate(TEST_REQUESTS):
res = get_event_info(request)
event_action = request.json.get('action', 'UNKNOWN')
assert res['action'] == event_action
assert res['id'] == REQUEST_ID_001
assert res['id'] == REQUEST_IDS[idx]
event_type = request.headers['X-GitHub-Event']
assert res['type'] == event_type

Expand Down Expand Up @@ -68,7 +69,7 @@ def test_process_event(tmpdir):
os.environ['GITHUB_APP_SECRET_TOKEN'] = 'fake_app_secret_token'
pyghee = ExamplePyGHee()

for request in TEST_REQUESTS:
for idx, request in enumerate(TEST_REQUESTS):
event_info = get_event_info(request)
event_action, event_type = event_info['action'], event_info['type']

Expand All @@ -79,15 +80,16 @@ def test_process_event(tmpdir):
event_data_dir = os.path.join(events_log_dir, event_type, event_action, '2022-02-20')
assert os.path.isdir(event_data_dir)

header_fp = '2022-02-20T14-23-27_%s_headers.json' % REQUEST_ID_001
body_fp = '2022-02-20T14-23-27_%s_body.json' % REQUEST_ID_001
request_id = REQUEST_IDS[idx]
header_fp = '2022-02-20T14-23-27_%s_headers.json' % request_id
body_fp = '2022-02-20T14-23-27_%s_body.json' % request_id
assert sorted(os.listdir(event_data_dir)) == [body_fp, header_fp]

# verify saved event header
expected_header = {
'Timestamp': TIMESTAMP_001,
'X-GitHub-Event': event_type,
'X-Github-Delivery': REQUEST_ID_001,
'X-Github-Delivery': request_id,
}
with open(os.path.join(event_data_dir, header_fp), 'r') as fp:
header_data = json.load(fp)
Expand Down