From a675cd0ad2a73a5f0ebef5924f5a5b0dd4cd3fd6 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Thu, 4 Jul 2024 19:22:40 +0200 Subject: [PATCH 1/2] register events to avoid processing duplicate events --- pyghee/lib.py | 32 ++++++++++++++++++++++++++++---- pyghee/main.py | 3 ++- setup.py | 2 +- tests/event_data.py | 3 ++- tests/test_events.py | 16 +++++++++------- 5 files changed, 42 insertions(+), 14 deletions(-) diff --git a/pyghee/lib.py b/pyghee/lib.py index 996ddc5..b4c4c67 100644 --- a/pyghee/lib.py +++ b/pyghee/lib.py @@ -84,6 +84,26 @@ def __init__(self, *args, **kwargs): else: del os.environ['GITHUB_APP_SECRET_TOKEN'] + self.registered_events = [] + + def register_event(self, event_id): + """ + Register event by ID. + Returns True if event is new, False is event is not new. + """ + if event_id in self.registered_events: + return False + else: + self.registered_events.append(event_id) + + # keep size of list of registered events under control, + # trim in half if maximum size has been reached + max_size = 10000 + if len(self.registered_events) >= max_size: + self.registered_events = self.registered_events[max_size//2:] + + return True + def handle_event(self, event_info, log_file=None): """ Handle event @@ -164,10 +184,14 @@ def process_event(self, request, abort_function, """ try: event_info = get_event_info(request) - self.log_event(event_info, events_log_dir=events_log_dir, log_file=log_file) - if verify: - self.verify_request(event_info, abort_function, log_file=log_file) - self.handle_event(event_info, log_file=log_file) + event_id = event_info['id'] + if self.register_event(event_id): + self.log_event(event_info, events_log_dir=events_log_dir, log_file=log_file) + if verify: + self.verify_request(event_info, abort_function, log_file=log_file) + self.handle_event(event_info, log_file=log_file) + else: + log("Duplicate event received, id: %s" % event_id) except Exception as err: if raise_error: raise diff --git a/pyghee/main.py b/pyghee/main.py index c4d1676..a5eb537 100644 --- a/pyghee/main.py +++ b/pyghee/main.py @@ -37,4 +37,5 @@ def handle_issue_comment_event(self, event_info, log_file=None): if __name__ == '__main__': app = create_app(klass=ExamplePyGHee) log("App started!") - waitress.serve(app, listen='*:3000') + # stick to single thread so we can avoid processing duplicate event deliveries multiple times + waitress.serve(app, listen='*:3000', threads=1) diff --git a/setup.py b/setup.py index f67bacd..3f00e7c 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="PyGHee", - version="0.0.3", + version="0.0.4", author="Kenneth Hoste", author_email="kenneth.hoste@ugent.be", description="PyGHee (pronounced as 'piggy') is the GitHub Event Executor, a Python library to facilitate creating a GitHub App implemented in Python to process [events from GitHub", diff --git a/tests/event_data.py b/tests/event_data.py index 9cdc466..11cf8b9 100644 --- a/tests/event_data.py +++ b/tests/event_data.py @@ -16,6 +16,7 @@ EVENT_TYPE_ISSUE_COMMENT = 'issue_comment' REQUEST_ID_001 = 'd3ed7694-8a6c-4008-a93f-b92aa86a95a8' +REQUEST_ID_002 = 'e4fe8783-7b7d-5119-b84e-c83bb97b86b9' TIMESTAMP_001 = '1645367007403' # 2022-02-20T15:23:27 CREATE_BRANCH_REQUEST = Request( @@ -41,7 +42,7 @@ 'Timestamp': TIMESTAMP_001, 'X-GitHub-Event': EVENT_TYPE_ISSUE_COMMENT, 'X-Hub-Signature': 'sha1=0123456789abcedf0123456789abcedf01234567', # fake signature! - 'X-Github-Delivery': REQUEST_ID_001, + 'X-Github-Delivery': REQUEST_ID_002, }, { # request.json 'action': ACTION_CREATED, diff --git a/tests/test_events.py b/tests/test_events.py index 6985820..8b6b3a9 100644 --- a/tests/test_events.py +++ b/tests/test_events.py @@ -15,10 +15,11 @@ from pyghee.main import ExamplePyGHee from requests.structures import CaseInsensitiveDict -from tests.event_data import REQUEST_ID_001, TIMESTAMP_001 +from tests.event_data import REQUEST_ID_001, REQUEST_ID_002, TIMESTAMP_001 from tests.event_data import CREATE_BRANCH_REQUEST, ISSUE_COMMENT_CREATED_REQUEST TEST_REQUESTS = (CREATE_BRANCH_REQUEST, ISSUE_COMMENT_CREATED_REQUEST) +REQUEST_IDS = (REQUEST_ID_001, REQUEST_ID_002) TEST_SECRET_TOKEN = '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef' @@ -28,11 +29,11 @@ def dummy_abort_function(abort_code): def test_get_event_info(): - for request in TEST_REQUESTS: + for idx, request in enumerate(TEST_REQUESTS): res = get_event_info(request) event_action = request.json.get('action', 'UNKNOWN') assert res['action'] == event_action - assert res['id'] == REQUEST_ID_001 + assert res['id'] == REQUEST_IDS[idx] event_type = request.headers['X-GitHub-Event'] assert res['type'] == event_type @@ -68,7 +69,7 @@ def test_process_event(tmpdir): os.environ['GITHUB_APP_SECRET_TOKEN'] = 'fake_app_secret_token' pyghee = ExamplePyGHee() - for request in TEST_REQUESTS: + for idx, request in enumerate(TEST_REQUESTS): event_info = get_event_info(request) event_action, event_type = event_info['action'], event_info['type'] @@ -79,15 +80,16 @@ def test_process_event(tmpdir): event_data_dir = os.path.join(events_log_dir, event_type, event_action, '2022-02-20') assert os.path.isdir(event_data_dir) - header_fp = '2022-02-20T14-23-27_%s_headers.json' % REQUEST_ID_001 - body_fp = '2022-02-20T14-23-27_%s_body.json' % REQUEST_ID_001 + request_id = REQUEST_IDS[idx] + header_fp = '2022-02-20T14-23-27_%s_headers.json' % request_id + body_fp = '2022-02-20T14-23-27_%s_body.json' % request_id assert sorted(os.listdir(event_data_dir)) == [body_fp, header_fp] # verify saved event header expected_header = { 'Timestamp': TIMESTAMP_001, 'X-GitHub-Event': event_type, - 'X-Github-Delivery': REQUEST_ID_001, + 'X-Github-Delivery': request_id, } with open(os.path.join(event_data_dir, header_fp), 'r') as fp: header_data = json.load(fp) From cd8bfe64897cb80474be303189005d745ca7789c Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Thu, 4 Jul 2024 19:28:09 +0200 Subject: [PATCH 2/2] stick to ubuntu 20.04 for test CI workflow, so we can keep testing with Python 3.6 --- .github/workflows/tests.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 29766d0..9683597 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -2,17 +2,17 @@ name: Run tests on: [push, pull_request] jobs: test: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 strategy: matrix: python: [3.6, 3.7, 3.8, 3.9, '3.10'] fail-fast: false steps: - name: Check out repository code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{matrix.python}}