From 43e20d6866edc029a6a57a9379db11a6d30ec989 Mon Sep 17 00:00:00 2001 From: Parth Sharma Date: Sat, 13 Jul 2019 18:33:26 +0530 Subject: [PATCH 1/2] fix gh_repo_info_worker to send task completion confirmation to broker Signed-off-by: Parth Sharma --- .../gh_repo_info_worker/worker.py | 37 ++++++++++++++----- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/workers/gh_repo_info_worker/gh_repo_info_worker/worker.py b/workers/gh_repo_info_worker/gh_repo_info_worker/worker.py index e46c65df21..c3e64829b0 100644 --- a/workers/gh_repo_info_worker/gh_repo_info_worker/worker.py +++ b/workers/gh_repo_info_worker/gh_repo_info_worker/worker.py @@ -173,16 +173,16 @@ def run(self): def collect(self, repos=None): while True: + time.sleep(0.5) if not self._queue.empty(): message = self._queue.get() self.working_on = 'UPDATE' + elif not self._maintain_queue.empty(): + message = self._maintain_queue.get() + logging.info("Popped off message: {}".format(str(message.entry_info))) + self.working_on = "MAINTAIN" else: - if not self._maintain_queue.empty(): - message = self._queue.get() - logging.info("Popped off message: {}".format(str(message.entry_info))) - self.working_on = "MAINTAIN" - else: - break + break if message.type == 'EXIT': break @@ -191,10 +191,8 @@ def collect(self, repos=None): raise ValueError(f'{message.type} is not a recognized task type') if message.type == 'TASK': - owner, repo = self.get_owner_repo(message.entry_info['git_url']) - logging.info(f'Querying: {owner}/{repo}') self.query_repo_info(message.entry_info['repo_id'], - owner, repo) + message.entry_info['git_url']) # if repos == None: @@ -223,10 +221,12 @@ def get_owner_repo(self, git_url): return owner, repo - def query_repo_info(self, repo_id, owner, repo): + def query_repo_info(self, repo_id, git_url): # url = f'https://api.github.com/repos/{owner}/{repo}' url = 'https://api.github.com/graphql' + owner, repo = self.get_owner_repo(git_url) + query = """ { repository(owner:"%s", name:"%s"){ @@ -309,6 +309,8 @@ def query_repo_info(self, repo_id, owner, repo): self.info_id_inc += 1 + self.register_task_completion(repo_id, git_url) + def update_rate_limit(self, response): @@ -325,3 +327,18 @@ def update_rate_limit(self, response): logging.info("Rate limit exceeded, waiting " + str(time_diff.total_seconds()) + " seconds.\n") time.sleep(time_diff.total_seconds()) self.rate_limit = int(response.headers['X-RateLimit-Limit']) + + def register_task_completion(self, repo_id, git_url): + task_completed = { + 'worker_id': self.config['id'], + 'job_type': self.working_on, + 'repo_id': repo_id, + 'git_url': git_url + } + + logging.info("Telling broker we completed task: " + str(task_completed) + "\n" + + "This task inserted: " + str(self.results_counter) + " tuples.\n\n") + + requests.post('http://localhost:{}/api/unstable/completed_task'.format( + self.config['broker_port']), json=task_completed) + self.results_counter = 0 From 2820b975700ef6baf24b7b36fb38bc0d7872104b Mon Sep 17 00:00:00 2001 From: Parth Sharma Date: Mon, 15 Jul 2019 15:21:19 +0530 Subject: [PATCH 2/2] Update gh_repo_worker readme Signed-off-by: Parth Sharma --- workers/gh_repo_info_worker/README.rst | 97 +++++++++++++++++++------- 1 file changed, 71 insertions(+), 26 deletions(-) diff --git a/workers/gh_repo_info_worker/README.rst b/workers/gh_repo_info_worker/README.rst index b8d31b3fef..dd3a5f2e4d 100644 --- a/workers/gh_repo_info_worker/README.rst +++ b/workers/gh_repo_info_worker/README.rst @@ -9,37 +9,82 @@ GitHub Repo Info Worker :target: False :alt: Latest Travis CI build status -Augur Worker that collects GitHub Repo Info data -**Note:** -This is a work in progress Worker. -Currently it is not integrated as a Augur Worker but can be used independently. -This version gets the repo info of all repos stored in the ``repo`` table. +Augur Worker that collects GitHub Repo Info data. + +This worker is integrated into Augur's worker architecture and can receieve tasks through the broker. Usage ----- -1. Activate Augur's virtualenv. -2. Open your python shell. -3. In your python shell: - -.. code:: python - - # Create a config dict - config = {'connection_string': 'sqlite:///:memory:', - 'host': '', - 'name': '', - 'password': '', - 'port': '', - 'schema': '', - 'user': '', - 'key': '' + +Running this Worker +******** + +To run this worker execute the following command + +.. code:: bash + + python -m gh_repo_info_worker.runtime + + +**Note:** Make sure the broker is running before running the worker + +Sending Tasks +******** + +To send a task to this worker manually, send a POST request to the endpoint ``/task`` +with the following json. Change ``git_url`` to the url of the GitHub repository you wish +to run the worker against. + +.. code:: javascript + + { + 'job_type': 'UPDATE', + 'models': ['repo_info'], + 'given': { + 'git_url': 'https://github.com/openssl/openssl' + } + } + +Scheduling Tasks +******** +To make this worker run periodically add a Housekeeper job in ``augur.config.json``. +To do so, in your ``augur.config.json``, in the Housekeeper section add the following: + +.. code:: javascript + + { + "model": "repo_info", + "delay": 60, + "repo_group_id": 0 } - # Import Worker - from gh_repo_info_worker.worker import GHRepoInfoWorker +Set ``delay`` to specify the interval (in seconds) the worker waits before running again. + +Set ``repo_group_id`` to the repo_group_id of the Repo Group you wish to run this worker against. +If you wish to run the worker for all repositories specify ``repo_group_id`` to ``0`` + +Successful Log File +----- +Here is an example of ``worker.log`` + +.. code-block:: + + INFO:root:Making database connections... + INFO:root:Getting max repo_info_id... + INFO:root:Starting Flask App with pid: 10950... + INFO:werkzeug: * Running on http://localhost:51237/ (Press CTRL+C to quit) + INFO:root:Sending to work on task: {'job_type': 'MAINTAIN', 'models': ['repo_info'], 'given': {'git_url': 'https://github.com/openssl/openssl'}, 'focused_task': 1} + INFO:root:Running... + INFO:werkzeug:127.0.0.1 - - [15/Jul/2019 15:09:05] "POST /AUGWOP/task HTTP/1.1" 200 - + INFO:root:Popped off message: {'git_url': 'https://github.com/openssl/openssl', 'repo_id': 25151} + INFO:root:Hitting endpoint https://api.github.com/graphql + INFO:root:Recieved rate limit from headers - # Create a worker instance - worker = GHRepoInfoWorker(config) + INFO:root:Updated rate limit, you have: 4999 requests remaining. - # Begin collecting data - worker.collect() + INFO:root:Inserting repo info for repo with id:25151, owner:openssl, name:openssl + INFO:root:Primary Key inserted into repo_info table: [16] + INFO:root:Inserted info for openssl/openssl + INFO:root:Telling broker we completed task: {'worker_id': 'com.augurlabs.core.gh_repo_info_worker', 'job_type': 'MAINTAIN', 'repo_id': 25151, 'git_url': 'https://github.com/openssl/openssl'} + This task inserted: 1 tuples.