diff --git a/scraper/doecode/__init__.py b/scraper/doecode/__init__.py index 251a94b..4a0dae9 100644 --- a/scraper/doecode/__init__.py +++ b/scraper/doecode/__init__.py @@ -3,6 +3,8 @@ import requests +from scraper.util import DEFAULT_REQUESTS_TIMEOUTS + logger = logging.getLogger(__name__) @@ -32,7 +34,11 @@ def process_url(url, key): if key is None: raise ValueError("DOE CODE API Key value is missing!") - response = requests.get(url, headers={"Authorization": "Basic " + key}) + response = requests.get( + url, + headers={"Authorization": "Basic " + key}, + timeout=DEFAULT_REQUESTS_TIMEOUTS, + ) doecode_json = response.json() for record in doecode_json["records"]: diff --git a/scraper/github/__init__.py b/scraper/github/__init__.py index cbc2d6b..6d21c87 100644 --- a/scraper/github/__init__.py +++ b/scraper/github/__init__.py @@ -8,6 +8,8 @@ import github3 import requests +from scraper.util import DEFAULT_REQUESTS_TIMEOUTS + logger = logging.getLogger(__name__) @@ -23,7 +25,8 @@ def gov_orgs(): us_gov_github_orgs = set() gov_orgs_json = requests.get( - "https://government.github.com/organizations.json" + "https://government.github.com/organizations.json", + timeout=DEFAULT_REQUESTS_TIMEOUTS, ).json() us_gov_github_orgs.update(gov_orgs_json["governments"]["U.S. Federal"]) diff --git a/scraper/github/queryManager.py b/scraper/github/queryManager.py index af18260..dd5ded8 100644 --- a/scraper/github/queryManager.py +++ b/scraper/github/queryManager.py @@ -14,6 +14,8 @@ import pytz import requests +from scraper.util import DEFAULT_REQUESTS_TIMEOUTS + def _vPrint(verbose, *args, **kwargs): """Easy verbosity-control print method. @@ -490,10 +492,13 @@ def _submitQuery( "https://api.github.com/graphql", data=gitqueryJSON, headers={**authhead, **headers}, + timeout=DEFAULT_REQUESTS_TIMEOUTS, ) else: fullResponse = requests.get( - "https://api.github.com" + gitquery, headers={**authhead, **headers} + "https://api.github.com" + gitquery, + headers={**authhead, **headers}, + timeout=DEFAULT_REQUESTS_TIMEOUTS, ) _vPrint( verbose, diff --git a/scraper/util.py b/scraper/util.py index bba3403..e74875e 100644 --- a/scraper/util.py +++ b/scraper/util.py @@ -8,6 +8,10 @@ logger = logging.getLogger(__name__) +# These mirror the defaults in github3.py sessions per: +# https://github.com/sigmavirus24/github3.py/blob/ce43e6e5fdef6555f5a6b6602e2cc4b66c428aef/src/github3/session.py#L98 +DEFAULT_REQUESTS_TIMEOUTS = (4, 10) + def execute(command, cwd=None): logger.debug("Forking command: %s", command)