From 2e79b73e94670d322ddcfba507ce94b70a7a0f60 Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Mon, 13 Mar 2023 01:47:40 -0400 Subject: [PATCH] Create and use default timeouts for the requests package With the release of version 1.7.5 the bandit package now flags requests calls (get(), post(), etc.) as medium severity security risks if they do not have a timeout value provided. The default values we create are the same as the defaults used by the github3.py package. This provides as much parity as possible with other uses of the requests package in this project. --- scraper/doecode/__init__.py | 8 +++++++- scraper/github/__init__.py | 5 ++++- scraper/github/queryManager.py | 7 ++++++- scraper/util.py | 2 ++ 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/scraper/doecode/__init__.py b/scraper/doecode/__init__.py index 251a94b..4a0dae9 100644 --- a/scraper/doecode/__init__.py +++ b/scraper/doecode/__init__.py @@ -3,6 +3,8 @@ import requests +from scraper.util import DEFAULT_REQUESTS_TIMEOUTS + logger = logging.getLogger(__name__) @@ -32,7 +34,11 @@ def process_url(url, key): if key is None: raise ValueError("DOE CODE API Key value is missing!") - response = requests.get(url, headers={"Authorization": "Basic " + key}) + response = requests.get( + url, + headers={"Authorization": "Basic " + key}, + timeout=DEFAULT_REQUESTS_TIMEOUTS, + ) doecode_json = response.json() for record in doecode_json["records"]: diff --git a/scraper/github/__init__.py b/scraper/github/__init__.py index fecd6b1..7b96ab8 100644 --- a/scraper/github/__init__.py +++ b/scraper/github/__init__.py @@ -8,6 +8,8 @@ import github3 import requests +from scraper.util import DEFAULT_REQUESTS_TIMEOUTS + logger = logging.getLogger(__name__) @@ -23,7 +25,8 @@ def gov_orgs(): us_gov_github_orgs = set() gov_orgs_json = requests.get( - "https://government.github.com/organizations.json" + "https://government.github.com/organizations.json", + timeout=DEFAULT_REQUESTS_TIMEOUTS, ).json() us_gov_github_orgs.update(gov_orgs_json["governments"]["U.S. Federal"]) diff --git a/scraper/github/queryManager.py b/scraper/github/queryManager.py index af18260..dd5ded8 100644 --- a/scraper/github/queryManager.py +++ b/scraper/github/queryManager.py @@ -14,6 +14,8 @@ import pytz import requests +from scraper.util import DEFAULT_REQUESTS_TIMEOUTS + def _vPrint(verbose, *args, **kwargs): """Easy verbosity-control print method. @@ -490,10 +492,13 @@ def _submitQuery( "https://api.github.com/graphql", data=gitqueryJSON, headers={**authhead, **headers}, + timeout=DEFAULT_REQUESTS_TIMEOUTS, ) else: fullResponse = requests.get( - "https://api.github.com" + gitquery, headers={**authhead, **headers} + "https://api.github.com" + gitquery, + headers={**authhead, **headers}, + timeout=DEFAULT_REQUESTS_TIMEOUTS, ) _vPrint( verbose, diff --git a/scraper/util.py b/scraper/util.py index bba3403..2432efc 100644 --- a/scraper/util.py +++ b/scraper/util.py @@ -8,6 +8,8 @@ logger = logging.getLogger(__name__) +DEFAULT_REQUESTS_TIMEOUTS = (4, 10) + def execute(command, cwd=None): logger.debug("Forking command: %s", command)