Skip to content

Commit

Permalink
Create and use default timeouts for the requests package
Browse files Browse the repository at this point in the history
With the release of version 1.7.5 the bandit package now flags requests
calls (get(), post(), etc.) as medium severity security risks if they
do not have a timeout value provided. The default values we create are
the same as the defaults used by the github3.py package. This provides
as much parity as possible with other uses of the requests package in
this project.
  • Loading branch information
mcdonnnj committed Mar 22, 2023
1 parent a52c21b commit 77b5047
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 3 deletions.
8 changes: 7 additions & 1 deletion scraper/doecode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import requests

from scraper.util import DEFAULT_REQUESTS_TIMEOUTS

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -32,7 +34,11 @@ def process_url(url, key):
if key is None:
raise ValueError("DOE CODE API Key value is missing!")

response = requests.get(url, headers={"Authorization": "Basic " + key})
response = requests.get(
url,
headers={"Authorization": "Basic " + key},
timeout=DEFAULT_REQUESTS_TIMEOUTS,
)
doecode_json = response.json()

for record in doecode_json["records"]:
Expand Down
5 changes: 4 additions & 1 deletion scraper/github/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import github3
import requests

from scraper.util import DEFAULT_REQUESTS_TIMEOUTS

logger = logging.getLogger(__name__)


Expand All @@ -23,7 +25,8 @@ def gov_orgs():
us_gov_github_orgs = set()

gov_orgs_json = requests.get(
"https://government.github.com/organizations.json"
"https://government.github.com/organizations.json",
timeout=DEFAULT_REQUESTS_TIMEOUTS,
).json()

us_gov_github_orgs.update(gov_orgs_json["governments"]["U.S. Federal"])
Expand Down
7 changes: 6 additions & 1 deletion scraper/github/queryManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import pytz
import requests

from scraper.util import DEFAULT_REQUESTS_TIMEOUTS


def _vPrint(verbose, *args, **kwargs):
"""Easy verbosity-control print method.
Expand Down Expand Up @@ -490,10 +492,13 @@ def _submitQuery(
"https://api.github.com/graphql",
data=gitqueryJSON,
headers={**authhead, **headers},
timeout=DEFAULT_REQUESTS_TIMEOUTS,
)
else:
fullResponse = requests.get(
"https://api.github.com" + gitquery, headers={**authhead, **headers}
"https://api.github.com" + gitquery,
headers={**authhead, **headers},
timeout=DEFAULT_REQUESTS_TIMEOUTS,
)
_vPrint(
verbose,
Expand Down
4 changes: 4 additions & 0 deletions scraper/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@

logger = logging.getLogger(__name__)

# These mirror the defaults in github3.py sessions per:
# https://github.com/sigmavirus24/github3.py/blob/ce43e6e5fdef6555f5a6b6602e2cc4b66c428aef/src/github3/session.py#L98
DEFAULT_REQUESTS_TIMEOUTS = (4, 10)


def execute(command, cwd=None):
logger.debug("Forking command: %s", command)
Expand Down

0 comments on commit 77b5047

Please sign in to comment.