From 0ca092f78e297198255e1681624937060f211bf7 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Thu, 15 Feb 2024 10:39:47 +0100 Subject: [PATCH 1/2] Enable timeouts when getting soup --- src/bioversions/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bioversions/utils.py b/src/bioversions/utils.py index 8c0b1225..3192e929 100644 --- a/src/bioversions/utils.py +++ b/src/bioversions/utils.py @@ -47,9 +47,9 @@ def norm(s: str) -> str: return s.lower().replace(" ", "").replace("-", "").replace(".", "") -def get_soup(url: str, verify: bool = True) -> BeautifulSoup: +def get_soup(url: str, verify: bool = True, timeout: Optional[int] = None) -> BeautifulSoup: """Get a beautiful soup parsed version of the given web page.""" - res = requests.get(url, verify=verify) + res = requests.get(url, verify=verify, timeout=timeout or 3) soup = BeautifulSoup(res.text, features="html.parser") return soup From b971a90a6ef830d99f3b8e373d87a2c0540b23ba Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Thu, 15 Feb 2024 10:49:51 +0100 Subject: [PATCH 2/2] Update utils.py --- src/bioversions/utils.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/bioversions/utils.py b/src/bioversions/utils.py index 3192e929..7968058c 100644 --- a/src/bioversions/utils.py +++ b/src/bioversions/utils.py @@ -48,8 +48,16 @@ def norm(s: str) -> str: def get_soup(url: str, verify: bool = True, timeout: Optional[int] = None) -> BeautifulSoup: - """Get a beautiful soup parsed version of the given web page.""" - res = requests.get(url, verify=verify, timeout=timeout or 3) + """Get a beautiful soup parsed version of the given web page. + + :param url: The URL to download and parse with BeautifulSoup + :param verify: Should SSL be used? This is almost always true, + except for Ensembl, which makes a big pain + :param timeout: How many integer seconds to wait for a response? + Defaults to 15 if none given. + :returns: A BeautifulSoup object + """ + res = requests.get(url, verify=verify, timeout=timeout or 15) soup = BeautifulSoup(res.text, features="html.parser") return soup