From 78acd211ab61a26b8cbceba0efec5a2cd324e87d Mon Sep 17 00:00:00 2001 From: Leah Antkiewicz Date: Wed, 11 Aug 2021 11:50:47 -0400 Subject: [PATCH 1/6] Retry GitHub download failures --- core/dbt/clients/registry.py | 26 ++------------------------ core/dbt/clients/system.py | 3 ++- core/dbt/utils.py | 23 +++++++++++++++++++++++ 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/core/dbt/clients/registry.py b/core/dbt/clients/registry.py index 30b2c9020e6..960b762d7f9 100644 --- a/core/dbt/clients/registry.py +++ b/core/dbt/clients/registry.py @@ -1,10 +1,7 @@ -from functools import wraps import requests -from dbt.exceptions import RegistryException -from dbt.utils import memoized +from dbt.utils import memoized, _exception_retry as exception_retry from dbt.logger import GLOBAL_LOGGER as logger import os -import time if os.getenv('DBT_PACKAGE_HUB_URL'): DEFAULT_REGISTRY_BASE_URL = os.getenv('DBT_PACKAGE_HUB_URL') @@ -19,26 +16,7 @@ def _get_url(url, registry_base_url=None): return '{}{}'.format(registry_base_url, url) -def _wrap_exceptions(fn): - @wraps(fn) - def wrapper(*args, **kwargs): - max_attempts = 5 - attempt = 0 - while True: - attempt += 1 - try: - return fn(*args, **kwargs) - except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as exc: - if attempt < max_attempts: - time.sleep(1) - continue - raise RegistryException( - 'Unable to connect to registry hub' - ) from exc - return wrapper - - -@_wrap_exceptions +@exception_retry def _get(path, registry_base_url=None): url = _get_url(path, registry_base_url) logger.debug('Making package registry request: GET {}'.format(url)) diff --git a/core/dbt/clients/system.py b/core/dbt/clients/system.py index 7d4de07108c..e4e09e859ca 100644 --- a/core/dbt/clients/system.py +++ b/core/dbt/clients/system.py @@ -15,9 +15,9 @@ ) import dbt.exceptions -import dbt.utils from dbt.logger import GLOBAL_LOGGER as logger +from dbt.utils import _exception_retry as exception_retry if sys.platform == 'win32': from ctypes import WinDLL, c_bool @@ -441,6 +441,7 @@ def run_cmd( return out, err +@exception_retry def download( url: str, path: str, timeout: Optional[Union[float, tuple]] = None ) -> None: diff --git a/core/dbt/utils.py b/core/dbt/utils.py index 0fec7cf4850..cb92fe51f2d 100644 --- a/core/dbt/utils.py +++ b/core/dbt/utils.py @@ -8,7 +8,11 @@ import itertools import json import os +import requests +import time + from contextlib import contextmanager +from dbt.exceptions import RegistryException from enum import Enum from typing_extensions import Protocol from typing import ( @@ -599,3 +603,22 @@ def __getitem__(self, name: str) -> Any: def __contains__(self, name) -> bool: return any((name in entry for entry in self._itersource())) + + +def _exception_retry(fn): + @functools.wraps(fn) + def wrapper(*args, **kwargs): + max_attempts = 5 + attempt = 0 + while True: + attempt += 1 + try: + return fn(*args, **kwargs) + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as exc: + if attempt < max_attempts: + time.sleep(1) + continue + raise RegistryException( + 'Unable to connect to registry hub' + ) from exc + return wrapper From 6ded253f4816625c253cbc2674ede9a653eb9d0d Mon Sep 17 00:00:00 2001 From: Leah Antkiewicz Date: Mon, 16 Aug 2021 14:18:44 -0400 Subject: [PATCH 2/6] Refactor and add tests --- core/dbt/clients/registry.py | 15 ++++--- core/dbt/clients/system.py | 10 +++-- core/dbt/deps/registry.py | 2 +- core/dbt/exceptions.py | 2 +- core/dbt/utils.py | 30 ++++++-------- test/unit/test_core_dbt_utils.py | 41 +++++++++++++++++++ .../test_registry_get_request_exception.py | 8 ++-- 7 files changed, 75 insertions(+), 33 deletions(-) create mode 100644 test/unit/test_core_dbt_utils.py diff --git a/core/dbt/clients/registry.py b/core/dbt/clients/registry.py index 960b762d7f9..78e569a8a3a 100644 --- a/core/dbt/clients/registry.py +++ b/core/dbt/clients/registry.py @@ -1,5 +1,5 @@ import requests -from dbt.utils import memoized, _exception_retry as exception_retry +from dbt.utils import memoized, _connection_exception_retry as connection_exception_retry from dbt.logger import GLOBAL_LOGGER as logger import os @@ -16,7 +16,10 @@ def _get_url(url, registry_base_url=None): return '{}{}'.format(registry_base_url, url) -@exception_retry +def _get_with_retries(path, registry_base_url=None): + return connection_exception_retry(lambda: _get(path, registry_base_url), 5) + + def _get(path, registry_base_url=None): url = _get_url(path, registry_base_url) logger.debug('Making package registry request: GET {}'.format(url)) @@ -28,22 +31,22 @@ def _get(path, registry_base_url=None): def index(registry_base_url=None): - return _get('api/v1/index.json', registry_base_url) + return _get_with_retries('api/v1/index.json', registry_base_url) index_cached = memoized(index) def packages(registry_base_url=None): - return _get('api/v1/packages.json', registry_base_url) + return _get_with_retries('api/v1/packages.json', registry_base_url) def package(name, registry_base_url=None): - return _get('api/v1/{}.json'.format(name), registry_base_url) + return _get_with_retries('api/v1/{}.json'.format(name), registry_base_url) def package_version(name, version, registry_base_url=None): - return _get('api/v1/{}/{}.json'.format(name, version), registry_base_url) + return _get_with_retries('api/v1/{}/{}.json'.format(name, version), registry_base_url) def get_available_versions(name): diff --git a/core/dbt/clients/system.py b/core/dbt/clients/system.py index e4e09e859ca..d3abc38876a 100644 --- a/core/dbt/clients/system.py +++ b/core/dbt/clients/system.py @@ -15,9 +15,8 @@ ) import dbt.exceptions - from dbt.logger import GLOBAL_LOGGER as logger -from dbt.utils import _exception_retry as exception_retry +from dbt.utils import _connection_exception_retry as connection_exception_retry if sys.platform == 'win32': from ctypes import WinDLL, c_bool @@ -441,7 +440,12 @@ def run_cmd( return out, err -@exception_retry +def download_with_retries( + url: str, path: str, timeout: Optional[Union[float, tuple]] = None +) -> None: + connection_exception_retry(lambda: download(url, path, timeout), 5) + + def download( url: str, path: str, timeout: Optional[Union[float, tuple]] = None ) -> None: diff --git a/core/dbt/deps/registry.py b/core/dbt/deps/registry.py index 59593dcce60..43033138fd7 100644 --- a/core/dbt/deps/registry.py +++ b/core/dbt/deps/registry.py @@ -61,7 +61,7 @@ def install(self, project, renderer): system.make_directory(os.path.dirname(tar_path)) download_url = metadata.downloads.tarball - system.download(download_url, tar_path) + system.download_with_retries(download_url, tar_path) deps_path = project.modules_path package_name = self.get_project_name(project, renderer) system.untar_package(tar_path, deps_path, package_name) diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py index 38f39580d47..674207c4153 100644 --- a/core/dbt/exceptions.py +++ b/core/dbt/exceptions.py @@ -714,7 +714,7 @@ def system_error(operation_name): .format(operation_name)) -class RegistryException(Exception): +class ConnectionException(Exception): pass diff --git a/core/dbt/utils.py b/core/dbt/utils.py index cb92fe51f2d..05354a15533 100644 --- a/core/dbt/utils.py +++ b/core/dbt/utils.py @@ -12,7 +12,8 @@ import time from contextlib import contextmanager -from dbt.exceptions import RegistryException +from dbt.exceptions import ConnectionException +from dbt.logger import GLOBAL_LOGGER as logger from enum import Enum from typing_extensions import Protocol from typing import ( @@ -605,20 +606,13 @@ def __contains__(self, name) -> bool: return any((name in entry for entry in self._itersource())) -def _exception_retry(fn): - @functools.wraps(fn) - def wrapper(*args, **kwargs): - max_attempts = 5 - attempt = 0 - while True: - attempt += 1 - try: - return fn(*args, **kwargs) - except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as exc: - if attempt < max_attempts: - time.sleep(1) - continue - raise RegistryException( - 'Unable to connect to registry hub' - ) from exc - return wrapper +def _connection_exception_retry(fn, max_attempts: int, attempt: int = 0): + try: + return fn() + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as exc: + if attempt <= max_attempts - 1: + logger.debug("Retrying external call. Attempt: ", attempt, " Max attempts: " , max_attempts) + time.sleep(1) + _connection_exception_retry(fn, max_attempts, attempt + 1) + else: + raise ConnectionException('External connection exception occurred: ' + str(exc)) diff --git a/test/unit/test_core_dbt_utils.py b/test/unit/test_core_dbt_utils.py new file mode 100644 index 00000000000..d3841071d8f --- /dev/null +++ b/test/unit/test_core_dbt_utils.py @@ -0,0 +1,41 @@ +import requests +import unittest + +from dbt.exceptions import ConnectionException +from dbt.utils import _connection_exception_retry as connection_exception_retry + + +class testCoreDbtUtils(unittest.TestCase): + def test_connection_exception_retry_none(self): + Counter._reset() + connection_exception_retry(lambda: Counter._add(), 5) + self.assertEqual(1, counter) + + def test_connection_exception_retry_max(self): + Counter._reset() + self.assertRaises(ConnectionException, connection_exception_retry(lambda: Counter._add_with_exception(), 5)) + self.assertEqual(6, counter) # 6 = original attempt plus 5 retries + + def test_connection_exception_retry_success(self): + Counter._reset() + connection_exception_retry(lambda: Counter._add_with_limited_exception(), 5) + self.assertEqual(2, counter) # 2 = original attempt plus 1 retry + + +counter:int = 0 +class Counter(): + def _add(): + global counter + counter+=1 + def _add_with_exception(): + global counter + counter+=1 + raise requests.exceptions.ConnectionError + def _add_with_limited_exception(): + global counter + counter+=1 + if counter < 2: + raise requests.exceptions.ConnectionError + def _reset(): + global counter + counter = 0 diff --git a/test/unit/test_registry_get_request_exception.py b/test/unit/test_registry_get_request_exception.py index 254169d9894..44033fe0546 100644 --- a/test/unit/test_registry_get_request_exception.py +++ b/test/unit/test_registry_get_request_exception.py @@ -1,9 +1,9 @@ import unittest -from dbt.exceptions import RegistryException -from dbt.clients.registry import _get +from dbt.exceptions import ConnectionException +from dbt.clients.registry import _get_with_retries class testRegistryGetRequestException(unittest.TestCase): def test_registry_request_error_catching(self): - # using non routable IP to test connection error logic in the _get function - self.assertRaises(RegistryException, _get, '', 'http://0.0.0.0') + # using non routable IP to test connection error logic in the _get_with_retries function + self.assertRaises(ConnectionException, _get_with_retries, '', 'http://0.0.0.0') From 5c6a5f6cc167aad2708d1db4315a05cbf20a494e Mon Sep 17 00:00:00 2001 From: Leah Antkiewicz Date: Mon, 16 Aug 2021 14:41:13 -0400 Subject: [PATCH 3/6] Fixed linting and added comment --- core/dbt/utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/dbt/utils.py b/core/dbt/utils.py index 05354a15533..0e97ee2483f 100644 --- a/core/dbt/utils.py +++ b/core/dbt/utils.py @@ -607,11 +607,15 @@ def __contains__(self, name) -> bool: def _connection_exception_retry(fn, max_attempts: int, attempt: int = 0): + """Attempts to run a function that makes an external call, if the call fails + on a connection error or timeout, it will be tried up to 5 more times. + """ try: return fn() except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as exc: if attempt <= max_attempts - 1: - logger.debug("Retrying external call. Attempt: ", attempt, " Max attempts: " , max_attempts) + logger.debug('Retrying external call. Attempt: ' + + f'{attempt} Max attempts: {max_attempts}') time.sleep(1) _connection_exception_retry(fn, max_attempts, attempt + 1) else: From 9e5374b0f0b40a8f3baef294fa58d113d299b390 Mon Sep 17 00:00:00 2001 From: leahwicz <60146280+leahwicz@users.noreply.github.com> Date: Thu, 19 Aug 2021 20:22:49 -0400 Subject: [PATCH 4/6] Fixing unit test assertRaises Co-authored-by: Kyle Wigley --- test/unit/test_core_dbt_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/unit/test_core_dbt_utils.py b/test/unit/test_core_dbt_utils.py index d3841071d8f..ab65647387c 100644 --- a/test/unit/test_core_dbt_utils.py +++ b/test/unit/test_core_dbt_utils.py @@ -13,7 +13,8 @@ def test_connection_exception_retry_none(self): def test_connection_exception_retry_max(self): Counter._reset() - self.assertRaises(ConnectionException, connection_exception_retry(lambda: Counter._add_with_exception(), 5)) + with self.assertRaises(ConnectionException): + connection_exception_retry(lambda: Counter._add_with_exception(), 5) self.assertEqual(6, counter) # 6 = original attempt plus 5 retries def test_connection_exception_retry_success(self): From d7fef88a8cf8a1c66ad5b995f5d875bb8af5d44b Mon Sep 17 00:00:00 2001 From: leahwicz <60146280+leahwicz@users.noreply.github.com> Date: Thu, 19 Aug 2021 20:27:26 -0400 Subject: [PATCH 5/6] Fixing casing Co-authored-by: Kyle Wigley --- test/unit/test_core_dbt_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/test_core_dbt_utils.py b/test/unit/test_core_dbt_utils.py index ab65647387c..4c91fb70c44 100644 --- a/test/unit/test_core_dbt_utils.py +++ b/test/unit/test_core_dbt_utils.py @@ -5,7 +5,7 @@ from dbt.utils import _connection_exception_retry as connection_exception_retry -class testCoreDbtUtils(unittest.TestCase): +class TestCoreDbtUtils(unittest.TestCase): def test_connection_exception_retry_none(self): Counter._reset() connection_exception_retry(lambda: Counter._add(), 5) From efbf59f256e19b2da8a52b11b7aede0b4bca5207 Mon Sep 17 00:00:00 2001 From: Leah Antkiewicz Date: Thu, 19 Aug 2021 20:39:51 -0400 Subject: [PATCH 6/6] Changing to use partial for function calls --- core/dbt/clients/registry.py | 4 +++- core/dbt/clients/system.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/core/dbt/clients/registry.py b/core/dbt/clients/registry.py index 78e569a8a3a..1d6ac5c7675 100644 --- a/core/dbt/clients/registry.py +++ b/core/dbt/clients/registry.py @@ -1,3 +1,4 @@ +import functools import requests from dbt.utils import memoized, _connection_exception_retry as connection_exception_retry from dbt.logger import GLOBAL_LOGGER as logger @@ -17,7 +18,8 @@ def _get_url(url, registry_base_url=None): def _get_with_retries(path, registry_base_url=None): - return connection_exception_retry(lambda: _get(path, registry_base_url), 5) + get_fn = functools.partial(_get, path, registry_base_url) + return connection_exception_retry(get_fn, 5) def _get(path, registry_base_url=None): diff --git a/core/dbt/clients/system.py b/core/dbt/clients/system.py index d3abc38876a..9fe4f4a3d48 100644 --- a/core/dbt/clients/system.py +++ b/core/dbt/clients/system.py @@ -1,4 +1,5 @@ import errno +import functools import fnmatch import json import os @@ -443,7 +444,8 @@ def run_cmd( def download_with_retries( url: str, path: str, timeout: Optional[Union[float, tuple]] = None ) -> None: - connection_exception_retry(lambda: download(url, path, timeout), 5) + download_fn = functools.partial(download, url, path, timeout) + connection_exception_retry(download_fn, 5) def download(