From 07c0212ea174cbdf43a58463f4ad8b019446aef8 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Tue, 8 Aug 2023 21:01:25 +0200 Subject: [PATCH 1/4] Added `connection_pool_size` configuration property (preview) We have three things that are configurable from `requests` connection pooling perspective: * `pool_connections` - Number of urllib3 connection pools to cache before discarding the least recently used pool. Python requests default value is 10. This PR increases it to 20. * `pool_maxsize` - The maximum number of connections to save in the pool. Improves performance in multithreaded situations. For now, we're setting it to the same value as connection_pool_size. * `pool_block` - If pool_block is False, then more connections will are created, but not saved after the first use. Block when no free connections are available. urllib3 ensures that no more than pool_maxsize connections are used at a time. Prevents platform from flooding. By default, requests library doesn't block. We start with blocking. This PR introduces `connection_pool_size` configuration property, which sets `pool_connections` and `pool_maxsize` to the same value. If not set, it uses 20 instead of default 10. --- databricks/sdk/core.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/databricks/sdk/core.py b/databricks/sdk/core.py index 386ad325..729a58e3 100644 --- a/databricks/sdk/core.py +++ b/databricks/sdk/core.py @@ -18,7 +18,7 @@ import requests import requests.auth -from requests.adapters import HTTPAdapter +from requests.adapters import HTTPAdapter, DEFAULT_POOLSIZE from urllib3.util.retry import Retry from .azure import ARM_DATABRICKS_RESOURCE_ID, ENVIRONMENTS, AzureEnvironment @@ -491,6 +491,7 @@ class Config: metadata_service_url = ConfigAttribute(env='DATABRICKS_METADATA_SERVICE_URL', auth='metadata-service', sensitive=True) + connection_pool_size: int = ConfigAttribute() def __init__(self, *, @@ -893,7 +894,29 @@ def __init__(self, cfg: Config = None): self._session = requests.Session() self._session.auth = self._authenticate - self._session.mount("https://", HTTPAdapter(max_retries=retry_strategy)) + + # Number of urllib3 connection pools to cache before discarding the least + # recently used pool. Python requests default value is 10. + pool_connections = cfg.connection_pool_size + if pool_connections is None: + pool_connections = 20 + + # The maximum number of connections to save in the pool. Improves performance + # in multithreaded situations. For now, we're setting it to the same value + # as connection_pool_size. + pool_maxsize = pool_connections + + # If pool_block is False, then more connections will are created, + # but not saved after the first use. Blocks when no free connections are available. + # urllib3 ensures that no more than pool_maxsize connections are used at a time. + # Prevents platform from flooding. By default, requests library doesn't block. + pool_block = True + + http_adapter = HTTPAdapter(max_retries=retry_strategy, + pool_connections=pool_connections, + pool_maxsize=pool_maxsize, + pool_block=pool_block) + self._session.mount("https://", http_adapter) @property def account_id(self) -> str: From d3fa59ff03e38fc1a3c5c99f4a23d9213ce30be8 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Tue, 8 Aug 2023 21:02:06 +0200 Subject: [PATCH 2/4] fmt --- databricks/sdk/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/databricks/sdk/core.py b/databricks/sdk/core.py index 729a58e3..ea502ff4 100644 --- a/databricks/sdk/core.py +++ b/databricks/sdk/core.py @@ -18,7 +18,7 @@ import requests import requests.auth -from requests.adapters import HTTPAdapter, DEFAULT_POOLSIZE +from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry from .azure import ARM_DATABRICKS_RESOURCE_ID, ENVIRONMENTS, AzureEnvironment From 565dc6884751f9ae45759eea1e5f52749019f66f Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Wed, 9 Aug 2023 17:02:37 +0200 Subject: [PATCH 3/4] added `connection_pool_max_size` configurable --- databricks/sdk/core.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/databricks/sdk/core.py b/databricks/sdk/core.py index ea502ff4..8c59844e 100644 --- a/databricks/sdk/core.py +++ b/databricks/sdk/core.py @@ -492,6 +492,7 @@ class Config: auth='metadata-service', sensitive=True) connection_pool_size: int = ConfigAttribute() + connection_pool_max_size: int = ConfigAttribute() def __init__(self, *, @@ -904,7 +905,9 @@ def __init__(self, cfg: Config = None): # The maximum number of connections to save in the pool. Improves performance # in multithreaded situations. For now, we're setting it to the same value # as connection_pool_size. - pool_maxsize = pool_connections + pool_maxsize = cfg.connection_pool_max_size + if cfg.connection_pool_max_size is None: + pool_maxsize = pool_connections # If pool_block is False, then more connections will are created, # but not saved after the first use. Blocks when no free connections are available. From 6ed5b70455307e17414af6052127e085d16ee430 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Thu, 10 Aug 2023 13:38:13 +0200 Subject: [PATCH 4/4] rename --- databricks/sdk/core.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/databricks/sdk/core.py b/databricks/sdk/core.py index 8c59844e..4127a985 100644 --- a/databricks/sdk/core.py +++ b/databricks/sdk/core.py @@ -491,8 +491,8 @@ class Config: metadata_service_url = ConfigAttribute(env='DATABRICKS_METADATA_SERVICE_URL', auth='metadata-service', sensitive=True) - connection_pool_size: int = ConfigAttribute() - connection_pool_max_size: int = ConfigAttribute() + max_connection_pools: int = ConfigAttribute() + max_connections_per_pool: int = ConfigAttribute() def __init__(self, *, @@ -898,15 +898,15 @@ def __init__(self, cfg: Config = None): # Number of urllib3 connection pools to cache before discarding the least # recently used pool. Python requests default value is 10. - pool_connections = cfg.connection_pool_size + pool_connections = cfg.max_connection_pools if pool_connections is None: pool_connections = 20 # The maximum number of connections to save in the pool. Improves performance # in multithreaded situations. For now, we're setting it to the same value # as connection_pool_size. - pool_maxsize = cfg.connection_pool_max_size - if cfg.connection_pool_max_size is None: + pool_maxsize = cfg.max_connections_per_pool + if cfg.max_connections_per_pool is None: pool_maxsize = pool_connections # If pool_block is False, then more connections will are created,