diff --git a/airbyte-cdk/python/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py b/airbyte-cdk/python/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py index 0dd450413dd4..63915f71d651 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py @@ -14,6 +14,7 @@ from airbyte_cdk.sources.http_logger import format_http_message from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository from airbyte_cdk.utils import AirbyteTracedException +from airbyte_cdk.utils.airbyte_secrets_utils import add_to_secrets from requests.auth import AuthBase from ..exceptions import DefaultBackoffException @@ -115,9 +116,20 @@ def _wrap_refresh_token_exception(self, exception: requests.exceptions.RequestEx def _get_refresh_access_token_response(self) -> Any: try: response = requests.request(method="POST", url=self.get_token_refresh_endpoint(), data=self.build_refresh_request_body()) - self._log_response(response) - response.raise_for_status() - return response.json() + if response.ok: + response_json = response.json() + # Add the access token to the list of secrets so it is replaced before logging the response + # An argument could be made to remove the prevous access key from the list of secrets, but unmasking values seems like a security incident waiting to happen... + access_key = response_json.get(self.get_access_token_name()) + if not access_key: + raise Exception("Token refresh API response was missing access token {self.get_access_token_name()}") + add_to_secrets(access_key) + self._log_response(response) + return response_json + else: + # log the response even if the request failed for troubleshooting purposes + self._log_response(response) + response.raise_for_status() except requests.exceptions.RequestException as e: if e.response is not None: if e.response.status_code == 429 or e.response.status_code >= 500: diff --git a/airbyte-cdk/python/airbyte_cdk/utils/airbyte_secrets_utils.py b/airbyte-cdk/python/airbyte_cdk/utils/airbyte_secrets_utils.py index eb04a6cf891f..e690a556606b 100644 --- a/airbyte-cdk/python/airbyte_cdk/utils/airbyte_secrets_utils.py +++ b/airbyte-cdk/python/airbyte_cdk/utils/airbyte_secrets_utils.py @@ -10,7 +10,7 @@ def get_secret_paths(spec: Mapping[str, Any]) -> List[List[str]]: paths = [] - def traverse_schema(schema_item: Any, path: List[str]): + def traverse_schema(schema_item: Any, path: List[str]) -> None: """ schema_item can be any property or value in the originally input jsonschema, depending on how far down the recursion stack we go path is the path to that schema item in the original input @@ -56,12 +56,18 @@ def get_secrets(connection_specification: Mapping[str, Any], config: Mapping[str __SECRETS_FROM_CONFIG: List[str] = [] -def update_secrets(secrets: List[str]): +def update_secrets(secrets: List[str]) -> None: """Update the list of secrets to be replaced""" global __SECRETS_FROM_CONFIG __SECRETS_FROM_CONFIG = secrets +def add_to_secrets(secret: str) -> None: + """Add to the list of secrets to be replaced""" + global __SECRETS_FROM_CONFIG + __SECRETS_FROM_CONFIG.append(secret) + + def filter_secrets(string: str) -> str: """Filter secrets from a string by replacing them with ****""" # TODO this should perform a maximal match for each secret. if "x" and "xk" are both secret values, and this method is called twice on diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/auth/test_oauth.py b/airbyte-cdk/python/unit_tests/sources/declarative/auth/test_oauth.py index bd019d374987..78dd0b591ec3 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/auth/test_oauth.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/auth/test_oauth.py @@ -10,6 +10,7 @@ import pytest import requests from airbyte_cdk.sources.declarative.auth import DeclarativeOauth2Authenticator +from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets from requests import Response LOGGER = logging.getLogger(__name__) @@ -165,6 +166,32 @@ def test_refresh_access_token(self, mocker): assert ("access_token", 1000) == token + filtered = filter_secrets("access_token") + assert filtered == "****" + + def test_refresh_access_token_missing_access_token(self, mocker): + oauth = DeclarativeOauth2Authenticator( + token_refresh_endpoint="{{ config['refresh_endpoint'] }}", + client_id="{{ config['client_id'] }}", + client_secret="{{ config['client_secret'] }}", + refresh_token="{{ config['refresh_token'] }}", + config=config, + scopes=["scope1", "scope2"], + token_expiry_date="{{ config['token_expiry_date'] }}", + refresh_request_body={ + "custom_field": "{{ config['custom_field'] }}", + "another_field": "{{ config['another_field'] }}", + "scopes": ["no_override"], + }, + parameters={}, + ) + + resp.status_code = 200 + mocker.patch.object(resp, "json", return_value={"expires_in": 1000}) + mocker.patch.object(requests, "request", side_effect=mock_request, autospec=True) + with pytest.raises(Exception): + oauth.refresh_access_token() + @pytest.mark.parametrize( "timestamp, expected_date", [ diff --git a/airbyte-cdk/python/unit_tests/utils/test_secret_utils.py b/airbyte-cdk/python/unit_tests/utils/test_secret_utils.py index 08cc81778da0..39c6ff735da0 100644 --- a/airbyte-cdk/python/unit_tests/utils/test_secret_utils.py +++ b/airbyte-cdk/python/unit_tests/utils/test_secret_utils.py @@ -3,7 +3,7 @@ # import pytest -from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets, get_secret_paths, get_secrets, update_secrets +from airbyte_cdk.utils.airbyte_secrets_utils import add_to_secrets, filter_secrets, get_secret_paths, get_secrets, update_secrets SECRET_STRING_KEY = "secret_key1" SECRET_STRING_VALUE = "secret_value" @@ -121,3 +121,15 @@ def test_secret_filtering(): update_secrets([SECRET_STRING_VALUE, SECRET_STRING_2_VALUE]) filtered = filter_secrets(sensitive_str) assert filtered == f"**** {NOT_SECRET_VALUE} **** ****" + + +def test_secrets_added_are_filtered(): + ADDED_SECRET = "only_a_secret_if_added" + sensitive_str = f"{ADDED_SECRET} {NOT_SECRET_VALUE}" + + filtered = filter_secrets(sensitive_str) + assert filtered == sensitive_str + + add_to_secrets(ADDED_SECRET) + filtered = filter_secrets(sensitive_str) + assert filtered == f"**** {NOT_SECRET_VALUE}"