Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 python cdk: mask oauth access key #34931

Merged
merged 16 commits into from
Feb 15, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from airbyte_cdk.sources.http_logger import format_http_message
from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
from airbyte_cdk.utils import AirbyteTracedException
from airbyte_cdk.utils.airbyte_secrets_utils import add_to_secrets
from requests.auth import AuthBase

from ..exceptions import DefaultBackoffException
Expand Down Expand Up @@ -115,9 +116,20 @@ def _wrap_refresh_token_exception(self, exception: requests.exceptions.RequestEx
def _get_refresh_access_token_response(self) -> Any:
try:
response = requests.request(method="POST", url=self.get_token_refresh_endpoint(), data=self.build_refresh_request_body())
self._log_response(response)
response.raise_for_status()
return response.json()
if response.ok:
response_json = response.json()
# Add the access token to the list of secrets so it is replaced before logging the response
# An argument could be made to remove the prevous access key from the list of secrets, but unmasking values seems like a security incident waiting to happen...
access_key = response_json.get(self.get_access_token_name())
if not access_key:
raise Exception("Token refresh API response was missing access token {self.get_access_token_name()}")
add_to_secrets(access_key)
self._log_response(response)
return response_json
else:
# log the response even if the request failed for troubleshooting purposes
self._log_response(response)
Comment on lines +129 to +131
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean that if the request is not successful, the raw unmasked access token will be logged? Seems like we would want to add it to secrets in both cases right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there won't be an access token if the request is not successful

response.raise_for_status()
except requests.exceptions.RequestException as e:
if e.response is not None:
if e.response.status_code == 429 or e.response.status_code >= 500:
Expand Down
10 changes: 8 additions & 2 deletions airbyte-cdk/python/airbyte_cdk/utils/airbyte_secrets_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
def get_secret_paths(spec: Mapping[str, Any]) -> List[List[str]]:
paths = []

def traverse_schema(schema_item: Any, path: List[str]):
def traverse_schema(schema_item: Any, path: List[str]) -> None:
"""
schema_item can be any property or value in the originally input jsonschema, depending on how far down the recursion stack we go
path is the path to that schema item in the original input
Expand Down Expand Up @@ -56,12 +56,18 @@ def get_secrets(connection_specification: Mapping[str, Any], config: Mapping[str
__SECRETS_FROM_CONFIG: List[str] = []


def update_secrets(secrets: List[str]):
def update_secrets(secrets: List[str]) -> None:
"""Update the list of secrets to be replaced"""
global __SECRETS_FROM_CONFIG
__SECRETS_FROM_CONFIG = secrets


def add_to_secrets(secret: str) -> None:
"""Add to the list of secrets to be replaced"""
global __SECRETS_FROM_CONFIG
__SECRETS_FROM_CONFIG.append(secret)


def filter_secrets(string: str) -> str:
"""Filter secrets from a string by replacing them with ****"""
# TODO this should perform a maximal match for each secret. if "x" and "xk" are both secret values, and this method is called twice on
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import pytest
import requests
from airbyte_cdk.sources.declarative.auth import DeclarativeOauth2Authenticator
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
from requests import Response

LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -165,6 +166,32 @@ def test_refresh_access_token(self, mocker):

assert ("access_token", 1000) == token

filtered = filter_secrets("access_token")
assert filtered == "****"

def test_refresh_access_token_missing_access_token(self, mocker):
oauth = DeclarativeOauth2Authenticator(
token_refresh_endpoint="{{ config['refresh_endpoint'] }}",
client_id="{{ config['client_id'] }}",
client_secret="{{ config['client_secret'] }}",
refresh_token="{{ config['refresh_token'] }}",
config=config,
scopes=["scope1", "scope2"],
token_expiry_date="{{ config['token_expiry_date'] }}",
refresh_request_body={
"custom_field": "{{ config['custom_field'] }}",
"another_field": "{{ config['another_field'] }}",
"scopes": ["no_override"],
},
parameters={},
)

resp.status_code = 200
mocker.patch.object(resp, "json", return_value={"expires_in": 1000})
mocker.patch.object(requests, "request", side_effect=mock_request, autospec=True)
with pytest.raises(Exception):
oauth.refresh_access_token()

@pytest.mark.parametrize(
"timestamp, expected_date",
[
Expand Down
14 changes: 13 additions & 1 deletion airbyte-cdk/python/unit_tests/utils/test_secret_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#

import pytest
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets, get_secret_paths, get_secrets, update_secrets
from airbyte_cdk.utils.airbyte_secrets_utils import add_to_secrets, filter_secrets, get_secret_paths, get_secrets, update_secrets

SECRET_STRING_KEY = "secret_key1"
SECRET_STRING_VALUE = "secret_value"
Expand Down Expand Up @@ -121,3 +121,15 @@ def test_secret_filtering():
update_secrets([SECRET_STRING_VALUE, SECRET_STRING_2_VALUE])
filtered = filter_secrets(sensitive_str)
assert filtered == f"**** {NOT_SECRET_VALUE} **** ****"


def test_secrets_added_are_filtered():
ADDED_SECRET = "only_a_secret_if_added"
sensitive_str = f"{ADDED_SECRET} {NOT_SECRET_VALUE}"

filtered = filter_secrets(sensitive_str)
assert filtered == sensitive_str

add_to_secrets(ADDED_SECRET)
filtered = filter_secrets(sensitive_str)
assert filtered == f"**** {NOT_SECRET_VALUE}"
Loading