Skip to content

Commit

Permalink
#2576 oncall. SAT: test spec against exposed secrets (#19124)
Browse files Browse the repository at this point in the history
* #2576 oncall. SAT: test spec against exposed secrets

* #2576 upd changelog

* #2576 review fixes

* #2576 more test fixes
  • Loading branch information
davydov-d authored Nov 10, 2022
1 parent bfdba6c commit 459856b
Show file tree
Hide file tree
Showing 5 changed files with 239 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog


## 0.2.18
Test connector specification against exposed secret fields. [#19124](https://github.com/airbytehq/airbyte/pull/19124).

## 0.2.17
Make `incremental.future_state` mandatory in `high` `test_strictness_level`. [#19085](https://github.com/airbytehq/airbyte/pull/19085/).

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ COPY pytest.ini setup.py ./
COPY source_acceptance_test ./source_acceptance_test
RUN pip install .

LABEL io.airbyte.version=0.2.17
LABEL io.airbyte.version=0.2.18
LABEL io.airbyte.name=airbyte/source-acceptance-test

ENTRYPOINT ["python", "-m", "pytest", "-p", "source_acceptance_test.plugin", "-r", "fEsx"]
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from collections import Counter, defaultdict
from functools import reduce
from logging import Logger
from typing import Any, Dict, List, Mapping, MutableMapping, Optional, Set
from typing import Any, Dict, List, Mapping, MutableMapping, Optional, Set, Tuple
from xmlrpc.client import Boolean

import dpath.util
Expand Down Expand Up @@ -53,6 +53,29 @@ def connector_spec_dict_fixture(actual_connector_spec):
return json.loads(actual_connector_spec.json())


@pytest.fixture(name="secret_property_names")
def secret_property_names_fixture():
return (
"client_token",
"access_token",
"api_token",
"token",
"secret",
"client_secret",
"password",
"key",
"service_account_info",
"service_account",
"tenant_id",
"certificate",
"jwt",
"credentials",
"app_id",
"appid",
"refresh_token",
)


@pytest.mark.default_timeout(10)
class TestSpec(BaseTest):

Expand Down Expand Up @@ -164,6 +187,84 @@ def test_has_secret(self):
def test_secret_never_in_the_output(self):
"""This test should be injected into any docker command it needs to know current config and spec"""

@staticmethod
def _is_spec_property_name_secret(path: str, secret_property_names) -> Tuple[Optional[str], bool]:
"""
Given a path to a type field, extract a field name and decide whether it is a name of secret or not
based on a provided list of secret names.
Split the path by `/`, drop the last item and make list reversed.
Then iterate over it and find the first item that's not a reserved keyword or an index.
Example:
properties/credentials/oneOf/1/properties/api_key/type -> [api_key, properties, 1, oneOf, credentials, properties] -> api_key
"""
reserved_keywords = ("anyOf", "oneOf", "allOf", "not", "properties", "items", "type", "prefixItems")
for part in reversed(path.split("/")[:-1]):
if part.isdigit() or part in reserved_keywords:
continue
return part, part.lower() in secret_property_names
return None, False

@staticmethod
def _property_can_store_secret(prop: dict) -> bool:
"""
Some fields can not hold a secret by design, others can.
Null type as well as boolean can not hold a secret value.
A string, a number or an integer type can always store secrets.
Objects and arrays can hold a secret in case they are generic,
meaning their inner structure is not described in details with properties/items.
"""
unsecure_types = {"string", "integer", "number"}
type_ = prop["type"]
is_property_generic_object = type_ == "object" and not any(
[prop.get("properties", {}), prop.get("anyOf", []), prop.get("oneOf", []), prop.get("allOf", [])]
)
is_property_generic_array = type_ == "array" and not any([prop.get("items", []), prop.get("prefixItems", [])])
return any(
[
isinstance(type_, str) and type_ in unsecure_types,
is_property_generic_object,
is_property_generic_array,
isinstance(type_, list) and (set(type_) & unsecure_types),
]
)

def test_secret_is_properly_marked(self, connector_spec_dict: dict, detailed_logger, secret_property_names):
"""
Each field has a type, therefore we can make a flat list of fields from the returned specification.
Iterate over the list, check if a field name is a secret name, can potentially hold a secret value
and make sure it is marked as `airbyte_secret`.
"""
secrets_exposed = []
non_secrets_hidden = []
spec_properties = connector_spec_dict["connectionSpecification"]["properties"]
for type_path, value in dpath.util.search(spec_properties, "**/type", yielded=True):
_, is_property_name_secret = self._is_spec_property_name_secret(type_path, secret_property_names)
if not is_property_name_secret:
continue
absolute_path = f"/{type_path}"
property_path, _ = absolute_path.rsplit(sep="/", maxsplit=1)
property_definition = dpath.util.get(spec_properties, property_path)
marked_as_secret = property_definition.get("airbyte_secret", False)
possibly_a_secret = self._property_can_store_secret(property_definition)
if marked_as_secret and not possibly_a_secret:
non_secrets_hidden.append(property_path)
if not marked_as_secret and possibly_a_secret:
secrets_exposed.append(property_path)

if non_secrets_hidden:
properties = "\n".join(non_secrets_hidden)
detailed_logger.warning(
f"""Some properties are marked with `airbyte_secret` although they probably should not be.
Please double check them. If they're okay, please fix this test.
{properties}"""
)
if secrets_exposed:
properties = "\n".join(secrets_exposed)
pytest.fail(
f"""The following properties should be marked with `airbyte_secret!`
{properties}"""
)

def test_defined_refs_exist_in_json_spec_file(self, connector_spec_dict: dict):
"""Checking for the presence of unresolved `$ref`s values within each json spec file"""
check_result = list(find_all_values_for_key_in_schema(connector_spec_dict, "$ref"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import pytest
from airbyte_cdk.models import ConnectorSpecification
from source_acceptance_test import conftest
from source_acceptance_test.tests.test_core import TestSpec as _TestSpec

from .conftest import does_not_raise
Expand Down Expand Up @@ -648,3 +649,133 @@ def test_additional_properties_is_true(connector_spec, expectation):
t = _TestSpec()
with expectation:
t.test_additional_properties_is_true(connector_spec)


@pytest.mark.parametrize(
"connector_spec, should_fail, is_warning_logged",
(
(
{
"connectionSpecification": {"type": "object", "properties": {"api_token": {"type": "string", "airbyte_secret": True}}}
},
False,
False
),
(
{
"connectionSpecification": {"type": "object", "properties": {"api_token": {"type": "null"}}}
},
False,
False
),
(
{
"connectionSpecification": {"type": "object", "properties": {"refresh_token": {"type": "boolean", "airbyte_secret": True}}}
},
False,
True
),
(
{
"connectionSpecification": {"type": "object", "properties": {"jwt": {"type": "object"}}}
},
True,
False
),
(
{
"connectionSpecification": {"type": "object", "properties": {"refresh_token": {"type": ["null", "string"]}}}
},
True,
False
),
(
{
"connectionSpecification": {"type": "object", "properties": {"credentials": {"type": "array"}}}
},
True,
False
),
(
{
"connectionSpecification": {"type": "object", "properties": {"credentials": {"type": "array", "items": {"type": "string"}}}}
},
True,
False
),
(
{
"connectionSpecification": {"type": "object", "properties": {"auth": {"oneOf": [{"api_token": {"type": "string"}}]}}}
},
True,
False
),
(
{
"connectionSpecification": {"type": "object", "properties": {"credentials": {"oneOf": [{"type": "object", "properties": {"api_key": {"type": "string"}}}]}}}
},
True,
False
),
(
{
"connectionSpecification": {"type": "object", "properties": {"start_date": {"type": ["null", "string"]}}}
},
False,
False
)
),
)
def test_airbyte_secret(mocker, connector_spec, should_fail, is_warning_logged):
mocker.patch.object(conftest.pytest, "fail")
t = _TestSpec()
logger = mocker.Mock()
t.test_secret_is_properly_marked(connector_spec, logger, ("api_key", "api_token", "refresh_token", "jwt", "credentials"))
if should_fail:
conftest.pytest.fail.assert_called_once()
else:
conftest.pytest.fail.assert_not_called()
if is_warning_logged:
_, args, _ = logger.warning.mock_calls[0]
msg, *_ = args
assert "Some properties are marked with `airbyte_secret` although they probably should not be" in msg
else:
logger.warning.assert_not_called()


@pytest.mark.parametrize(
"path, expected_name, expected_result",
(
("properties/api_key/type", "api_key", True),
("properties/start_date/type", "start_date", False),
("properties/credentials/oneOf/1/properties/api_token/type", "api_token", True),
("properties/type", None, False), # root element
("properties/accounts/items/2/properties/jwt/type", "jwt", True)
)
)
def test_is_spec_property_name_secret(path, expected_name, expected_result):
t = _TestSpec()
assert t._is_spec_property_name_secret(path, ("api_key", "api_token", "refresh_token", "jwt", "credentials")) == (expected_name, expected_result)


@pytest.mark.parametrize(
"property_def, can_store_secret",
(
({"type": "boolean"}, False),
({"type": "null"}, False),
({"type": "string"}, True),
({"type": "integer"}, True),
({"type": "number"}, True),
({"type": ["null", "string"]}, True),
({"type": ["null", "boolean"]}, False),
({"type": "object"}, True),
# the object itself cannot hold a secret but the inner items can and will be processed separately
({"type": "object", "properties": {"api_key": {}}}, False),
({"type": "array"}, True),
# same as object
({"type": "array", "items": {"type": "string"}}, False)
)
)
def test_property_can_store_secret(property_def, can_store_secret):
t = _TestSpec()
assert t._property_can_store_secret(property_def) is can_store_secret
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ acceptance_tests: # Tests configuration
Verify that a `spec` operation issued to the connector returns a valid connector specification.
Additional tests are validating the backward compatibility of the current specification compared to the specification of the previous connector version. If no previous connector version is found (by default the test looks for a docker image with the same name but with the `latest` tag), this test is skipped.
These backward compatibility tests can be bypassed by changing the value of the `backward_compatibility_tests_config.disable_for_version` input in `acceptance-test-config.yml` (see below).
One more test validates the specification against containing exposed secrets. This means fields that potentially could hold a secret value should be explicitly marked with `"airbyte_secret": true`. If an input field like `api_key` / `password` / `client_secret` / etc. is exposed, the test will fail.

| Input | Type | Default | Note |
| :--------------------------------------------------------------- | :----- | :------------------ | :-------------------------------------------------------------------------------------------------------------------- |
Expand Down

0 comments on commit 459856b

Please sign in to comment.