Skip to content

Commit

Permalink
make sure that all the secrets are remove (#8881)
Browse files Browse the repository at this point in the history
explore all the nodes to mask secrets, even if they are arrays or nested at a given deep level.

It makes what has been made in #8859 more generic
  • Loading branch information
benmoriceau authored Dec 18, 2021
1 parent 20c4889 commit ec305a5
Show file tree
Hide file tree
Showing 10 changed files with 214 additions and 177 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,19 @@

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import io.airbyte.commons.json.Jsons;
import io.airbyte.validation.json.JsonSchemaValidator;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Optional;
import java.util.Queue;
import java.util.Set;
import lombok.Value;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -22,6 +28,9 @@ public class JsonSecretsProcessor {

public static String AIRBYTE_SECRET_FIELD = "airbyte_secret";
public static final String PROPERTIES_FIELD = "properties";
public static String TYPE_FIELD = "type";
public static String ARRAY_TYPE_FIELD = "array";
public static String ITEMS_FIELD = "items";

private static final JsonSchemaValidator VALIDATOR = new JsonSchemaValidator();

Expand All @@ -46,36 +55,80 @@ public JsonNode maskSecrets(final JsonNode obj, final JsonNode schema) {
return obj;
}
Preconditions.checkArgument(schema.isObject());
// get the properties field
final ObjectNode properties = (ObjectNode) schema.get(PROPERTIES_FIELD);
final JsonNode copy = obj.deepCopy();
// for the property keys
for (final String key : Jsons.keys(properties)) {
final JsonNode fieldSchema = properties.get(key);
// if the json schema field is an obj and has the airbyte secret field
if (isSecret(fieldSchema) && copy.has(key)) {
// mask and set it
if (copy.has(key)) {
((ObjectNode) copy).put(key, SECRETS_MASK);

final SecretKeys secretKeys = getAllSecretKeys(schema);
return maskAllSecrets(obj, secretKeys);
}

private JsonNode maskAllSecrets(final JsonNode obj, final SecretKeys secretKeys) {
final JsonNode copiedObj = obj.deepCopy();
final Queue<JsonNode> toProcess = new LinkedList<>();
toProcess.add(copiedObj);

while (!toProcess.isEmpty()) {
final JsonNode currentNode = toProcess.remove();
for (final String key : Jsons.keys(currentNode)) {
if (secretKeys.fieldSecretKey.contains(key)) {
((ObjectNode) currentNode).put(key, SECRETS_MASK);
} else if (currentNode.get(key).isObject()) {
toProcess.add(currentNode.get(key));
} else if (currentNode.get(key).isArray()) {
if (secretKeys.arraySecretKey.contains(key)) {
final ArrayNode sanitizedArrayNode = new ArrayNode(JsonNodeFactory.instance);
currentNode.get(key).forEach((secret) -> sanitizedArrayNode.add(SECRETS_MASK));
((ObjectNode) currentNode).put(key, sanitizedArrayNode);
} else {
final ArrayNode arrayNode = (ArrayNode) currentNode.get(key);
arrayNode.forEach((node) -> {
toProcess.add(node);
});
}
}
} else if (canBeProcessed(fieldSchema) && copy.has(key)) {
((ObjectNode) copy).set(key, maskSecrets(copy.get(key), fieldSchema));
}
}

final var combinationKey = findJsonCombinationNode(fieldSchema);
return copiedObj;
}

@Value
private class SecretKeys {

private final Set<String> fieldSecretKey;
private final Set<String> arraySecretKey;

}

if (combinationKey.isPresent() && copy.has(key)) {
var combinationCopy = copy.get(key);
final var arrayNode = (ArrayNode) fieldSchema.get(combinationKey.get());
for (int i = 0; i < arrayNode.size(); i++) {
// Mask field values if any of the combination option is declaring it as secrets
combinationCopy = maskSecrets(combinationCopy, arrayNode.get(i));
private SecretKeys getAllSecretKeys(final JsonNode schema) {
final Set<String> fieldSecretKeys = new HashSet<>();
final Set<String> arraySecretKeys = new HashSet<>();

final Queue<JsonNode> toProcess = new LinkedList<>();
toProcess.add(schema);

while (!toProcess.isEmpty()) {
final JsonNode currentNode = toProcess.remove();
for (final String key : Jsons.keys(currentNode)) {
if (isArrayDefinition(currentNode.get(key))) {
final JsonNode arrayItems = currentNode.get(key).get(ITEMS_FIELD);
if (arrayItems.has(AIRBYTE_SECRET_FIELD) && arrayItems.get(AIRBYTE_SECRET_FIELD).asBoolean()) {
arraySecretKeys.add(key);
} else {
toProcess.add(arrayItems);
}
} else if (isSecret(currentNode.get(key))) {
fieldSecretKeys.add(key);
} else if (currentNode.get(key).isObject()) {
toProcess.add(currentNode.get(key));
} else if (currentNode.get(key).isArray()) {
final ArrayNode arrayNode = (ArrayNode) currentNode.get(key);
arrayNode.forEach((node) -> {
toProcess.add(node);
});
}
((ObjectNode) copy).set(key, combinationCopy);
}
}

return copy;
return new SecretKeys(fieldSecretKeys, arraySecretKeys);
}

public static Optional<String> findJsonCombinationNode(final JsonNode node) {
Expand Down Expand Up @@ -152,4 +205,11 @@ public static boolean canBeProcessed(final JsonNode schema) {
return schema.isObject() && schema.has(PROPERTIES_FIELD) && schema.get(PROPERTIES_FIELD).isObject();
}

public static boolean isArrayDefinition(final JsonNode obj) {
return obj.isObject()
&& obj.has(TYPE_FIELD)
&& obj.get(TYPE_FIELD).asText().equals(ARRAY_TYPE_FIELD)
&& obj.has(ITEMS_FIELD);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,16 @@
import static org.junit.jupiter.api.Assertions.assertEquals;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableMap;
import io.airbyte.commons.json.Jsons;
import java.io.IOException;
import java.io.InputStream;
import java.util.stream.Stream;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

public class JsonSecretsProcessorTest {

Expand Down Expand Up @@ -153,163 +160,8 @@ public class JsonSecretsProcessorTest {
+ " }\n"
+ " }");

private final static String test = """
{
"provider": {
"bucket": "bucket",
"endpoint": "",
"path_prefix": "",
"aws_access_key_id": "nothingtosee",
"aws_secret_access_key": "same"
}
}
""";

private final static String testSpecs =
"""
{
"type": "object",
"title": "S3 Source Spec",
"required": [
"dataset",
"path_pattern",
"provider"
],
"properties": {
"provider": {
"type": "object",
"title": "S3: Amazon Web Services",
"required": [
"bucket"
],
"properties": {
"bucket": {
"type": "string",
"title": "Bucket",
"description": "Name of the S3 bucket where the file(s) exist."
},
"use_ssl": {
"type": "boolean",
"title": "Use Ssl",
"description": "Is remote server using secure SSL/TLS connection"
},
"endpoint": {
"type": "string",
"title": "Endpoint",
"default": "",
"description": "Endpoint to an S3 compatible service. Leave empty to use AWS."
},
"path_prefix": {
"type": "string",
"title": "Path Prefix",
"default": "",
"description": "By providing a path-like prefix (e.g. myFolder/thisTable/) under which all the relevant files sit, we can optimise finding these in S3. This is optional but recommended if your bucket contains many folders/files."
},
"verify_ssl_cert": {
"type": "boolean",
"title": "Verify Ssl Cert",
"description": "Allow self signed certificates"
},
"aws_access_key_id": {
"type": "string",
"title": "Aws Access Key Id",
"description": "In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.",
"airbyte_secret": true
},
"aws_secret_access_key": {
"type": "string",
"title": "Aws Secret Access Key",
"description": "In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.",
"airbyte_secret": true
}
}
}
}
}
""";

JsonSecretsProcessor processor = new JsonSecretsProcessor();

@Test
public void testNestedSecrets() {
final JsonNode obj = Jsons.deserialize(test);
final JsonNode specObj = Jsons.deserialize(testSpecs);
final JsonNode sanitized = processor.maskSecrets(obj, specObj);

final JsonNode expected = Jsons.jsonNode(ImmutableMap.builder()
.put("bucket", "bucket")
.put("endpoint", "")
.put("path_prefix", "")
.put("aws_access_key_id", JsonSecretsProcessor.SECRETS_MASK)
.put("aws_secret_access_key", JsonSecretsProcessor.SECRETS_MASK).build());
assertEquals(expected, sanitized.get("provider"));
}

@Test
public void testMaskSecrets() {
final JsonNode obj = Jsons.jsonNode(ImmutableMap.builder()
.put("field1", "value1")
.put("field2", 2)
.put("secret1", "donttellanyone")
.put("secret2", "verysecret").build());

final JsonNode sanitized = processor.maskSecrets(obj, SCHEMA_ONE_LAYER);

final JsonNode expected = Jsons.jsonNode(ImmutableMap.builder()
.put("field1", "value1")
.put("field2", 2)
.put("secret1", JsonSecretsProcessor.SECRETS_MASK)
.put("secret2", JsonSecretsProcessor.SECRETS_MASK).build());
assertEquals(expected, sanitized);
}

@Test
public void testMaskSecretsNotInObj() {
final JsonNode obj = Jsons.jsonNode(ImmutableMap.builder()
.put("field1", "value1")
.put("field2", 2).build());

final JsonNode actual = processor.maskSecrets(obj, SCHEMA_ONE_LAYER);

// Didn't have secrets, no fields should have been impacted.
assertEquals(obj, actual);
}

@Test
public void testMaskSecretInnerObject() {
final JsonNode oneOf = Jsons.jsonNode(ImmutableMap.builder()
.put("s3_bucket_name", "name")
.put("secret_access_key", "secret").build());
final JsonNode base = Jsons.jsonNode(ImmutableMap.builder()
.put("warehouse", "house")
.put("loading_method", oneOf).build());

final JsonNode actual = processor.maskSecrets(base, SCHEMA_INNER_OBJECT);

final JsonNode expectedOneOf = Jsons.jsonNode(ImmutableMap.builder()
.put("s3_bucket_name", "name")
.put("secret_access_key", JsonSecretsProcessor.SECRETS_MASK)
.build());
final JsonNode expected = Jsons.jsonNode(ImmutableMap.builder()
.put("warehouse", "house")
.put("loading_method", expectedOneOf).build());

assertEquals(expected, actual);
}

@Test
public void testMaskSecretNotInInnerObject() {
final JsonNode base = Jsons.jsonNode(ImmutableMap.builder()
.put("warehouse", "house").build());

final JsonNode actual = processor.maskSecrets(base, SCHEMA_INNER_OBJECT);

final JsonNode expected = Jsons.jsonNode(ImmutableMap.builder()
.put("warehouse", "house").build());

assertEquals(expected, actual);
}

@Test
public void testCopySecrets() {
final JsonNode src = Jsons.jsonNode(ImmutableMap.builder()
Expand Down Expand Up @@ -431,4 +283,45 @@ void testHandlesSameKeyInOneOf() {
final JsonNode actual = new JsonSecretsProcessor().copySecrets(src, dst, ONE_OF_WITH_SAME_KEY_IN_SUB_SCHEMAS);
}

private static Stream<Arguments> scenarioProvider() {
return Stream.of(
Arguments.of("array", true),
Arguments.of("array", false),
Arguments.of("array_of_oneof", true),
Arguments.of("array_of_oneof", false),
Arguments.of("nested_object", true),
Arguments.of("nested_object", false),
Arguments.of("nested_oneof", true),
Arguments.of("nested_oneof", false),
Arguments.of("oneof", true),
Arguments.of("oneof", false),
Arguments.of("optional_password", true),
Arguments.of("optional_password", false),
Arguments.of("postgres_ssh_key", true),
Arguments.of("postgres_ssh_key", false),
Arguments.of("simple", true),
Arguments.of("simple", false));
}

@ParameterizedTest
@MethodSource("scenarioProvider")
void testSecretScenario(final String folder, final boolean partial) throws IOException {
final ObjectMapper objectMapper = new ObjectMapper();

final InputStream specIs = getClass().getClassLoader().getResourceAsStream(folder + "/spec.json");
final JsonNode specs = objectMapper.readTree(specIs);

final String inputFilePath = folder + (partial ? "/partial_config.json" : "/full_config.json");
final InputStream inputIs = getClass().getClassLoader().getResourceAsStream(inputFilePath);
final JsonNode input = objectMapper.readTree(inputIs);

final String expectedFilePath = folder + "/expected.json";
final InputStream expectedIs = getClass().getClassLoader().getResourceAsStream(expectedFilePath);
final JsonNode expected = objectMapper.readTree(expectedIs);

final JsonNode actual = processor.maskSecrets(input, specs);

assertEquals(expected, actual);
}

}
15 changes: 15 additions & 0 deletions airbyte-config/persistence/src/test/resources/array/expected.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"username": "charles",
"rotating_key_strings": ["**********", "**********", "**********"],
"rotating_key_objects": [
{
"a": "**********"
},
{
"a": "**********"
},
{
"a": "**********"
}
]
}
Loading

0 comments on commit ec305a5

Please sign in to comment.