From f7853de9fff4de04b30373a4c642a1efd63ff432 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 15 Nov 2022 13:48:50 -0800 Subject: [PATCH 01/12] put WellKnownTypes.json in worker --- airbyte-workers/Dockerfile | 8 +++++++- airbyte-workers/build.gradle | 6 ++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 926fad98f31c..2a346d79368b 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -4,7 +4,7 @@ FROM ${JDK_IMAGE} AS worker ARG DOCKER_BUILD_ARCH=amd64 RUN amazon-linux-extras install -y docker -RUN yum install -y jq tar && yum clean all +RUN yum install -y jq tar wget gzip && yum clean all RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/${DOCKER_BUILD_ARCH}/kubectl" \ && chmod +x kubectl && mv kubectl /usr/local/bin/ @@ -20,5 +20,11 @@ WORKDIR /app # Move worker app ADD bin/${APPLICATION}-${VERSION}.tar /app +# Grab well-known types file +RUN wget https://github.com/mikefarah/yq/releases/download/v4.30.4/yq_linux_${DOCKER_BUILD_ARCH}.tar.gz -O - | tar xz \ + && mv yq_linux_${DOCKER_BUILD_ARCH} /usr/bin/yq +COPY well_known_types.yaml /app +RUN yq -o yaml /app/well_known_types.yaml > /app/WellKnownTypes.json + # wait for upstream dependencies to become available before starting server ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-${VERSION}/bin/${APPLICATION}"] diff --git a/airbyte-workers/build.gradle b/airbyte-workers/build.gradle index 9bab5ce0fe7d..a966543ec8af 100644 --- a/airbyte-workers/build.gradle +++ b/airbyte-workers/build.gradle @@ -116,8 +116,14 @@ task cloudStorageIntegrationTest(type: Test) { } } +task copyWellKnownTypes(type: Copy) { + from project(':airbyte-protocol').file('protocol-models/src/main/resources/airbyte_protocol/well_known_types.yaml') + into file('build/docker') +} + tasks.named("buildDockerImage") { dependsOn copyGeneratedTar + dependsOn copyWellKnownTypes } Task publishArtifactsTask = getPublishArtifactsTask("$rootProject.ext.version", project) From 1184c87decc09902c77741b6b45ee2782ae1a846 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Wed, 16 Nov 2022 11:43:19 -0800 Subject: [PATCH 02/12] misc random experimentation --- .../validation/json/JsonSchemaValidator.java | 25 +++++++ airbyte-workers/Dockerfile | 2 +- airbyte-workers/build.gradle | 9 ++- .../controller/HeartbeatController.java | 16 ++++ .../src/resources/WellKnownTypes.json | 73 +++++++++++++++++++ 5 files changed, 122 insertions(+), 3 deletions(-) create mode 100644 airbyte-workers/src/resources/WellKnownTypes.json diff --git a/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java b/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java index 04fb601221d9..e098b70415b1 100644 --- a/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java +++ b/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java @@ -6,13 +6,20 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; +import com.networknt.schema.JsonSchema; import com.networknt.schema.JsonSchemaFactory; import com.networknt.schema.SchemaValidatorsConfig; import com.networknt.schema.SpecVersion; +import com.networknt.schema.SpecVersion.VersionFlag; +import com.networknt.schema.ValidationContext; import com.networknt.schema.ValidationMessage; +import io.airbyte.commons.json.Jsons; import io.airbyte.commons.string.Strings; import java.io.File; import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; import java.util.List; import java.util.Set; import java.util.stream.Collectors; @@ -31,6 +38,7 @@ public class JsonSchemaValidator { public JsonSchemaValidator() { this.schemaValidatorsConfig = new SchemaValidatorsConfig(); + schemaValidatorsConfig.setUriMappings(ImmutableMap.of("file:foo.json", "file:///Users/edgao/Desktop/t.json")); this.jsonSchemaFactory = JsonSchemaFactory.getInstance(SpecVersion.VersionFlag.V7); } @@ -59,6 +67,11 @@ public List getValidationMessagePaths(final JsonNode schemaJson, final J private Set validateInternal(final JsonNode schemaJson, final JsonNode objectJson) { Preconditions.checkNotNull(schemaJson); Preconditions.checkNotNull(objectJson); +// JsonSchema schema = jsonSchemaFactory.getSchema(schemaJson, schemaValidatorsConfig); +// +// new JsonSchema( +// new ValidationContext(jsonSchemaFactory.getUriFactory(), jsonSchemaFactory.getUriFactory(), jsonMetaSchema, this, config), +// mappedUri, schemaNode, true /* retrieved via id, resolving will not change anything */);; return jsonSchemaFactory.getSchema(schemaJson, schemaValidatorsConfig) .validate(objectJson); @@ -136,4 +149,16 @@ public static JsonNode getSchema(final File schemaFile, final String definitionS } } + + public static void main(String[] args) throws URISyntaxException { + SchemaValidatorsConfig c = new SchemaValidatorsConfig(); + c.setUriMappings(ImmutableMap.of("file:foo.json", "file:///Users/edgao/Desktop/t.json")); + + JsonSchemaFactory f = JsonSchemaFactory.getInstance(VersionFlag.V7); + JsonSchema schema = f.getSchema(new URI("file:///Users/edgao/Desktop/inp.json"), c); + Set res = schema.validate(Jsons.deserialize(""" + "arst" + """)); + System.out.println("results were " + res); + } } diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 2a346d79368b..1b47475936e5 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -24,7 +24,7 @@ ADD bin/${APPLICATION}-${VERSION}.tar /app RUN wget https://github.com/mikefarah/yq/releases/download/v4.30.4/yq_linux_${DOCKER_BUILD_ARCH}.tar.gz -O - | tar xz \ && mv yq_linux_${DOCKER_BUILD_ARCH} /usr/bin/yq COPY well_known_types.yaml /app -RUN yq -o yaml /app/well_known_types.yaml > /app/WellKnownTypes.json +RUN yq -o json /app/well_known_types.yaml > /app/WellKnownTypes.json # wait for upstream dependencies to become available before starting server ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-${VERSION}/bin/${APPLICATION}"] diff --git a/airbyte-workers/build.gradle b/airbyte-workers/build.gradle index a966543ec8af..666f5060ca69 100644 --- a/airbyte-workers/build.gradle +++ b/airbyte-workers/build.gradle @@ -116,14 +116,19 @@ task cloudStorageIntegrationTest(type: Test) { } } -task copyWellKnownTypes(type: Copy) { +task copyWellKnownTypesResource(type: Copy) { + from project(':airbyte-protocol').file('protocol-models/src/main/resources/airbyte_protocol/well_known_types.yaml') + into file('build/docker') +} + +task copyWellKnownTypesDocker(type: Copy) { from project(':airbyte-protocol').file('protocol-models/src/main/resources/airbyte_protocol/well_known_types.yaml') into file('build/docker') } tasks.named("buildDockerImage") { dependsOn copyGeneratedTar - dependsOn copyWellKnownTypes + dependsOn copyWellKnownTypesResource } Task publishArtifactsTask = getPublishArtifactsTask("$rootProject.ext.version", project) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/controller/HeartbeatController.java b/airbyte-workers/src/main/java/io/airbyte/workers/controller/HeartbeatController.java index 5dd650554f5d..4bcdfdc66cb6 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/controller/HeartbeatController.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/controller/HeartbeatController.java @@ -4,6 +4,8 @@ package io.airbyte.workers.controller; +import io.airbyte.commons.json.Jsons; +import io.airbyte.validation.json.JsonSchemaValidator; import io.micronaut.http.HttpHeaders; import io.micronaut.http.HttpResponse; import io.micronaut.http.MediaType; @@ -13,6 +15,7 @@ import io.micronaut.http.annotation.Options; import io.micronaut.http.annotation.Post; import java.util.Map; +import java.util.Set; /** * Heartbeat controller @@ -48,4 +51,17 @@ private void addCorsHeaders(final MutableHttpResponse response) { } } + public static void main(String[] args) { +// Set res = new JsonSchemaValidator().validate( +// Jsons.deserialize(""" +// { +// "$ref": "foo.json#/definitions/String" +// } +// """), +// Jsons.deserialize(""" +// "arst" +// """) +// ); +// System.out.println("Validation result: " + res); + } } diff --git a/airbyte-workers/src/resources/WellKnownTypes.json b/airbyte-workers/src/resources/WellKnownTypes.json new file mode 100644 index 000000000000..ec7c2b3cb466 --- /dev/null +++ b/airbyte-workers/src/resources/WellKnownTypes.json @@ -0,0 +1,73 @@ +{ + "definitions": { + "String": { + "type": "string", + "description": "Arbitrary text" + }, + "BinaryData": { + "type": "string", + "description": "Arbitrary binary data. Represented as base64-encoded strings in the JSON transport. In the future, if we support other transports, may be encoded differently.\n", + "pattern": "^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$" + }, + "Date": { + "type": "string", + "pattern": "^\\d{4}-\\d{2}-\\d{2}( BC)?$", + "description": "RFC 3339§5.6's full-date format, extended with BC era support" + }, + "TimestampWithTimezone": { + "type": "string", + "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?(Z|[+\\-]\\d{1,2}:\\d{2})( BC)?$", + "description": "An instant in time. Frequently simply referred to as just a timestamp, or timestamptz. Uses RFC 3339§5.6's date-time format, requiring a \"T\" separator, and extended with BC era support. Note that we do _not_ accept Unix epochs here.\n" + }, + "TimestampWithoutTimezone": { + "type": "string", + "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?( BC)?$", + "description": "Also known as a localdatetime, or just datetime. Under RFC 3339§5.6, this would be represented as `full-date \"T\" partial-time`, extended with BC era support.\n" + }, + "TimeWithTimezone": { + "type": "string", + "pattern": "^\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?(Z|[+\\-]\\d{1,2}:\\d{2})$", + "description": "An RFC 3339§5.6 full-time" + }, + "TimeWithoutTimezone": { + "type": "string", + "pattern": "^\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?$", + "description": "An RFC 3339§5.6 partial-time" + }, + "Number": { + "type": "string", + "oneOf": [ + { + "pattern": "-?(0|[0-9]\\d*)(\\.\\d+)?" + }, + { + "enum": [ + "Infinity", + "-Infinity", + "NaN" + ] + } + ], + "description": "Note the mix of regex validation for normal numbers, and enum validation for special values." + }, + "Integer": { + "type": "string", + "oneOf": [ + { + "pattern": "-?(0|[0-9]\\d*)" + }, + { + "enum": [ + "Infinity", + "-Infinity", + "NaN" + ] + } + ] + }, + "Boolean": { + "type": "boolean", + "description": "Note the direct usage of a primitive boolean rather than string. Unlike Numbers and Integers, we don't expect unusual values here." + } + } +} From ee48cf8ccf61ef9f005f6e0a1f6958b4b64f1c88 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 18 Nov 2022 14:27:20 -0800 Subject: [PATCH 03/12] finalize --- .../validation/json/JsonSchemaValidator.java | 70 +++++++++++------- airbyte-workers/build.gradle | 9 +-- .../controller/HeartbeatController.java | 16 ---- .../src/resources/WellKnownTypes.json | 73 ------------------- 4 files changed, 45 insertions(+), 123 deletions(-) delete mode 100644 airbyte-workers/src/resources/WellKnownTypes.json diff --git a/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java b/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java index e098b70415b1..182e08db9e8f 100644 --- a/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java +++ b/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java @@ -6,15 +6,12 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableMap; +import com.networknt.schema.JsonMetaSchema; import com.networknt.schema.JsonSchema; import com.networknt.schema.JsonSchemaFactory; -import com.networknt.schema.SchemaValidatorsConfig; import com.networknt.schema.SpecVersion; -import com.networknt.schema.SpecVersion.VersionFlag; import com.networknt.schema.ValidationContext; import com.networknt.schema.ValidationMessage; -import io.airbyte.commons.json.Jsons; import io.airbyte.commons.string.Strings; import java.io.File; import java.io.IOException; @@ -33,13 +30,19 @@ public class JsonSchemaValidator { private static final Logger LOGGER = LoggerFactory.getLogger(JsonSchemaValidator.class); - private final SchemaValidatorsConfig schemaValidatorsConfig; private final JsonSchemaFactory jsonSchemaFactory; + private final URI BASE_URI; public JsonSchemaValidator() { - this.schemaValidatorsConfig = new SchemaValidatorsConfig(); - schemaValidatorsConfig.setUriMappings(ImmutableMap.of("file:foo.json", "file:///Users/edgao/Desktop/t.json")); this.jsonSchemaFactory = JsonSchemaFactory.getInstance(SpecVersion.VersionFlag.V7); + + // This URI just needs to point at any path in the same directory as /app/WellKnownTypes.json + // It's required for the JsonSchema#validate method to resolve $ref correctly. + try { + this.BASE_URI = new URI("file:///app/nonexistent_file.json"); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } } public Set validate(final JsonNode schemaJson, final JsonNode objectJson) { @@ -67,14 +70,39 @@ public List getValidationMessagePaths(final JsonNode schemaJson, final J private Set validateInternal(final JsonNode schemaJson, final JsonNode objectJson) { Preconditions.checkNotNull(schemaJson); Preconditions.checkNotNull(objectJson); -// JsonSchema schema = jsonSchemaFactory.getSchema(schemaJson, schemaValidatorsConfig); -// -// new JsonSchema( -// new ValidationContext(jsonSchemaFactory.getUriFactory(), jsonSchemaFactory.getUriFactory(), jsonMetaSchema, this, config), -// mappedUri, schemaNode, true /* retrieved via id, resolving will not change anything */);; - - return jsonSchemaFactory.getSchema(schemaJson, schemaValidatorsConfig) - .validate(objectJson); + + // Default to draft-07, but have handling for the other metaschemas that networknt supports + JsonMetaSchema metaschema; + JsonNode metaschemaNode = schemaJson.get("$schema"); + if (metaschemaNode == null || "".equals(metaschemaNode.asText())) { + metaschema = JsonMetaSchema.getV7(); + } else { + String metaschemaString = metaschemaNode.asText(); + // We're not using "http://....".equals(), because we want to avoid weirdness with https, etc. + if (metaschemaString.contains("json-schema.org/draft-04")) { + metaschema = JsonMetaSchema.getV4(); + } else if (metaschemaString.contains("json-schema.org/draft-06")) { + metaschema = JsonMetaSchema.getV6(); + } else if (metaschemaString.contains("json-schema.org/draft/2019-09")) { + metaschema = JsonMetaSchema.getV201909(); + } else if (metaschemaString.contains("json-schema.org/draft/2020-12")) { + metaschema = JsonMetaSchema.getV202012(); + } else { + metaschema = JsonMetaSchema.getV7(); + } + } + + ValidationContext context = new ValidationContext( + jsonSchemaFactory.getUriFactory(), + null, + metaschema, + jsonSchemaFactory, + null); + JsonSchema schema = new JsonSchema( + context, + BASE_URI, + schemaJson); + return schema.validate(objectJson); } public boolean test(final JsonNode schemaJson, final JsonNode objectJson) { @@ -149,16 +177,4 @@ public static JsonNode getSchema(final File schemaFile, final String definitionS } } - - public static void main(String[] args) throws URISyntaxException { - SchemaValidatorsConfig c = new SchemaValidatorsConfig(); - c.setUriMappings(ImmutableMap.of("file:foo.json", "file:///Users/edgao/Desktop/t.json")); - - JsonSchemaFactory f = JsonSchemaFactory.getInstance(VersionFlag.V7); - JsonSchema schema = f.getSchema(new URI("file:///Users/edgao/Desktop/inp.json"), c); - Set res = schema.validate(Jsons.deserialize(""" - "arst" - """)); - System.out.println("results were " + res); - } } diff --git a/airbyte-workers/build.gradle b/airbyte-workers/build.gradle index 666f5060ca69..a966543ec8af 100644 --- a/airbyte-workers/build.gradle +++ b/airbyte-workers/build.gradle @@ -116,19 +116,14 @@ task cloudStorageIntegrationTest(type: Test) { } } -task copyWellKnownTypesResource(type: Copy) { - from project(':airbyte-protocol').file('protocol-models/src/main/resources/airbyte_protocol/well_known_types.yaml') - into file('build/docker') -} - -task copyWellKnownTypesDocker(type: Copy) { +task copyWellKnownTypes(type: Copy) { from project(':airbyte-protocol').file('protocol-models/src/main/resources/airbyte_protocol/well_known_types.yaml') into file('build/docker') } tasks.named("buildDockerImage") { dependsOn copyGeneratedTar - dependsOn copyWellKnownTypesResource + dependsOn copyWellKnownTypes } Task publishArtifactsTask = getPublishArtifactsTask("$rootProject.ext.version", project) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/controller/HeartbeatController.java b/airbyte-workers/src/main/java/io/airbyte/workers/controller/HeartbeatController.java index 4bcdfdc66cb6..5dd650554f5d 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/controller/HeartbeatController.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/controller/HeartbeatController.java @@ -4,8 +4,6 @@ package io.airbyte.workers.controller; -import io.airbyte.commons.json.Jsons; -import io.airbyte.validation.json.JsonSchemaValidator; import io.micronaut.http.HttpHeaders; import io.micronaut.http.HttpResponse; import io.micronaut.http.MediaType; @@ -15,7 +13,6 @@ import io.micronaut.http.annotation.Options; import io.micronaut.http.annotation.Post; import java.util.Map; -import java.util.Set; /** * Heartbeat controller @@ -51,17 +48,4 @@ private void addCorsHeaders(final MutableHttpResponse response) { } } - public static void main(String[] args) { -// Set res = new JsonSchemaValidator().validate( -// Jsons.deserialize(""" -// { -// "$ref": "foo.json#/definitions/String" -// } -// """), -// Jsons.deserialize(""" -// "arst" -// """) -// ); -// System.out.println("Validation result: " + res); - } } diff --git a/airbyte-workers/src/resources/WellKnownTypes.json b/airbyte-workers/src/resources/WellKnownTypes.json deleted file mode 100644 index ec7c2b3cb466..000000000000 --- a/airbyte-workers/src/resources/WellKnownTypes.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "definitions": { - "String": { - "type": "string", - "description": "Arbitrary text" - }, - "BinaryData": { - "type": "string", - "description": "Arbitrary binary data. Represented as base64-encoded strings in the JSON transport. In the future, if we support other transports, may be encoded differently.\n", - "pattern": "^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$" - }, - "Date": { - "type": "string", - "pattern": "^\\d{4}-\\d{2}-\\d{2}( BC)?$", - "description": "RFC 3339§5.6's full-date format, extended with BC era support" - }, - "TimestampWithTimezone": { - "type": "string", - "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?(Z|[+\\-]\\d{1,2}:\\d{2})( BC)?$", - "description": "An instant in time. Frequently simply referred to as just a timestamp, or timestamptz. Uses RFC 3339§5.6's date-time format, requiring a \"T\" separator, and extended with BC era support. Note that we do _not_ accept Unix epochs here.\n" - }, - "TimestampWithoutTimezone": { - "type": "string", - "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?( BC)?$", - "description": "Also known as a localdatetime, or just datetime. Under RFC 3339§5.6, this would be represented as `full-date \"T\" partial-time`, extended with BC era support.\n" - }, - "TimeWithTimezone": { - "type": "string", - "pattern": "^\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?(Z|[+\\-]\\d{1,2}:\\d{2})$", - "description": "An RFC 3339§5.6 full-time" - }, - "TimeWithoutTimezone": { - "type": "string", - "pattern": "^\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?$", - "description": "An RFC 3339§5.6 partial-time" - }, - "Number": { - "type": "string", - "oneOf": [ - { - "pattern": "-?(0|[0-9]\\d*)(\\.\\d+)?" - }, - { - "enum": [ - "Infinity", - "-Infinity", - "NaN" - ] - } - ], - "description": "Note the mix of regex validation for normal numbers, and enum validation for special values." - }, - "Integer": { - "type": "string", - "oneOf": [ - { - "pattern": "-?(0|[0-9]\\d*)" - }, - { - "enum": [ - "Infinity", - "-Infinity", - "NaN" - ] - } - ] - }, - "Boolean": { - "type": "boolean", - "description": "Note the direct usage of a primitive boolean rather than string. Unlike Numbers and Integers, we don't expect unusual values here." - } - } -} From 31abca79a4ac5403264a77ffec6e128ea12be9a7 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 18 Nov 2022 14:45:03 -0800 Subject: [PATCH 04/12] add test --- .../validation/json/JsonSchemaValidator.java | 10 +++++- .../json/JsonSchemaValidatorTest.java | 36 +++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java b/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java index 182e08db9e8f..4876ece7c654 100644 --- a/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java +++ b/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java @@ -5,6 +5,7 @@ package io.airbyte.validation.json; import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.networknt.schema.JsonMetaSchema; import com.networknt.schema.JsonSchema; @@ -34,12 +35,19 @@ public class JsonSchemaValidator { private final URI BASE_URI; public JsonSchemaValidator() { + // This URI just needs to point at any path in the same directory as /app/WellKnownTypes.json + // It's required for the JsonSchema#validate method to resolve $ref correctly. + this("file:///app/nonexistent_file.json"); + } + + @VisibleForTesting + protected JsonSchemaValidator(String baseUri) { this.jsonSchemaFactory = JsonSchemaFactory.getInstance(SpecVersion.VersionFlag.V7); // This URI just needs to point at any path in the same directory as /app/WellKnownTypes.json // It's required for the JsonSchema#validate method to resolve $ref correctly. try { - this.BASE_URI = new URI("file:///app/nonexistent_file.json"); + this.BASE_URI = new URI(baseUri); } catch (URISyntaxException e) { throw new RuntimeException(e); } diff --git a/airbyte-json-validation/src/test/java/io/airbyte/validation/json/JsonSchemaValidatorTest.java b/airbyte-json-validation/src/test/java/io/airbyte/validation/json/JsonSchemaValidatorTest.java index 6dcf3c554b29..a1e4f05dad80 100644 --- a/airbyte-json-validation/src/test/java/io/airbyte/validation/json/JsonSchemaValidatorTest.java +++ b/airbyte-json-validation/src/test/java/io/airbyte/validation/json/JsonSchemaValidatorTest.java @@ -5,6 +5,7 @@ package io.airbyte.validation.json; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -16,6 +17,8 @@ import java.io.File; import java.io.IOException; import java.nio.file.Files; +import java.util.Collections; +import java.util.Set; import org.junit.jupiter.api.Test; class JsonSchemaValidatorTest { @@ -102,4 +105,37 @@ void test() throws IOException { assertNull(JsonSchemaValidator.getSchema(schemaFile, "NonExistentObject")); } + @Test + public void testResolveReferences() throws IOException { + String referencableSchemas = """ + { + "definitions": { + "ref1": {"type": "string"}, + "ref2": {"type": "boolean"} + } + } + """; + final File schemaFile = IOs.writeFile(Files.createTempDirectory("test"), "WellKnownTypes.json", referencableSchemas).toFile(); + JsonSchemaValidator jsonSchemaValidator = new JsonSchemaValidator("file://" + schemaFile.getParentFile().getAbsolutePath() + "/foo.json"); + + Set validationResult = jsonSchemaValidator.validate( + Jsons.deserialize(""" + { + "type": "object", + "properties": { + "prop1": {"$ref": "WellKnownTypes.json#/definitions/ref1"}, + "prop2": {"$ref": "WellKnownTypes.json#/definitions/ref2"} + } + } + """), + Jsons.deserialize(""" + { + "prop1": "foo", + "prop2": "false" + } + """)); + + assertEquals(Set.of("$.prop2: string found, boolean expected"), validationResult); + } + } From ab696a47498d9b84b8e4220e58f2b31aeac5610f Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 18 Nov 2022 14:49:34 -0800 Subject: [PATCH 05/12] copypasta error --- .../java/io/airbyte/validation/json/JsonSchemaValidator.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java b/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java index 4876ece7c654..eff86f2fb70e 100644 --- a/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java +++ b/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java @@ -44,8 +44,6 @@ public JsonSchemaValidator() { protected JsonSchemaValidator(String baseUri) { this.jsonSchemaFactory = JsonSchemaFactory.getInstance(SpecVersion.VersionFlag.V7); - // This URI just needs to point at any path in the same directory as /app/WellKnownTypes.json - // It's required for the JsonSchema#validate method to resolve $ref correctly. try { this.BASE_URI = new URI(baseUri); } catch (URISyntaxException e) { From 27f0d1e853e55e6b34c8e3f580d802614ec22504 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 18 Nov 2022 14:59:44 -0800 Subject: [PATCH 06/12] formatting --- .../json/JsonSchemaValidatorTest.java | 41 +++++++++---------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/airbyte-json-validation/src/test/java/io/airbyte/validation/json/JsonSchemaValidatorTest.java b/airbyte-json-validation/src/test/java/io/airbyte/validation/json/JsonSchemaValidatorTest.java index a1e4f05dad80..a6a82c5afad8 100644 --- a/airbyte-json-validation/src/test/java/io/airbyte/validation/json/JsonSchemaValidatorTest.java +++ b/airbyte-json-validation/src/test/java/io/airbyte/validation/json/JsonSchemaValidatorTest.java @@ -17,7 +17,6 @@ import java.io.File; import java.io.IOException; import java.nio.file.Files; -import java.util.Collections; import java.util.Set; import org.junit.jupiter.api.Test; @@ -108,32 +107,32 @@ void test() throws IOException { @Test public void testResolveReferences() throws IOException { String referencableSchemas = """ - { - "definitions": { - "ref1": {"type": "string"}, - "ref2": {"type": "boolean"} - } - } - """; + { + "definitions": { + "ref1": {"type": "string"}, + "ref2": {"type": "boolean"} + } + } + """; final File schemaFile = IOs.writeFile(Files.createTempDirectory("test"), "WellKnownTypes.json", referencableSchemas).toFile(); JsonSchemaValidator jsonSchemaValidator = new JsonSchemaValidator("file://" + schemaFile.getParentFile().getAbsolutePath() + "/foo.json"); Set validationResult = jsonSchemaValidator.validate( Jsons.deserialize(""" - { - "type": "object", - "properties": { - "prop1": {"$ref": "WellKnownTypes.json#/definitions/ref1"}, - "prop2": {"$ref": "WellKnownTypes.json#/definitions/ref2"} - } - } - """), + { + "type": "object", + "properties": { + "prop1": {"$ref": "WellKnownTypes.json#/definitions/ref1"}, + "prop2": {"$ref": "WellKnownTypes.json#/definitions/ref2"} + } + } + """), Jsons.deserialize(""" - { - "prop1": "foo", - "prop2": "false" - } - """)); + { + "prop1": "foo", + "prop2": "false" + } + """)); assertEquals(Set.of("$.prop2: string found, boolean expected"), validationResult); } From bd0817e1cda3c2626357836387b7302460b859a0 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 18 Nov 2022 15:16:46 -0800 Subject: [PATCH 07/12] pmd --- .../java/io/airbyte/validation/json/JsonSchemaValidator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java b/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java index eff86f2fb70e..3ced1086e654 100644 --- a/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java +++ b/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java @@ -80,7 +80,7 @@ private Set validateInternal(final JsonNode schemaJson, final // Default to draft-07, but have handling for the other metaschemas that networknt supports JsonMetaSchema metaschema; JsonNode metaschemaNode = schemaJson.get("$schema"); - if (metaschemaNode == null || "".equals(metaschemaNode.asText())) { + if (metaschemaNode == null || metaschemaNode.asText() == null || metaschemaNode.asText().isEmpty()) { metaschema = JsonMetaSchema.getV7(); } else { String metaschemaString = metaschemaNode.asText(); From 95c11f010fc6e9ee1a16d60b3bfaf28cbef7a0d8 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 18 Nov 2022 15:23:58 -0800 Subject: [PATCH 08/12] other pmd >.> --- .../io/airbyte/validation/json/JsonSchemaValidatorTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-json-validation/src/test/java/io/airbyte/validation/json/JsonSchemaValidatorTest.java b/airbyte-json-validation/src/test/java/io/airbyte/validation/json/JsonSchemaValidatorTest.java index a6a82c5afad8..086b46c19d6f 100644 --- a/airbyte-json-validation/src/test/java/io/airbyte/validation/json/JsonSchemaValidatorTest.java +++ b/airbyte-json-validation/src/test/java/io/airbyte/validation/json/JsonSchemaValidatorTest.java @@ -105,7 +105,7 @@ void test() throws IOException { } @Test - public void testResolveReferences() throws IOException { + void testResolveReferences() throws IOException { String referencableSchemas = """ { "definitions": { From 9c78f67e36d5cf675c03e9bb97b65e142a449c00 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 22 Nov 2022 16:54:54 -0800 Subject: [PATCH 09/12] generate in gradle --- airbyte-workers/Dockerfile | 11 ++++------- airbyte-workers/build.gradle | 17 +++++++++++++---- build.gradle | 2 ++ 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 1b47475936e5..672a4e7ff9af 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -3,8 +3,11 @@ FROM ${JDK_IMAGE} AS worker ARG DOCKER_BUILD_ARCH=amd64 +# Grab well-known types file +COPY WellKnownTypes.json /app + RUN amazon-linux-extras install -y docker -RUN yum install -y jq tar wget gzip && yum clean all +RUN yum install -y jq tar && yum clean all RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/${DOCKER_BUILD_ARCH}/kubectl" \ && chmod +x kubectl && mv kubectl /usr/local/bin/ @@ -20,11 +23,5 @@ WORKDIR /app # Move worker app ADD bin/${APPLICATION}-${VERSION}.tar /app -# Grab well-known types file -RUN wget https://github.com/mikefarah/yq/releases/download/v4.30.4/yq_linux_${DOCKER_BUILD_ARCH}.tar.gz -O - | tar xz \ - && mv yq_linux_${DOCKER_BUILD_ARCH} /usr/bin/yq -COPY well_known_types.yaml /app -RUN yq -o json /app/well_known_types.yaml > /app/WellKnownTypes.json - # wait for upstream dependencies to become available before starting server ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-${VERSION}/bin/${APPLICATION}"] diff --git a/airbyte-workers/build.gradle b/airbyte-workers/build.gradle index a966543ec8af..c336a8aab620 100644 --- a/airbyte-workers/build.gradle +++ b/airbyte-workers/build.gradle @@ -1,3 +1,6 @@ +import groovy.yaml.YamlSlurper +import groovy.json.JsonBuilder + plugins { id 'application' id 'airbyte-integration-test-java' @@ -116,14 +119,20 @@ task cloudStorageIntegrationTest(type: Test) { } } -task copyWellKnownTypes(type: Copy) { - from project(':airbyte-protocol').file('protocol-models/src/main/resources/airbyte_protocol/well_known_types.yaml') - into file('build/docker') +task generateWellKnownTypes() { + doLast { + def wellKnownTypesYaml = project(':airbyte-protocol').file('protocol-models/src/main/resources/airbyte_protocol/well_known_types.yaml').text + def parsedYaml = new YamlSlurper().parseText(wellKnownTypesYaml) + def wellKnownTypesJson = new JsonBuilder(parsedYaml).toPrettyString() + def targetFile = project.file("${buildDir}/docker/WellKnownTypes.json") + targetFile.getParentFile().mkdirs() + targetFile.text = wellKnownTypesJson + } } tasks.named("buildDockerImage") { dependsOn copyGeneratedTar - dependsOn copyWellKnownTypes + dependsOn generateWellKnownTypes } Task publishArtifactsTask = getPublishArtifactsTask("$rootProject.ext.version", project) diff --git a/build.gradle b/build.gradle index d9304dfd9389..3f5301b5c580 100644 --- a/build.gradle +++ b/build.gradle @@ -20,6 +20,8 @@ buildscript { // The alternative is to import the openapi plugin for all modules. // This might need to be updated when we change openapi plugin versions. classpath 'com.fasterxml.jackson.core:jackson-core:2.13.0' + + classpath 'org.codehaus.groovy:groovy-yaml:3.0.3' } } From 55b3854a74cd4c0c4f9d6268be9212c98d2ba4f4 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 22 Nov 2022 17:07:31 -0800 Subject: [PATCH 10/12] better interface + comment --- .../validation/json/JsonSchemaValidator.java | 36 ++++++++++++------- .../json/JsonSchemaValidatorTest.java | 5 ++- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java b/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java index 3ced1086e654..5c9507bee977 100644 --- a/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java +++ b/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java @@ -30,25 +30,37 @@ public class JsonSchemaValidator { private static final Logger LOGGER = LoggerFactory.getLogger(JsonSchemaValidator.class); + // This URI just needs to point at any path in the same directory as /app/WellKnownTypes.json + // It's required for the JsonSchema#validate method to resolve $ref correctly. + private static final URI DEFAULT_BASE_URI; + + static { + try { + DEFAULT_BASE_URI = new URI("file:///app/nonexistent_file.json"); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } private final JsonSchemaFactory jsonSchemaFactory; - private final URI BASE_URI; + private final URI baseUri; public JsonSchemaValidator() { - // This URI just needs to point at any path in the same directory as /app/WellKnownTypes.json - // It's required for the JsonSchema#validate method to resolve $ref correctly. - this("file:///app/nonexistent_file.json"); + this(DEFAULT_BASE_URI); } + /** + * The public constructor hardcodes a URL with access to WellKnownTypes.json. This method allows + * tests to override that URI + * + * Required to resolve $ref schemas using WellKnownTypes.json + * + * @param baseUri The base URI for schema resolution + */ @VisibleForTesting - protected JsonSchemaValidator(String baseUri) { + protected JsonSchemaValidator(URI baseUri) { this.jsonSchemaFactory = JsonSchemaFactory.getInstance(SpecVersion.VersionFlag.V7); - - try { - this.BASE_URI = new URI(baseUri); - } catch (URISyntaxException e) { - throw new RuntimeException(e); - } + this.baseUri = baseUri; } public Set validate(final JsonNode schemaJson, final JsonNode objectJson) { @@ -106,7 +118,7 @@ private Set validateInternal(final JsonNode schemaJson, final null); JsonSchema schema = new JsonSchema( context, - BASE_URI, + baseUri, schemaJson); return schema.validate(objectJson); } diff --git a/airbyte-json-validation/src/test/java/io/airbyte/validation/json/JsonSchemaValidatorTest.java b/airbyte-json-validation/src/test/java/io/airbyte/validation/json/JsonSchemaValidatorTest.java index 086b46c19d6f..d3c65be6de57 100644 --- a/airbyte-json-validation/src/test/java/io/airbyte/validation/json/JsonSchemaValidatorTest.java +++ b/airbyte-json-validation/src/test/java/io/airbyte/validation/json/JsonSchemaValidatorTest.java @@ -16,8 +16,10 @@ import io.airbyte.commons.json.Jsons; import java.io.File; import java.io.IOException; +import java.net.URI; import java.nio.file.Files; import java.util.Set; +import lombok.SneakyThrows; import org.junit.jupiter.api.Test; class JsonSchemaValidatorTest { @@ -104,6 +106,7 @@ void test() throws IOException { assertNull(JsonSchemaValidator.getSchema(schemaFile, "NonExistentObject")); } + @SneakyThrows @Test void testResolveReferences() throws IOException { String referencableSchemas = """ @@ -115,7 +118,7 @@ void testResolveReferences() throws IOException { } """; final File schemaFile = IOs.writeFile(Files.createTempDirectory("test"), "WellKnownTypes.json", referencableSchemas).toFile(); - JsonSchemaValidator jsonSchemaValidator = new JsonSchemaValidator("file://" + schemaFile.getParentFile().getAbsolutePath() + "/foo.json"); + JsonSchemaValidator jsonSchemaValidator = new JsonSchemaValidator(new URI("file://" + schemaFile.getParentFile().getAbsolutePath() + "/foo.json")); Set validationResult = jsonSchemaValidator.validate( Jsons.deserialize(""" From 22aef15ade292ca8424e30c5c50ebab0e2891da3 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Wed, 23 Nov 2022 07:46:57 -0800 Subject: [PATCH 11/12] formatting --- .../io/airbyte/validation/json/JsonSchemaValidatorTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/airbyte-json-validation/src/test/java/io/airbyte/validation/json/JsonSchemaValidatorTest.java b/airbyte-json-validation/src/test/java/io/airbyte/validation/json/JsonSchemaValidatorTest.java index d3c65be6de57..a1a0b511e40c 100644 --- a/airbyte-json-validation/src/test/java/io/airbyte/validation/json/JsonSchemaValidatorTest.java +++ b/airbyte-json-validation/src/test/java/io/airbyte/validation/json/JsonSchemaValidatorTest.java @@ -118,7 +118,8 @@ void testResolveReferences() throws IOException { } """; final File schemaFile = IOs.writeFile(Files.createTempDirectory("test"), "WellKnownTypes.json", referencableSchemas).toFile(); - JsonSchemaValidator jsonSchemaValidator = new JsonSchemaValidator(new URI("file://" + schemaFile.getParentFile().getAbsolutePath() + "/foo.json")); + JsonSchemaValidator jsonSchemaValidator = + new JsonSchemaValidator(new URI("file://" + schemaFile.getParentFile().getAbsolutePath() + "/foo.json")); Set validationResult = jsonSchemaValidator.validate( Jsons.deserialize(""" From 1068283c71f12711306bd657da4cc7a95f62f419 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Wed, 23 Nov 2022 07:46:59 -0800 Subject: [PATCH 12/12] better dockerfile caching --- airbyte-workers/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 672a4e7ff9af..d1eb65b71434 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -3,9 +3,6 @@ FROM ${JDK_IMAGE} AS worker ARG DOCKER_BUILD_ARCH=amd64 -# Grab well-known types file -COPY WellKnownTypes.json /app - RUN amazon-linux-extras install -y docker RUN yum install -y jq tar && yum clean all @@ -23,5 +20,8 @@ WORKDIR /app # Move worker app ADD bin/${APPLICATION}-${VERSION}.tar /app +# Grab well-known types file +COPY WellKnownTypes.json /app + # wait for upstream dependencies to become available before starting server ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-${VERSION}/bin/${APPLICATION}"]