Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RecordSchemaValidator can resolve $ref schemas #19625

Merged
merged 12 commits into from
Nov 30, 2022
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,19 @@
package io.airbyte.validation.json;

import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.networknt.schema.JsonMetaSchema;
import com.networknt.schema.JsonSchema;
import com.networknt.schema.JsonSchemaFactory;
import com.networknt.schema.SchemaValidatorsConfig;
import com.networknt.schema.SpecVersion;
import com.networknt.schema.ValidationContext;
import com.networknt.schema.ValidationMessage;
import io.airbyte.commons.string.Strings;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
Expand All @@ -25,13 +30,37 @@
public class JsonSchemaValidator {

private static final Logger LOGGER = LoggerFactory.getLogger(JsonSchemaValidator.class);
// This URI just needs to point at any path in the same directory as /app/WellKnownTypes.json
// It's required for the JsonSchema#validate method to resolve $ref correctly.
private static final URI DEFAULT_BASE_URI;

static {
try {
DEFAULT_BASE_URI = new URI("file:///app/nonexistent_file.json");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not use file:///app/WellKnownTypes.json instead?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just to make it clear that it doesn't need to be the exact file being referenced. E.g. if we later added a second file, we wouldn't need to update this URI.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the part that always feels a bit unusual is referencing a non existing file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hm. what if we appended the nonexistent_file.json in the constructor? so callers would just need new JsonSchemaValidator(new URI("file:///app/"))

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bump @gosusnp

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure that appending this in the constructor helps understanding that much. It probably boils down to why do we actually need to pass a full URI rather than just the actual base URI.
I'd say that it doesn't feel ideal, at the same time, it doesn't seem important enough to block this PR. The comments helps with the understanding.

} catch (URISyntaxException e) {
throw new RuntimeException(e);
}
}

private final SchemaValidatorsConfig schemaValidatorsConfig;
private final JsonSchemaFactory jsonSchemaFactory;
private final URI baseUri;

public JsonSchemaValidator() {
this.schemaValidatorsConfig = new SchemaValidatorsConfig();
this(DEFAULT_BASE_URI);
}

/**
* The public constructor hardcodes a URL with access to WellKnownTypes.json. This method allows
* tests to override that URI
*
* Required to resolve $ref schemas using WellKnownTypes.json
*
* @param baseUri The base URI for schema resolution
*/
@VisibleForTesting
protected JsonSchemaValidator(URI baseUri) {
this.jsonSchemaFactory = JsonSchemaFactory.getInstance(SpecVersion.VersionFlag.V7);
this.baseUri = baseUri;
}

public Set<String> validate(final JsonNode schemaJson, final JsonNode objectJson) {
Expand Down Expand Up @@ -60,8 +89,38 @@ private Set<ValidationMessage> validateInternal(final JsonNode schemaJson, final
Preconditions.checkNotNull(schemaJson);
Preconditions.checkNotNull(objectJson);

return jsonSchemaFactory.getSchema(schemaJson, schemaValidatorsConfig)
.validate(objectJson);
// Default to draft-07, but have handling for the other metaschemas that networknt supports
JsonMetaSchema metaschema;
JsonNode metaschemaNode = schemaJson.get("$schema");
if (metaschemaNode == null || metaschemaNode.asText() == null || metaschemaNode.asText().isEmpty()) {
metaschema = JsonMetaSchema.getV7();
} else {
String metaschemaString = metaschemaNode.asText();
// We're not using "http://....".equals(), because we want to avoid weirdness with https, etc.
if (metaschemaString.contains("json-schema.org/draft-04")) {
metaschema = JsonMetaSchema.getV4();
} else if (metaschemaString.contains("json-schema.org/draft-06")) {
metaschema = JsonMetaSchema.getV6();
} else if (metaschemaString.contains("json-schema.org/draft/2019-09")) {
metaschema = JsonMetaSchema.getV201909();
} else if (metaschemaString.contains("json-schema.org/draft/2020-12")) {
metaschema = JsonMetaSchema.getV202012();
} else {
metaschema = JsonMetaSchema.getV7();
}
}

ValidationContext context = new ValidationContext(
jsonSchemaFactory.getUriFactory(),
null,
metaschema,
jsonSchemaFactory,
null);
JsonSchema schema = new JsonSchema(
context,
baseUri,
schemaJson);
return schema.validate(objectJson);
}

public boolean test(final JsonNode schemaJson, final JsonNode objectJson) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package io.airbyte.validation.json;

import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
Expand All @@ -15,7 +16,10 @@
import io.airbyte.commons.json.Jsons;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.nio.file.Files;
import java.util.Set;
import lombok.SneakyThrows;
import org.junit.jupiter.api.Test;

class JsonSchemaValidatorTest {
Expand Down Expand Up @@ -102,4 +106,38 @@ void test() throws IOException {
assertNull(JsonSchemaValidator.getSchema(schemaFile, "NonExistentObject"));
}

@SneakyThrows
@Test
void testResolveReferences() throws IOException {
String referencableSchemas = """
{
"definitions": {
"ref1": {"type": "string"},
"ref2": {"type": "boolean"}
}
}
""";
final File schemaFile = IOs.writeFile(Files.createTempDirectory("test"), "WellKnownTypes.json", referencableSchemas).toFile();
JsonSchemaValidator jsonSchemaValidator = new JsonSchemaValidator(new URI("file://" + schemaFile.getParentFile().getAbsolutePath() + "/foo.json"));

Set<String> validationResult = jsonSchemaValidator.validate(
Jsons.deserialize("""
{
"type": "object",
"properties": {
"prop1": {"$ref": "WellKnownTypes.json#/definitions/ref1"},
"prop2": {"$ref": "WellKnownTypes.json#/definitions/ref2"}
}
}
"""),
Jsons.deserialize("""
{
"prop1": "foo",
"prop2": "false"
}
"""));

assertEquals(Set.of("$.prop2: string found, boolean expected"), validationResult);
}

}
3 changes: 3 additions & 0 deletions airbyte-workers/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ FROM ${JDK_IMAGE} AS worker

ARG DOCKER_BUILD_ARCH=amd64

# Grab well-known types file
COPY WellKnownTypes.json /app
edgao marked this conversation as resolved.
Show resolved Hide resolved

RUN amazon-linux-extras install -y docker
RUN yum install -y jq tar && yum clean all

Expand Down
15 changes: 15 additions & 0 deletions airbyte-workers/build.gradle
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import groovy.yaml.YamlSlurper
import groovy.json.JsonBuilder

plugins {
id 'application'
id 'airbyte-integration-test-java'
Expand Down Expand Up @@ -116,8 +119,20 @@ task cloudStorageIntegrationTest(type: Test) {
}
}

task generateWellKnownTypes() {
doLast {
def wellKnownTypesYaml = project(':airbyte-protocol').file('protocol-models/src/main/resources/airbyte_protocol/well_known_types.yaml').text
def parsedYaml = new YamlSlurper().parseText(wellKnownTypesYaml)
def wellKnownTypesJson = new JsonBuilder(parsedYaml).toPrettyString()
def targetFile = project.file("${buildDir}/docker/WellKnownTypes.json")
targetFile.getParentFile().mkdirs()
targetFile.text = wellKnownTypesJson
}
}

tasks.named("buildDockerImage") {
dependsOn copyGeneratedTar
dependsOn generateWellKnownTypes
}

Task publishArtifactsTask = getPublishArtifactsTask("$rootProject.ext.version", project)
2 changes: 2 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ buildscript {
// The alternative is to import the openapi plugin for all modules.
// This might need to be updated when we change openapi plugin versions.
classpath 'com.fasterxml.jackson.core:jackson-core:2.13.0'

classpath 'org.codehaus.groovy:groovy-yaml:3.0.3'
}
}

Expand Down