Skip to content

Commit

Permalink
addressing comments
Browse files Browse the repository at this point in the history
  • Loading branch information
pabloem committed Nov 22, 2022
1 parent 2d77e7a commit a14c8c4
Show file tree
Hide file tree
Showing 9 changed files with 228 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,6 @@ class BeamModulePlugin implements Plugin<Project> {
def jackson_version = "2.13.3"
def jaxb_api_version = "2.3.3"
def jsr305_version = "3.0.2"
def json_org_version = "20200518"
def everit_json_version = "1.14.1"
def kafka_version = "2.4.1"
def nemo_version = "0.1"
Expand Down Expand Up @@ -680,7 +679,7 @@ class BeamModulePlugin implements Plugin<Project> {
joda_time : "joda-time:joda-time:2.10.10",
jsonassert : "org.skyscreamer:jsonassert:1.5.0",
jsr305 : "com.google.code.findbugs:jsr305:$jsr305_version",
json_org : "org.json:json:${json_org_version}",
json_org : "org.json:json:20200518", // Try to keep in sync with google_cloud_platform_libraries_bom transitive deps.
everit_json_schema : "com.github.erosb:everit-json-schema:${everit_json_version}",
junit : "junit:junit:4.13.1",
kafka : "org.apache.kafka:kafka_2.11:$kafka_version",
Expand Down
7 changes: 6 additions & 1 deletion sdks/java/core/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,12 @@ dependencies {
shadow library.java.snappy_java
shadow library.java.joda_time
shadow library.java.json_org
shadow library.java.everit_json_schema
// com.github.everit JSON schema validation library is used for json-schema.org validation.
// to aoid forcing the library onto users, we ask users to provide it rather than include
// it by default.
// It is only used for optional functionality in JsonUtils schema parsing and conversion.
provided library.java.everit_json_schema
testImplementation library.java.everit_json_schema
provided library.java.junit
provided library.java.hamcrest
provided 'io.airlift:aircompressor:0.18'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,15 @@ private static Schema beamSchemaFromJsonSchema(org.everit.json.schema.ObjectSche
if (propertySchema == null) {
throw new IllegalArgumentException("Unable to parse schema " + jsonSchema.toString());
}
if (propertySchema.getClass().equals(org.everit.json.schema.ObjectSchema.class)) {
if (propertySchema instanceof org.everit.json.schema.ObjectSchema) {
beamSchemaBuilder =
beamSchemaBuilder.addField(
Schema.Field.of(propertyName, beamTypeFromJsonSchemaType(propertySchema)));
} else if (propertySchema.getClass().equals(org.everit.json.schema.ArraySchema.class)) {
} else if (propertySchema instanceof org.everit.json.schema.ArraySchema) {
if (((ArraySchema) propertySchema).getAllItemSchema() == null) {
throw new IllegalArgumentException(
"Array schema is not properly formatted or unsupported: " + propertyName);
}
beamSchemaBuilder =
beamSchemaBuilder.addField(
Schema.Field.of(
Expand All @@ -118,33 +122,39 @@ private static Schema beamSchemaFromJsonSchema(org.everit.json.schema.ObjectSche

private static Schema.FieldType beamTypeFromJsonSchemaType(
org.everit.json.schema.Schema propertySchema) {
if (propertySchema.getClass().equals(org.everit.json.schema.ObjectSchema.class)) {
if (propertySchema instanceof org.everit.json.schema.ObjectSchema) {
return Schema.FieldType.row(beamSchemaFromJsonSchema((ObjectSchema) propertySchema));
} else if (propertySchema.getClass().equals(org.everit.json.schema.BooleanSchema.class)) {
} else if (propertySchema instanceof org.everit.json.schema.BooleanSchema) {
return Schema.FieldType.BOOLEAN;
} else if (propertySchema.getClass().equals(org.everit.json.schema.NumberSchema.class)) {
} else if (propertySchema instanceof org.everit.json.schema.NumberSchema) {
return ((NumberSchema) propertySchema).requiresInteger()
? Schema.FieldType.INT64
: Schema.FieldType.DOUBLE;
}
if (propertySchema.getClass().equals(org.everit.json.schema.StringSchema.class)) {
if (propertySchema instanceof org.everit.json.schema.StringSchema) {
return Schema.FieldType.STRING;
} else if (propertySchema.getClass().equals(org.everit.json.schema.ReferenceSchema.class)) {
} else if (propertySchema instanceof org.everit.json.schema.ReferenceSchema) {
org.everit.json.schema.Schema sch = ((ReferenceSchema) propertySchema).getReferredSchema();
return beamTypeFromJsonSchemaType(sch);
} else if (propertySchema instanceof org.everit.json.schema.ArraySchema) {
if (((ArraySchema) propertySchema).getAllItemSchema() == null) {
throw new IllegalArgumentException(
"Array schema is not properly formatted or unsupported: " + propertySchema);
}
return Schema.FieldType.array(
beamTypeFromJsonSchemaType(((ArraySchema) propertySchema).getAllItemSchema()));
} else {
throw new IllegalArgumentException(
"Unsupported schema type: " + propertySchema.getClass().toString());
throw new IllegalArgumentException("Unsupported schema type: " + propertySchema.getClass());
}
}

private static org.everit.json.schema.ObjectSchema jsonSchemaFromString(String jsonSchema) {
JSONObject parsedSchema = new JSONObject(jsonSchema);
org.everit.json.schema.Schema schemaValidator =
org.everit.json.schema.loader.SchemaLoader.load(parsedSchema);
if (!schemaValidator.getClass().equals(ObjectSchema.class)) {
if (!(schemaValidator instanceof ObjectSchema)) {
throw new IllegalArgumentException(
String.format("The schema is not a valid object schema:\n%s", jsonSchema));
String.format("The schema is not a valid object schema:%n %s", jsonSchema));
}
return (org.everit.json.schema.ObjectSchema) schemaValidator;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.containsString;
import static org.junit.Assert.assertThrows;

import com.google.common.io.ByteStreams;
import java.io.IOException;
import java.io.InputStream;
import java.util.stream.Collectors;
import org.apache.beam.sdk.schemas.utils.JsonUtils;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.io.ByteStreams;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
Expand Down Expand Up @@ -71,4 +73,91 @@ public void testNestedStructsJsonSchemaToBeamSchema() throws IOException {
Schema.Field.of("veggieLike", Schema.FieldType.BOOLEAN))))));
}
}

@Test
public void testArrayNestedArrayObjectJsonSchemaToBeamSchema() throws IOException {
try (InputStream inputStream =
getClass().getResourceAsStream("/schemas/json/array_nested_array_json_schema.json")) {
String stringJsonSchema = new String(ByteStreams.toByteArray(inputStream), "UTF-8");
Schema parsedSchema = JsonUtils.beamSchemaFromJsonSchema(stringJsonSchema);

assertThat(parsedSchema.getFieldNames(), containsInAnyOrder("complexMatrix"));
assertThat(
parsedSchema.getFields().stream().map(Schema.Field::getType).collect(Collectors.toList()),
containsInAnyOrder(
Schema.FieldType.array(
Schema.FieldType.array(
Schema.FieldType.row(
Schema.of(
Schema.Field.of("imaginary", Schema.FieldType.DOUBLE),
Schema.Field.of("real", Schema.FieldType.DOUBLE)))))));
}
}

@Test
public void testObjectNestedObjectArrayJsonSchemaToBeamSchema() throws IOException {
try (InputStream inputStream =
getClass()
.getResourceAsStream("/schemas/json/object_nested_object_and_array_json_schema.json")) {
String stringJsonSchema = new String(ByteStreams.toByteArray(inputStream), "UTF-8");
Schema parsedSchema = JsonUtils.beamSchemaFromJsonSchema(stringJsonSchema);

assertThat(parsedSchema.getFieldNames(), containsInAnyOrder("classroom"));
assertThat(
parsedSchema.getFields().stream().map(Schema.Field::getType).collect(Collectors.toList()),
containsInAnyOrder(
Schema.FieldType.row(
Schema.of(
Schema.Field.of("teacher", Schema.FieldType.STRING),
Schema.Field.of(
"classroom",
Schema.FieldType.row(
Schema.of(
Schema.Field.of(
"students",
Schema.FieldType.array(
Schema.FieldType.row(
Schema.of(
Schema.Field.of("name", Schema.FieldType.STRING),
Schema.Field.of(
"age", Schema.FieldType.INT64))))),
Schema.Field.of("building", Schema.FieldType.STRING))))))));
}
}

@Test
public void testUnsupportedTupleArrays() throws IOException {
try (InputStream inputStream =
getClass().getResourceAsStream("/schemas/json/unsupported_tuple_arrays.json")) {
String stringJsonSchema = new String(ByteStreams.toByteArray(inputStream), "UTF-8");

IllegalArgumentException thrownException =
assertThrows(
IllegalArgumentException.class,
() -> {
JsonUtils.beamSchemaFromJsonSchema(stringJsonSchema);
});

assertThat(
thrownException.getMessage(), containsString("Array schema is not properly formatted"));
}
}

@Test
public void testUnsupportedNestedTupleArrays() throws IOException {
try (InputStream inputStream =
getClass().getResourceAsStream("/schemas/json/unsupported_nested_tuple_array.json")) {
String stringJsonSchema = new String(ByteStreams.toByteArray(inputStream), "UTF-8");

IllegalArgumentException thrownException =
assertThrows(
IllegalArgumentException.class,
() -> {
JsonUtils.beamSchemaFromJsonSchema(stringJsonSchema);
});

assertThat(
thrownException.getMessage(), containsString("Array schema is not properly formatted"));
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"type": "object",
"properties": {
"complexMatrix": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "object",
"properties": {
"real": {"type": "number"},
"imaginary": {"type": "number"}
}
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
{
"$id": "https://example.com/arrays.schema.json",
"description": "A representation of a person, company, organization, or place",
"type": "object",
"properties": {
"fruits": {
Expand Down Expand Up @@ -30,4 +29,4 @@
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"type": "object",
"properties": {
"classroom": {
"type": "object",
"properties": {
"teacher": {
"type": "string"
},
"classroom": {
"type": "object",
"properties": {
"building": {
"type": "string"
},
"students": {
"type": "array",
"items": {
"$ref": "#/$defs/student"
}
}
}
}
}
}
},
"$defs": {
"student": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"age": {
"type": "integer"
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"properties": {
"objectWithTuple": {
"type": "object",
"properties": {
"someString": {
"type": "string"
},
"tupleArray": {
"type": "array",
"prefixItems": [
{
"type": "number"
},
{
"type": "string"
},
{
"enum": [
"Street",
"Avenue",
"Boulevard"
]
},
{
"enum": [
"NW",
"NE",
"SW",
"SE"
]
}
]
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"properties": {
"tupleArray": {
"type": "array",
"prefixItems": [
{ "type": "number" },
{ "type": "string" },
{ "enum": ["Street", "Avenue", "Boulevard"] },
{ "enum": ["NW", "NE", "SW", "SE"] }
]
}
}
}

0 comments on commit a14c8c4

Please sign in to comment.