From 6b6c1d31b1a2c7c07110412e60d08437e2313c9b Mon Sep 17 00:00:00 2001 From: William Lo Date: Thu, 27 Apr 2023 22:53:00 -0400 Subject: [PATCH 1/2] Add check that if nested field is optional and has a non-null default, then it should order the types with its default type first instead of null --- .../apache/gobblin/util/AvroFlattener.java | 6 ++- .../gobblin/util/AvroFlattenerTest.java | 22 +++++++++++ ...ultWithinOptionWithinRecord_flattened.json | 37 +++++++++++++++++++ ...aultWithinOptionWithinRecord_original.json | 33 +++++++++++++++++ 4 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 gobblin-utility/src/test/resources/flattenAvro/nonNullDefaultWithinOptionWithinRecord_flattened.json create mode 100644 gobblin-utility/src/test/resources/flattenAvro/nonNullDefaultWithinOptionWithinRecord_original.json diff --git a/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroFlattener.java b/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroFlattener.java index f81ce38780e..38c2a9aa69b 100644 --- a/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroFlattener.java +++ b/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroFlattener.java @@ -23,13 +23,13 @@ import org.apache.avro.AvroRuntimeException; import org.apache.avro.Schema; -import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper; import org.apache.commons.lang3.StringUtils; import org.apache.log4j.Logger; import com.google.common.base.Optional; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; +import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper; /*** * This class provides methods to flatten an Avro Schema to make it more optimal for ORC @@ -402,6 +402,10 @@ private List flattenField(Schema.Field f, ImmutableList pa } // Wrap the Union, since parent Union is an option else { + // If the field within the parent Union has a non-null default value, then null should not be the first member + if (f.hasDefaultValue() && f.defaultVal() != null) { + isNullFirstMember = false; + } if (isNullFirstMember) { flattenedFieldSchema = Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), flattenedFieldSchema)); diff --git a/gobblin-utility/src/test/java/org/apache/gobblin/util/AvroFlattenerTest.java b/gobblin-utility/src/test/java/org/apache/gobblin/util/AvroFlattenerTest.java index c6889da19d7..fa7fb74e2b7 100644 --- a/gobblin-utility/src/test/java/org/apache/gobblin/util/AvroFlattenerTest.java +++ b/gobblin-utility/src/test/java/org/apache/gobblin/util/AvroFlattenerTest.java @@ -188,4 +188,26 @@ public void testRecordWithinMapWithinMap () throws IOException { } + /** + * Test flattening for non-null default within an Option within another Record + * Record R1 { + * fields: { + * Union [ null, + * Record 2 { + * field: type + * default: type + * } + * ] + * } + * } + */ + @Test + public void testNonNullDefaultWithinOptionWithinRecord () throws IOException { + + Schema originalSchema = readSchemaFromJsonFile("nonNullDefaultWithinOptionWithinRecord_original.json"); + Schema expectedSchema = readSchemaFromJsonFile("nonNullDefaultWithinOptionWithinRecord_flattened.json"); + Assert.assertEquals(new AvroFlattener().flatten(originalSchema, false).toString(), expectedSchema.toString()); + } + + } diff --git a/gobblin-utility/src/test/resources/flattenAvro/nonNullDefaultWithinOptionWithinRecord_flattened.json b/gobblin-utility/src/test/resources/flattenAvro/nonNullDefaultWithinOptionWithinRecord_flattened.json new file mode 100644 index 00000000000..1ef5899093a --- /dev/null +++ b/gobblin-utility/src/test/resources/flattenAvro/nonNullDefaultWithinOptionWithinRecord_flattened.json @@ -0,0 +1,37 @@ +{ + "type":"record", + "name":"parentRecordName", + "fields":[ + { + "name":"parentFieldUnion__unionRecordMemberFieldUnion__superNestedFieldString1", + "type":[ + "string", + "null" + ], + "default":"defaultString1", + "flatten_source":"parentFieldUnion.unionRecordMemberFieldUnion.superNestedFieldString1" + }, + { + "name":"parentFieldUnion__unionRecordMemberFieldUnion__superNestedFieldString2", + "type":[ + "string", + "null" + ], + "default":"defaultString2", + "flatten_source":"parentFieldUnion.unionRecordMemberFieldUnion.superNestedFieldString2" + }, + { + "name":"parentFieldUnion__unionRecordMemberFieldString", + "type":[ + "null", + "string" + ], + "default":null, + "flatten_source":"parentFieldUnion.unionRecordMemberFieldString" + }, + { + "name":"parentFieldInt", + "type":"int" + } + ] +} \ No newline at end of file diff --git a/gobblin-utility/src/test/resources/flattenAvro/nonNullDefaultWithinOptionWithinRecord_original.json b/gobblin-utility/src/test/resources/flattenAvro/nonNullDefaultWithinOptionWithinRecord_original.json new file mode 100644 index 00000000000..ae66543f13a --- /dev/null +++ b/gobblin-utility/src/test/resources/flattenAvro/nonNullDefaultWithinOptionWithinRecord_original.json @@ -0,0 +1,33 @@ +{ + "type" : "record", + "name" : "parentRecordName", + "fields" : [ { + "name" : "parentFieldUnion", + "type" : [ "null", { + "type" : "record", + "name" : "unionRecordMember", + "fields" : [ { + "name" : "unionRecordMemberFieldUnion", + "type" : [ "null", { + "type" : "record", + "name" : "superNestedRecord", + "fields" : [ { + "name" : "superNestedFieldString1", + "type" : "string", + "default": "defaultString1" + }, { + "name" : "superNestedFieldString2", + "type" : "string", + "default": "defaultString2" + } ] + } ] + }, { + "name" : "unionRecordMemberFieldString", + "type" : "string" + } ] + } ] + }, { + "name" : "parentFieldInt", + "type" : "int" + } ] +} \ No newline at end of file From eae0712a65d7e72f022d194069beb9c94e85e519 Mon Sep 17 00:00:00 2001 From: William Lo Date: Thu, 27 Apr 2023 23:01:35 -0400 Subject: [PATCH 2/2] add default type check in test --- .../java/org/apache/gobblin/util/AvroFlattenerTest.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/gobblin-utility/src/test/java/org/apache/gobblin/util/AvroFlattenerTest.java b/gobblin-utility/src/test/java/org/apache/gobblin/util/AvroFlattenerTest.java index fa7fb74e2b7..a4c9477b5c9 100644 --- a/gobblin-utility/src/test/java/org/apache/gobblin/util/AvroFlattenerTest.java +++ b/gobblin-utility/src/test/java/org/apache/gobblin/util/AvroFlattenerTest.java @@ -22,6 +22,8 @@ import org.testng.Assert; import org.testng.annotations.Test; +import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper; + public class AvroFlattenerTest { @@ -206,7 +208,11 @@ public void testNonNullDefaultWithinOptionWithinRecord () throws IOException { Schema originalSchema = readSchemaFromJsonFile("nonNullDefaultWithinOptionWithinRecord_original.json"); Schema expectedSchema = readSchemaFromJsonFile("nonNullDefaultWithinOptionWithinRecord_flattened.json"); - Assert.assertEquals(new AvroFlattener().flatten(originalSchema, false).toString(), expectedSchema.toString()); + Schema flattenedSchema = new AvroFlattener().flatten(originalSchema, false); + Assert.assertEquals(AvroCompatibilityHelper.getSpecificDefaultValue( + flattenedSchema.getField("parentFieldUnion__unionRecordMemberFieldUnion__superNestedFieldString1")).toString(), + "defaultString1"); + Assert.assertEquals(flattenedSchema.toString(), expectedSchema.toString()); }