From 82349c0d84042f56f42353ea8a8fa71205b22ffd Mon Sep 17 00:00:00 2001 From: zouxxyy Date: Thu, 10 Nov 2022 17:58:29 +0800 Subject: [PATCH 1/5] [5191] Fix avro compatibility with spark3.2+ --- .../org/apache/hudi/avro/HoodieAvroUtils.java | 47 +++++++++---------- .../debezium/AbstractDebeziumAvroPayload.java | 8 +++- .../apache/hudi/avro/TestHoodieAvroUtils.java | 9 +++- ...writeNonDefaultsWithLatestAvroPayload.java | 8 ++-- .../model/TestPartialUpdateAvroPayload.java | 2 +- 5 files changed, 43 insertions(+), 31 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java index 5288f7fa0cf6..3f46c943e14f 100644 --- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java @@ -493,32 +493,31 @@ public static Object getNestedFieldVal(GenericRecord record, String fieldName, b String[] parts = fieldName.split("\\."); GenericRecord valueNode = record; int i = 0; - try { - for (; i < parts.length; i++) { - String part = parts[i]; - Object val = valueNode.get(part); - if (val == null) { - break; - } - // return, if last part of name - if (i == parts.length - 1) { - Schema fieldSchema = valueNode.getSchema().getField(part).schema(); - return convertValueForSpecificDataTypes(fieldSchema, val, consistentLogicalTimestampEnabled); - } else { - // VC: Need a test here - if (!(val instanceof GenericRecord)) { - throw new HoodieException("Cannot find a record at part value :" + part); - } - valueNode = (GenericRecord) val; - } + for (; i < parts.length; i++) { + String part = parts[i]; + Object val; + try { + val = valueNode.get(part); + } catch (AvroRuntimeException e) { + // Since avro 1.10, arvo will throw AvroRuntimeException("Not a valid schema field: " + key) + // rather than return null like the previous version if record doesn't contain this key. + val = null; + } + if (val == null) { + break; } - } catch (AvroRuntimeException e) { - // Since avro 1.10, arvo will throw AvroRuntimeException("Not a valid schema field: " + key) - // rather than return null like the previous version if if record doesn't contain this key. - // So when returnNullIfNotFound is true, catch this exception. - if (!returnNullIfNotFound) { - throw e; + + // return, if last part of name + if (i == parts.length - 1) { + Schema fieldSchema = valueNode.getSchema().getField(part).schema(); + return convertValueForSpecificDataTypes(fieldSchema, val, consistentLogicalTimestampEnabled); + } else { + // VC: Need a test here + if (!(val instanceof GenericRecord)) { + throw new HoodieException("Cannot find a record at part value :" + part); + } + valueNode = (GenericRecord) val; } } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/AbstractDebeziumAvroPayload.java b/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/AbstractDebeziumAvroPayload.java index 33f1d9f0025b..5ab7dabb2439 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/AbstractDebeziumAvroPayload.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/AbstractDebeziumAvroPayload.java @@ -18,6 +18,7 @@ package org.apache.hudi.common.model.debezium; +import org.apache.avro.AvroRuntimeException; import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload; import org.apache.hudi.common.util.Option; @@ -76,7 +77,12 @@ private Option handleDeleteOperation(IndexedRecord insertRecord) boolean delete = false; if (insertRecord instanceof GenericRecord) { GenericRecord record = (GenericRecord) insertRecord; - Object value = record.get(DebeziumConstants.FLATTENED_OP_COL_NAME); + Object value; + try { + value = record.get(DebeziumConstants.FLATTENED_OP_COL_NAME); + } catch (AvroRuntimeException e) { + value = null; + } delete = value != null && value.toString().equalsIgnoreCase(DebeziumConstants.DELETE_OP); } diff --git a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java index 3371085d2130..694ae68eded2 100644 --- a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java @@ -18,6 +18,7 @@ package org.apache.hudi.avro; +import org.apache.avro.AvroRuntimeException; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.testutils.SchemaTestUtil; import org.apache.hudi.exception.SchemaCompatibilityException; @@ -253,7 +254,13 @@ public void testRemoveFields() { assertEquals("key1", rec1.get("_row_key")); assertEquals("val1", rec1.get("non_pii_col")); assertEquals(3.5, rec1.get("timestamp")); - assertNull(rec1.get("pii_col")); + Object removed; + try { + removed = rec1.get("pii_col"); + } catch (AvroRuntimeException e) { + removed = null; + } + assertNull(removed); assertEquals(expectedSchema, rec1.getSchema()); // non-partitioned table test with empty list of fields. diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/TestOverwriteNonDefaultsWithLatestAvroPayload.java b/hudi-common/src/test/java/org/apache/hudi/common/model/TestOverwriteNonDefaultsWithLatestAvroPayload.java index 0807b41f610c..4b7e4bda0b36 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/model/TestOverwriteNonDefaultsWithLatestAvroPayload.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/model/TestOverwriteNonDefaultsWithLatestAvroPayload.java @@ -181,10 +181,10 @@ public void testDeletedRecord() throws IOException { @Test public void testNullColumn() throws IOException { Schema avroSchema = Schema.createRecord(Arrays.asList( - new Schema.Field("id", Schema.createUnion(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.NULL)), "", JsonProperties.NULL_VALUE), - new Schema.Field("name", Schema.createUnion(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.NULL)), "", JsonProperties.NULL_VALUE), - new Schema.Field("age", Schema.createUnion(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.NULL)), "", JsonProperties.NULL_VALUE), - new Schema.Field("job", Schema.createUnion(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.NULL)), "", JsonProperties.NULL_VALUE) + new Schema.Field("id", Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING)), "", JsonProperties.NULL_VALUE), + new Schema.Field("name", Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING)), "", JsonProperties.NULL_VALUE), + new Schema.Field("age", Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING)), "", JsonProperties.NULL_VALUE), + new Schema.Field("job", Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING)), "", JsonProperties.NULL_VALUE) )); GenericRecord record1 = new GenericData.Record(avroSchema); record1.put("id", "1"); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/TestPartialUpdateAvroPayload.java b/hudi-common/src/test/java/org/apache/hudi/common/model/TestPartialUpdateAvroPayload.java index d7e1a6146ad3..8f463fbfa018 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/model/TestPartialUpdateAvroPayload.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/model/TestPartialUpdateAvroPayload.java @@ -55,7 +55,7 @@ public class TestPartialUpdateAvroPayload { + " {\"name\": \"id\", \"type\": [\"null\", \"string\"]},\n" + " {\"name\": \"partition\", \"type\": [\"null\", \"string\"]},\n" + " {\"name\": \"ts\", \"type\": [\"null\", \"long\"]},\n" - + " {\"name\": \"_hoodie_is_deleted\", \"type\": [\"null\", \"boolean\"], \"default\":false},\n" + + " {\"name\": \"_hoodie_is_deleted\", \"type\": [\"boolean\", \"null\"], \"default\":false},\n" + " {\"name\": \"city\", \"type\": [\"null\", \"string\"]},\n" + " {\"name\": \"child\", \"type\": [\"null\", {\"type\": \"array\", \"items\": \"string\"}]}\n" + " ]\n" From fe4f220520b2b989bde89589296675f63c760e2a Mon Sep 17 00:00:00 2001 From: zouxxyy Date: Thu, 10 Nov 2022 23:24:05 +0800 Subject: [PATCH 2/5] update --- .../main/java/org/apache/hudi/avro/HoodieAvroUtils.java | 7 +++++++ .../apache/hudi/common/functional/TestHoodieLogFormat.java | 2 +- .../apache/hudi/common/util/TestObjectSizeCalculator.java | 4 +++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java index 3f46c943e14f..ca86c059cef8 100644 --- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java @@ -54,6 +54,8 @@ import org.apache.avro.io.JsonEncoder; import org.apache.avro.specific.SpecificRecordBase; +import org.apache.hadoop.util.VersionUtil; + import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -89,6 +91,7 @@ */ public class HoodieAvroUtils { + public static final String AVRO_VERSION = Schema.class.getPackage().getImplementationVersion(); private static final ThreadLocal BINARY_ENCODER = ThreadLocal.withInitial(() -> null); private static final ThreadLocal BINARY_DECODER = ThreadLocal.withInitial(() -> null); @@ -1032,4 +1035,8 @@ public GenericRecord next() { public static GenericRecord rewriteRecordDeep(GenericRecord oldRecord, Schema newSchema) { return rewriteRecordWithNewSchema(oldRecord, newSchema, Collections.EMPTY_MAP); } + + public static boolean gteqAvro1_10() { + return VersionUtil.compareVersions(AVRO_VERSION, "1.10") >= 0; + } } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java index 1c58727b3995..41443c9c0622 100755 --- a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java @@ -2347,7 +2347,7 @@ public void testDataBlockFormatAppendAndReadWithProjectedSchema( new HashMap() {{ put(HoodieLogBlockType.AVRO_DATA_BLOCK, 0); // not supported put(HoodieLogBlockType.HFILE_DATA_BLOCK, 0); // not supported - put(HoodieLogBlockType.PARQUET_DATA_BLOCK, 2605); + put(HoodieLogBlockType.PARQUET_DATA_BLOCK, HoodieAvroUtils.gteqAvro1_10() ? 2593 : 2605); }}; List recordsRead = getRecords(dataBlockRead); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestObjectSizeCalculator.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestObjectSizeCalculator.java index 712f4b85f84a..70f8e43533cd 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestObjectSizeCalculator.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestObjectSizeCalculator.java @@ -19,6 +19,7 @@ package org.apache.hudi.common.util; +import org.apache.hudi.avro.HoodieAvroUtils; import org.apache.hudi.common.model.HoodieRecord; import org.apache.avro.Schema; @@ -73,7 +74,8 @@ public void testGetObjectSize() { assertEquals(32, getObjectSize(emptyClass)); assertEquals(40, getObjectSize(stringClass)); assertEquals(40, getObjectSize(payloadClass)); - assertEquals(1240, getObjectSize(Schema.create(Schema.Type.STRING))); + assertEquals(HoodieAvroUtils.gteqAvro1_10() ? 1320 : 1240, + getObjectSize(Schema.create(Schema.Type.STRING))); assertEquals(104, getObjectSize(person)); } From 05ddfadc13c1e07ec8e76068c0baab8a0c0c064d Mon Sep 17 00:00:00 2001 From: zouxxyy Date: Fri, 11 Nov 2022 15:57:52 +0800 Subject: [PATCH 3/5] fix comments --- .../org/apache/hudi/avro/HoodieAvroUtils.java | 69 ++++++++++++++----- .../debezium/AbstractDebeziumAvroPayload.java | 9 +-- .../apache/hudi/avro/TestHoodieAvroUtils.java | 61 ++++++++++++++-- .../functional/TestHoodieLogFormat.java | 2 +- .../common/util/TestObjectSizeCalculator.java | 4 +- 5 files changed, 111 insertions(+), 34 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java index ca86c059cef8..53a114e3e576 100644 --- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java @@ -481,6 +481,32 @@ public static String getRootLevelFieldName(String fieldName) { return fieldName.split("\\.")[0]; } + /** + * Obtain value of the provided key, which is consistent with avro before 1.10 + */ + public static Object getFieldVal(GenericRecord record, String key) { + return getFieldVal(record, key, true); + } + + /** + * Obtain value of the provided key, when set returnNullIfNotFound false, + * it is consistent with avro after 1.10 + */ + public static Object getFieldVal(GenericRecord record, String key, boolean returnNullIfNotFound) { + if (record.getSchema().getField(key) == null) { + if (returnNullIfNotFound) { + return null; + } else { + // Since avro 1.10, arvo will throw AvroRuntimeException("Not a valid schema field: " + key) + // rather than return null like the previous version if record doesn't contain this key. + // Here we simulate this behavior. + throw new AvroRuntimeException("Not a valid schema field: " + key); + } + } else { + return record.get(key); + } + } + /** * Obtain value of the provided field as string, denoted by dot notation. e.g: a.b.c */ @@ -495,43 +521,46 @@ public static String getNestedFieldValAsString(GenericRecord record, String fiel public static Object getNestedFieldVal(GenericRecord record, String fieldName, boolean returnNullIfNotFound, boolean consistentLogicalTimestampEnabled) { String[] parts = fieldName.split("\\."); GenericRecord valueNode = record; - int i = 0; - for (; i < parts.length; i++) { + for (int i = 0; i < parts.length; i++) { String part = parts[i]; Object val; try { - val = valueNode.get(part); + val = HoodieAvroUtils.getFieldVal(valueNode, part, returnNullIfNotFound); } catch (AvroRuntimeException e) { - // Since avro 1.10, arvo will throw AvroRuntimeException("Not a valid schema field: " + key) - // rather than return null like the previous version if record doesn't contain this key. - val = null; - } - if (val == null) { - break; + if (returnNullIfNotFound) { + return null; + } else { + throw new HoodieException( + fieldName + "(Part -" + parts[i] + ") field not found in record. Acceptable fields were :" + + valueNode.getSchema().getFields().stream().map(Field::name).collect(Collectors.toList())); + } } - // return, if last part of name if (i == parts.length - 1) { - Schema fieldSchema = valueNode.getSchema().getField(part).schema(); - return convertValueForSpecificDataTypes(fieldSchema, val, consistentLogicalTimestampEnabled); + // return, if last part of name + if (val == null) { + return null; + } else { + Schema fieldSchema = valueNode.getSchema().getField(part).schema(); + return convertValueForSpecificDataTypes(fieldSchema, val, consistentLogicalTimestampEnabled); + } } else { - // VC: Need a test here if (!(val instanceof GenericRecord)) { throw new HoodieException("Cannot find a record at part value :" + part); + } else { + valueNode = (GenericRecord) val; } - valueNode = (GenericRecord) val; } } + // This can only be reached if the length of parts is 0 if (returnNullIfNotFound) { return null; - } else if (valueNode.getSchema().getField(parts[i]) == null) { + } else { throw new HoodieException( - fieldName + "(Part -" + parts[i] + ") field not found in record. Acceptable fields were :" + fieldName + " field not found in record. Acceptable fields were :" + valueNode.getSchema().getFields().stream().map(Field::name).collect(Collectors.toList())); - } else { - throw new HoodieException("The value of " + parts[i] + " can not be null"); } } @@ -1036,6 +1065,10 @@ public static GenericRecord rewriteRecordDeep(GenericRecord oldRecord, Schema ne return rewriteRecordWithNewSchema(oldRecord, newSchema, Collections.EMPTY_MAP); } + public static boolean gteqAvro1_9() { + return VersionUtil.compareVersions(AVRO_VERSION, "1.9") >= 0; + } + public static boolean gteqAvro1_10() { return VersionUtil.compareVersions(AVRO_VERSION, "1.10") >= 0; } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/AbstractDebeziumAvroPayload.java b/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/AbstractDebeziumAvroPayload.java index 5ab7dabb2439..9082d572a4bd 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/AbstractDebeziumAvroPayload.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/AbstractDebeziumAvroPayload.java @@ -18,7 +18,7 @@ package org.apache.hudi.common.model.debezium; -import org.apache.avro.AvroRuntimeException; +import org.apache.hudi.avro.HoodieAvroUtils; import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload; import org.apache.hudi.common.util.Option; @@ -77,12 +77,7 @@ private Option handleDeleteOperation(IndexedRecord insertRecord) boolean delete = false; if (insertRecord instanceof GenericRecord) { GenericRecord record = (GenericRecord) insertRecord; - Object value; - try { - value = record.get(DebeziumConstants.FLATTENED_OP_COL_NAME); - } catch (AvroRuntimeException e) { - value = null; - } + Object value = HoodieAvroUtils.getFieldVal(record, DebeziumConstants.FLATTENED_OP_COL_NAME); delete = value != null && value.toString().equalsIgnoreCase(DebeziumConstants.DELETE_OP); } diff --git a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java index 694ae68eded2..3c8d43d49472 100644 --- a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java @@ -18,15 +18,17 @@ package org.apache.hudi.avro; -import org.apache.avro.AvroRuntimeException; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.testutils.SchemaTestUtil; +import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.SchemaCompatibilityException; +import org.apache.avro.AvroRuntimeException; import org.apache.avro.JsonProperties; import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; + import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Assertions; @@ -254,13 +256,12 @@ public void testRemoveFields() { assertEquals("key1", rec1.get("_row_key")); assertEquals("val1", rec1.get("non_pii_col")); assertEquals(3.5, rec1.get("timestamp")); - Object removed; - try { - removed = rec1.get("pii_col"); - } catch (AvroRuntimeException e) { - removed = null; + if (HoodieAvroUtils.gteqAvro1_10()) { + GenericRecord finalRec1 = rec1; + assertThrows(AvroRuntimeException.class, () -> finalRec1.get("pii_col")); + } else { + assertNull(rec1.get("pii_col")); } - assertNull(removed); assertEquals(expectedSchema, rec1.getSchema()); // non-partitioned table test with empty list of fields. @@ -309,6 +310,52 @@ public void testGetNestedFieldVal() { } } + @Test + public void testGetNestedFieldValWithNestedField() { + Schema nestedSchema = new Schema.Parser().parse(SCHEMA_WITH_NESTED_FIELD); + GenericRecord rec = new GenericData.Record(nestedSchema); + + // test get . + assertEquals(". field not found in record. Acceptable fields were :[firstname, lastname, student]", + assertThrows(HoodieException.class, () -> + HoodieAvroUtils.getNestedFieldVal(rec, ".", false, false)).getMessage()); + + // test get fake_key + assertEquals("fake_key(Part -fake_key) field not found in record. Acceptable fields were :[firstname, lastname, student]", + assertThrows(HoodieException.class, () -> + HoodieAvroUtils.getNestedFieldVal(rec, "fake_key", false, false)).getMessage()); + + // test get student(null) + assertNull(HoodieAvroUtils.getNestedFieldVal(rec, "student", false, false)); + + // test get student + GenericRecord studentRecord = new GenericData.Record(rec.getSchema().getField("student").schema()); + studentRecord.put("firstname", "person"); + rec.put("student", studentRecord); + assertEquals(studentRecord, HoodieAvroUtils.getNestedFieldVal(rec, "student", false, false)); + + // test get student.fake_key + assertEquals("student.fake_key(Part -fake_key) field not found in record. Acceptable fields were :[firstname, lastname]", + assertThrows(HoodieException.class, () -> + HoodieAvroUtils.getNestedFieldVal(rec, "student.fake_key", false, false)).getMessage()); + + // test get student.firstname + assertEquals("person", HoodieAvroUtils.getNestedFieldVal(rec, "student.firstname", false, false)); + + // test get student.lastname(null) + assertNull(HoodieAvroUtils.getNestedFieldVal(rec, "student.lastname", false, false)); + + // test get student.firstname.fake_key + assertEquals("Cannot find a record at part value :firstname", + assertThrows(HoodieException.class, () -> + HoodieAvroUtils.getNestedFieldVal(rec, "student.firstname.fake_key", false, false)).getMessage()); + + // test get student.lastname(null).fake_key + assertEquals("Cannot find a record at part value :lastname", + assertThrows(HoodieException.class, () -> + HoodieAvroUtils.getNestedFieldVal(rec, "student.lastname.fake_key", false, false)).getMessage()); + } + @Test public void testGetNestedFieldValWithDecimalField() { GenericRecord rec = new GenericData.Record(new Schema.Parser().parse(SCHEMA_WITH_DECIMAL_FIELD)); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java index 41443c9c0622..f16eb52d3686 100755 --- a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java @@ -2347,7 +2347,7 @@ public void testDataBlockFormatAppendAndReadWithProjectedSchema( new HashMap() {{ put(HoodieLogBlockType.AVRO_DATA_BLOCK, 0); // not supported put(HoodieLogBlockType.HFILE_DATA_BLOCK, 0); // not supported - put(HoodieLogBlockType.PARQUET_DATA_BLOCK, HoodieAvroUtils.gteqAvro1_10() ? 2593 : 2605); + put(HoodieLogBlockType.PARQUET_DATA_BLOCK, HoodieAvroUtils.gteqAvro1_9() ? 2593 : 2605); }}; List recordsRead = getRecords(dataBlockRead); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestObjectSizeCalculator.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestObjectSizeCalculator.java index 70f8e43533cd..625e30198404 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestObjectSizeCalculator.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestObjectSizeCalculator.java @@ -74,7 +74,9 @@ public void testGetObjectSize() { assertEquals(32, getObjectSize(emptyClass)); assertEquals(40, getObjectSize(stringClass)); assertEquals(40, getObjectSize(payloadClass)); - assertEquals(HoodieAvroUtils.gteqAvro1_10() ? 1320 : 1240, + // Since avro 1.9, Schema use ConcurrentHashMap instead of LinkedHashMap to + // implement props, which will change the size of the object. + assertEquals(HoodieAvroUtils.gteqAvro1_9() ? 1320 : 1240, getObjectSize(Schema.create(Schema.Type.STRING))); assertEquals(104, getObjectSize(person)); } From dccac34068e8042fe88ffc8db29860a8676d262e Mon Sep 17 00:00:00 2001 From: zouxxyy Date: Fri, 11 Nov 2022 22:05:51 +0800 Subject: [PATCH 4/5] fix --- .../org/apache/hudi/avro/HoodieAvroUtils.java | 6 +++++- .../apache/hudi/avro/TestHoodieAvroUtils.java | 17 +++++------------ 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java index 53a114e3e576..2f226b2d4609 100644 --- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java @@ -547,7 +547,11 @@ public static Object getNestedFieldVal(GenericRecord record, String fieldName, b } } else { if (!(val instanceof GenericRecord)) { - throw new HoodieException("Cannot find a record at part value :" + part); + if (returnNullIfNotFound) { + return null; + } else { + throw new HoodieException("Cannot find a record at part value :" + part); + } } else { valueNode = (GenericRecord) val; } diff --git a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java index 3c8d43d49472..483c49b1f50b 100644 --- a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java @@ -295,19 +295,12 @@ public void testGetNestedFieldVal() { assertNull(rowKeyNotExist); // Field does not exist - try { - HoodieAvroUtils.getNestedFieldVal(rec, "fake_key", false, false); - } catch (Exception e) { - assertEquals("fake_key(Part -fake_key) field not found in record. Acceptable fields were :[timestamp, _row_key, non_pii_col, pii_col]", - e.getMessage()); - } + assertEquals("fake_key(Part -fake_key) field not found in record. Acceptable fields were :[timestamp, _row_key, non_pii_col, pii_col]", + assertThrows(HoodieException.class, () -> + HoodieAvroUtils.getNestedFieldVal(rec, "fake_key", false, false)).getMessage()); - // Field exist while value not - try { - HoodieAvroUtils.getNestedFieldVal(rec, "timestamp", false, false); - } catch (Exception e) { - assertEquals("The value of timestamp can not be null", e.getMessage()); - } + // Field exists while value not + assertNull(HoodieAvroUtils.getNestedFieldVal(rec, "timestamp", false, false)); } @Test From 700eaef0003fab1ef86fa93e730426e2d4880aaa Mon Sep 17 00:00:00 2001 From: zouxxyy Date: Sat, 12 Nov 2022 09:01:08 +0800 Subject: [PATCH 5/5] fix for comments --- .github/workflows/bot.yml | 8 ++++++++ .../hudi/common/model/TestPartialUpdateAvroPayload.java | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml index 0a61fa2544ba..fca33bb700a8 100644 --- a/.github/workflows/bot.yml +++ b/.github/workflows/bot.yml @@ -73,6 +73,14 @@ jobs: run: | HUDI_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout) ./packaging/bundle-validation/ci_run.sh $HUDI_VERSION + - name: Common Test + env: + SCALA_PROFILE: ${{ matrix.scalaProfile }} + SPARK_PROFILE: ${{ matrix.sparkProfile }} + FLINK_PROFILE: ${{ matrix.flinkProfile }} + if: ${{ !endsWith(env.SPARK_PROFILE, '2.4') }} # skip test spark 2.4 as it's covered by Azure CI + run: + mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" '-Dtest=Test*' -pl hudi-common $MVN_ARGS - name: Spark SQL Test env: SCALA_PROFILE: ${{ matrix.scalaProfile }} diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/TestPartialUpdateAvroPayload.java b/hudi-common/src/test/java/org/apache/hudi/common/model/TestPartialUpdateAvroPayload.java index 8f463fbfa018..b64b289abfb6 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/model/TestPartialUpdateAvroPayload.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/model/TestPartialUpdateAvroPayload.java @@ -55,7 +55,7 @@ public class TestPartialUpdateAvroPayload { + " {\"name\": \"id\", \"type\": [\"null\", \"string\"]},\n" + " {\"name\": \"partition\", \"type\": [\"null\", \"string\"]},\n" + " {\"name\": \"ts\", \"type\": [\"null\", \"long\"]},\n" - + " {\"name\": \"_hoodie_is_deleted\", \"type\": [\"boolean\", \"null\"], \"default\":false},\n" + + " {\"name\": \"_hoodie_is_deleted\", \"type\": \"boolean\", \"default\": false},\n" + " {\"name\": \"city\", \"type\": [\"null\", \"string\"]},\n" + " {\"name\": \"child\", \"type\": [\"null\", {\"type\": \"array\", \"items\": \"string\"}]}\n" + " ]\n"