From 2ee320b36793750c531f9801a03b1fea32dceaba Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Mon, 12 Sep 2022 16:50:18 -0700 Subject: [PATCH] [HUDI-4825] Remove redundant fields in serialized commit metadata in JSON (#6646) --- .../apache/hudi/common/util/JsonUtils.java | 6 +++ .../model/TestHoodieCommitMetadata.java | 29 ++++++++++++ .../TestHoodieReplaceCommitMetadata.java | 47 +++++++++++++++++++ 3 files changed, 82 insertions(+) create mode 100644 hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieReplaceCommitMetadata.java diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/JsonUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/JsonUtils.java index d820bde178e1..7c41fe4f29d9 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/JsonUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/JsonUtils.java @@ -29,7 +29,13 @@ public class JsonUtils { private static final ObjectMapper MAPPER = new ObjectMapper(); static { MAPPER.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES); + // We need to exclude custom getters, setters and creators which can use member fields + // to derive new fields, so that they are not included in the serialization MAPPER.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY); + MAPPER.setVisibility(PropertyAccessor.GETTER, JsonAutoDetect.Visibility.NONE); + MAPPER.setVisibility(PropertyAccessor.IS_GETTER, JsonAutoDetect.Visibility.NONE); + MAPPER.setVisibility(PropertyAccessor.SETTER, JsonAutoDetect.Visibility.NONE); + MAPPER.setVisibility(PropertyAccessor.CREATOR, JsonAutoDetect.Visibility.NONE); } public static ObjectMapper getObjectMapper() { diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieCommitMetadata.java b/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieCommitMetadata.java index 0eaaff1267d4..b0b59f8cbcb8 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieCommitMetadata.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieCommitMetadata.java @@ -19,11 +19,16 @@ package org.apache.hudi.common.model; import org.apache.hudi.common.testutils.HoodieTestUtils; +import org.apache.hudi.common.util.CollectionUtils; import org.apache.hudi.common.util.FileIOUtils; +import org.apache.hudi.common.util.JsonUtils; import org.junit.jupiter.api.Test; +import java.io.IOException; +import java.util.Arrays; import java.util.List; +import java.util.stream.Collectors; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertSame; @@ -34,6 +39,30 @@ */ public class TestHoodieCommitMetadata { + private static final List EXPECTED_FIELD_NAMES = Arrays.asList( + "partitionToWriteStats", "compacted", "extraMetadata", "operationType"); + + public static void verifyMetadataFieldNames( + HoodieCommitMetadata commitMetadata, List expectedFieldNameList) + throws IOException { + String serializedCommitMetadata = commitMetadata.toJsonString(); + List actualFieldNameList = CollectionUtils.toStream( + JsonUtils.getObjectMapper().readTree(serializedCommitMetadata).fieldNames()) + .collect(Collectors.toList()); + assertEquals( + expectedFieldNameList.stream().sorted().collect(Collectors.toList()), + actualFieldNameList.stream().sorted().collect(Collectors.toList()) + ); + } + + @Test + public void verifyFieldNamesInCommitMetadata() throws IOException { + List fakeHoodieWriteStats = HoodieTestUtils.generateFakeHoodieWriteStat(10); + HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata(); + fakeHoodieWriteStats.forEach(stat -> commitMetadata.addWriteStat(stat.getPartitionPath(), stat)); + verifyMetadataFieldNames(commitMetadata, EXPECTED_FIELD_NAMES); + } + @Test public void testPerfStatPresenceInHoodieMetadata() throws Exception { diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieReplaceCommitMetadata.java b/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieReplaceCommitMetadata.java new file mode 100644 index 000000000000..f2c0c1c043bf --- /dev/null +++ b/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieReplaceCommitMetadata.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.common.model; + +import org.apache.hudi.common.testutils.HoodieTestUtils; + +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import static org.apache.hudi.common.model.TestHoodieCommitMetadata.verifyMetadataFieldNames; + +public class TestHoodieReplaceCommitMetadata { + + private static final List EXPECTED_FIELD_NAMES = Arrays.asList( + "partitionToWriteStats", "partitionToReplaceFileIds", "compacted", "extraMetadata", "operationType"); + + @Test + public void verifyFieldNamesInReplaceCommitMetadata() throws IOException { + List fakeHoodieWriteStats = HoodieTestUtils.generateFakeHoodieWriteStat(10); + HoodieReplaceCommitMetadata commitMetadata = new HoodieReplaceCommitMetadata(); + fakeHoodieWriteStats.forEach(stat -> { + commitMetadata.addWriteStat(stat.getPartitionPath(), stat); + commitMetadata.addReplaceFileId(stat.getPartitionPath(), stat.getFileId()); + }); + verifyMetadataFieldNames(commitMetadata, EXPECTED_FIELD_NAMES); + } +}