Skip to content

Commit

Permalink
[HUDI-4825] Remove redundant fields in serialized commit metadata in …
Browse files Browse the repository at this point in the history
…JSON (apache#6646)
  • Loading branch information
yihua authored Sep 12, 2022
1 parent dc5ec0c commit 3cfe485
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,13 @@ public class JsonUtils {
private static final ObjectMapper MAPPER = new ObjectMapper();
static {
MAPPER.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);
// We need to exclude custom getters, setters and creators which can use member fields
// to derive new fields, so that they are not included in the serialization
MAPPER.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY);
MAPPER.setVisibility(PropertyAccessor.GETTER, JsonAutoDetect.Visibility.NONE);
MAPPER.setVisibility(PropertyAccessor.IS_GETTER, JsonAutoDetect.Visibility.NONE);
MAPPER.setVisibility(PropertyAccessor.SETTER, JsonAutoDetect.Visibility.NONE);
MAPPER.setVisibility(PropertyAccessor.CREATOR, JsonAutoDetect.Visibility.NONE);
}

public static ObjectMapper getObjectMapper() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,16 @@
package org.apache.hudi.common.model;

import org.apache.hudi.common.testutils.HoodieTestUtils;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.FileIOUtils;
import org.apache.hudi.common.util.JsonUtils;

import org.junit.jupiter.api.Test;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertSame;
Expand All @@ -34,6 +39,30 @@
*/
public class TestHoodieCommitMetadata {

private static final List<String> EXPECTED_FIELD_NAMES = Arrays.asList(
"partitionToWriteStats", "compacted", "extraMetadata", "operationType");

public static void verifyMetadataFieldNames(
HoodieCommitMetadata commitMetadata, List<String> expectedFieldNameList)
throws IOException {
String serializedCommitMetadata = commitMetadata.toJsonString();
List<String> actualFieldNameList = CollectionUtils.toStream(
JsonUtils.getObjectMapper().readTree(serializedCommitMetadata).fieldNames())
.collect(Collectors.toList());
assertEquals(
expectedFieldNameList.stream().sorted().collect(Collectors.toList()),
actualFieldNameList.stream().sorted().collect(Collectors.toList())
);
}

@Test
public void verifyFieldNamesInCommitMetadata() throws IOException {
List<HoodieWriteStat> fakeHoodieWriteStats = HoodieTestUtils.generateFakeHoodieWriteStat(10);
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
fakeHoodieWriteStats.forEach(stat -> commitMetadata.addWriteStat(stat.getPartitionPath(), stat));
verifyMetadataFieldNames(commitMetadata, EXPECTED_FIELD_NAMES);
}

@Test
public void testPerfStatPresenceInHoodieMetadata() throws Exception {

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.hudi.common.model;

import org.apache.hudi.common.testutils.HoodieTestUtils;

import org.junit.jupiter.api.Test;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;

import static org.apache.hudi.common.model.TestHoodieCommitMetadata.verifyMetadataFieldNames;

public class TestHoodieReplaceCommitMetadata {

private static final List<String> EXPECTED_FIELD_NAMES = Arrays.asList(
"partitionToWriteStats", "partitionToReplaceFileIds", "compacted", "extraMetadata", "operationType");

@Test
public void verifyFieldNamesInReplaceCommitMetadata() throws IOException {
List<HoodieWriteStat> fakeHoodieWriteStats = HoodieTestUtils.generateFakeHoodieWriteStat(10);
HoodieReplaceCommitMetadata commitMetadata = new HoodieReplaceCommitMetadata();
fakeHoodieWriteStats.forEach(stat -> {
commitMetadata.addWriteStat(stat.getPartitionPath(), stat);
commitMetadata.addReplaceFileId(stat.getPartitionPath(), stat.getFileId());
});
verifyMetadataFieldNames(commitMetadata, EXPECTED_FIELD_NAMES);
}
}

0 comments on commit 3cfe485

Please sign in to comment.