diff --git a/azure-pipelines.yml b/azure-pipelines.yml index c2d5b510de2a..397ff9800c4b 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -57,7 +57,7 @@ stages: inputs: mavenPomFile: 'pom.xml' goals: 'test' - options: -Punit-tests -pl hudi-common,hudi-flink,hudi-client/hudi-spark-client + options: -Punit-tests -pl hudi-common,hudi-flink-datasource/hudi-flink,hudi-client/hudi-spark-client publishJUnitResults: false jdkVersionOption: '1.8' mavenOptions: '-Xmx2g $(MAVEN_OPTS)' @@ -66,7 +66,7 @@ stages: inputs: mavenPomFile: 'pom.xml' goals: 'test' - options: -Pfunctional-tests -pl hudi-common,hudi-flink + options: -Pfunctional-tests -pl hudi-common,hudi-flink-datasource/hudi-flink publishJUnitResults: false jdkVersionOption: '1.8' mavenOptions: '-Xmx2g $(MAVEN_OPTS)' @@ -165,7 +165,7 @@ stages: inputs: mavenPomFile: 'pom.xml' goals: 'test' - options: -Punit-tests -pl !hudi-common,!hudi-flink,!hudi-client/hudi-spark-client,!hudi-client/hudi-client-common,!hudi-client/hudi-flink-client,!hudi-client/hudi-java-client,!hudi-cli,!hudi-utilities,!hudi-sync/hudi-hive-sync + options: -Punit-tests -pl !hudi-common,!hudi-flink-datasource/hudi-flink,!hudi-client/hudi-spark-client,!hudi-client/hudi-client-common,!hudi-client/hudi-flink-client,!hudi-client/hudi-java-client,!hudi-cli,!hudi-utilities,!hudi-sync/hudi-hive-sync publishJUnitResults: false jdkVersionOption: '1.8' mavenOptions: '-Xmx2g $(MAVEN_OPTS)' @@ -174,7 +174,7 @@ stages: inputs: mavenPomFile: 'pom.xml' goals: 'test' - options: -Pfunctional-tests -pl !hudi-common,!hudi-flink,!hudi-client/hudi-spark-client,!hudi-client/hudi-client-common,!hudi-client/hudi-flink-client,!hudi-client/hudi-java-client,!hudi-cli,!hudi-utilities,!hudi-sync/hudi-hive-sync + options: -Pfunctional-tests -pl !hudi-common,!hudi-flink-datasource/hudi-flink,!hudi-client/hudi-spark-client,!hudi-client/hudi-client-common,!hudi-client/hudi-flink-client,!hudi-client/hudi-java-client,!hudi-cli,!hudi-utilities,!hudi-sync/hudi-hive-sync publishJUnitResults: false jdkVersionOption: '1.8' mavenOptions: '-Xmx2g $(MAVEN_OPTS)' diff --git a/hudi-client/hudi-flink-client/pom.xml b/hudi-client/hudi-flink-client/pom.xml index b6f1f3d372d2..ffe82a0c96b5 100644 --- a/hudi-client/hudi-flink-client/pom.xml +++ b/hudi-client/hudi-flink-client/pom.xml @@ -60,7 +60,7 @@ org.apache.flink - flink-table-runtime_${scala.binary.version} + ${flink.table.runtime.artifactId} ${flink.version} provided @@ -159,7 +159,7 @@ org.apache.flink - flink-runtime + ${flink.runtime.artifactId} ${flink.version} test tests diff --git a/hudi-flink-datasource/hudi-flink/pom.xml b/hudi-flink-datasource/hudi-flink/pom.xml new file mode 100644 index 000000000000..bd05e7c10a4d --- /dev/null +++ b/hudi-flink-datasource/hudi-flink/pom.xml @@ -0,0 +1,364 @@ + + + + + + + hudi-flink-datasource + org.apache.hudi + 0.11.0-SNAPSHOT + + 4.0.0 + + hudi-flink + 0.11.0-SNAPSHOT + jar + + + ${project.parent.parent.basedir} + 1.11.1 + + + + + + org.jacoco + jacoco-maven-plugin + + + org.apache.maven.plugins + maven-compiler-plugin + + 1.8 + 1.8 + + + + org.apache.maven.plugins + maven-jar-plugin + 3.1.2 + + + + test-jar + + + + + + org.apache.rat + apache-rat-plugin + + + + + + src/main/resources + + + src/test/resources + + + + + + + + org.apache.hudi + hudi-common + ${project.version} + + + org.apache.hudi + hudi-client-common + ${project.version} + + + org.apache.hudi + hudi-flink-client + ${project.version} + + + org.apache.hudi + hudi-hadoop-mr + ${project.version} + + + org.apache.hudi + hudi-hive-sync + ${project.version} + + + org.apache.hudi + hudi-sync-common + ${project.version} + + + org.apache.hudi + ${hudi.flink.module} + ${project.version} + + + + + org.apache.flink + flink-streaming-java_${scala.binary.version} + compile + + + org.apache.flink + flink-clients_${scala.binary.version} + compile + + + com.esotericsoftware.kryo + kryo + + + com.esotericsoftware.minlog + minlog + + + + + org.apache.flink + flink-connector-kafka_${scala.binary.version} + compile + + + org.apache.kafka + kafka-clients + ${kafka.version} + + + org.apache.flink + flink-hadoop-compatibility_${scala.binary.version} + ${flink.version} + + + org.apache.flink + flink-parquet_${scala.binary.version} + ${flink.version} + provided + + + org.apache.flink + flink-json + ${flink.version} + provided + + + org.apache.flink + flink-table-common + ${flink.version} + provided + + + org.apache.flink + ${flink.table.runtime.artifactId} + ${flink.version} + provided + + + org.apache.flink + ${flink.table.planner.artifactId} + ${flink.version} + provided + + + org.apache.flink + flink-statebackend-rocksdb_${scala.binary.version} + ${flink.version} + provided + + + + org.apache.parquet + parquet-hadoop + ${parquet.version} + + + org.xerial.snappy + snappy-java + + + + + + + org.apache.parquet + parquet-avro + ${parquet.version} + test + + + + + org.apache.avro + avro + + 1.10.0 + compile + + + + + org.apache.hadoop + hadoop-mapreduce-client-core + compile + + + org.slf4j + slf4j-log4j12 + + + + + + com.beust + jcommander + compile + + + com.twitter + bijection-avro_${scala.binary.version} + 0.9.7 + + + joda-time + joda-time + 2.5 + + + + ${hive.groupid} + hive-exec + ${hive.version} + ${hive.exec.classifier} + + + javax.mail + mail + + + org.eclipse.jetty.aggregate + * + + + + + + + + + org.junit.jupiter + junit-jupiter-api + test + + + org.junit.jupiter + junit-jupiter-engine + test + + + org.junit.vintage + junit-vintage-engine + test + + + org.junit.jupiter + junit-jupiter-params + test + + + + org.apache.hudi + hudi-common + ${project.version} + tests + test-jar + test + + + org.apache.hudi + hudi-client-common + ${project.version} + tests + test-jar + test + + + org.apache.hudi + hudi-flink-client + ${project.version} + tests + test-jar + test + + + org.apache.hudi + ${hudi.flink.module} + ${project.version} + tests + test-jar + test + + + + org.apache.flink + flink-test-utils_${scala.binary.version} + ${flink.version} + test + + + org.apache.flink + ${flink.runtime.artifactId} + ${flink.version} + test + test-jar + + + org.apache.flink + flink-streaming-java_${scala.binary.version} + ${flink.version} + test + test-jar + + + org.apache.flink + ${flink.table.runtime.artifactId} + ${flink.version} + test + test-jar + + + org.apache.flink + flink-json + ${flink.version} + test + test-jar + + + org.apache.flink + flink-csv + ${flink.version} + test + + + diff --git a/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java similarity index 99% rename from hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java index 90ed73a3deb8..cd5c2a70e1d4 100644 --- a/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java @@ -323,8 +323,8 @@ private FlinkOptions() { .stringType() .defaultValue("") .withDescription("Index key field. Value to be used as hashing to find the bucket ID. Should be a subset of or equal to the recordKey fields.\n" - + "Actual value will be obtained by invoking .toString() on the field value. Nested fields can be specified using " - + "the dot notation eg: `a.b.c`"); + + "Actual value will be obtained by invoking .toString() on the field value. Nested fields can be specified using " + + "the dot notation eg: `a.b.c`"); public static final ConfigOption BUCKET_INDEX_NUM_BUCKETS = ConfigOptions .key(HoodieIndexConfig.BUCKET_INDEX_NUM_BUCKETS.key()) diff --git a/hudi-flink/src/main/java/org/apache/hudi/configuration/OptionsResolver.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/OptionsResolver.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/configuration/OptionsResolver.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/OptionsResolver.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/schema/FilebasedSchemaProvider.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/schema/FilebasedSchemaProvider.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/schema/FilebasedSchemaProvider.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/schema/FilebasedSchemaProvider.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/schema/SchemaProvider.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/schema/SchemaProvider.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/schema/SchemaProvider.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/schema/SchemaProvider.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/schema/SchemaRegistryProvider.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/schema/SchemaRegistryProvider.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/schema/SchemaRegistryProvider.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/schema/SchemaRegistryProvider.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteFunction.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteFunction.java similarity index 97% rename from hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteFunction.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteFunction.java index 128358096cde..057c794331d6 100644 --- a/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteFunction.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteFunction.java @@ -18,10 +18,6 @@ package org.apache.hudi.sink; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.runtime.state.FunctionInitializationContext; -import org.apache.flink.streaming.api.functions.ProcessFunction; -import org.apache.flink.util.Collector; import org.apache.hudi.common.model.FileSlice; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieRecord; @@ -31,6 +27,11 @@ import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.index.bucket.BucketIdentifier; import org.apache.hudi.table.HoodieFlinkTable; + +import org.apache.flink.configuration.Configuration; +import org.apache.flink.runtime.state.FunctionInitializationContext; +import org.apache.flink.streaming.api.functions.ProcessFunction; +import org.apache.flink.util.Collector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -131,7 +132,7 @@ private void bootstrapIndex() throws IOException { int partitionOfBucket = BucketIdentifier.mod(i, parallelism); if (partitionOfBucket == taskID) { LOG.info(String.format("Bootstrapping index. Adding bucket %s , " - + "Current parallelism: %s , Max parallelism: %s , Current task id: %s", + + "Current parallelism: %s , Max parallelism: %s , Current task id: %s", i, parallelism, maxParallelism, taskID)); bucketToLoad.add(i); } @@ -155,7 +156,7 @@ private void bootstrapIndex() throws IOException { LOG.info(String.format("Should load this partition bucket %s with fileID %s", partitionBucketId, fileID)); if (bucketToFileIDMap.containsKey(partitionBucketId)) { throw new RuntimeException(String.format("Duplicate fileID %s from partitionBucket %s found " - + "during the BucketStreamWriteFunction index bootstrap.", fileID, partitionBucketId)); + + "during the BucketStreamWriteFunction index bootstrap.", fileID, partitionBucketId)); } else { LOG.info(String.format("Adding fileID %s to the partition bucket %s.", fileID, partitionBucketId)); bucketToFileIDMap.put(partitionBucketId, fileID); diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteOperator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteOperator.java similarity index 99% rename from hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteOperator.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteOperator.java index 209fe59e4b8c..cf740cc2ccc5 100644 --- a/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteOperator.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteOperator.java @@ -18,10 +18,11 @@ package org.apache.hudi.sink; -import org.apache.flink.configuration.Configuration; import org.apache.hudi.sink.common.AbstractWriteOperator; import org.apache.hudi.sink.common.WriteOperatorFactory; +import org.apache.flink.configuration.Configuration; + /** * Operator for {@link BucketStreamWriteFunction}. * diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/CleanFunction.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/CleanFunction.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/sink/CleanFunction.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/CleanFunction.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteFunction.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteFunction.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteFunction.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteFunction.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperator.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperator.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperator.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorCoordinator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorCoordinator.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorCoordinator.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorCoordinator.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/append/AppendWriteFunction.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/append/AppendWriteFunction.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/sink/append/AppendWriteFunction.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/append/AppendWriteFunction.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/append/AppendWriteOperator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/append/AppendWriteOperator.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/sink/append/AppendWriteOperator.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/append/AppendWriteOperator.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/BootstrapOperator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/BootstrapOperator.java similarity index 98% rename from hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/BootstrapOperator.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/BootstrapOperator.java index cdb378f88885..1fc8d393be6a 100644 --- a/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/BootstrapOperator.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/BootstrapOperator.java @@ -256,9 +256,9 @@ public static HoodieRecord generateHoodieRecord(HoodieKey hoodieKey, FileSlice f } protected boolean shouldLoadFile(String fileId, - int maxParallelism, - int parallelism, - int taskID) { + int maxParallelism, + int parallelism, + int taskID) { return KeyGroupRangeAssignment.assignKeyToParallelOperator( fileId, maxParallelism, parallelism) == taskID; } diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/IndexRecord.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/IndexRecord.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/IndexRecord.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/IndexRecord.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/aggregate/BootstrapAccumulator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/aggregate/BootstrapAccumulator.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/aggregate/BootstrapAccumulator.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/aggregate/BootstrapAccumulator.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/aggregate/BootstrapAggFunction.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/aggregate/BootstrapAggFunction.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/aggregate/BootstrapAggFunction.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/aggregate/BootstrapAggFunction.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/batch/BatchBootstrapOperator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/batch/BatchBootstrapOperator.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/batch/BatchBootstrapOperator.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/batch/BatchBootstrapOperator.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriteFunction.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriteFunction.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriteFunction.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriteFunction.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriteOperator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriteOperator.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriteOperator.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriteOperator.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriterHelper.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriterHelper.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriterHelper.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriterHelper.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/RowDataKeyGen.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/RowDataKeyGen.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/sink/bulk/RowDataKeyGen.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/RowDataKeyGen.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/sort/SortOperator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/sort/SortOperator.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/sink/bulk/sort/SortOperator.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/sort/SortOperator.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/sort/SortOperatorGen.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/sort/SortOperatorGen.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/sink/bulk/sort/SortOperatorGen.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/sort/SortOperatorGen.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/common/AbstractStreamWriteFunction.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/common/AbstractStreamWriteFunction.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/sink/common/AbstractStreamWriteFunction.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/common/AbstractStreamWriteFunction.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/common/AbstractWriteFunction.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/common/AbstractWriteFunction.java similarity index 99% rename from hudi-flink/src/main/java/org/apache/hudi/sink/common/AbstractWriteFunction.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/common/AbstractWriteFunction.java index 8e776006f3b9..9e131ff91e1e 100644 --- a/hudi-flink/src/main/java/org/apache/hudi/sink/common/AbstractWriteFunction.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/common/AbstractWriteFunction.java @@ -41,6 +41,7 @@ public abstract class AbstractWriteFunction extends ProcessFunction uniqueIdToFileStatus = new HashMap<>(); metadataList.forEach(metadata -> - uniqueIdToFileStatus.putAll(getFilesToReadOfInstant(basePath, metadata, fs, tableType))); + uniqueIdToFileStatus.putAll(getFilesToReadOfInstant(basePath, metadata, fs, tableType))); return uniqueIdToFileStatus.values().toArray(new FileStatus[0]); } diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/transform/ChainedTransformer.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/transform/ChainedTransformer.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/sink/transform/ChainedTransformer.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/transform/ChainedTransformer.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunction.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunction.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunction.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunction.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunctionWithRateLimit.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunctionWithRateLimit.java similarity index 89% rename from hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunctionWithRateLimit.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunctionWithRateLimit.java index 3d42ad87d908..fc9c2177e7c0 100644 --- a/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunctionWithRateLimit.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunctionWithRateLimit.java @@ -18,11 +18,11 @@ package org.apache.hudi.sink.transform; +import org.apache.hudi.adapter.RateLimiterAdapter; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.configuration.FlinkOptions; import org.apache.flink.configuration.Configuration; -import org.apache.flink.shaded.guava30.com.google.common.util.concurrent.RateLimiter; import org.apache.flink.table.data.RowData; import org.apache.flink.table.types.logical.RowType; @@ -39,7 +39,7 @@ public class RowDataToHoodieFunctionWithRateLimit bootstrap( * The bootstrap operator loads the existing data index (primary key to file id mapping), * then send the indexing data set to subsequent operator(usually the bucket assign operator). * - * @param conf The configuration - * @param rowType The row type + * @param conf The configuration + * @param rowType The row type * @param defaultParallelism The default parallelism - * @param dataStream The data stream - * @param bounded Whether the source is bounded - * @param overwrite Whether it is insert overwrite + * @param dataStream The data stream + * @param bounded Whether the source is bounded + * @param overwrite Whether it is insert overwrite */ public static DataStream bootstrap( Configuration conf, @@ -268,9 +268,9 @@ public static DataStream rowDataToHoodieRecord(Configuration conf, *

The bucket assigner assigns the inputs to suitable file groups, the write task caches * and flushes the data set to disk. * - * @param conf The configuration + * @param conf The configuration * @param defaultParallelism The default parallelism - * @param dataStream The input data stream + * @param dataStream The input data stream * @return the stream write data stream pipeline */ public static DataStream hoodieStreamWrite(Configuration conf, int defaultParallelism, DataStream dataStream) { @@ -280,25 +280,25 @@ public static DataStream hoodieStreamWrite(Configuration conf, int defau String indexKeyFields = conf.getString(FlinkOptions.INDEX_KEY_FIELD); BucketIndexPartitioner partitioner = new BucketIndexPartitioner<>(bucketNum, indexKeyFields); return dataStream.partitionCustom(partitioner, HoodieRecord::getKey) - .transform("bucket_write", TypeInformation.of(Object.class), operatorFactory) - .uid("uid_bucket_write" + conf.getString(FlinkOptions.TABLE_NAME)) - .setParallelism(conf.getInteger(FlinkOptions.WRITE_TASKS)); + .transform("bucket_write", TypeInformation.of(Object.class), operatorFactory) + .uid("uid_bucket_write" + conf.getString(FlinkOptions.TABLE_NAME)) + .setParallelism(conf.getInteger(FlinkOptions.WRITE_TASKS)); } else { WriteOperatorFactory operatorFactory = StreamWriteOperator.getFactory(conf); return dataStream - // Key-by record key, to avoid multiple subtasks write to a bucket at the same time - .keyBy(HoodieRecord::getRecordKey) - .transform( - "bucket_assigner", - TypeInformation.of(HoodieRecord.class), - new KeyedProcessOperator<>(new BucketAssignFunction<>(conf))) - .uid("uid_bucket_assigner_" + conf.getString(FlinkOptions.TABLE_NAME)) - .setParallelism(conf.getOptional(FlinkOptions.BUCKET_ASSIGN_TASKS).orElse(defaultParallelism)) - // shuffle by fileId(bucket id) - .keyBy(record -> record.getCurrentLocation().getFileId()) - .transform("stream_write", TypeInformation.of(Object.class), operatorFactory) - .uid("uid_stream_write" + conf.getString(FlinkOptions.TABLE_NAME)) - .setParallelism(conf.getInteger(FlinkOptions.WRITE_TASKS)); + // Key-by record key, to avoid multiple subtasks write to a bucket at the same time + .keyBy(HoodieRecord::getRecordKey) + .transform( + "bucket_assigner", + TypeInformation.of(HoodieRecord.class), + new KeyedProcessOperator<>(new BucketAssignFunction<>(conf))) + .uid("uid_bucket_assigner_" + conf.getString(FlinkOptions.TABLE_NAME)) + .setParallelism(conf.getOptional(FlinkOptions.BUCKET_ASSIGN_TASKS).orElse(defaultParallelism)) + // shuffle by fileId(bucket id) + .keyBy(record -> record.getCurrentLocation().getFileId()) + .transform("stream_write", TypeInformation.of(Object.class), operatorFactory) + .uid("uid_stream_write" + conf.getString(FlinkOptions.TABLE_NAME)) + .setParallelism(conf.getInteger(FlinkOptions.WRITE_TASKS)); } } @@ -324,8 +324,8 @@ public static DataStream hoodieStreamWrite(Configuration conf, int defau */ public static DataStreamSink compact(Configuration conf, DataStream dataStream) { return dataStream.transform("compact_plan_generate", - TypeInformation.of(CompactionPlanEvent.class), - new CompactionPlanOperator(conf)) + TypeInformation.of(CompactionPlanEvent.class), + new CompactionPlanOperator(conf)) .setParallelism(1) // plan generate must be singleton .rebalance() .transform("compact_task", diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/utils/TimeWait.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/TimeWait.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/sink/utils/TimeWait.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/TimeWait.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/source/FileIndex.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/FileIndex.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/source/FileIndex.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/FileIndex.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/source/IncrementalInputSplits.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/IncrementalInputSplits.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/source/IncrementalInputSplits.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/IncrementalInputSplits.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadMonitoringFunction.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadMonitoringFunction.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/source/StreamReadMonitoringFunction.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadMonitoringFunction.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadOperator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadOperator.java similarity index 89% rename from hudi-flink/src/main/java/org/apache/hudi/source/StreamReadOperator.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadOperator.java index c3f43422f1d1..bf6dc98f4de0 100644 --- a/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadOperator.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadOperator.java @@ -18,24 +18,23 @@ package org.apache.hudi.source; +import org.apache.hudi.adapter.AbstractStreamOperatorAdapter; +import org.apache.hudi.adapter.AbstractStreamOperatorFactoryAdapter; +import org.apache.hudi.adapter.MailboxExecutorAdapter; +import org.apache.hudi.adapter.Utils; import org.apache.hudi.table.format.mor.MergeOnReadInputFormat; import org.apache.hudi.table.format.mor.MergeOnReadInputSplit; -import org.apache.flink.api.common.operators.MailboxExecutor; import org.apache.flink.api.common.state.ListState; import org.apache.flink.api.common.state.ListStateDescriptor; import org.apache.flink.runtime.state.JavaSerializer; import org.apache.flink.runtime.state.StateInitializationContext; import org.apache.flink.runtime.state.StateSnapshotContext; import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.streaming.api.operators.AbstractStreamOperator; -import org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory; import org.apache.flink.streaming.api.operators.OneInputStreamOperator; import org.apache.flink.streaming.api.operators.OneInputStreamOperatorFactory; import org.apache.flink.streaming.api.operators.StreamOperator; import org.apache.flink.streaming.api.operators.StreamOperatorParameters; -import org.apache.flink.streaming.api.operators.StreamSourceContexts; -import org.apache.flink.streaming.api.operators.YieldingOperatorFactory; import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; @@ -55,11 +54,11 @@ * this operator can have multiple parallelism. * *

As soon as an input split {@link MergeOnReadInputSplit} is received, it is put into a queue, - * the {@link MailboxExecutor} read the actual data of the split. + * the {@code MailboxExecutor} read the actual data of the split. * This architecture allows the separation of split reading from processing the checkpoint barriers, * thus removing any potential back-pressure. */ -public class StreamReadOperator extends AbstractStreamOperator +public class StreamReadOperator extends AbstractStreamOperatorAdapter implements OneInputStreamOperator { private static final Logger LOG = LoggerFactory.getLogger(StreamReadOperator.class); @@ -69,7 +68,7 @@ public class StreamReadOperator extends AbstractStreamOperator // It's the same thread that runs this operator and checkpoint actions. Use this executor to schedule only // splits for subsequent reading, so that a new checkpoint could be triggered without blocking a long time // for exhausting all scheduled split reading tasks. - private final MailboxExecutor executor; + private final MailboxExecutorAdapter executor; private MergeOnReadInputFormat format; @@ -86,7 +85,7 @@ public class StreamReadOperator extends AbstractStreamOperator private transient volatile SplitState currentSplitState; private StreamReadOperator(MergeOnReadInputFormat format, ProcessingTimeService timeService, - MailboxExecutor mailboxExecutor) { + MailboxExecutorAdapter mailboxExecutor) { this.format = Preconditions.checkNotNull(format, "The InputFormat should not be null."); this.processingTimeService = timeService; this.executor = Preconditions.checkNotNull(mailboxExecutor, "The mailboxExecutor should not be null."); @@ -114,14 +113,12 @@ public void initializeState(StateInitializationContext context) throws Exception } } - this.sourceContext = StreamSourceContexts.getSourceContext( + this.sourceContext = Utils.getSourceContext( getOperatorConfig().getTimeCharacteristic(), getProcessingTimeService(), - new Object(), // no actual locking needed + getContainingTask(), output, - getRuntimeContext().getExecutionConfig().getAutoWatermarkInterval(), - -1, - true); + getRuntimeContext().getExecutionConfig().getAutoWatermarkInterval()); // Enqueue to process the recovered input splits. enqueueProcessSplits(); @@ -236,26 +233,19 @@ private enum SplitState { IDLE, RUNNING } - private static class OperatorFactory extends AbstractStreamOperatorFactory - implements YieldingOperatorFactory, OneInputStreamOperatorFactory { + private static class OperatorFactory extends AbstractStreamOperatorFactoryAdapter + implements OneInputStreamOperatorFactory { private final MergeOnReadInputFormat format; - private transient MailboxExecutor mailboxExecutor; - private OperatorFactory(MergeOnReadInputFormat format) { this.format = format; } - @Override - public void setMailboxExecutor(MailboxExecutor mailboxExecutor) { - this.mailboxExecutor = mailboxExecutor; - } - @SuppressWarnings("unchecked") @Override public > O createStreamOperator(StreamOperatorParameters parameters) { - StreamReadOperator operator = new StreamReadOperator(format, processingTimeService, mailboxExecutor); + StreamReadOperator operator = new StreamReadOperator(format, processingTimeService, getMailboxExecutorAdapter()); operator.setup(parameters.getContainingTask(), parameters.getStreamConfig(), parameters.getOutput()); return (O) operator; } diff --git a/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/streamer/HoodieFlinkStreamer.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/HoodieFlinkStreamer.java similarity index 94% rename from hudi-flink/src/main/java/org/apache/hudi/streamer/HoodieFlinkStreamer.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/HoodieFlinkStreamer.java index 192de91d238a..a12ec23dcb03 100644 --- a/hudi-flink/src/main/java/org/apache/hudi/streamer/HoodieFlinkStreamer.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/HoodieFlinkStreamer.java @@ -79,14 +79,14 @@ public static void main(String[] args) throws Exception { conf.setLong(FlinkOptions.WRITE_COMMIT_ACK_TIMEOUT, ckpTimeout); DataStream dataStream = env.addSource(new FlinkKafkaConsumer<>( - cfg.kafkaTopic, - new JsonRowDataDeserializationSchema( - rowType, - InternalTypeInfo.of(rowType), - false, - true, - TimestampFormat.ISO_8601 - ), kafkaProps)) + cfg.kafkaTopic, + new JsonRowDataDeserializationSchema( + rowType, + InternalTypeInfo.of(rowType), + false, + true, + TimestampFormat.ISO_8601 + ), kafkaProps)) .name("kafka_source") .uid("uid_kafka_source"); diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableFactory.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableFactory.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableFactory.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableFactory.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSink.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSink.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSink.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSink.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSource.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSource.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSource.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSource.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/catalog/CatalogOptions.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/CatalogOptions.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/catalog/CatalogOptions.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/CatalogOptions.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalog.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalog.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalog.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalog.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalogFactory.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalogFactory.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalogFactory.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalogFactory.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/FilePathUtils.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FilePathUtils.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/FilePathUtils.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FilePathUtils.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/FormatUtils.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FormatUtils.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/FormatUtils.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FormatUtils.java index 666dc3a73fb9..fce9b75f764e 100644 --- a/hudi-flink/src/main/java/org/apache/hudi/table/format/FormatUtils.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FormatUtils.java @@ -33,6 +33,7 @@ import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.hadoop.config.HoodieRealtimeConfig; import org.apache.hudi.table.format.mor.MergeOnReadInputSplit; +import org.apache.hudi.util.StreamerUtil; import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; @@ -42,7 +43,6 @@ import org.apache.flink.types.RowKind; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; -import org.apache.hudi.util.StreamerUtil; import java.util.ArrayList; import java.util.Arrays; diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/CopyOnWriteInputFormat.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/CopyOnWriteInputFormat.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/cow/CopyOnWriteInputFormat.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/CopyOnWriteInputFormat.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java similarity index 99% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java index e112bcf24e50..c636b36100fe 100644 --- a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java @@ -334,7 +334,7 @@ private static ColumnReader createColumnReader( case TIMESTAMP_WITH_LOCAL_TIME_ZONE: switch (descriptor.getPrimitiveType().getPrimitiveTypeName()) { case INT64: - return new Int64TimestampColumnReader(utcTimestamp, descriptor, pageReader, ((TimestampType)fieldType).getPrecision()); + return new Int64TimestampColumnReader(utcTimestamp, descriptor, pageReader, ((TimestampType) fieldType).getPrecision()); case INT96: return new TimestampColumnReader(utcTimestamp, descriptor, pageReader); default: diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapArrayVector.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapArrayVector.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapArrayVector.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapArrayVector.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapMapColumnVector.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapMapColumnVector.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapMapColumnVector.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapMapColumnVector.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapRowColumnVector.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapRowColumnVector.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapRowColumnVector.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapRowColumnVector.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/ParquetDecimalVector.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/ParquetDecimalVector.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/ParquetDecimalVector.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/ParquetDecimalVector.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/AbstractColumnReader.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/AbstractColumnReader.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/AbstractColumnReader.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/AbstractColumnReader.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/BaseVectorizedColumnReader.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/BaseVectorizedColumnReader.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/BaseVectorizedColumnReader.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/BaseVectorizedColumnReader.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/FixedLenBytesColumnReader.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/FixedLenBytesColumnReader.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/FixedLenBytesColumnReader.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/FixedLenBytesColumnReader.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/Int64TimestampColumnReader.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/Int64TimestampColumnReader.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/Int64TimestampColumnReader.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/Int64TimestampColumnReader.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/MapColumnReader.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/MapColumnReader.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/MapColumnReader.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/MapColumnReader.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReader.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReader.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReader.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReader.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReaderFactory.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReaderFactory.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReaderFactory.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReaderFactory.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RowColumnReader.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RowColumnReader.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RowColumnReader.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RowColumnReader.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RunLengthDecoder.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RunLengthDecoder.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RunLengthDecoder.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RunLengthDecoder.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java index 8283b5c3cd5b..202b14404aa3 100644 --- a/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java @@ -31,8 +31,8 @@ import org.apache.hudi.keygen.KeyGenUtils; import org.apache.hudi.table.format.FilePathUtils; import org.apache.hudi.table.format.FormatUtils; -import org.apache.hudi.table.format.cow.vector.reader.ParquetColumnarRowSplitReader; import org.apache.hudi.table.format.cow.ParquetSplitReaderUtil; +import org.apache.hudi.table.format.cow.vector.reader.ParquetColumnarRowSplitReader; import org.apache.hudi.util.AvroToRowDataConverters; import org.apache.hudi.util.RowDataProjection; import org.apache.hudi.util.RowDataToAvroConverters; diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputSplit.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputSplit.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputSplit.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputSplit.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadTableState.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadTableState.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadTableState.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadTableState.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/util/AvroSchemaConverter.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/AvroSchemaConverter.java similarity index 99% rename from hudi-flink/src/main/java/org/apache/hudi/util/AvroSchemaConverter.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/AvroSchemaConverter.java index 3328233e63e8..6325c2bcceb4 100644 --- a/hudi-flink/src/main/java/org/apache/hudi/util/AvroSchemaConverter.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/AvroSchemaConverter.java @@ -76,8 +76,8 @@ public static DataType convertToDataType(Schema schema) { return DataTypes.ARRAY(convertToDataType(schema.getElementType())).notNull(); case MAP: return DataTypes.MAP( - DataTypes.STRING().notNull(), - convertToDataType(schema.getValueType())) + DataTypes.STRING().notNull(), + convertToDataType(schema.getValueType())) .notNull(); case UNION: final Schema actualSchema; diff --git a/hudi-flink/src/main/java/org/apache/hudi/util/AvroToRowDataConverters.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/AvroToRowDataConverters.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/util/AvroToRowDataConverters.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/AvroToRowDataConverters.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/util/ChangelogModes.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ChangelogModes.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/util/ChangelogModes.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ChangelogModes.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/util/CompactionUtil.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/CompactionUtil.java similarity index 97% rename from hudi-flink/src/main/java/org/apache/hudi/util/CompactionUtil.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/CompactionUtil.java index 74629f9b0942..3d386cf8cc17 100644 --- a/hudi-flink/src/main/java/org/apache/hudi/util/CompactionUtil.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/CompactionUtil.java @@ -51,10 +51,10 @@ public class CompactionUtil { /** * Schedules a new compaction instant. * - * @param metaClient The metadata client - * @param writeClient The write client + * @param metaClient The metadata client + * @param writeClient The write client * @param deltaTimeCompaction Whether the compaction is trigger by elapsed delta time - * @param committed Whether the last instant was committed successfully + * @param committed Whether the last instant was committed successfully */ public static void scheduleCompaction( HoodieTableMetaClient metaClient, diff --git a/hudi-flink/src/main/java/org/apache/hudi/util/DataTypeUtils.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/DataTypeUtils.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/util/DataTypeUtils.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/DataTypeUtils.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/util/FlinkStateBackendConverter.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/FlinkStateBackendConverter.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/util/FlinkStateBackendConverter.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/FlinkStateBackendConverter.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/util/FlinkTables.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/FlinkTables.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/util/FlinkTables.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/FlinkTables.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/util/InputFormats.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/InputFormats.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/util/InputFormats.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/InputFormats.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/util/RowDataProjection.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/RowDataProjection.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/util/RowDataProjection.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/RowDataProjection.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/util/RowDataToAvroConverters.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/RowDataToAvroConverters.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/util/RowDataToAvroConverters.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/RowDataToAvroConverters.java diff --git a/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java similarity index 99% rename from hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java index 55d403dc4db8..3138d5d98616 100644 --- a/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java @@ -503,6 +503,7 @@ public static String getLastCompletedInstant(HoodieTableMetaClient metaClient) { /** * Returns whether there are successful commits on the timeline. + * * @param metaClient The meta client * @return true if there is any successful commit */ diff --git a/hudi-flink/src/main/java/org/apache/hudi/util/StringToRowDataConverter.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StringToRowDataConverter.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/util/StringToRowDataConverter.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StringToRowDataConverter.java index 47aed1ed31e7..216fa3f0f336 100644 --- a/hudi-flink/src/main/java/org/apache/hudi/util/StringToRowDataConverter.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StringToRowDataConverter.java @@ -18,7 +18,6 @@ package org.apache.hudi.util; -import org.apache.flink.table.types.logical.TimestampType; import org.apache.hudi.common.util.ValidationUtils; import org.apache.flink.annotation.Internal; @@ -27,6 +26,7 @@ import org.apache.flink.table.data.TimestampData; import org.apache.flink.table.types.logical.DecimalType; import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.TimestampType; import java.math.BigDecimal; import java.nio.charset.StandardCharsets; diff --git a/hudi-flink/src/main/java/org/apache/hudi/util/ViewStorageProperties.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ViewStorageProperties.java similarity index 100% rename from hudi-flink/src/main/java/org/apache/hudi/util/ViewStorageProperties.java rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ViewStorageProperties.java diff --git a/hudi-flink/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory b/hudi-flink-datasource/hudi-flink/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory similarity index 100% rename from hudi-flink/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory rename to hudi-flink-datasource/hudi-flink/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/ITTestDataStreamWrite.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/ITTestDataStreamWrite.java similarity index 99% rename from hudi-flink/src/test/java/org/apache/hudi/sink/ITTestDataStreamWrite.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/ITTestDataStreamWrite.java index 4864696da144..bbf9009fd5b7 100644 --- a/hudi-flink/src/test/java/org/apache/hudi/sink/ITTestDataStreamWrite.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/ITTestDataStreamWrite.java @@ -178,7 +178,7 @@ public void testMergeOnReadWriteWithCompaction(String indexType) throws Exceptio DataStream pipeline = Pipelines.hoodieStreamWrite(conf, parallelism, hoodieRecordDataStream); Pipelines.clean(conf, pipeline); Pipelines.compact(conf, pipeline); - JobClient client = execEnv.executeAsync(execEnv.getStreamGraph()); + JobClient client = execEnv.executeAsync("mor-write-with-compact"); if (client.getJobStatus().get() != JobStatus.FAILED) { try { TimeUnit.SECONDS.sleep(20); // wait long enough for the compaction to finish diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/TestStreamWriteOperatorCoordinator.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestStreamWriteOperatorCoordinator.java similarity index 99% rename from hudi-flink/src/test/java/org/apache/hudi/sink/TestStreamWriteOperatorCoordinator.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestStreamWriteOperatorCoordinator.java index 6266c3052309..814a8f19e159 100644 --- a/hudi-flink/src/test/java/org/apache/hudi/sink/TestStreamWriteOperatorCoordinator.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestStreamWriteOperatorCoordinator.java @@ -18,14 +18,6 @@ package org.apache.hudi.sink; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.runtime.jobgraph.OperatorID; -import org.apache.flink.runtime.operators.coordination.MockOperatorCoordinatorContext; -import org.apache.flink.runtime.operators.coordination.OperatorCoordinator; -import org.apache.flink.runtime.operators.coordination.OperatorEvent; -import org.apache.flink.util.FileUtils; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hudi.client.WriteStatus; import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieWriteStat; @@ -38,6 +30,15 @@ import org.apache.hudi.util.StreamerUtil; import org.apache.hudi.utils.TestConfigurations; import org.apache.hudi.utils.TestUtils; + +import org.apache.flink.configuration.Configuration; +import org.apache.flink.runtime.jobgraph.OperatorID; +import org.apache.flink.runtime.operators.coordination.MockOperatorCoordinatorContext; +import org.apache.flink.runtime.operators.coordination.OperatorCoordinator; +import org.apache.flink.runtime.operators.coordination.OperatorEvent; +import org.apache.flink.util.FileUtils; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteCopyOnWrite.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteCopyOnWrite.java similarity index 99% rename from hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteCopyOnWrite.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteCopyOnWrite.java index 08035aff5a16..4771a7a3455b 100644 --- a/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteCopyOnWrite.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteCopyOnWrite.java @@ -119,7 +119,7 @@ public void testInsert() throws Exception { // open the function and ingest data preparePipeline() .consume(TestData.DATA_SET_INSERT) - .assertEmptyDataFiles() + .assertEmptyDataFiles() .checkpoint(1) .assertNextEvent() .checkpointComplete(1) diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteMergeOnRead.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteMergeOnRead.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteMergeOnRead.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteMergeOnRead.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteMergeOnReadWithCompact.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteMergeOnReadWithCompact.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteMergeOnReadWithCompact.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteMergeOnReadWithCompact.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/bulk/TestRowDataKeyGen.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/bulk/TestRowDataKeyGen.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/sink/bulk/TestRowDataKeyGen.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/bulk/TestRowDataKeyGen.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/compact/ITTestHoodieFlinkCompactor.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/compact/ITTestHoodieFlinkCompactor.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/sink/compact/ITTestHoodieFlinkCompactor.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/compact/ITTestHoodieFlinkCompactor.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/meta/TestCkpMetadata.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/meta/TestCkpMetadata.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/sink/meta/TestCkpMetadata.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/meta/TestCkpMetadata.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/partitioner/TestBucketAssigner.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/partitioner/TestBucketAssigner.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/sink/partitioner/TestBucketAssigner.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/partitioner/TestBucketAssigner.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CollectorOutput.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CollectorOutput.java similarity index 62% rename from hudi-flink/src/test/java/org/apache/hudi/sink/utils/CollectorOutput.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CollectorOutput.java index c386e6287b8c..b18cfac51b44 100644 --- a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CollectorOutput.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CollectorOutput.java @@ -18,59 +18,45 @@ package org.apache.hudi.sink.utils; +import org.apache.hudi.adapter.OutputAdapter; + import org.apache.flink.streaming.api.operators.Output; import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; -import org.apache.flink.streaming.runtime.streamrecord.StreamElement; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; -import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; -import org.apache.flink.util.InstantiationUtil; import org.apache.flink.util.OutputTag; -import java.io.IOException; +import java.util.ArrayList; import java.util.List; /** * Collecting {@link Output} for {@link StreamRecord}. */ -public class CollectorOutput implements Output> { +public class CollectorOutput implements OutputAdapter> { - private final List list; + private final List records; - public CollectorOutput(List list) { - this.list = list; + public CollectorOutput() { + this.records = new ArrayList<>(); } - public List getList() { - return list; + public List getRecords() { + return this.records; } @Override public void emitWatermark(Watermark mark) { - list.add(mark); - } - - @Override - public void emitWatermarkStatus(WatermarkStatus watermarkStatus) { - + // no operation } @Override public void emitLatencyMarker(LatencyMarker latencyMarker) { - list.add(latencyMarker); + // no operation } @Override public void collect(StreamRecord record) { - try { - ClassLoader cl = record.getClass().getClassLoader(); - T copied = - InstantiationUtil.deserializeObject( - InstantiationUtil.serializeObject(record.getValue()), cl); - list.add(record.copy(copied)); - } catch (IOException | ClassNotFoundException ex) { - throw new RuntimeException("Unable to deserialize record: " + record, ex); - } + records.add(record.getValue()); } @Override @@ -80,5 +66,6 @@ public void collect(OutputTag outputTag, StreamRecord record) { @Override public void close() { + this.records.clear(); } } diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CompactFunctionWrapper.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CompactFunctionWrapper.java similarity index 79% rename from hudi-flink/src/test/java/org/apache/hudi/sink/utils/CompactFunctionWrapper.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CompactFunctionWrapper.java index e703515de3b7..1dba81ce2b7b 100644 --- a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CompactFunctionWrapper.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CompactFunctionWrapper.java @@ -33,14 +33,8 @@ import org.apache.flink.runtime.operators.coordination.MockOperatorCoordinatorContext; import org.apache.flink.runtime.operators.testutils.MockEnvironment; import org.apache.flink.runtime.operators.testutils.MockEnvironmentBuilder; -import org.apache.flink.streaming.api.operators.Output; import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; -import org.apache.flink.streaming.api.watermark.Watermark; -import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; -import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; -import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; import org.apache.flink.util.Collector; -import org.apache.flink.util.OutputTag; import java.util.ArrayList; import java.util.List; @@ -95,44 +89,13 @@ public void openFunction() throws Exception { } public void compact(long checkpointID) throws Exception { - List events = new ArrayList<>(); // collect the CompactEvents. - Output> output = new Output>() { - @Override - public void emitWatermark(Watermark watermark) { - - } - - @Override - public void emitWatermarkStatus(WatermarkStatus watermarkStatus) { - - } - - @Override - public void collect(OutputTag outputTag, StreamRecord streamRecord) { - - } - - @Override - public void emitLatencyMarker(LatencyMarker latencyMarker) { - - } - - @Override - public void collect(StreamRecord record) { - events.add(record.getValue()); - } - - @Override - public void close() { - - } - }; + CollectorOutput output = new CollectorOutput<>(); compactionPlanOperator.setOutput(output); compactionPlanOperator.notifyCheckpointComplete(checkpointID); // collect the CompactCommitEvents List compactCommitEvents = new ArrayList<>(); - for (CompactionPlanEvent event : events) { + for (CompactionPlanEvent event : output.getRecords()) { compactFunction.processElement(event, null, new Collector() { @Override public void collect(CompactionCommitEvent event) { diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/InsertFunctionWrapper.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/InsertFunctionWrapper.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/sink/utils/InsertFunctionWrapper.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/InsertFunctionWrapper.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockCoordinatorExecutor.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockCoordinatorExecutor.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockCoordinatorExecutor.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockCoordinatorExecutor.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockMapState.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockMapState.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockMapState.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockMapState.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockOperatorStateStore.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockOperatorStateStore.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockOperatorStateStore.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockOperatorStateStore.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStateInitializationContext.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStateInitializationContext.java similarity index 89% rename from hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStateInitializationContext.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStateInitializationContext.java index c582e9553b30..945d1bbbe75f 100644 --- a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStateInitializationContext.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStateInitializationContext.java @@ -17,18 +17,17 @@ package org.apache.hudi.sink.utils; +import org.apache.hudi.adapter.StateInitializationContextAdapter; + import org.apache.flink.api.common.state.KeyedStateStore; import org.apache.flink.runtime.state.FunctionInitializationContext; import org.apache.flink.runtime.state.KeyGroupStatePartitionStreamProvider; -import org.apache.flink.runtime.state.StateInitializationContext; import org.apache.flink.runtime.state.StatePartitionStreamProvider; -import java.util.OptionalLong; - /** * A {@link FunctionInitializationContext} for testing purpose. */ -public class MockStateInitializationContext implements StateInitializationContext { +public class MockStateInitializationContext implements StateInitializationContextAdapter { private final MockOperatorStateStore operatorStateStore; @@ -41,11 +40,6 @@ public boolean isRestored() { return false; } - @Override - public OptionalLong getRestoredCheckpointId() { - return OptionalLong.empty(); - } - @Override public MockOperatorStateStore getOperatorStateStore() { return operatorStateStore; diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStreamingRuntimeContext.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStreamingRuntimeContext.java similarity index 93% rename from hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStreamingRuntimeContext.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStreamingRuntimeContext.java index 8a66f1dce011..7c5b79700e43 100644 --- a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStreamingRuntimeContext.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStreamingRuntimeContext.java @@ -17,10 +17,10 @@ package org.apache.hudi.sink.utils; +import org.apache.hudi.adapter.StreamingRuntimeContextAdapter; + import org.apache.flink.api.common.ExecutionConfig; import org.apache.flink.api.common.state.KeyedStateStore; -import org.apache.flink.metrics.groups.OperatorMetricGroup; -import org.apache.flink.metrics.groups.UnregisteredMetricsGroup; import org.apache.flink.runtime.jobgraph.OperatorID; import org.apache.flink.runtime.memory.MemoryManager; import org.apache.flink.runtime.operators.testutils.MockEnvironment; @@ -37,7 +37,7 @@ * *

NOTE: Adapted from Apache Flink, the MockStreamOperator is modified to support MapState. */ -public class MockStreamingRuntimeContext extends StreamingRuntimeContext { +public class MockStreamingRuntimeContext extends StreamingRuntimeContextAdapter { private final boolean isCheckpointingEnabled; @@ -68,11 +68,6 @@ public MockStreamingRuntimeContext( this.subtaskIndex = subtaskIndex; } - @Override - public OperatorMetricGroup getMetricGroup() { - return UnregisteredMetricsGroup.createOperatorMetricGroup(); - } - @Override public boolean isCheckpointingEnabled() { return isCheckpointingEnabled; diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockValueState.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockValueState.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockValueState.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockValueState.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/StreamWriteFunctionWrapper.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/StreamWriteFunctionWrapper.java similarity index 88% rename from hudi-flink/src/test/java/org/apache/hudi/sink/utils/StreamWriteFunctionWrapper.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/StreamWriteFunctionWrapper.java index 2c8bac057668..2bb0f69d18ca 100644 --- a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/StreamWriteFunctionWrapper.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/StreamWriteFunctionWrapper.java @@ -42,18 +42,14 @@ import org.apache.flink.runtime.operators.testutils.MockEnvironment; import org.apache.flink.runtime.operators.testutils.MockEnvironmentBuilder; import org.apache.flink.streaming.api.graph.StreamConfig; -import org.apache.flink.streaming.api.operators.Output; import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; import org.apache.flink.streaming.api.operators.collect.utils.MockFunctionSnapshotContext; import org.apache.flink.streaming.api.operators.collect.utils.MockOperatorEventGateway; -import org.apache.flink.streaming.runtime.streamrecord.StreamElement; -import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.util.MockStreamTask; import org.apache.flink.streaming.util.MockStreamTaskBuilder; import org.apache.flink.table.data.RowData; import org.apache.flink.util.Collector; -import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -98,8 +94,6 @@ public class StreamWriteFunctionWrapper implements TestFunctionWrapper { private CompactFunctionWrapper compactFunctionWrapper; - private final Output>> output; - private final MockStreamTask streamTask; private final StreamConfig streamConfig; @@ -128,7 +122,6 @@ public StreamWriteFunctionWrapper(String tablePath, Configuration conf) throws E this.stateInitializationContext = new MockStateInitializationContext(); this.compactFunctionWrapper = new CompactFunctionWrapper(this.conf); this.asyncCompaction = StreamerUtil.needsAsyncCompaction(conf); - this.output = new CollectorOutput<>(new ArrayList<>()); this.streamConfig = new StreamConfig(conf); streamConfig.setOperatorID(new OperatorID()); this.streamTask = new MockStreamTaskBuilder(environment) @@ -144,16 +137,23 @@ public void openFunction() throws Exception { toHoodieFunction.setRuntimeContext(runtimeContext); toHoodieFunction.open(conf); + bucketAssignerFunction = new BucketAssignFunction<>(conf); + bucketAssignerFunction.setRuntimeContext(runtimeContext); + bucketAssignerFunction.open(conf); + bucketAssignerFunction.initializeState(this.stateInitializationContext); + if (conf.getBoolean(FlinkOptions.INDEX_BOOTSTRAP_ENABLED)) { bootstrapOperator = new BootstrapOperator<>(conf); + CollectorOutput> output = new CollectorOutput<>(); bootstrapOperator.setup(streamTask, streamConfig, output); bootstrapOperator.initializeState(this.stateInitializationContext); - } - bucketAssignerFunction = new BucketAssignFunction<>(conf); - bucketAssignerFunction.setRuntimeContext(runtimeContext); - bucketAssignerFunction.open(conf); - bucketAssignerFunction.initializeState(this.stateInitializationContext); + Collector> collector = ScalaCollector.getInstance(); + for (HoodieRecord bootstrapRecord : output.getRecords()) { + bucketAssignerFunction.processElement(bootstrapRecord, null, collector); + bucketAssignFunctionContext.setCurrentKey(bootstrapRecord.getRecordKey()); + } + } setupWriteFunction(); @@ -162,39 +162,12 @@ public void openFunction() throws Exception { } } - @SuppressWarnings("unchecked") public void invoke(I record) throws Exception { HoodieRecord hoodieRecord = toHoodieFunction.map((RowData) record); - HoodieRecord[] hoodieRecords = new HoodieRecord[1]; - Collector> collector = new Collector>() { - @Override - public void collect(HoodieRecord record) { - hoodieRecords[0] = record; - } - - @Override - public void close() { - - } - }; - - if (conf.getBoolean(FlinkOptions.INDEX_BOOTSTRAP_ENABLED)) { - List list = ((CollectorOutput) output).getList(); - for (StreamElement streamElement : list) { - if (streamElement.isRecord()) { - HoodieRecord bootstrapRecord = (HoodieRecord) streamElement.asRecord().getValue(); - bucketAssignerFunction.processElement(bootstrapRecord, null, collector); - bucketAssignFunctionContext.setCurrentKey(bootstrapRecord.getRecordKey()); - } - } - - bootstrapOperator.processElement(new StreamRecord<>(hoodieRecord)); - list.clear(); - } - + ScalaCollector> collector = ScalaCollector.getInstance(); bucketAssignerFunction.processElement(hoodieRecord, null, collector); bucketAssignFunctionContext.setCurrentKey(hoodieRecord.getRecordKey()); - writeFunction.processElement(hoodieRecords[0], null, null); + writeFunction.processElement(collector.getVal(), null, null); } public WriteMetadataEvent[] getEventBuffer() { @@ -307,4 +280,26 @@ public boolean isKeyInState(String key) { return this.updateKeys.contains(key); } } + + private static class ScalaCollector implements Collector { + private T val; + + public static ScalaCollector getInstance() { + return new ScalaCollector<>(); + } + + @Override + public void collect(T t) { + this.val = t; + } + + @Override + public void close() { + this.val = null; + } + + public T getVal() { + return val; + } + } } diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestFunctionWrapper.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestFunctionWrapper.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestFunctionWrapper.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestFunctionWrapper.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/source/TestFileIndex.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestFileIndex.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/source/TestFileIndex.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestFileIndex.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/source/TestStreamReadMonitoringFunction.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestStreamReadMonitoringFunction.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/source/TestStreamReadMonitoringFunction.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestStreamReadMonitoringFunction.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/source/TestStreamReadOperator.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestStreamReadOperator.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/source/TestStreamReadOperator.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestStreamReadOperator.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java similarity index 97% rename from hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java index 903be90b964b..72c0890bbf64 100644 --- a/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java @@ -18,6 +18,7 @@ package org.apache.hudi.table; +import org.apache.hudi.adapter.TestTableEnvs; import org.apache.hudi.common.model.DefaultHoodieRecordPayload; import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.table.timeline.HoodieTimeline; @@ -31,12 +32,10 @@ import org.apache.flink.configuration.Configuration; import org.apache.flink.core.execution.JobClient; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.table.api.EnvironmentSettings; import org.apache.flink.table.api.TableEnvironment; import org.apache.flink.table.api.TableResult; import org.apache.flink.table.api.TableSchema; -import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; import org.apache.flink.table.api.config.ExecutionConfigOptions; import org.apache.flink.table.api.internal.TableEnvironmentImpl; import org.apache.flink.table.catalog.ObjectPath; @@ -89,24 +88,7 @@ void beforeEach() { execConf.setString("restart-strategy", "fixed-delay"); execConf.setString("restart-strategy.fixed-delay.attempts", "0"); - Configuration conf = new Configuration(); - // for batch upsert use cases: current suggestion is to disable these 2 options, - // from 1.14, flink runtime execution mode has switched from streaming - // to batch for batch execution mode(before that, both streaming and batch use streaming execution mode), - // current batch execution mode has these limitations: - // - // 1. the keyed stream default to always sort the inputs by key; - // 2. the batch state-backend requires the inputs sort by state key - // - // For our hudi batch pipeline upsert case, we rely on the consuming sequence for index records and data records, - // the index records must be loaded first before data records for BucketAssignFunction to keep upsert semantics correct, - // so we suggest disabling these 2 options to use streaming state-backend for batch execution mode - // to keep the strategy before 1.14. - conf.setBoolean("execution.sorted-inputs.enabled", false); - conf.setBoolean("execution.batch-state-backend.enabled", false); - StreamExecutionEnvironment execEnv = StreamExecutionEnvironment.getExecutionEnvironment(conf); - settings = EnvironmentSettings.newInstance().inBatchMode().build(); - batchTableEnv = StreamTableEnvironment.create(execEnv, settings); + batchTableEnv = TestTableEnvs.getBatchTableEnv(); batchTableEnv.getConfig().getConfiguration() .setInteger(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 1); } diff --git a/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableFactory.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableFactory.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableFactory.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableFactory.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableSource.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableSource.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableSource.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableSource.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieCatalog.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieCatalog.java similarity index 98% rename from hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieCatalog.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieCatalog.java index 67d3f1899832..3930e763fbaa 100644 --- a/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieCatalog.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieCatalog.java @@ -101,6 +101,7 @@ public class TestHoodieCatalog { new ResolvedSchema(EXPECTED_TABLE_COLUMNS, Collections.emptyList(), CONSTRAINTS); private static final Map EXPECTED_OPTIONS = new HashMap<>(); + static { EXPECTED_OPTIONS.put(FlinkOptions.TABLE_TYPE.key(), FlinkOptions.TABLE_TYPE_MERGE_ON_READ); EXPECTED_OPTIONS.put(FlinkOptions.INDEX_GLOBAL_ENABLED.key(), "false"); @@ -243,7 +244,7 @@ public void testGetTable() throws Exception { // validate comment assertEquals(EXPECTED_CATALOG_TABLE.getComment(), actualTable.getComment()); // validate partition key - assertEquals(EXPECTED_CATALOG_TABLE.getPartitionKeys(),((CatalogTable) actualTable).getPartitionKeys()); + assertEquals(EXPECTED_CATALOG_TABLE.getPartitionKeys(), ((CatalogTable) actualTable).getPartitionKeys()); } @Test diff --git a/hudi-flink/src/test/java/org/apache/hudi/table/format/TestInputFormat.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/format/TestInputFormat.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/table/format/TestInputFormat.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/format/TestInputFormat.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/utils/SchemaBuilder.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/SchemaBuilder.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/utils/SchemaBuilder.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/SchemaBuilder.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/utils/TestCompactionUtil.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestCompactionUtil.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/utils/TestCompactionUtil.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestCompactionUtil.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/utils/TestConfigurations.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestConfigurations.java similarity index 93% rename from hudi-flink/src/test/java/org/apache/hudi/utils/TestConfigurations.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestConfigurations.java index d1b6e04a1835..e1106671799b 100644 --- a/hudi-flink/src/test/java/org/apache/hudi/utils/TestConfigurations.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestConfigurations.java @@ -47,11 +47,11 @@ private TestConfigurations() { } public static final DataType ROW_DATA_TYPE = DataTypes.ROW( - DataTypes.FIELD("uuid", DataTypes.VARCHAR(20)),// record key - DataTypes.FIELD("name", DataTypes.VARCHAR(10)), - DataTypes.FIELD("age", DataTypes.INT()), - DataTypes.FIELD("ts", DataTypes.TIMESTAMP(3)), // precombine field - DataTypes.FIELD("partition", DataTypes.VARCHAR(10))) + DataTypes.FIELD("uuid", DataTypes.VARCHAR(20)),// record key + DataTypes.FIELD("name", DataTypes.VARCHAR(10)), + DataTypes.FIELD("age", DataTypes.INT()), + DataTypes.FIELD("ts", DataTypes.TIMESTAMP(3)), // precombine field + DataTypes.FIELD("partition", DataTypes.VARCHAR(10))) .notNull(); public static final RowType ROW_TYPE = (RowType) ROW_DATA_TYPE.getLogicalType(); @@ -64,12 +64,12 @@ private TestConfigurations() { .map(RowType.RowField::asSummaryString).collect(Collectors.toList()); public static final DataType ROW_DATA_TYPE_WIDER = DataTypes.ROW( - DataTypes.FIELD("uuid", DataTypes.VARCHAR(20)),// record key - DataTypes.FIELD("name", DataTypes.VARCHAR(10)), - DataTypes.FIELD("age", DataTypes.INT()), - DataTypes.FIELD("salary", DataTypes.DOUBLE()), - DataTypes.FIELD("ts", DataTypes.TIMESTAMP(3)), // precombine field - DataTypes.FIELD("partition", DataTypes.VARCHAR(10))) + DataTypes.FIELD("uuid", DataTypes.VARCHAR(20)),// record key + DataTypes.FIELD("name", DataTypes.VARCHAR(10)), + DataTypes.FIELD("age", DataTypes.INT()), + DataTypes.FIELD("salary", DataTypes.DOUBLE()), + DataTypes.FIELD("ts", DataTypes.TIMESTAMP(3)), // precombine field + DataTypes.FIELD("partition", DataTypes.VARCHAR(10))) .notNull(); public static final RowType ROW_TYPE_WIDER = (RowType) ROW_DATA_TYPE_WIDER.getLogicalType(); diff --git a/hudi-flink/src/test/java/org/apache/hudi/utils/TestData.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestData.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/utils/TestData.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestData.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/utils/TestHoodieRowData.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestHoodieRowData.java similarity index 90% rename from hudi-flink/src/test/java/org/apache/hudi/utils/TestHoodieRowData.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestHoodieRowData.java index 7729042837a3..10e7ca1b2c29 100644 --- a/hudi-flink/src/test/java/org/apache/hudi/utils/TestHoodieRowData.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestHoodieRowData.java @@ -54,17 +54,17 @@ public class TestHoodieRowData { private static final int ROW_INDEX = 10; private static final DataType BASIC_DATA_TYPE = DataTypes.ROW( - DataTypes.FIELD("integer", DataTypes.INT()), - DataTypes.FIELD("string", DataTypes.STRING()), - DataTypes.FIELD("boolean", DataTypes.BOOLEAN()), - DataTypes.FIELD("short", DataTypes.SMALLINT()), - DataTypes.FIELD("byte", DataTypes.TINYINT()), - DataTypes.FIELD("long", DataTypes.BIGINT()), - DataTypes.FIELD("float", DataTypes.FLOAT()), - DataTypes.FIELD("double", DataTypes.DOUBLE()), - DataTypes.FIELD("decimal", DataTypes.DECIMAL(10, 4)), - DataTypes.FIELD("binary", DataTypes.BYTES()), - DataTypes.FIELD("row", DataTypes.ROW())) + DataTypes.FIELD("integer", DataTypes.INT()), + DataTypes.FIELD("string", DataTypes.STRING()), + DataTypes.FIELD("boolean", DataTypes.BOOLEAN()), + DataTypes.FIELD("short", DataTypes.SMALLINT()), + DataTypes.FIELD("byte", DataTypes.TINYINT()), + DataTypes.FIELD("long", DataTypes.BIGINT()), + DataTypes.FIELD("float", DataTypes.FLOAT()), + DataTypes.FIELD("double", DataTypes.DOUBLE()), + DataTypes.FIELD("decimal", DataTypes.DECIMAL(10, 4)), + DataTypes.FIELD("binary", DataTypes.BYTES()), + DataTypes.FIELD("row", DataTypes.ROW())) .notNull(); private static final RowType ROW_TYPE = (RowType) BASIC_DATA_TYPE.getLogicalType(); diff --git a/hudi-flink/src/test/java/org/apache/hudi/utils/TestSQL.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestSQL.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/utils/TestSQL.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestSQL.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/utils/TestStreamerUtil.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestStreamerUtil.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/utils/TestStreamerUtil.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestStreamerUtil.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/utils/TestStringToRowDataConverter.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestStringToRowDataConverter.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/utils/TestStringToRowDataConverter.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestStringToRowDataConverter.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/utils/TestUtils.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestUtils.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/utils/TestUtils.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestUtils.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/utils/TestViewStorageProperties.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestViewStorageProperties.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/utils/TestViewStorageProperties.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestViewStorageProperties.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/utils/factory/CollectSinkTableFactory.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/factory/CollectSinkTableFactory.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/utils/factory/CollectSinkTableFactory.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/factory/CollectSinkTableFactory.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/utils/factory/ContinuousFileSourceFactory.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/factory/ContinuousFileSourceFactory.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/utils/factory/ContinuousFileSourceFactory.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/factory/ContinuousFileSourceFactory.java diff --git a/hudi-flink/src/test/java/org/apache/hudi/utils/source/ContinuousFileSource.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/source/ContinuousFileSource.java similarity index 100% rename from hudi-flink/src/test/java/org/apache/hudi/utils/source/ContinuousFileSource.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/source/ContinuousFileSource.java diff --git a/hudi-flink/src/test/resources/META-INF/services/org.apache.flink.table.factories.Factory b/hudi-flink-datasource/hudi-flink/src/test/resources/META-INF/services/org.apache.flink.table.factories.Factory similarity index 100% rename from hudi-flink/src/test/resources/META-INF/services/org.apache.flink.table.factories.Factory rename to hudi-flink-datasource/hudi-flink/src/test/resources/META-INF/services/org.apache.flink.table.factories.Factory diff --git a/hudi-flink/src/test/resources/debezium_json.data b/hudi-flink-datasource/hudi-flink/src/test/resources/debezium_json.data similarity index 100% rename from hudi-flink/src/test/resources/debezium_json.data rename to hudi-flink-datasource/hudi-flink/src/test/resources/debezium_json.data diff --git a/hudi-flink/src/test/resources/log4j-surefire-quiet.properties b/hudi-flink-datasource/hudi-flink/src/test/resources/log4j-surefire-quiet.properties similarity index 99% rename from hudi-flink/src/test/resources/log4j-surefire-quiet.properties rename to hudi-flink-datasource/hudi-flink/src/test/resources/log4j-surefire-quiet.properties index 2b94ea290306..40171af5f408 100644 --- a/hudi-flink/src/test/resources/log4j-surefire-quiet.properties +++ b/hudi-flink-datasource/hudi-flink/src/test/resources/log4j-surefire-quiet.properties @@ -18,7 +18,6 @@ log4j.rootLogger=WARN, CONSOLE log4j.logger.org.apache.hudi=DEBUG log4j.logger.org.apache.hadoop.hbase=ERROR - # CONSOLE is set to be a ConsoleAppender. log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender # CONSOLE uses PatternLayout. diff --git a/hudi-flink/src/test/resources/log4j-surefire.properties b/hudi-flink-datasource/hudi-flink/src/test/resources/log4j-surefire.properties similarity index 99% rename from hudi-flink/src/test/resources/log4j-surefire.properties rename to hudi-flink-datasource/hudi-flink/src/test/resources/log4j-surefire.properties index 8dcd17f303f6..5806188cb5c5 100644 --- a/hudi-flink/src/test/resources/log4j-surefire.properties +++ b/hudi-flink-datasource/hudi-flink/src/test/resources/log4j-surefire.properties @@ -19,7 +19,6 @@ log4j.rootLogger=INFO, CONSOLE log4j.logger.org.apache=INFO log4j.logger.org.apache.hudi=DEBUG log4j.logger.org.apache.hadoop.hbase=ERROR - # A1 is set to be a ConsoleAppender. log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender # A1 uses PatternLayout. diff --git a/hudi-flink/src/test/resources/test_read_schema.avsc b/hudi-flink-datasource/hudi-flink/src/test/resources/test_read_schema.avsc similarity index 100% rename from hudi-flink/src/test/resources/test_read_schema.avsc rename to hudi-flink-datasource/hudi-flink/src/test/resources/test_read_schema.avsc diff --git a/hudi-flink/src/test/resources/test_source.data b/hudi-flink-datasource/hudi-flink/src/test/resources/test_source.data similarity index 100% rename from hudi-flink/src/test/resources/test_source.data rename to hudi-flink-datasource/hudi-flink/src/test/resources/test_source.data diff --git a/hudi-flink/src/test/resources/test_source_2.data b/hudi-flink-datasource/hudi-flink/src/test/resources/test_source_2.data similarity index 100% rename from hudi-flink/src/test/resources/test_source_2.data rename to hudi-flink-datasource/hudi-flink/src/test/resources/test_source_2.data diff --git a/hudi-flink/src/test/resources/test_source_3.data b/hudi-flink-datasource/hudi-flink/src/test/resources/test_source_3.data similarity index 100% rename from hudi-flink/src/test/resources/test_source_3.data rename to hudi-flink-datasource/hudi-flink/src/test/resources/test_source_3.data diff --git a/hudi-flink/src/test/resources/test_source_4.data b/hudi-flink-datasource/hudi-flink/src/test/resources/test_source_4.data similarity index 100% rename from hudi-flink/src/test/resources/test_source_4.data rename to hudi-flink-datasource/hudi-flink/src/test/resources/test_source_4.data diff --git a/hudi-flink/src/test/resources/test_source_5.data b/hudi-flink-datasource/hudi-flink/src/test/resources/test_source_5.data similarity index 100% rename from hudi-flink/src/test/resources/test_source_5.data rename to hudi-flink-datasource/hudi-flink/src/test/resources/test_source_5.data diff --git a/hudi-flink-datasource/hudi-flink1.13.x/pom.xml b/hudi-flink-datasource/hudi-flink1.13.x/pom.xml new file mode 100644 index 000000000000..f6e4f5bc88df --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.13.x/pom.xml @@ -0,0 +1,90 @@ + + + + + hudi-flink-datasource + org.apache.hudi + 0.11.0-SNAPSHOT + + 4.0.0 + + hudi-flink1.13.x + 0.11.0-SNAPSHOT + jar + + + ${project.parent.parent.basedir} + + + + + org.apache.flink + flink-table-runtime-blink_${scala.binary.version} + ${flink1.13.version} + provided + + + org.apache.flink + flink-streaming-java_${scala.binary.version} + ${flink1.13.version} + provided + + + org.apache.flink + flink-core + ${flink1.13.version} + provided + + + org.apache.flink + flink-runtime_${scala.binary.version} + ${flink1.13.version} + test + test-jar + + + + + + + org.jacoco + jacoco-maven-plugin + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + test-compile + + + + false + + + + org.apache.rat + apache-rat-plugin + + + + \ No newline at end of file diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/flink/table/data/ColumnarArrayData.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/flink/table/data/ColumnarArrayData.java new file mode 100644 index 000000000000..09da6180d9c6 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/flink/table/data/ColumnarArrayData.java @@ -0,0 +1,262 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.table.data; + +import org.apache.flink.table.data.binary.TypedSetters; +import org.apache.flink.table.data.vector.ArrayColumnVector; +import org.apache.flink.table.data.vector.BooleanColumnVector; +import org.apache.flink.table.data.vector.ByteColumnVector; +import org.apache.flink.table.data.vector.BytesColumnVector; +import org.apache.flink.table.data.vector.ColumnVector; +import org.apache.flink.table.data.vector.DecimalColumnVector; +import org.apache.flink.table.data.vector.DoubleColumnVector; +import org.apache.flink.table.data.vector.FloatColumnVector; +import org.apache.flink.table.data.vector.IntColumnVector; +import org.apache.flink.table.data.vector.LongColumnVector; +import org.apache.flink.table.data.vector.MapColumnVector; +import org.apache.flink.table.data.vector.RowColumnVector; +import org.apache.flink.table.data.vector.ShortColumnVector; +import org.apache.flink.table.data.vector.TimestampColumnVector; + +import java.util.Arrays; + +/** + * Columnar array to support access to vector column data. + * + *

References {@code org.apache.flink.table.data.ColumnarArrayData} to include FLINK-15390. + */ +public final class ColumnarArrayData implements ArrayData, TypedSetters { + + private final ColumnVector data; + private final int offset; + private final int numElements; + + public ColumnarArrayData(ColumnVector data, int offset, int numElements) { + this.data = data; + this.offset = offset; + this.numElements = numElements; + } + + @Override + public int size() { + return numElements; + } + + @Override + public boolean isNullAt(int pos) { + return data.isNullAt(offset + pos); + } + + @Override + public void setNullAt(int pos) { + throw new UnsupportedOperationException("Not support the operation!"); + } + + @Override + public boolean getBoolean(int pos) { + return ((BooleanColumnVector) data).getBoolean(offset + pos); + } + + @Override + public byte getByte(int pos) { + return ((ByteColumnVector) data).getByte(offset + pos); + } + + @Override + public short getShort(int pos) { + return ((ShortColumnVector) data).getShort(offset + pos); + } + + @Override + public int getInt(int pos) { + return ((IntColumnVector) data).getInt(offset + pos); + } + + @Override + public long getLong(int pos) { + return ((LongColumnVector) data).getLong(offset + pos); + } + + @Override + public float getFloat(int pos) { + return ((FloatColumnVector) data).getFloat(offset + pos); + } + + @Override + public double getDouble(int pos) { + return ((DoubleColumnVector) data).getDouble(offset + pos); + } + + @Override + public StringData getString(int pos) { + BytesColumnVector.Bytes byteArray = getByteArray(pos); + return StringData.fromBytes(byteArray.data, byteArray.offset, byteArray.len); + } + + @Override + public DecimalData getDecimal(int pos, int precision, int scale) { + return ((DecimalColumnVector) data).getDecimal(offset + pos, precision, scale); + } + + @Override + public TimestampData getTimestamp(int pos, int precision) { + return ((TimestampColumnVector) data).getTimestamp(offset + pos, precision); + } + + @Override + public RawValueData getRawValue(int pos) { + throw new UnsupportedOperationException("RawValueData is not supported."); + } + + @Override + public byte[] getBinary(int pos) { + BytesColumnVector.Bytes byteArray = getByteArray(pos); + if (byteArray.len == byteArray.data.length) { + return byteArray.data; + } else { + return Arrays.copyOfRange(byteArray.data, byteArray.offset, byteArray.len); + } + } + + @Override + public ArrayData getArray(int pos) { + return ((ArrayColumnVector) data).getArray(offset + pos); + } + + @Override + public MapData getMap(int pos) { + return ((MapColumnVector) data).getMap(offset + pos); + } + + @Override + public RowData getRow(int pos, int numFields) { + return ((RowColumnVector) data).getRow(offset + pos); + } + + @Override + public void setBoolean(int pos, boolean value) { + throw new UnsupportedOperationException("Not support the operation!"); + } + + @Override + public void setByte(int pos, byte value) { + throw new UnsupportedOperationException("Not support the operation!"); + } + + @Override + public void setShort(int pos, short value) { + throw new UnsupportedOperationException("Not support the operation!"); + } + + @Override + public void setInt(int pos, int value) { + throw new UnsupportedOperationException("Not support the operation!"); + } + + @Override + public void setLong(int pos, long value) { + throw new UnsupportedOperationException("Not support the operation!"); + } + + @Override + public void setFloat(int pos, float value) { + throw new UnsupportedOperationException("Not support the operation!"); + } + + @Override + public void setDouble(int pos, double value) { + throw new UnsupportedOperationException("Not support the operation!"); + } + + @Override + public void setDecimal(int pos, DecimalData value, int precision) { + throw new UnsupportedOperationException("Not support the operation!"); + } + + @Override + public void setTimestamp(int pos, TimestampData value, int precision) { + throw new UnsupportedOperationException("Not support the operation!"); + } + + @Override + public boolean[] toBooleanArray() { + boolean[] res = new boolean[numElements]; + for (int i = 0; i < numElements; i++) { + res[i] = getBoolean(i); + } + return res; + } + + @Override + public byte[] toByteArray() { + byte[] res = new byte[numElements]; + for (int i = 0; i < numElements; i++) { + res[i] = getByte(i); + } + return res; + } + + @Override + public short[] toShortArray() { + short[] res = new short[numElements]; + for (int i = 0; i < numElements; i++) { + res[i] = getShort(i); + } + return res; + } + + @Override + public int[] toIntArray() { + int[] res = new int[numElements]; + for (int i = 0; i < numElements; i++) { + res[i] = getInt(i); + } + return res; + } + + @Override + public long[] toLongArray() { + long[] res = new long[numElements]; + for (int i = 0; i < numElements; i++) { + res[i] = getLong(i); + } + return res; + } + + @Override + public float[] toFloatArray() { + float[] res = new float[numElements]; + for (int i = 0; i < numElements; i++) { + res[i] = getFloat(i); + } + return res; + } + + @Override + public double[] toDoubleArray() { + double[] res = new double[numElements]; + for (int i = 0; i < numElements; i++) { + res[i] = getDouble(i); + } + return res; + } + + private BytesColumnVector.Bytes getByteArray(int pos) { + return ((BytesColumnVector) data).getBytes(offset + pos); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/flink/table/data/ColumnarMapData.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/flink/table/data/ColumnarMapData.java new file mode 100644 index 000000000000..86f4db3954a2 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/flink/table/data/ColumnarMapData.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.table.data; + +import org.apache.flink.table.data.vector.ColumnVector; + +/** + * Columnar map to support access to vector column data. + * + *

Referenced from flink 1.14.0 {@code org.apache.flink.table.data.ColumnarMapData}. + */ +public final class ColumnarMapData implements MapData { + + private final ColumnVector keyColumnVector; + private final ColumnVector valueColumnVector; + private final int offset; + private final int numElements; + + public ColumnarMapData( + ColumnVector keyColumnVector, + ColumnVector valueColumnVector, + int offset, + int numElements) { + this.keyColumnVector = keyColumnVector; + this.valueColumnVector = valueColumnVector; + this.offset = offset; + this.numElements = numElements; + } + + @Override + public int size() { + return numElements; + } + + @Override + public ArrayData keyArray() { + return new ColumnarArrayData(keyColumnVector, offset, numElements); + } + + @Override + public ArrayData valueArray() { + return new ColumnarArrayData(valueColumnVector, offset, numElements); + } + + @Override + public boolean equals(Object o) { + throw new UnsupportedOperationException( + "ColumnarMapData do not support equals, please compare fields one by one!"); + } + + @Override + public int hashCode() { + throw new UnsupportedOperationException( + "ColumnarMapData do not support hashCode, please hash fields one by one!"); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/flink/table/data/ColumnarRowData.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/flink/table/data/ColumnarRowData.java new file mode 100644 index 000000000000..fbe225878dfc --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/flink/table/data/ColumnarRowData.java @@ -0,0 +1,223 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.table.data; + +import org.apache.flink.table.data.binary.TypedSetters; +import org.apache.flink.table.data.vector.BytesColumnVector.Bytes; +import org.apache.flink.table.data.vector.VectorizedColumnBatch; +import org.apache.flink.types.RowKind; + +/** + * Columnar row to support access to vector column data. + * It is a row view in {@link VectorizedColumnBatch}. + * + *

References {@code org.apache.flink.table.data.ColumnarRowData} to include FLINK-15390. + */ +public final class ColumnarRowData implements RowData, TypedSetters { + + private RowKind rowKind = RowKind.INSERT; + private VectorizedColumnBatch vectorizedColumnBatch; + private int rowId; + + public ColumnarRowData() { + } + + public ColumnarRowData(VectorizedColumnBatch vectorizedColumnBatch) { + this(vectorizedColumnBatch, 0); + } + + public ColumnarRowData(VectorizedColumnBatch vectorizedColumnBatch, int rowId) { + this.vectorizedColumnBatch = vectorizedColumnBatch; + this.rowId = rowId; + } + + public void setVectorizedColumnBatch(VectorizedColumnBatch vectorizedColumnBatch) { + this.vectorizedColumnBatch = vectorizedColumnBatch; + this.rowId = 0; + } + + public void setRowId(int rowId) { + this.rowId = rowId; + } + + @Override + public RowKind getRowKind() { + return rowKind; + } + + @Override + public void setRowKind(RowKind kind) { + this.rowKind = kind; + } + + @Override + public int getArity() { + return vectorizedColumnBatch.getArity(); + } + + @Override + public boolean isNullAt(int pos) { + return vectorizedColumnBatch.isNullAt(rowId, pos); + } + + @Override + public boolean getBoolean(int pos) { + return vectorizedColumnBatch.getBoolean(rowId, pos); + } + + @Override + public byte getByte(int pos) { + return vectorizedColumnBatch.getByte(rowId, pos); + } + + @Override + public short getShort(int pos) { + return vectorizedColumnBatch.getShort(rowId, pos); + } + + @Override + public int getInt(int pos) { + return vectorizedColumnBatch.getInt(rowId, pos); + } + + @Override + public long getLong(int pos) { + return vectorizedColumnBatch.getLong(rowId, pos); + } + + @Override + public float getFloat(int pos) { + return vectorizedColumnBatch.getFloat(rowId, pos); + } + + @Override + public double getDouble(int pos) { + return vectorizedColumnBatch.getDouble(rowId, pos); + } + + @Override + public StringData getString(int pos) { + Bytes byteArray = vectorizedColumnBatch.getByteArray(rowId, pos); + return StringData.fromBytes(byteArray.data, byteArray.offset, byteArray.len); + } + + @Override + public DecimalData getDecimal(int pos, int precision, int scale) { + return vectorizedColumnBatch.getDecimal(rowId, pos, precision, scale); + } + + @Override + public TimestampData getTimestamp(int pos, int precision) { + return vectorizedColumnBatch.getTimestamp(rowId, pos, precision); + } + + @Override + public RawValueData getRawValue(int pos) { + throw new UnsupportedOperationException("RawValueData is not supported."); + } + + @Override + public byte[] getBinary(int pos) { + Bytes byteArray = vectorizedColumnBatch.getByteArray(rowId, pos); + if (byteArray.len == byteArray.data.length) { + return byteArray.data; + } else { + byte[] ret = new byte[byteArray.len]; + System.arraycopy(byteArray.data, byteArray.offset, ret, 0, byteArray.len); + return ret; + } + } + + @Override + public RowData getRow(int pos, int numFields) { + return vectorizedColumnBatch.getRow(rowId, pos); + } + + @Override + public ArrayData getArray(int pos) { + return vectorizedColumnBatch.getArray(rowId, pos); + } + + @Override + public MapData getMap(int pos) { + return vectorizedColumnBatch.getMap(rowId, pos); + } + + @Override + public void setNullAt(int pos) { + throw new UnsupportedOperationException("Not support the operation!"); + } + + @Override + public void setBoolean(int pos, boolean value) { + throw new UnsupportedOperationException("Not support the operation!"); + } + + @Override + public void setByte(int pos, byte value) { + throw new UnsupportedOperationException("Not support the operation!"); + } + + @Override + public void setShort(int pos, short value) { + throw new UnsupportedOperationException("Not support the operation!"); + } + + @Override + public void setInt(int pos, int value) { + throw new UnsupportedOperationException("Not support the operation!"); + } + + @Override + public void setLong(int pos, long value) { + throw new UnsupportedOperationException("Not support the operation!"); + } + + @Override + public void setFloat(int pos, float value) { + throw new UnsupportedOperationException("Not support the operation!"); + } + + @Override + public void setDouble(int pos, double value) { + throw new UnsupportedOperationException("Not support the operation!"); + } + + @Override + public void setDecimal(int pos, DecimalData value, int precision) { + throw new UnsupportedOperationException("Not support the operation!"); + } + + @Override + public void setTimestamp(int pos, TimestampData value, int precision) { + throw new UnsupportedOperationException("Not support the operation!"); + } + + @Override + public boolean equals(Object o) { + throw new UnsupportedOperationException( + "ColumnarRowData do not support equals, please compare fields one by one!"); + } + + @Override + public int hashCode() { + throw new UnsupportedOperationException( + "ColumnarRowData do not support hashCode, please hash fields one by one!"); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/flink/table/data/vector/MapColumnVector.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/flink/table/data/vector/MapColumnVector.java new file mode 100644 index 000000000000..9cb28b8610a1 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/flink/table/data/vector/MapColumnVector.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.table.data.vector; + +import org.apache.flink.table.data.MapData; + +/** + * Map column vector. + */ +public interface MapColumnVector extends ColumnVector { + MapData getMap(int i); +} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/flink/table/data/vector/RowColumnVector.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/flink/table/data/vector/RowColumnVector.java new file mode 100644 index 000000000000..5fa3031e5174 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/flink/table/data/vector/RowColumnVector.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.table.data.vector; + +import org.apache.flink.table.data.ColumnarRowData; + +/** + * Row column vector. + */ +public interface RowColumnVector extends ColumnVector { + ColumnarRowData getRow(int i); +} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/flink/table/data/vector/VectorizedColumnBatch.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/flink/table/data/vector/VectorizedColumnBatch.java new file mode 100644 index 000000000000..a0224705bd9f --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/flink/table/data/vector/VectorizedColumnBatch.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.table.data.vector; + +import org.apache.flink.table.data.ArrayData; +import org.apache.flink.table.data.DecimalData; +import org.apache.flink.table.data.MapData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.table.data.vector.BytesColumnVector.Bytes; + +import java.io.Serializable; +import java.nio.charset.StandardCharsets; + +/** + * A VectorizedColumnBatch is a set of rows, organized with each column as a vector. It is the unit + * of query execution, organized to minimize the cost per row. + * + *

{@code VectorizedColumnBatch}s are influenced by Apache Hive VectorizedRowBatch. + * + *

References {@code org.apache.flink.table.data.vector.VectorizedColumnBatch} to include FLINK-15390. + */ +public class VectorizedColumnBatch implements Serializable { + private static final long serialVersionUID = 8180323238728166155L; + + /** + * This number is carefully chosen to minimize overhead and typically allows one + * VectorizedColumnBatch to fit in cache. + */ + public static final int DEFAULT_SIZE = 2048; + + private int numRows; + public final ColumnVector[] columns; + + public VectorizedColumnBatch(ColumnVector[] vectors) { + this.columns = vectors; + } + + public void setNumRows(int numRows) { + this.numRows = numRows; + } + + public int getNumRows() { + return numRows; + } + + public int getArity() { + return columns.length; + } + + public boolean isNullAt(int rowId, int colId) { + return columns[colId].isNullAt(rowId); + } + + public boolean getBoolean(int rowId, int colId) { + return ((BooleanColumnVector) columns[colId]).getBoolean(rowId); + } + + public byte getByte(int rowId, int colId) { + return ((ByteColumnVector) columns[colId]).getByte(rowId); + } + + public short getShort(int rowId, int colId) { + return ((ShortColumnVector) columns[colId]).getShort(rowId); + } + + public int getInt(int rowId, int colId) { + return ((IntColumnVector) columns[colId]).getInt(rowId); + } + + public long getLong(int rowId, int colId) { + return ((LongColumnVector) columns[colId]).getLong(rowId); + } + + public float getFloat(int rowId, int colId) { + return ((FloatColumnVector) columns[colId]).getFloat(rowId); + } + + public double getDouble(int rowId, int colId) { + return ((DoubleColumnVector) columns[colId]).getDouble(rowId); + } + + public Bytes getByteArray(int rowId, int colId) { + return ((BytesColumnVector) columns[colId]).getBytes(rowId); + } + + private byte[] getBytes(int rowId, int colId) { + Bytes byteArray = getByteArray(rowId, colId); + if (byteArray.len == byteArray.data.length) { + return byteArray.data; + } else { + return byteArray.getBytes(); + } + } + + public String getString(int rowId, int colId) { + Bytes byteArray = getByteArray(rowId, colId); + return new String(byteArray.data, byteArray.offset, byteArray.len, StandardCharsets.UTF_8); + } + + public DecimalData getDecimal(int rowId, int colId, int precision, int scale) { + return ((DecimalColumnVector) (columns[colId])).getDecimal(rowId, precision, scale); + } + + public TimestampData getTimestamp(int rowId, int colId, int precision) { + return ((TimestampColumnVector) (columns[colId])).getTimestamp(rowId, precision); + } + + public ArrayData getArray(int rowId, int colId) { + return ((ArrayColumnVector) columns[colId]).getArray(rowId); + } + + public RowData getRow(int rowId, int colId) { + return ((RowColumnVector) columns[colId]).getRow(rowId); + } + + public MapData getMap(int rowId, int colId) { + return ((MapColumnVector) columns[colId]).getMap(rowId); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java new file mode 100644 index 000000000000..51c53f368fb9 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.streaming.api.operators.AbstractStreamOperator; + +/** + * Adapter clazz for {@code AbstractStreamOperator}. + */ +public abstract class AbstractStreamOperatorAdapter extends AbstractStreamOperator { + @Override + public void close() throws Exception { + super.dispose(); + } + + public void finish() throws Exception { + super.close(); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java new file mode 100644 index 000000000000..0ea0968f1758 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory; +import org.apache.flink.streaming.api.operators.MailboxExecutor; +import org.apache.flink.streaming.api.operators.YieldingOperatorFactory; + +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** + * Adapter clazz for {@link AbstractStreamOperatorFactory}. + */ +public abstract class AbstractStreamOperatorFactoryAdapter + extends AbstractStreamOperatorFactory implements YieldingOperatorFactory { + private transient MailboxExecutor mailboxExecutor; + + @Override + public void setMailboxExecutor(MailboxExecutor mailboxExecutor) { + this.mailboxExecutor = mailboxExecutor; + } + + public MailboxExecutorAdapter getMailboxExecutorAdapter() { + return new MailboxExecutorAdapter(getMailboxExecutor()); + } + + /** + * Provides the mailbox executor iff this factory implements {@link YieldingOperatorFactory}. + */ + protected MailboxExecutor getMailboxExecutor() { + return checkNotNull( + mailboxExecutor, "Factory does not implement %s", YieldingOperatorFactory.class); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java new file mode 100644 index 000000000000..9ae3ca6912f6 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.streaming.api.operators.MailboxExecutor; +import org.apache.flink.util.function.ThrowingRunnable; + +/** + * Adapter clazz for {@link MailboxExecutor}. + */ +public class MailboxExecutorAdapter { + private final MailboxExecutor executor; + + public MailboxExecutorAdapter(MailboxExecutor executor) { + this.executor = executor; + } + + public void execute(ThrowingRunnable command, String description) { + this.executor.execute(command, description); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java new file mode 100644 index 000000000000..6d058de89bc5 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.shaded.guava18.com.google.common.util.concurrent.RateLimiter; + +/** + * Bridge class for shaded guava clazz {@code RateLimiter}. + */ +public class RateLimiterAdapter { + private final RateLimiter rateLimiter; + + private RateLimiterAdapter(double permitsPerSecond) { + this.rateLimiter = RateLimiter.create(permitsPerSecond); + } + + public static RateLimiterAdapter create(double permitsPerSecond) { + return new RateLimiterAdapter(permitsPerSecond); + } + + public void acquire() { + this.rateLimiter.acquire(); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/Utils.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/Utils.java new file mode 100644 index 000000000000..9eb52c676589 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/Utils.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.streaming.api.TimeCharacteristic; +import org.apache.flink.streaming.api.functions.source.SourceFunction; +import org.apache.flink.streaming.api.operators.Output; +import org.apache.flink.streaming.api.operators.StreamSourceContexts; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; +import org.apache.flink.streaming.runtime.tasks.StreamTask; + +/** + * Adapter utils. + */ +public class Utils { + public static SourceFunction.SourceContext getSourceContext( + TimeCharacteristic timeCharacteristic, + ProcessingTimeService processingTimeService, + StreamTask streamTask, + Output> output, + long watermarkInterval) { + return StreamSourceContexts.getSourceContext( + timeCharacteristic, + processingTimeService, + new Object(), // no actual locking needed + streamTask.getStreamStatusMaintainer(), + output, + watermarkInterval, + -1); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java new file mode 100644 index 000000000000..18686b811c40 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.streaming.api.operators.Output; + +/** + * Adapter clazz for {@link Output}. + */ +public interface OutputAdapter extends Output { +} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java new file mode 100644 index 000000000000..8563d2422b64 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.runtime.state.StateInitializationContext; + +/** + * Adapter clazz for {@link StateInitializationContext}. + */ +public interface StateInitializationContextAdapter extends StateInitializationContext { +} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java new file mode 100644 index 000000000000..176783e8108c --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.api.common.accumulators.Accumulator; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.metrics.groups.UnregisteredMetricsGroup; +import org.apache.flink.runtime.execution.Environment; +import org.apache.flink.streaming.api.operators.AbstractStreamOperator; +import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; + +import java.util.Map; + +/** + * Adapter clazz for {@link StreamingRuntimeContext}. + */ +public class StreamingRuntimeContextAdapter extends StreamingRuntimeContext { + + public StreamingRuntimeContextAdapter(AbstractStreamOperator operator, Environment env, + Map> accumulators) { + super(operator, env, accumulators); + } + + @Override + public MetricGroup getMetricGroup() { + return new UnregisteredMetricsGroup(); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java b/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java new file mode 100644 index 000000000000..e3088356709f --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java @@ -0,0 +1,34 @@ +package org.apache.hudi.adapter; + +import org.apache.flink.table.api.EnvironmentSettings; +import org.apache.flink.table.api.TableEnvironment; +import org.apache.flink.table.api.internal.TableEnvironmentImpl; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * TableEnv for test goals. + */ +public class TestTableEnvs { + + public static TableEnvironment getBatchTableEnv() { + EnvironmentSettings settings = EnvironmentSettings.newInstance().inBatchMode().build(); + return TableEnvironmentImpl.create(settings); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.14.x/pom.xml b/hudi-flink-datasource/hudi-flink1.14.x/pom.xml new file mode 100644 index 000000000000..0e5df91b49c9 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.14.x/pom.xml @@ -0,0 +1,102 @@ + + + + + hudi-flink-datasource + org.apache.hudi + 0.11.0-SNAPSHOT + + 4.0.0 + + hudi-flink1.14.x + 0.11.0-SNAPSHOT + jar + + + ${project.parent.parent.basedir} + + + + + org.apache.flink + flink-table-api-java + ${flink1.14.version} + provided + + + org.apache.flink + flink-table-api-java-bridge_${scala.binary.version} + ${flink1.14.version} + provided + + + org.apache.flink + flink-shaded-guava + 30.1.1-jre-14.0 + provided + + + org.apache.flink + flink-core + ${flink1.14.version} + provided + + + org.apache.flink + flink-streaming-java_${scala.binary.version} + ${flink1.14.version} + provided + + + org.apache.flink + flink-runtime + ${flink1.14.version} + test + test-jar + + + + + + + org.jacoco + jacoco-maven-plugin + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + test-compile + + + + false + + + + org.apache.rat + apache-rat-plugin + + + + \ No newline at end of file diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java new file mode 100644 index 000000000000..d4c6bc3a8f4d --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.streaming.api.operators.AbstractStreamOperator; + +/** + * Adapter clazz for {@code AbstractStreamOperator}. + */ +public abstract class AbstractStreamOperatorAdapter extends AbstractStreamOperator { +} diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java new file mode 100644 index 000000000000..6dcfe71ccfd9 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory; +import org.apache.flink.streaming.api.operators.YieldingOperatorFactory; + +/** + * Adapter clazz for {@link AbstractStreamOperatorFactory}. + */ +public abstract class AbstractStreamOperatorFactoryAdapter + extends AbstractStreamOperatorFactory implements YieldingOperatorFactory { + + public MailboxExecutorAdapter getMailboxExecutorAdapter() { + return new MailboxExecutorAdapter(getMailboxExecutor()); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java new file mode 100644 index 000000000000..0c836f3db391 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.api.common.operators.MailboxExecutor; +import org.apache.flink.util.function.ThrowingRunnable; + +/** + * Adapter clazz for {@link MailboxExecutor}. + */ +public class MailboxExecutorAdapter { + private final MailboxExecutor executor; + + public MailboxExecutorAdapter(MailboxExecutor executor) { + this.executor = executor; + } + + public void execute(ThrowingRunnable command, String description) { + this.executor.execute(command, description); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java new file mode 100644 index 000000000000..865c0c81d4d9 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.shaded.guava30.com.google.common.util.concurrent.RateLimiter; + +/** + * Bridge class for shaded guava clazz {@code RateLimiter}. + */ +public class RateLimiterAdapter { + private final RateLimiter rateLimiter; + + private RateLimiterAdapter(double permitsPerSecond) { + this.rateLimiter = RateLimiter.create(permitsPerSecond); + } + + public static RateLimiterAdapter create(double permitsPerSecond) { + return new RateLimiterAdapter(permitsPerSecond); + } + + public void acquire() { + this.rateLimiter.acquire(); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/Utils.java b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/Utils.java new file mode 100644 index 000000000000..41ac0ffcee57 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/Utils.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.streaming.api.TimeCharacteristic; +import org.apache.flink.streaming.api.functions.source.SourceFunction; +import org.apache.flink.streaming.api.operators.Output; +import org.apache.flink.streaming.api.operators.StreamSourceContexts; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; +import org.apache.flink.streaming.runtime.tasks.StreamTask; + +/** + * Adapter utils. + */ +public class Utils { + public static SourceFunction.SourceContext getSourceContext( + TimeCharacteristic timeCharacteristic, + ProcessingTimeService processingTimeService, + StreamTask streamTask, + Output> output, + long watermarkInterval) { + return StreamSourceContexts.getSourceContext( + timeCharacteristic, + processingTimeService, + new Object(), // no actual locking needed + output, + watermarkInterval, + -1, + true); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java b/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java new file mode 100644 index 000000000000..c0d83e6096e3 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.streaming.api.operators.Output; +import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; + +/** + * Adapter clazz for {@link Output}. + */ +public interface OutputAdapter extends Output { + @Override + default void emitWatermarkStatus(WatermarkStatus watermarkStatus) { + // no operation + } +} diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java b/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java new file mode 100644 index 000000000000..1f76ad692f33 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.runtime.state.StateInitializationContext; + +import java.util.OptionalLong; + +/** + * Adapter clazz for {@link StateInitializationContext}. + */ +public interface StateInitializationContextAdapter extends StateInitializationContext { + @Override + default OptionalLong getRestoredCheckpointId() { + return OptionalLong.empty(); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java b/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java new file mode 100644 index 000000000000..4461c28943d3 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.api.common.accumulators.Accumulator; +import org.apache.flink.metrics.groups.OperatorMetricGroup; +import org.apache.flink.metrics.groups.UnregisteredMetricsGroup; +import org.apache.flink.runtime.execution.Environment; +import org.apache.flink.streaming.api.operators.AbstractStreamOperator; +import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; + +import java.util.Map; + +/** + * Adapter clazz for {@link StreamingRuntimeContext}. + */ +public class StreamingRuntimeContextAdapter extends StreamingRuntimeContext { + + public StreamingRuntimeContextAdapter(AbstractStreamOperator operator, Environment env, + Map> accumulators) { + super(operator, env, accumulators); + } + + @Override + public OperatorMetricGroup getMetricGroup() { + return UnregisteredMetricsGroup.createOperatorMetricGroup(); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java b/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java new file mode 100644 index 000000000000..e65437609a21 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.configuration.Configuration; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.api.EnvironmentSettings; +import org.apache.flink.table.api.TableEnvironment; +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; + +/** + * TableEnv for test goals. + */ +public class TestTableEnvs { + + public static TableEnvironment getBatchTableEnv() { + Configuration conf = new Configuration(); + // for batch upsert use cases: current suggestion is to disable these 2 options, + // from 1.14, flink runtime execution mode has switched from streaming + // to batch for batch execution mode(before that, both streaming and batch use streaming execution mode), + // current batch execution mode has these limitations: + // + // 1. the keyed stream default to always sort the inputs by key; + // 2. the batch state-backend requires the inputs sort by state key + // + // For our hudi batch pipeline upsert case, we rely on the consuming sequence for index records and data records, + // the index records must be loaded first before data records for BucketAssignFunction to keep upsert semantics correct, + // so we suggest disabling these 2 options to use streaming state-backend for batch execution mode + // to keep the strategy before 1.14. + conf.setBoolean("execution.sorted-inputs.enabled", false); + conf.setBoolean("execution.batch-state-backend.enabled", false); + StreamExecutionEnvironment execEnv = StreamExecutionEnvironment.getExecutionEnvironment(conf); + EnvironmentSettings settings = EnvironmentSettings.newInstance().inBatchMode().build(); + return StreamTableEnvironment.create(execEnv, settings); + } +} diff --git a/hudi-flink-datasource/pom.xml b/hudi-flink-datasource/pom.xml new file mode 100644 index 000000000000..1ce7735b9844 --- /dev/null +++ b/hudi-flink-datasource/pom.xml @@ -0,0 +1,41 @@ + + + + + hudi + org.apache.hudi + 0.11.0-SNAPSHOT + + 4.0.0 + + hudi-flink-datasource + 0.11.0-SNAPSHOT + pom + + + ${project.parent.basedir} + + + + hudi-flink1.13.x + hudi-flink1.14.x + hudi-flink + + + diff --git a/hudi-flink/pom.xml b/hudi-flink/pom.xml deleted file mode 100644 index 7338b23bf4b2..000000000000 --- a/hudi-flink/pom.xml +++ /dev/null @@ -1,349 +0,0 @@ - - - - - - - hudi - org.apache.hudi - 0.11.0-SNAPSHOT - - 4.0.0 - - hudi-flink_${scala.binary.version} - jar - - - ${project.parent.basedir} - 1.11.1 - - - - - - org.jacoco - jacoco-maven-plugin - - - org.apache.maven.plugins - maven-compiler-plugin - - 1.8 - 1.8 - - - - org.apache.maven.plugins - maven-jar-plugin - 3.1.2 - - - - test-jar - - - - - - org.apache.rat - apache-rat-plugin - - - - - - src/main/resources - - - src/test/resources - - - - - - - - org.apache.hudi - hudi-common - ${project.version} - - - org.apache.hudi - hudi-client-common - ${project.version} - - - org.apache.hudi - hudi-flink-client - ${project.version} - - - org.apache.hudi - hudi-hadoop-mr - ${project.version} - - - org.apache.hudi - hudi-hive-sync - ${project.version} - - - org.apache.hudi - hudi-sync-common - ${project.version} - - - - - org.apache.flink - flink-streaming-java_${scala.binary.version} - compile - - - org.apache.flink - flink-clients_${scala.binary.version} - compile - - - com.esotericsoftware.kryo - kryo - - - com.esotericsoftware.minlog - minlog - - - - - org.apache.flink - flink-connector-kafka_${scala.binary.version} - compile - - - org.apache.kafka - kafka-clients - ${kafka.version} - - - org.apache.flink - flink-hadoop-compatibility_${scala.binary.version} - ${flink.version} - - - org.apache.flink - flink-parquet_${scala.binary.version} - ${flink.version} - provided - - - org.apache.flink - flink-json - ${flink.version} - provided - - - org.apache.flink - flink-table-common - ${flink.version} - provided - - - org.apache.flink - flink-table-runtime_${scala.binary.version} - ${flink.version} - provided - - - org.apache.flink - flink-table-planner_${scala.binary.version} - ${flink.version} - provided - - - org.apache.flink - flink-statebackend-rocksdb_${scala.binary.version} - ${flink.version} - provided - - - - org.apache.parquet - parquet-hadoop - ${parquet.version} - - - org.xerial.snappy - snappy-java - - - - - - - org.apache.parquet - parquet-avro - ${parquet.version} - test - - - - - org.apache.avro - avro - - 1.10.0 - compile - - - - - org.apache.hadoop - hadoop-mapreduce-client-core - compile - - - org.slf4j - slf4j-log4j12 - - - - - - com.beust - jcommander - compile - - - com.twitter - bijection-avro_${scala.binary.version} - 0.9.7 - - - joda-time - joda-time - 2.5 - - - - ${hive.groupid} - hive-exec - ${hive.version} - ${hive.exec.classifier} - - - javax.mail - mail - - - org.eclipse.jetty.aggregate - * - - - - - - - - - org.junit.jupiter - junit-jupiter-api - test - - - org.junit.jupiter - junit-jupiter-engine - test - - - org.junit.vintage - junit-vintage-engine - test - - - org.junit.jupiter - junit-jupiter-params - test - - - - org.apache.hudi - hudi-common - ${project.version} - tests - test-jar - test - - - org.apache.hudi - hudi-client-common - ${project.version} - tests - test-jar - test - - - org.apache.hudi - hudi-flink-client - ${project.version} - tests - test-jar - test - - - - org.apache.flink - flink-test-utils_${scala.binary.version} - ${flink.version} - test - - - org.apache.flink - flink-runtime - ${flink.version} - test - test-jar - - - org.apache.flink - flink-streaming-java_${scala.binary.version} - ${flink.version} - test - test-jar - - - org.apache.flink - flink-table-runtime_${scala.binary.version} - ${flink.version} - test - test-jar - - - org.apache.flink - flink-json - ${flink.version} - test - test-jar - - - org.apache.flink - flink-csv - ${flink.version} - test - - - diff --git a/hudi-kafka-connect/pom.xml b/hudi-kafka-connect/pom.xml index 8845bfb801ae..961855b03486 100644 --- a/hudi-kafka-connect/pom.xml +++ b/hudi-kafka-connect/pom.xml @@ -142,7 +142,7 @@ org.apache.hudi - hudi-flink_${scala.binary.version} + hudi-flink ${project.version} diff --git a/packaging/hudi-flink-bundle/pom.xml b/packaging/hudi-flink-bundle/pom.xml index 222478090b4b..30ee37a4ecf2 100644 --- a/packaging/hudi-flink-bundle/pom.xml +++ b/packaging/hudi-flink-bundle/pom.xml @@ -25,7 +25,7 @@ ../../pom.xml 4.0.0 - hudi-flink-bundle_${scala.binary.version} + hudi-flink${flink.bundle.version}-bundle_${scala.binary.version} jar @@ -76,7 +76,8 @@ org.apache.hudi:hudi-common org.apache.hudi:hudi-client-common org.apache.hudi:hudi-flink-client - org.apache.hudi:hudi-flink_${scala.binary.version} + org.apache.hudi:hudi-flink + org.apache.hudi:${hudi.flink.module} org.apache.hudi:hudi-hive-sync org.apache.hudi:hudi-sync-common org.apache.hudi:hudi-hadoop-mr @@ -254,7 +255,12 @@ org.apache.hudi - hudi-flink_${scala.binary.version} + ${hudi.flink.module} + ${project.version} + + + org.apache.hudi + hudi-flink ${project.version} diff --git a/pom.xml b/pom.xml index 5be49eb04c18..c61d5ef8f3a7 100644 --- a/pom.xml +++ b/pom.xml @@ -52,7 +52,7 @@ packaging/hudi-timeline-server-bundle packaging/hudi-trino-bundle hudi-examples - hudi-flink + hudi-flink-datasource hudi-kafka-connect packaging/hudi-flink-bundle packaging/hudi-kafka-connect-bundle @@ -113,10 +113,17 @@ 0.8.0 4.4.1 ${spark2.version} - - 1.14.3 2.4.4 3.2.1 + + 1.14.4 + 1.13.6 + ${flink1.14.version} + hudi-flink1.14.x + 1.14 + flink-runtime + flink-table-runtime_${scala.binary.version} + flink-table-planner_${scala.binary.version} hudi-spark2 hudi-spark2-common 1.8.2 @@ -1696,6 +1703,35 @@ + + flink1.14 + + true + + flink1.14 + + !disabled + + + + + flink1.13 + + ${flink1.13.version} + flink-runtime_${scala.binary.version} + flink-table-runtime-blink_${scala.binary.version} + flink-table-planner-blink_${scala.binary.version} + hudi-flink1.13.x + 1.13 + true + + + + flink1.13 + + + + skipShadeSources