Skip to content

Commit

Permalink
[HUDI-3193] Decouple hudi-aws from hudi-client-common
Browse files Browse the repository at this point in the history
  • Loading branch information
codope committed May 23, 2022
1 parent 3ef137d commit 145542b
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ public static Builder forRegistry(MetricRegistry registry) {
}

public static class Builder {
private MetricRegistry registry;
private final MetricRegistry registry;
private Clock clock;
private String prefix;
private TimeUnit rateUnit;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,13 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.conf.HiveConf;

import static org.apache.hudi.common.util.ValidationUtils.checkArgument;

/**
* Currently Experimental. Utility class that implements syncing a Hudi Table with the
* AWS Glue Data Catalog (https://docs.aws.amazon.com/glue/latest/dg/populate-data-catalog.html)
* to enable querying via Glue ETLs, Athena etc.
*
* <p>
* Extends HiveSyncTool since most logic is similar to Hive syncing,
* expect using a different client {@link AWSGlueCatalogSyncClient} that implements
* the necessary functionality using Glue APIs.
Expand All @@ -41,11 +43,7 @@
*/
public class AwsGlueCatalogSyncTool extends HiveSyncTool {

public AwsGlueCatalogSyncTool(TypedProperties props, Configuration conf, FileSystem fs) {
super(props, new HiveConf(conf, HiveConf.class), fs);
}

public AwsGlueCatalogSyncTool(HiveSyncConfig hiveSyncConfig, HiveConf hiveConf, FileSystem fs) {
private AwsGlueCatalogSyncTool(HiveSyncConfig hiveSyncConfig, HiveConf hiveConf, FileSystem fs) {
super(hiveSyncConfig, hiveConf, fs);
}

Expand All @@ -65,6 +63,56 @@ public static void main(String[] args) {
FileSystem fs = FSUtils.getFs(cfg.basePath, new Configuration());
HiveConf hiveConf = new HiveConf();
hiveConf.addResource(fs.getConf());
new AwsGlueCatalogSyncTool(cfg, hiveConf, fs).syncHoodieTable();
AwsGlueCatalogSyncTool.newBuilder()
.withHiveSyncConfig(cfg)
.withHiveConf(hiveConf)
.withFileSystem(fs)
.build()
.syncHoodieTable();
}

public static Builder newBuilder() {
return new Builder();
}

public static class Builder {

private Configuration conf;
private FileSystem fs;
private HiveConf hiveConf;
private HiveSyncConfig hiveSyncConfig;
private TypedProperties props;

public Builder withConfiguration(Configuration conf) {
this.conf = conf;
return this;
}

public Builder withFileSystem(FileSystem fs) {
this.fs = fs;
return this;
}

public Builder withHiveConf(HiveConf hiveConf) {
this.hiveConf = hiveConf;
return this;
}

public Builder withHiveSyncConfig(HiveSyncConfig hiveSyncConfig) {
this.hiveSyncConfig = hiveSyncConfig;
return this;
}

public Builder withProperties(TypedProperties props) {
this.props = props;
return this;
}

public AwsGlueCatalogSyncTool build() {
checkArgument(fs != null, "FileSystem is null");
checkArgument(hiveConf != null, "HiveConf is null");
checkArgument(hiveSyncConfig != null, "HiveSyncConfig is null");
return new AwsGlueCatalogSyncTool(hiveSyncConfig, hiveConf, fs);
}
}
}
1 change: 1 addition & 0 deletions hudi-client/hudi-client-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-aws</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
Expand Down
6 changes: 6 additions & 0 deletions hudi-flink-datasource/hudi-flink/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@
<artifactId>hudi-client-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-aws</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-flink-client</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

package org.apache.hudi.sink.utils;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.hudi.aws.sync.AwsGlueCatalogSyncTool;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.configuration.FlinkOptions;
Expand All @@ -28,6 +27,7 @@
import org.apache.hudi.hive.ddl.HiveSyncMode;
import org.apache.hudi.table.format.FilePathUtils;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.configuration.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.conf.HiveConf;
Expand All @@ -53,7 +53,11 @@ private HiveSyncContext(HiveSyncConfig syncConfig, HiveConf hiveConf, FileSystem
public HiveSyncTool hiveSyncTool() {
HiveSyncMode syncMode = HiveSyncMode.of(syncConfig.syncMode);
if (syncMode == HiveSyncMode.GLUE) {
return new AwsGlueCatalogSyncTool(this.syncConfig, this.hiveConf, this.fs);
return AwsGlueCatalogSyncTool.newBuilder()
.withHiveSyncConfig(this.syncConfig)
.withHiveConf(this.hiveConf)
.withFileSystem(this.fs)
.build();
}
return new HiveSyncTool(this.syncConfig, this.hiveConf, this.fs);
}
Expand Down

0 comments on commit 145542b

Please sign in to comment.