Skip to content

Commit

Permalink
[HUDI-5182] Create Hudi CLI Bundle (#7224)
Browse files Browse the repository at this point in the history
Creates a hudi-cli-bundle bundle which contains all the cli related dependencies, so that the hudi-cli-bundle can be used with hudi-spark-bundle to start Hudi CLI easily.

Co-authored-by: Rahil Chertara <rchertar@amazon.com>
Co-authored-by: Y Ethan Guo <ethan.guoyihua@gmail.com>
  • Loading branch information
3 people authored Dec 20, 2022
1 parent a0fb9b2 commit 4fd3ff3
Show file tree
Hide file tree
Showing 6 changed files with 346 additions and 0 deletions.
22 changes: 22 additions & 0 deletions packaging/hudi-cli-bundle/conf/hudi-env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Set the necessary environment variables
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop/conf"}
export SPARK_CONF_DIR=${SPARK_CONF_DIR:-"/etc/spark/conf"}
export CLIENT_JAR=${CLIENT_JAR}
44 changes: 44 additions & 0 deletions packaging/hudi-cli-bundle/hudi-cli-with-bundle.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
echo "DIR is ${DIR}"
CLI_BUNDLE_JAR=`ls $DIR/target/hudi-cli-bundle*.jar | grep -v source | grep -v javadoc`
SPARK_BUNDLE_JAR=`ls $DIR/../hudi-spark-bundle/target/hudi-spark*-bundle*.jar | grep -v source | grep -v javadoc`
HUDI_CONF_DIR="${DIR}"/conf
# hudi aux lib contains jakarta.el jars, which need to be put directly on class path
HUDI_AUX_LIB="${DIR}"/auxlib

. "${DIR}"/conf/hudi-env.sh

if [ -z "$CLI_BUNDLE_JAR" ] || [ -z "$SPARK_BUNDLE_JAR" ]; then
echo "Make sure to generate both the hudi-cli-bundle.jar and hudi-spark-bundle.jar before running this script."
exit
fi

if [ -z "$SPARK_HOME" ]; then
echo "SPARK_HOME not set, setting to /usr/local/spark"
export SPARK_HOME="/usr/local/spark"
fi

if [ -z "$CLIENT_JAR" ]; then
echo "Client jar location not set, please set it in conf/hudi-env.sh"
fi

echo "Running : java -cp ${HUDI_AUX_LIB}/*:${SPARK_HOME}/*:${SPARK_HOME}/jars/*:${HUDI_CONF_DIR}:${HUDI_AUX_LIB}/*:${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:${CLI_BUNDLE_JAR}:${SPARK_BUNDLE_JAR}:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.apache.hudi.cli.Main $@"
java -cp ${HUDI_AUX_LIB}/*:${SPARK_HOME}/*:${SPARK_HOME}/jars/*:${HUDI_CONF_DIR}:${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:${CLI_BUNDLE_JAR}:${SPARK_BUNDLE_JAR}:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.apache.hudi.cli.Main $@
243 changes: 243 additions & 0 deletions packaging/hudi-cli-bundle/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>hudi</artifactId>
<groupId>org.apache.hudi</groupId>
<version>0.13.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>hudi-cli-bundle_${scala.binary.version}</artifactId>
<packaging>jar</packaging>

<properties>
<checkstyle.skip>true</checkstyle.skip>
<main.basedir>${project.parent.basedir}</main.basedir>
<skipTests>true</skipTests>
<!-- jakarta and jline version chosen based on hibernate validator https://mvnrepository.com/artifact/org.hibernate.validator/hibernate-validator/6.2.4.Final -->
<jakarta.el.version>3.0.3</jakarta.el.version>
<jakarta.validation.version>2.0.2</jakarta.validation.version>
<jline.version>3.21.0</jline.version>
<hudi.cli.gson.version>2.6.2</hudi.cli.gson.version>
</properties>

<build>
<plugins>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>${maven-shade-plugin.version}</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<createSourcesJar>${shadeSources}</createSourcesJar>
<dependencyReducedPomLocation>${project.build.directory}/dependency-reduced-pom.xml
</dependencyReducedPomLocation>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer"/>
<transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
<addHeader>true</addHeader>
</transformer>
<transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
<resource>META-INF/LICENSE</resource>
<file>target/classes/META-INF/LICENSE</file>
</transformer>
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>META-INF/spring.handlers</resource>
</transformer>
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>META-INF/spring.schemas</resource>
</transformer>
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>META-INF/spring.factories</resource>
</transformer>
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>org.apache.hudi.cli.Main</mainClass>
</transformer>
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>META-INF/spring/org.springframework.boot.actuate.autoconfigure.web.ManagementContextConfiguration.imports</resource>
</transformer>
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports</resource>
</transformer>
</transformers>
<artifactSet>
<includes combine.children="append">
<!-- hudi -->
<include>org.apache.hudi:hudi-cli</include>
<include>org.apache.hudi:hudi-utilities_${scala.binary.version}</include>
<!-- cli related dependencies -->
<include>com.fasterxml:classmate</include>
<include>com.fasterxml.woodstox:woodstox-core</include>
<include>com.google.code.gson:gson</include>
<include>com.google.re2j:re2j</include>
<include>com.jakewharton.fliptables:fliptables</include>

<include>jakarta.el:jakarta.el-api</include>
<include>jakarta.validation:jakarta.validation-api</include>
<include>net.java.dev.jna:jna</include>

<include>org.apache.httpcomponents:httpclient</include>
<include>org.apache.httpcomponents:httpcore</include>
<include>org.apache.httpcomponents:fluent-hc</include>
<include>org.codehaus.woodstox:stax2-api</include>
<include>org.fusesource.jansi:jansi</include>
<include>org.glassfish:jakarta.el</include>
<include>org.hibernate.validator:hibernate-validator</include>
<include>org.jboss.logging:jboss-logging</include>
<include>org.jline:*</include>
<include>org.springframework*:*</include>
<include>org.springframework.boot:*</include>
<include>org.yaml:*</include>
</includes>
</artifactSet>
<relocations>
<relocation>
<pattern>com.google.code.gson.</pattern>
<shadedPattern>org.apache.hudi.com.google.code.gson.</shadedPattern>
</relocation>
</relocations>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
<exclude>**/*.proto</exclude>
<exclude>**/Log4j2Plugins.dat</exclude>
</excludes>
</filter>
</filters>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
<resources>
<resource>
<directory>hudi-cli/src/main/resources</directory>
</resource>
</resources>
</build>

<dependencies>
<!-- Hoodie -->
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-cli</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-utilities_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-validation</artifactId>
<version>${springboot.version}</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-logging</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
<exclusion>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.springframework.shell</groupId>
<artifactId>spring-shell-starter</artifactId>
<version>${spring.shell.version}</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
<exclusion>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>${hudi.cli.gson.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.glassfish</groupId>
<artifactId>jakarta.el</artifactId>
<version>${jakarta.el.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>jakarta.el</groupId>
<artifactId>jakarta.el-api</artifactId>
<version>${jakarta.el.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>jakarta.validation</groupId>
<artifactId>jakarta.validation-api</artifactId>
<version>${jakarta.validation.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>fluent-hc</artifactId>
<version>${http.version}</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>${http.version}</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>${http.version}</version>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.hudi.cli.bundle;

import org.apache.hudi.common.util.ReflectionUtils;

/**
* A simple main class to dump all classes loaded in current classpath.
*
* This is a workaround for generating sources and javadoc jars for packaging modules. The maven plugins for generating
* javadoc and sources plugins do not generate corresponding jars if there are no source files.
*
* This class does not have anything to do with Hudi but is there to keep mvn javadocs/source plugin happy.
*/
public class Main {
public static void main(String[] args) {
ReflectionUtils.getTopLevelClassesInClasspath(Main.class).forEach(System.out::println);
}
}
1 change: 1 addition & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
<module>hudi-kafka-connect</module>
<module>packaging/hudi-flink-bundle</module>
<module>packaging/hudi-kafka-connect-bundle</module>
<module>packaging/hudi-cli-bundle</module>
<module>hudi-tests-common</module>
</modules>

Expand Down

0 comments on commit 4fd3ff3

Please sign in to comment.