From 4fd3ff3e0c7eca01d7058936f86cbc3bd5b99a3a Mon Sep 17 00:00:00 2001 From: Rahil C <32500120+rahil-c@users.noreply.github.com> Date: Mon, 19 Dec 2022 16:17:41 -0800 Subject: [PATCH] [HUDI-5182] Create Hudi CLI Bundle (#7224) Creates a hudi-cli-bundle bundle which contains all the cli related dependencies, so that the hudi-cli-bundle can be used with hudi-spark-bundle to start Hudi CLI easily. Co-authored-by: Rahil Chertara Co-authored-by: Y Ethan Guo --- packaging/hudi-cli-bundle/conf/hudi-env.sh | 22 ++ .../hudi-cli-bundle/conf}/log4j2.properties | 0 .../hudi-cli-bundle/hudi-cli-with-bundle.sh | 44 ++++ packaging/hudi-cli-bundle/pom.xml | 243 ++++++++++++++++++ .../java/org/apache/hudi/cli/bundle/Main.java | 36 +++ pom.xml | 1 + 6 files changed, 346 insertions(+) create mode 100644 packaging/hudi-cli-bundle/conf/hudi-env.sh rename {hudi-cli/src/main/resources => packaging/hudi-cli-bundle/conf}/log4j2.properties (100%) create mode 100755 packaging/hudi-cli-bundle/hudi-cli-with-bundle.sh create mode 100644 packaging/hudi-cli-bundle/pom.xml create mode 100644 packaging/hudi-cli-bundle/src/main/java/org/apache/hudi/cli/bundle/Main.java diff --git a/packaging/hudi-cli-bundle/conf/hudi-env.sh b/packaging/hudi-cli-bundle/conf/hudi-env.sh new file mode 100644 index 000000000000..5d2cc36c2bbd --- /dev/null +++ b/packaging/hudi-cli-bundle/conf/hudi-env.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Set the necessary environment variables +export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop/conf"} +export SPARK_CONF_DIR=${SPARK_CONF_DIR:-"/etc/spark/conf"} +export CLIENT_JAR=${CLIENT_JAR} diff --git a/hudi-cli/src/main/resources/log4j2.properties b/packaging/hudi-cli-bundle/conf/log4j2.properties similarity index 100% rename from hudi-cli/src/main/resources/log4j2.properties rename to packaging/hudi-cli-bundle/conf/log4j2.properties diff --git a/packaging/hudi-cli-bundle/hudi-cli-with-bundle.sh b/packaging/hudi-cli-bundle/hudi-cli-with-bundle.sh new file mode 100755 index 000000000000..361e5962fab2 --- /dev/null +++ b/packaging/hudi-cli-bundle/hudi-cli-with-bundle.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +echo "DIR is ${DIR}" +CLI_BUNDLE_JAR=`ls $DIR/target/hudi-cli-bundle*.jar | grep -v source | grep -v javadoc` +SPARK_BUNDLE_JAR=`ls $DIR/../hudi-spark-bundle/target/hudi-spark*-bundle*.jar | grep -v source | grep -v javadoc` +HUDI_CONF_DIR="${DIR}"/conf +# hudi aux lib contains jakarta.el jars, which need to be put directly on class path +HUDI_AUX_LIB="${DIR}"/auxlib + +. "${DIR}"/conf/hudi-env.sh + +if [ -z "$CLI_BUNDLE_JAR" ] || [ -z "$SPARK_BUNDLE_JAR" ]; then + echo "Make sure to generate both the hudi-cli-bundle.jar and hudi-spark-bundle.jar before running this script." + exit +fi + +if [ -z "$SPARK_HOME" ]; then + echo "SPARK_HOME not set, setting to /usr/local/spark" + export SPARK_HOME="/usr/local/spark" +fi + +if [ -z "$CLIENT_JAR" ]; then + echo "Client jar location not set, please set it in conf/hudi-env.sh" +fi + +echo "Running : java -cp ${HUDI_AUX_LIB}/*:${SPARK_HOME}/*:${SPARK_HOME}/jars/*:${HUDI_CONF_DIR}:${HUDI_AUX_LIB}/*:${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:${CLI_BUNDLE_JAR}:${SPARK_BUNDLE_JAR}:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.apache.hudi.cli.Main $@" +java -cp ${HUDI_AUX_LIB}/*:${SPARK_HOME}/*:${SPARK_HOME}/jars/*:${HUDI_CONF_DIR}:${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:${CLI_BUNDLE_JAR}:${SPARK_BUNDLE_JAR}:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.apache.hudi.cli.Main $@ diff --git a/packaging/hudi-cli-bundle/pom.xml b/packaging/hudi-cli-bundle/pom.xml new file mode 100644 index 000000000000..981ec7405026 --- /dev/null +++ b/packaging/hudi-cli-bundle/pom.xml @@ -0,0 +1,243 @@ + + + + + hudi + org.apache.hudi + 0.13.0-SNAPSHOT + ../../pom.xml + + 4.0.0 + hudi-cli-bundle_${scala.binary.version} + jar + + + true + ${project.parent.basedir} + true + + 3.0.3 + 2.0.2 + 3.21.0 + 2.6.2 + + + + + + org.apache.rat + apache-rat-plugin + + + org.apache.maven.plugins + maven-shade-plugin + ${maven-shade-plugin.version} + + + package + + shade + + + ${shadeSources} + ${project.build.directory}/dependency-reduced-pom.xml + + + + + true + + + META-INF/LICENSE + target/classes/META-INF/LICENSE + + + META-INF/spring.handlers + + + META-INF/spring.schemas + + + META-INF/spring.factories + + + + org.apache.hudi.cli.Main + + + META-INF/spring/org.springframework.boot.actuate.autoconfigure.web.ManagementContextConfiguration.imports + + + META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports + + + + + + org.apache.hudi:hudi-cli + org.apache.hudi:hudi-utilities_${scala.binary.version} + + com.fasterxml:classmate + com.fasterxml.woodstox:woodstox-core + com.google.code.gson:gson + com.google.re2j:re2j + com.jakewharton.fliptables:fliptables + + jakarta.el:jakarta.el-api + jakarta.validation:jakarta.validation-api + net.java.dev.jna:jna + + org.apache.httpcomponents:httpclient + org.apache.httpcomponents:httpcore + org.apache.httpcomponents:fluent-hc + org.codehaus.woodstox:stax2-api + org.fusesource.jansi:jansi + org.glassfish:jakarta.el + org.hibernate.validator:hibernate-validator + org.jboss.logging:jboss-logging + org.jline:* + org.springframework*:* + org.springframework.boot:* + org.yaml:* + + + + + com.google.code.gson. + org.apache.hudi.com.google.code.gson. + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + **/*.proto + **/Log4j2Plugins.dat + + + + + + + + + + + hudi-cli/src/main/resources + + + + + + + + org.apache.hudi + hudi-cli + ${project.version} + + + org.apache.hudi + hudi-utilities_${scala.binary.version} + ${project.version} + + + org.apache.hadoop + hadoop-common + ${hadoop.version} + compile + + + org.springframework.boot + spring-boot-starter-validation + ${springboot.version} + compile + + + org.springframework.boot + spring-boot-starter-logging + + + com.google.guava + guava + + + ch.qos.logback + logback-classic + + + + + org.springframework.shell + spring-shell-starter + ${spring.shell.version} + compile + + + com.google.guava + guava + + + ch.qos.logback + logback-classic + + + + + com.google.code.gson + gson + ${hudi.cli.gson.version} + compile + + + org.glassfish + jakarta.el + ${jakarta.el.version} + compile + + + jakarta.el + jakarta.el-api + ${jakarta.el.version} + compile + + + jakarta.validation + jakarta.validation-api + ${jakarta.validation.version} + compile + + + org.apache.httpcomponents + fluent-hc + ${http.version} + + + org.apache.httpcomponents + httpcore + ${http.version} + + + org.apache.httpcomponents + httpclient + ${http.version} + + + diff --git a/packaging/hudi-cli-bundle/src/main/java/org/apache/hudi/cli/bundle/Main.java b/packaging/hudi-cli-bundle/src/main/java/org/apache/hudi/cli/bundle/Main.java new file mode 100644 index 000000000000..71b6c41784a2 --- /dev/null +++ b/packaging/hudi-cli-bundle/src/main/java/org/apache/hudi/cli/bundle/Main.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.cli.bundle; + +import org.apache.hudi.common.util.ReflectionUtils; + +/** + * A simple main class to dump all classes loaded in current classpath. + * + * This is a workaround for generating sources and javadoc jars for packaging modules. The maven plugins for generating + * javadoc and sources plugins do not generate corresponding jars if there are no source files. + * + * This class does not have anything to do with Hudi but is there to keep mvn javadocs/source plugin happy. + */ +public class Main { + public static void main(String[] args) { + ReflectionUtils.getTopLevelClassesInClasspath(Main.class).forEach(System.out::println); + } +} diff --git a/pom.xml b/pom.xml index e64c1e7e8499..16e2108ce543 100644 --- a/pom.xml +++ b/pom.xml @@ -61,6 +61,7 @@ hudi-kafka-connect packaging/hudi-flink-bundle packaging/hudi-kafka-connect-bundle + packaging/hudi-cli-bundle hudi-tests-common