From 1c10e28caa54f32aa44a5406046894b8fcb335a4 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Tue, 25 Apr 2023 08:53:58 -0500 Subject: [PATCH] ODP-1302 [SPARK-43225][BUILD][SQL] Remove jackson-core-asl and jackson-mapper-asl from pre-built distribution - Remove `jackson-core-asl` from maven dependency. - Change the scope of `jackson-mapper-asl` from compile to test. - Replace all `Hive.get(conf)` with `Hive.getWithoutRegisterFns(conf)`. To fix CVE issue: https://github.com/apache/spark/security/dependabot/50. No. manual test. Closes #40893 from wangyum/SPARK-43225. Lead-authored-by: Yuming Wang Co-authored-by: Yuming Wang Signed-off-by: Sean Owen (cherry picked from commit 9c237d7bc7ba201c8c7e728f3488dab83995d9cb) [SPARK-43868][SQL][TESTS] Remove `originalUDFs` from `TestHive` to ensure `ObjectHashAggregateExecBenchmark` can run successfully on Github Action This pr remove `originalUDFs` from `TestHive` to ensure `ObjectHashAggregateExecBenchmark` can run successfully on Github Action. After SPARK-43225, `org.codehaus.jackson:jackson-mapper-asl` becomes a test scope dependency, so when using GA to run benchmark, it is not in the classpath because GA uses https://github.com/apache/spark/blob/d61c77cac17029ee27319e6b766b48d314a4dd31/.github/workflows/benchmark.yml#L179-L183 iunstead of the sbt `Test/runMain`. `ObjectHashAggregateExecBenchmark` used `TestHive`, and `TestHive` will always call `org.apache.hadoop.hive.ql.exec.FunctionRegistry#getFunctionNames` to init `originalUDFs` before this pr, so when we run `ObjectHashAggregateExecBenchmark` on GitHub Actions, there will be the following exceptions: --- core/pom.xml | 8 ------ dev/deps/spark-deps-hadoop-3-hive-2.3 | 18 ++++++------ pom.xml | 28 +------------------ resource-managers/yarn/pom.xml | 10 +++++++ sql/core/pom.xml | 10 ------- .../apache/hive/service/cli/CLIService.java | 2 +- .../service/cli/session/HiveSessionImpl.java | 4 +-- .../cli/session/HiveSessionImplwithUGI.java | 4 +-- sql/hive/pom.xml | 4 --- .../apache/spark/sql/hive/test/TestHive.scala | 8 ------ 10 files changed, 24 insertions(+), 72 deletions(-) diff --git a/core/pom.xml b/core/pom.xml index 37a604b6fb24f..ffa84cbbae66a 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -480,14 +480,6 @@ commons-logging commons-logging - - org.codehaus.jackson - jackson-mapper-asl - - - org.codehaus.jackson - jackson-core-asl - com.fasterxml.jackson.core jackson-core diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 56c24679deb74..8794f1f72bf72 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -97,16 +97,14 @@ httpclient/4.5.13//httpclient-4.5.13.jar httpcore/4.4.14//httpcore-4.4.14.jar ini4j/0.5.4//ini4j-0.5.4.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar -ivy/2.5.0//ivy-2.5.0.jar -jackson-annotations/2.13.4//jackson-annotations-2.13.4.jar -jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar -jackson-core/2.13.4//jackson-core-2.13.4.jar -jackson-databind/2.13.4.1//jackson-databind-2.13.4.1.jar -jackson-dataformat-cbor/2.13.4//jackson-dataformat-cbor-2.13.4.jar -jackson-dataformat-yaml/2.13.4//jackson-dataformat-yaml-2.13.4.jar -jackson-datatype-jsr310/2.13.4//jackson-datatype-jsr310-2.13.4.jar -jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar -jackson-module-scala_2.12/2.13.4//jackson-module-scala_2.12-2.13.4.jar +ivy/2.5.1//ivy-2.5.1.jar +jackson-annotations/2.14.2//jackson-annotations-2.14.2.jar +jackson-core/2.14.2//jackson-core-2.14.2.jar +jackson-databind/2.14.2//jackson-databind-2.14.2.jar +jackson-dataformat-cbor/2.14.2//jackson-dataformat-cbor-2.14.2.jar +jackson-dataformat-yaml/2.14.2//jackson-dataformat-yaml-2.14.2.jar +jackson-datatype-jsr310/2.14.2//jackson-datatype-jsr310-2.14.2.jar +jackson-module-scala_2.12/2.14.2//jackson-module-scala_2.12-2.14.2.jar jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar diff --git a/pom.xml b/pom.xml index 897f3549d5c57..08bf6e81c7287 100644 --- a/pom.xml +++ b/pom.xml @@ -179,7 +179,6 @@ --test true - 1.9.13 2.12.3 1.1.8.4 1.1.2 @@ -1154,10 +1153,6 @@ asm asm - - org.codehaus.jackson - jackson-mapper-asl - org.ow2.asm asm @@ -1602,28 +1597,7 @@ - - org.codehaus.jackson - jackson-core-asl - ${codehaus.jackson.version} - ${hadoop.deps.scope} - - - org.codehaus.jackson - jackson-mapper-asl - ${codehaus.jackson.version} - ${hadoop.deps.scope} - - - org.codehaus.jackson - jackson-xc - ${codehaus.jackson.version} - - - org.codehaus.jackson - jackson-jaxrs - ${codehaus.jackson.version} - + ${hive.group} hive-beeline diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index 268832ba8ac33..fec3aac5d3fda 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -201,6 +201,16 @@ jersey-json test ${jersey-1.version} + + + org.codehaus.jackson + jackson-core-asl + + + org.codehaus.jackson + jackson-mapper-asl + + com.sun.jersey diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 13c8306d6fd6c..d3b8e573c4a01 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -255,16 +255,6 @@ hadoop-2.7 - - org.codehaus.jackson - jackson-core-asl - test - - - org.codehaus.jackson - jackson-mapper-asl - test - diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java index f4d07d10a4357..3345a9ae6a5fa 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java @@ -534,7 +534,7 @@ public synchronized String getDelegationTokenFromMetaStore(String owner) try { Hive.closeCurrent(); - return Hive.get(hiveConf).getDelegationToken(owner, owner); + return Hive.getWithoutRegisterFns(hiveConf).getDelegationToken(owner, owner); } catch (HiveException e) { if (e.getCause() instanceof UnsupportedOperationException) { throw (UnsupportedOperationException)e.getCause(); diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java index 8e1e500ff78b4..53de7c673b43f 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java @@ -252,7 +252,7 @@ public static int setVariable(String varname, String varvalue) throws Exception ss.getHiveVariables().put(propName, substitution.substitute(ss.getConf(),varvalue)); } else if (varname.startsWith(METACONF_PREFIX)) { String propName = varname.substring(METACONF_PREFIX.length()); - Hive hive = Hive.get(ss.getConf()); + Hive hive = Hive.getWithoutRegisterFns(ss.getConf()); hive.setMetaConf(propName, substitution.substitute(ss.getConf(), varvalue)); } else { setConf(varname, varname, varvalue, true); @@ -413,7 +413,7 @@ public HiveConf getHiveConf() { @Override public IMetaStoreClient getMetaStoreClient() throws HiveSQLException { try { - return Hive.get(getHiveConf()).getMSC(); + return Hive.getWithoutRegisterFns(getHiveConf()).getMSC(); } catch (HiveException e) { throw new HiveSQLException("Failed to get metastore connection", e); } catch (MetaException e) { diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java index 6e153db6751d3..514b19eb7111a 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java @@ -54,7 +54,7 @@ public HiveSessionImplwithUGI(TProtocolVersion protocol, String username, String // create a new metastore connection for this particular user session Hive.set(null); try { - sessionHive = Hive.get(getHiveConf()); + sessionHive = Hive.getWithoutRegisterFns(getHiveConf()); } catch (HiveException e) { throw new HiveSQLException("Failed to setup metastore connection", e); } @@ -140,7 +140,7 @@ private void setDelegationToken(String delegationTokenStr) throws HiveSQLExcepti private void cancelDelegationToken() throws HiveSQLException { if (delegationTokenStr != null) { try { - Hive.get(getHiveConf()).cancelDelegationToken(delegationTokenStr); + Hive.getWithoutRegisterFns(getHiveConf()).cancelDelegationToken(delegationTokenStr); } catch (HiveException e) { throw new HiveSQLException("Couldn't cancel delegation token", e); } diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 43cfc0844a94e..f843761350e27 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -143,10 +143,6 @@ org.apache.httpcomponents httpclient - - org.codehaus.jackson - jackson-mapper-asl - commons-codec diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala index 3769de07d8a37..09fdb1cc2ce04 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.hive.test import java.io.File import java.net.URI -import java.util.{Set => JavaSet} import scala.collection.JavaConverters._ import scala.collection.mutable @@ -27,7 +26,6 @@ import scala.collection.mutable import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.hadoop.hive.conf.HiveConf.ConfVars -import org.apache.hadoop.hive.ql.exec.FunctionRegistry import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe import org.apache.spark.{SparkConf, SparkContext} @@ -523,12 +521,6 @@ private[hive] class TestHiveSparkSession( } } - /** - * Records the UDFs present when the server starts, so we can delete ones that are created by - * tests. - */ - protected val originalUDFs: JavaSet[String] = FunctionRegistry.getFunctionNames - /** * Resets the test instance by deleting any table, view, temp view, and UDF that have been created */