From 08cd075f18ce0690e2d24e5613645f280b42a6df Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Fri, 26 Jul 2024 07:46:26 -0700 Subject: [PATCH] [SPARK-49012][SQL][BUILD] Add bouncycastle-related test dependencies to the `hive-thriftserver` module to fix the Maven daily test ### What changes were proposed in this pull request? This pr add bouncycastle-related test dependencies to the `hive-thrift` module to fix the Maven daily test. ### Why are the changes needed? `sql-on-files.sql` added the following statement in https://github.com/apache/spark/pull/47480, which caused the Maven daily test to fail https://github.com/apache/spark/blob/2363aec0c14ead24ade2bfa23478a4914f179c00/sql/core/src/test/resources/sql-tests/inputs/sql-on-files.sql#L10 - https://github.com/apache/spark/actions/runs/10094638521/job/27943309504 - https://github.com/apache/spark/actions/runs/10095571472/job/27943298802 ``` - sql-on-files.sql *** FAILED *** "" did not contain "Exception" Exception did not match for query #6 CREATE TABLE sql_on_files.test_orc USING ORC AS SELECT 1, expected: , but got: java.sql.SQLException org.apache.hive.service.cli.HiveSQLException: Error running query: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 8542.0 failed 1 times, most recent failure: Lost task 0.0 in stage 8542.0 (TID 8594) (localhost executor driver): java.lang.NoClassDefFoundError: org/bouncycastle/jce/provider/BouncyCastleProvider at test.org.apache.spark.sql.execution.datasources.orc.FakeKeyProvider$Factory.createProvider(FakeKeyProvider.java:127) at org.apache.hadoop.crypto.key.KeyProviderFactory.get(KeyProviderFactory.java:96) at org.apache.hadoop.crypto.key.KeyProviderFactory.getProviders(KeyProviderFactory.java:68) at org.apache.orc.impl.HadoopShimsCurrent.createKeyProvider(HadoopShimsCurrent.java:97) at org.apache.orc.impl.HadoopShimsCurrent.getHadoopKeyProvider(HadoopShimsCurrent.java:131) at org.apache.orc.impl.CryptoUtils$HadoopKeyProviderFactory.create(CryptoUtils.java:158) at org.apache.orc.impl.CryptoUtils.getKeyProvider(CryptoUtils.java:141) at org.apache.orc.impl.WriterImpl.setupEncryption(WriterImpl.java:1015) at org.apache.orc.impl.WriterImpl.(WriterImpl.java:164) at org.apache.orc.OrcFile.createWriter(OrcFile.java:1078) at org.apache.spark.sql.execution.datasources.orc.OrcOutputWriter.(OrcOutputWriter.scala:49) at org.apache.spark.sql.execution.datasources.orc.OrcFileFormat$$anon$1.newInstance(OrcFileFormat.scala:89) at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:180) at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.(FileFormatDataWriter.scala:165) at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:391) at org.apache.spark.sql.execution.datasources.WriteFilesExec.$anonfun$doExecuteWrite$1(WriteFiles.scala:107) at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:901) at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:901) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374) at org.apache.spark.rdd.RDD.iterator(RDD.scala:338) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93) at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171) at org.apache.spark.scheduler.Task.run(Task.scala:146) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$5(Executor.scala:644) at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:647) at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635) at java.base/java.lang.Thread.run(Thread.java:840) Caused by: java.lang.ClassNotFoundException: org.bouncycastle.jce.provider.BouncyCastleProvider at java.base/jdk.internal.loader.BuiltinClassLoader.loadClass(BuiltinClassLoader.java:641) at java.base/jdk.internal.loader.ClassLoaders$AppClassLoader.loadClass(ClassLoaders.java:188) at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:525) ... 32 more ``` Because we have configured `hadoop.security.key.provider.path` as `test:///` in the parent `pom.xml`, https://github.com/apache/spark/blob/5ccf9ba958f492c1eb4dde22a647ba75aba63d8e/pom.xml#L3165-L3166 `KeyProviderFactory#getProviders` will use `FakeKeyProvider$Factory` to create instances of `FakeKeyProvider`. https://github.com/apache/spark/blob/5ccf9ba958f492c1eb4dde22a647ba75aba63d8e/sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory#L18 During the initialization of `FakeKeyProvider`, it first initializes its superclass `org.apache.hadoop.crypto.key.KeyProvider`, which leads to the loading of the `BouncyCastleProvider` class. Therefore, we need to add bouncycastle-related test dependencies in the `hive-thrift` module. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manual Test with this pr. ``` build/mvn -Phive -Phive-thriftserver clean install -DskipTests build/mvn -Phive -Phive-thriftserver clean install -Dtest=none -DwildcardSuites=org.apache.spark.sql.hive.thriftserver.ThriftServerQueryTestSuite -pl sql/hive-thriftserver ``` ``` Run completed in 6 minutes, 52 seconds. Total number of tests run: 243 Suites: completed 2, aborted 0 Tests: succeeded 243, failed 0, canceled 0, ignored 20, pending 0 All tests passed. ``` ### Was this patch authored or co-authored using generative AI tooling? No Closes #47496 from LuciferYang/thrift-bouncycastle. Authored-by: yangjie01 Signed-off-by: Dongjoon Hyun --- sql/hive-thriftserver/pom.xml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 6a352f8a530d7..d50c78bd1f9b8 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -156,6 +156,16 @@ org.apache.httpcomponents httpcore + + org.bouncycastle + bcprov-jdk18on + test + + + org.bouncycastle + bcpkix-jdk18on + test + target/scala-${scala.binary.version}/classes