diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index 797c5cfc7..f8c2c0e67 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -45,7 +45,7 @@ jobs: run: | cd /tmp git clone https://github.com/oap-project/arrow.git - cd arrow && git checkout arrow-3.0.0-oap && cd cpp + cd arrow && git checkout arrow-4.0.0-oap && cd cpp mkdir build && cd build cmake .. -DARROW_JNI=ON -DARROW_GANDIVA_JAVA=ON -DARROW_GANDIVA=ON -DARROW_PARQUET=ON -DARROW_CSV=ON -DARROW_HDFS=ON -DARROW_FILESYSTEM=ON -DARROW_WITH_SNAPPY=ON -DARROW_JSON=ON -DARROW_DATASET=ON -DARROW_WITH_LZ4=ON -DGTEST_ROOT=/usr/src/gtest && make -j2 sudo make install @@ -89,7 +89,7 @@ jobs: run: | cd /tmp git clone https://github.com/oap-project/arrow.git - cd arrow && git checkout arrow-3.0.0-oap && cd cpp + cd arrow && git checkout arrow-4.0.0-oap && cd cpp mkdir build && cd build cmake .. -DARROW_JNI=ON -DARROW_GANDIVA_JAVA=ON -DARROW_GANDIVA=ON -DARROW_PARQUET=ON -DARROW_CSV=ON -DARROW_HDFS=ON -DARROW_FILESYSTEM=ON -DARROW_WITH_SNAPPY=ON -DARROW_JSON=ON -DARROW_DATASET=ON -DARROW_WITH_LZ4=ON -DGTEST_ROOT=/usr/src/gtest && make -j2 sudo make install diff --git a/arrow-data-source/README.md b/arrow-data-source/README.md index 4d097f896..1b4f3413d 100644 --- a/arrow-data-source/README.md +++ b/arrow-data-source/README.md @@ -117,7 +117,7 @@ You have to use a customized Arrow to support for our datasets Java API. ``` // build arrow-cpp -git clone -b arrow-3.0.0-oap-1.1 https://github.com/oap-project/arrow.git +git clone -b arrow-4.0.0-oap https://github.com/oap-project/arrow.git cd arrow/cpp mkdir build cd build diff --git a/arrow-data-source/common/src/main/java/com/intel/oap/spark/sql/execution/datasources/v2/arrow/SparkManagedReservationListener.java b/arrow-data-source/common/src/main/java/com/intel/oap/spark/sql/execution/datasources/v2/arrow/SparkManagedReservationListener.java index 47c1a9890..92b140bc9 100644 --- a/arrow-data-source/common/src/main/java/com/intel/oap/spark/sql/execution/datasources/v2/arrow/SparkManagedReservationListener.java +++ b/arrow-data-source/common/src/main/java/com/intel/oap/spark/sql/execution/datasources/v2/arrow/SparkManagedReservationListener.java @@ -17,7 +17,7 @@ package com.intel.oap.spark.sql.execution.datasources.v2.arrow; -import org.apache.arrow.memory.ReservationListener; +import org.apache.arrow.dataset.jni.ReservationListener; /** * Reserve Spark managed memory. diff --git a/arrow-data-source/pom.xml b/arrow-data-source/pom.xml index c41a0d4dd..3da20f7c1 100644 --- a/arrow-data-source/pom.xml +++ b/arrow-data-source/pom.xml @@ -49,30 +49,8 @@ org.apache.hadoop - hadoop-common - ${hadoop.version} - - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-annotations - - - com.fasterxml.jackson.core - jackson-databind - - - org.slf4j - slf4j-log4j12 - - - log4j - log4j - - + hadoop-client + provided org.apache.hadoop diff --git a/arrow-data-source/script/build_arrow.sh b/arrow-data-source/script/build_arrow.sh index 3ede7686b..410e31070 100755 --- a/arrow-data-source/script/build_arrow.sh +++ b/arrow-data-source/script/build_arrow.sh @@ -62,7 +62,7 @@ echo "ARROW_SOURCE_DIR=${ARROW_SOURCE_DIR}" echo "ARROW_INSTALL_DIR=${ARROW_INSTALL_DIR}" mkdir -p $ARROW_SOURCE_DIR mkdir -p $ARROW_INSTALL_DIR -git clone https://github.com/oap-project/arrow.git --branch arrow-3.0.0-oap $ARROW_SOURCE_DIR +git clone https://github.com/oap-project/arrow.git --branch arrow-4.0.0-oap $ARROW_SOURCE_DIR pushd $ARROW_SOURCE_DIR cmake ./cpp \ diff --git a/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/arrow/ArrowFileFormat.scala b/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/arrow/ArrowFileFormat.scala index 61122592d..f13b9b612 100644 --- a/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/arrow/ArrowFileFormat.scala +++ b/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/arrow/ArrowFileFormat.scala @@ -107,7 +107,7 @@ class ArrowFileFormat extends FileFormat with DataSourceRegister with Serializab .asScala .toList val itrList = taskList - .map(task => task.scan()) + .map(task => task.execute()) Option(TaskContext.get()).foreach(_.addTaskCompletionListener[Unit](_ => { itrList.foreach(_.close()) @@ -120,7 +120,7 @@ class ArrowFileFormat extends FileFormat with DataSourceRegister with Serializab val itr = itrList .toIterator .flatMap(itr => itr.asScala) - .map(vsr => ArrowUtils.loadVectors(vsr, file.partitionValues, partitionSchema, + .map(batch => ArrowUtils.loadBatch(batch, file.partitionValues, partitionSchema, requiredSchema)) new UnsafeItr(itr).asInstanceOf[Iterator[InternalRow]] } diff --git a/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/v2/arrow/ArrowPartitionReaderFactory.scala b/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/v2/arrow/ArrowPartitionReaderFactory.scala index 1f9145fb6..99ccd781a 100644 --- a/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/v2/arrow/ArrowPartitionReaderFactory.scala +++ b/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/v2/arrow/ArrowPartitionReaderFactory.scala @@ -76,12 +76,12 @@ case class ArrowPartitionReaderFactory( .toList val vsrItrList = taskList - .map(task => task.scan()) + .map(task => task.execute()) val batchItr = vsrItrList .toIterator .flatMap(itr => itr.asScala) - .map(bundledVectors => ArrowUtils.loadVectors(bundledVectors, partitionedFile.partitionValues, + .map(batch => ArrowUtils.loadBatch(batch, partitionedFile.partitionValues, readPartitionSchema, readDataSchema)) new PartitionReader[ColumnarBatch] { diff --git a/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/v2/arrow/ArrowUtils.scala b/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/v2/arrow/ArrowUtils.scala index 4af788d0e..a99c3ed5d 100644 --- a/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/v2/arrow/ArrowUtils.scala +++ b/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/v2/arrow/ArrowUtils.scala @@ -18,29 +18,28 @@ package com.intel.oap.spark.sql.execution.datasources.v2.arrow import java.net.URI -import java.util.TimeZone import scala.collection.JavaConverters._ import com.intel.oap.vectorized.ArrowWritableColumnVector -import org.apache.arrow.dataset.file.SingleFileDatasetFactory -import org.apache.arrow.dataset.scanner.ScanTask -import org.apache.arrow.vector.FieldVector -import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID +import org.apache.arrow.dataset.file.FileSystemDatasetFactory +import org.apache.arrow.vector.ipc.message.ArrowRecordBatch import org.apache.arrow.vector.types.pojo.Schema import org.apache.hadoop.fs.FileStatus import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.execution.datasources.v2.arrow.{SparkMemoryUtils, SparkSchemaUtils} import org.apache.spark.sql.execution.vectorized.ColumnVectorUtils +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap +import org.apache.spark.sql.vectorized.ColumnVector import org.apache.spark.sql.vectorized.ColumnarBatch object ArrowUtils { def readSchema(file: FileStatus, options: CaseInsensitiveStringMap): Option[StructType] = { - val factory: SingleFileDatasetFactory = + val factory: FileSystemDatasetFactory = makeArrowDiscovery(file.getPath.toString, -1L, -1L, new ArrowOptions(options.asScala.toMap)) val schema = factory.inspect() @@ -65,11 +64,11 @@ object ArrowUtils { } def makeArrowDiscovery(file: String, startOffset: Long, length: Long, - options: ArrowOptions): SingleFileDatasetFactory = { + options: ArrowOptions): FileSystemDatasetFactory = { val format = getFormat(options).getOrElse(throw new IllegalStateException) val allocator = SparkMemoryUtils.contextAllocator() - val factory = new SingleFileDatasetFactory(allocator, + val factory = new FileSystemDatasetFactory(allocator, SparkMemoryUtils.contextMemoryPool(), format, rewriteUri(file), @@ -80,17 +79,18 @@ object ArrowUtils { def toArrowSchema(t: StructType): Schema = { // fixme this might be platform dependent - SparkSchemaUtils.toArrowSchema(t, TimeZone.getDefault.getID) + SparkSchemaUtils.toArrowSchema(t, SQLConf.get.sessionLocalTimeZone) } - def loadVectors(bundledVectors: ScanTask.ArrowBundledVectors, partitionValues: InternalRow, + def loadBatch(input: ArrowRecordBatch, partitionValues: InternalRow, partitionSchema: StructType, dataSchema: StructType): ColumnarBatch = { - val rowCount: Int = getRowCount(bundledVectors) - val dataVectors = getDataVectors(bundledVectors, dataSchema) - val dictionaryVectors = getDictionaryVectors(bundledVectors, dataSchema) + val rowCount: Int = input.getLength - val vectors = ArrowWritableColumnVector.loadColumns(rowCount, dataVectors.asJava, - dictionaryVectors.asJava) + val vectors = try { + ArrowWritableColumnVector.loadColumns(rowCount, toArrowSchema(dataSchema), input) + } finally { + input.close() + } val partitionColumns = ArrowWritableColumnVector.allocateColumns(rowCount, partitionSchema) (0 until partitionColumns.length).foreach(i => { ColumnVectorUtils.populate(partitionColumns(i), partitionValues, i) @@ -98,54 +98,13 @@ object ArrowUtils { partitionColumns(i).setIsConstant() }) - val batch = new ColumnarBatch(vectors ++ partitionColumns, rowCount) + val batch = new ColumnarBatch( + vectors.map(_.asInstanceOf[ColumnVector]) ++ + partitionColumns.map(_.asInstanceOf[ColumnVector]), + rowCount) batch } - private def getRowCount(bundledVectors: ScanTask.ArrowBundledVectors) = { - val valueVectors = bundledVectors.valueVectors - val rowCount = valueVectors.getRowCount - rowCount - } - - private def getDataVectors(bundledVectors: ScanTask.ArrowBundledVectors, - dataSchema: StructType): List[FieldVector] = { - // TODO Deprecate following (bad performance maybe brought). - // TODO Assert vsr strictly matches dataSchema instead. - val valueVectors = bundledVectors.valueVectors - dataSchema.map(f => { - val vector = valueVectors.getVector(f.name) - if (vector == null) { - throw new IllegalStateException("Error: no vector named " + f.name + " in record bach") - } - vector - }).toList - } - - private def getDictionaryVectors(bundledVectors: ScanTask.ArrowBundledVectors, - dataSchema: StructType): List[FieldVector] = { - val valueVectors = bundledVectors.valueVectors - val dictionaryVectorMap = bundledVectors.dictionaryVectors - - val fieldNameToDictionaryEncoding = valueVectors.getSchema.getFields.asScala.map(f => { - f.getName -> f.getDictionary - }).toMap - - val dictionaryVectorsWithNulls = dataSchema.map(f => { - val de = fieldNameToDictionaryEncoding(f.name) - - Option(de) match { - case None => null - case _ => - if (de.getIndexType.getTypeID != ArrowTypeID.Int) { - throw new IllegalArgumentException("Wrong index type: " + de.getIndexType) - } - dictionaryVectorMap.get(de.getId).getVector - } - }).toList - dictionaryVectorsWithNulls - } - private def getFormat( options: ArrowOptions): Option[org.apache.arrow.dataset.file.FileFormat] = { Option(options.originalFormat match { diff --git a/docs/ApacheArrowInstallation.md b/docs/ApacheArrowInstallation.md index 9b0ae3b32..f26120e9d 100644 --- a/docs/ApacheArrowInstallation.md +++ b/docs/ApacheArrowInstallation.md @@ -30,7 +30,7 @@ Please make sure your cmake version is qualified based on the prerequisite. # Arrow ``` shell git clone https://github.com/oap-project/arrow.git -cd arrow && git checkout arrow-3.0.0-oap-1.1 +cd arrow && git checkout arrow-4.0.0-oap mkdir -p arrow/cpp/release-build cd arrow/cpp/release-build cmake -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_GANDIVA_JAVA=ON -DARROW_GANDIVA=ON -DARROW_PARQUET=ON -DARROW_CSV=ON -DARROW_HDFS=ON -DARROW_BOOST_USE_SHARED=ON -DARROW_JNI=ON -DARROW_DATASET=ON -DARROW_WITH_PROTOBUF=ON -DARROW_WITH_SNAPPY=ON -DARROW_WITH_LZ4=ON -DARROW_FILESYSTEM=ON -DARROW_JSON=ON .. diff --git a/docs/OAP-Developer-Guide.md b/docs/OAP-Developer-Guide.md index e3ee02ad5..f941a5587 100644 --- a/docs/OAP-Developer-Guide.md +++ b/docs/OAP-Developer-Guide.md @@ -33,7 +33,7 @@ Then the dependencies below will be installed: * [HPNL](https://github.com/Intel-bigdata/HPNL) * [PMDK](https://github.com/pmem/pmdk) * [OneAPI](https://software.intel.com/content/www/us/en/develop/tools/oneapi.html) -* [Arrow](https://github.com/oap-project/arrow/tree/arrow-3.0.0-oap-1.1) +* [Arrow](https://github.com/oap-project/arrow/tree/arrow-4.0.0-oap) * [LLVM](https://llvm.org/) Run the following command to learn more. diff --git a/docs/OAP-Installation-Guide.md b/docs/OAP-Installation-Guide.md index 7b7b17f68..ce2311e0a 100644 --- a/docs/OAP-Installation-Guide.md +++ b/docs/OAP-Installation-Guide.md @@ -36,7 +36,7 @@ Once finished steps above, you have completed OAP dependencies installation and Dependencies below are required by OAP and all of them are included in OAP Conda package, they will be automatically installed in your cluster when you Conda install OAP. Ensure you have activated environment which you created in the previous steps. -- [Arrow](https://github.com/oap-project/arrow/tree/arrow-3.0.0-oap-1.1) +- [Arrow](https://github.com/oap-project/arrow/tree/arrow-4.0.0-oap) - [Plasma](http://arrow.apache.org/blog/2017/08/08/plasma-in-memory-object-store/) - [Memkind](https://anaconda.org/intel/memkind) - [Vmemcache](https://anaconda.org/intel/vmemcache) diff --git a/native-sql-engine/core/pom.xml b/native-sql-engine/core/pom.xml index ca162abee..3c92d6bf7 100644 --- a/native-sql-engine/core/pom.xml +++ b/native-sql-engine/core/pom.xml @@ -78,7 +78,7 @@ org.apache.arrow arrow-memory-netty - 3.0.0 + ${arrow.version} runtime @@ -134,11 +134,6 @@ com.intel.oap ${project.version} - - com.google.flatbuffers - flatbuffers-java - 1.9.0 - org.scalacheck scalacheck_${scala.binary.version} @@ -259,8 +254,14 @@ org.apache.hadoop - hadoop-auth - ${hadoop.version} + hadoop-client + provided + + + com.intel.oap + spark-arrow-datasource-standard + ${project.version} + test diff --git a/native-sql-engine/core/src/main/java/com/intel/oap/vectorized/JniUtils.java b/native-sql-engine/core/src/main/java/com/intel/oap/vectorized/JniUtils.java index ebca1bff7..65f3f887f 100644 --- a/native-sql-engine/core/src/main/java/com/intel/oap/vectorized/JniUtils.java +++ b/native-sql-engine/core/src/main/java/com/intel/oap/vectorized/JniUtils.java @@ -39,8 +39,8 @@ /** Helper class for JNI related operations. */ public class JniUtils { private static final String LIBRARY_NAME = "spark_columnar_jni"; - private static final String ARROW_LIBRARY_NAME = "libarrow.so.300"; - private static final String GANDIVA_LIBRARY_NAME = "libgandiva.so.300"; + private static final String ARROW_LIBRARY_NAME = "libarrow.so.400"; + private static final String GANDIVA_LIBRARY_NAME = "libgandiva.so.400"; private static boolean isLoaded = false; private static boolean isCodegenDependencyLoaded = false; private static List codegenJarsLoadedCache = new ArrayList<>(); diff --git a/native-sql-engine/cpp/src/CMakeLists.txt b/native-sql-engine/cpp/src/CMakeLists.txt index b11a06fb6..bf0294830 100644 --- a/native-sql-engine/cpp/src/CMakeLists.txt +++ b/native-sql-engine/cpp/src/CMakeLists.txt @@ -128,7 +128,7 @@ macro(build_arrow STATIC_ARROW) ExternalProject_Add(arrow_ep GIT_REPOSITORY https://github.com/oap-project/arrow.git SOURCE_DIR ${ARROW_SOURCE_DIR} - GIT_TAG arrow-3.0.0-oap + GIT_TAG arrow-4.0.0-oap BUILD_IN_SOURCE 1 INSTALL_DIR ${ARROW_PREFIX} INSTALL_COMMAND make install @@ -216,15 +216,15 @@ macro(build_arrow STATIC_ARROW) ) ExternalProject_Add_Step(arrow_ep copy_arrow_binary_300 - COMMAND cp -a ${ARROW_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${ARROW_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX}.300 ${root_directory}/releases/ - COMMENT "Copy libarrow.so.300 to releases/" + COMMAND cp -a ${ARROW_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${ARROW_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX}.400 ${root_directory}/releases/ + COMMENT "Copy libarrow.so.400 to releases/" DEPENDEES mkdir download update patch configure build install java_install WORKING_DIRECTORY "${ARROW_PREFIX}/" ) ExternalProject_Add_Step(arrow_ep copy_arrow_binary_300_0_0 - COMMAND cp -a ${ARROW_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${ARROW_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX}.300.0.0 ${root_directory}/releases/ - COMMENT "Copy libarrow.so.300.0.0 to releases/" + COMMAND cp -a ${ARROW_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${ARROW_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX}.400.0.0 ${root_directory}/releases/ + COMMENT "Copy libarrow.so.400.0.0 to releases/" DEPENDEES mkdir download update patch configure build install java_install WORKING_DIRECTORY "${ARROW_PREFIX}/" ) @@ -239,15 +239,15 @@ macro(build_arrow STATIC_ARROW) ) ExternalProject_Add_Step(arrow_ep copy_gandiva_binary_300 - COMMAND cp -a ${ARROW_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${GANDIVA_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX}.300 ${root_directory}/releases/ - COMMENT "Copy libgandiva.so.300 to releases/" + COMMAND cp -a ${ARROW_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${GANDIVA_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX}.400 ${root_directory}/releases/ + COMMENT "Copy libgandiva.so.400 to releases/" DEPENDEES mkdir download update patch configure build install java_install WORKING_DIRECTORY "${ARROW_PREFIX}/" ) ExternalProject_Add_Step(arrow_ep copy_gandiva_binary_300_0_0 - COMMAND cp -a ${ARROW_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${GANDIVA_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX}.300.0.0 ${root_directory}/releases/ - COMMENT "Copy libgandiva.so.300.0.0 to releases/" + COMMAND cp -a ${ARROW_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${GANDIVA_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX}.400.0.0 ${root_directory}/releases/ + COMMENT "Copy libgandiva.so.400.0.0 to releases/" DEPENDEES mkdir download update patch configure build install java_install WORKING_DIRECTORY "${ARROW_PREFIX}/" ) @@ -321,13 +321,11 @@ macro(find_arrow) message(STATUS "COPY and Set Arrow Header to: ${ARROW_BFS_INCLUDE_DIR}") file(COPY ${ARROW_BFS_INCLUDE_DIR}/arrow DESTINATION ${root_directory}/releases/include) file(COPY ${ARROW_BFS_INCLUDE_DIR}/gandiva DESTINATION ${root_directory}/releases/include) - file(COPY ${ARROW_BFS_INCLUDE_DIR}/jni DESTINATION ${root_directory}/releases/include) file(COPY ${ARROW_BFS_INCLUDE_DIR}/parquet DESTINATION ${root_directory}/releases/include) else() message(STATUS "COPY and Set Arrow Header to: ${ARROW_INCLUDE_DIR}") file(COPY ${ARROW_INCLUDE_DIR}/arrow DESTINATION ${root_directory}/releases/include) file(COPY ${ARROW_INCLUDE_DIR}/gandiva DESTINATION ${root_directory}/releases/include) - file(COPY ${ARROW_INCLUDE_DIR}/jni DESTINATION ${root_directory}/releases/include) file(COPY ${ARROW_INCLUDE_DIR}/parquet DESTINATION ${root_directory}/releases/include) endif() diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.h b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.h index 734d8c727..ee01b5dbf 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.h +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.h @@ -112,8 +112,9 @@ class EncodeArrayKernel : public KernalBase { arrow::Status Evaluate(const std::shared_ptr& in, std::shared_ptr* out) override; - private: class Impl; + + private: std::unique_ptr impl_; arrow::compute::ExecContext* ctx_ = nullptr; }; diff --git a/native-sql-engine/cpp/src/codegen/common/visitor_base.h b/native-sql-engine/cpp/src/codegen/common/visitor_base.h index 75b494f43..0b056f1a4 100644 --- a/native-sql-engine/cpp/src/codegen/common/visitor_base.h +++ b/native-sql-engine/cpp/src/codegen/common/visitor_base.h @@ -48,6 +48,10 @@ class VisitorBase : public gandiva::NodeVisitor { arrow::Status Visit(const gandiva::InExpressionNode& node) override { return arrow::Status::OK(); } + arrow::Status Visit( + const gandiva::InExpressionNode& node) override { + return arrow::Status::OK(); + } }; } // namespace codegen } // namespace sparkcolumnarplugin diff --git a/pom.xml b/pom.xml index 03d8554b5..da385c41d 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ 2.12.10 2.12 3.1.1 - 3.0.0 + 4.0.0 2.7.4 UTF-8 UTF-8 @@ -71,6 +71,14 @@ log4j log4j + + org.apache.hadoop + hadoop-client + + + org.apache.curator + curator-recipes + @@ -92,6 +100,16 @@ ${spark.version} test-jar test + + + org.apache.hadoop + hadoop-client + + + org.apache.curator + curator-recipes + + org.apache.spark @@ -119,6 +137,38 @@ test-jar test + + org.apache.hadoop + hadoop-client + ${hadoop.version} + provided + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-databind + + + org.slf4j + slf4j-log4j12 + + + log4j + log4j + + + io.netty + netty + + + org.scalatest scalatest_${scala.binary.version}