diff --git a/native-sql-engine/cpp/src/CMakeLists.txt b/native-sql-engine/cpp/src/CMakeLists.txt index d6aa0258e..9aeac55e1 100644 --- a/native-sql-engine/cpp/src/CMakeLists.txt +++ b/native-sql-engine/cpp/src/CMakeLists.txt @@ -477,9 +477,12 @@ file(COPY codegen/common/result_iterator.h DESTINATION ${root_directory}/release file(COPY codegen/common/relation_column.h DESTINATION ${root_directory}/releases/include/codegen/common/) file(COPY codegen/common/hash_relation.h DESTINATION ${root_directory}/releases/include/codegen/common/) file(COPY codegen/common/sort_relation.h DESTINATION ${root_directory}/releases/include/codegen/common/) +file(CREATE_LINK ${root_directory}/releases/include ${root_directory}/releases/nativesql_include SYMBOLIC) file(COPY codegen/common/hash_relation_string.h DESTINATION ${root_directory}/releases/include/codegen/common/) file(COPY codegen/common/hash_relation_number.h DESTINATION ${root_directory}/releases/include/codegen/common/) + + add_definitions(-DNATIVESQL_SRC_PATH="${root_directory}/releases") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations -Wno-attributes") set(SPARK_COLUMNAR_PLUGIN_SRCS diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc index da64be0e7..0ec53a11e 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc @@ -39,12 +39,7 @@ namespace extra { std::string BaseCodes() { return R"( -#include -#include -#include "codegen/arrow_compute/ext/code_generator_base.h" -#include "precompile/array.h" -using namespace sparkcolumnarplugin::codegen::arrowcompute::extra; )"; } @@ -601,6 +596,7 @@ arrow::Status CompileCodes(std::string codes, std::string signature) { mkdir(outpath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); std::string prefix = "/spark-columnar-plugin-codegen-"; std::string cppfile = outpath + prefix + signature + ".cc"; + std::string objfile = outpath + prefix + signature + ".o"; std::string libfile = outpath + prefix + signature + ".so"; std::string jarfile = outpath + prefix + signature + ".jar"; std::string logfile = outpath + prefix + signature + ".log"; @@ -626,13 +622,16 @@ arrow::Status CompileCodes(std::string codes, std::string signature) { } std::string env_gcc = std::string(env_gcc_); + std::string env_codegen_option = " -O3 -march=native "; char* env_codegen_option_ = std::getenv("CODEGEN_OPTION"); - if (env_codegen_option_ == nullptr) { - env_codegen_option_ = " -O3 -march=native "; + if (env_codegen_option_ != nullptr) { + env_codegen_option = std::string(env_codegen_option_); } - std::string env_codegen_option = std::string(env_codegen_option_); + std::string libwscgfile = GetTempPath() + "/nativesql_include/precompile/wscgapi.hpp"; + std::string libwscg_pch = + GetTempPath() + "/nativesql_include/precompile/wscgapi.hpp.gch"; const char* env_arrow_dir = std::getenv("LIBARROW_DIR"); std::string arrow_header; std::string arrow_lib, arrow_lib2; @@ -646,14 +645,33 @@ arrow::Status CompileCodes(std::string codes, std::string signature) { arrow_lib2 = " -L" + std::string(env_arrow_dir) + "/lib "; } // compile the code - std::string cmd = env_gcc + " -std=c++14 -Wno-deprecated-declarations " + arrow_header + - arrow_lib + arrow_lib2 + nativesql_header + nativesql_header_2 + - nativesql_lib + cppfile + " -o " + libfile + env_codegen_option + - " -shared -fPIC -lspark_columnar_jni 2> " + logfile; + std::string base_dir = GetTempPath(); + chdir(base_dir.c_str()); + std::string cmd = ""; + struct stat pch_stat; + auto ret = stat(libwscg_pch.c_str(), &pch_stat); + if (ret == -1) { + cmd += env_gcc + " -std=c++14 -Wno-deprecated-declarations " + arrow_header + + arrow_lib + arrow_lib2 + nativesql_header + nativesql_header_2 + " -c " + + libwscgfile + env_codegen_option + " -fPIC && "; + } + + cmd += env_gcc + " -std=c++14 -Wno-deprecated-declarations " + arrow_header + + nativesql_header + nativesql_header_2 + " -c " + cppfile + " -o " + objfile + + env_codegen_option + "-fPIC && "; + // linking + cmd += env_gcc + arrow_lib + arrow_lib2 + nativesql_lib + objfile + " -o " + libfile + + " -lspark_columnar_jni -shared && "; + + // package + cmd += "cd " + outpath + " && jar -cf spark-columnar-plugin-codegen-precompile-" + + signature + ".jar spark-columnar-plugin-codegen-" + signature + ".so 2>" + + logfile; + #ifdef DEBUG std::cout << cmd << std::endl; #endif - int ret; + int elapse_time = 0; TIME_MICRO(elapse_time, ret, system(cmd.c_str())); #ifdef DEBUG @@ -664,15 +682,6 @@ arrow::Status CompileCodes(std::string codes, std::string signature) { std::cout << cmd << std::endl; return arrow::Status::Invalid("compilation failed, see ", logfile); } - cmd = "cd " + outpath + "; jar -cf spark-columnar-plugin-codegen-precompile-" + - signature + ".jar spark-columnar-plugin-codegen-" + signature + ".so"; -#ifdef DEBUG - std::cout << cmd << std::endl; -#endif - ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) != EXIT_SUCCESS) { - return arrow::Status::Invalid("package jar failed"); - } #ifdef DEBUG struct stat tstat; diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_merge_join_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_merge_join_kernel.cc index 273ce6313..0bf504f82 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_merge_join_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_merge_join_kernel.cc @@ -108,9 +108,6 @@ class ConditionedMergeJoinKernel::Impl { auto codegen_ctx = std::make_shared(); bool use_relation_for_stream = input.empty(); - codegen_ctx->header_codes.push_back( - R"(#include "codegen/arrow_compute/ext/array_item_index.h")"); - std::vector prepare_list; bool cond_check = false; if (condition_) cond_check = true; @@ -119,7 +116,6 @@ class ConditionedMergeJoinKernel::Impl { std::stringstream sort_define_ss; std::vector field_list = {left_field_list_, right_field_list_}; - codegen_ctx->header_codes.push_back(R"(#include "codegen/common/sort_relation.h")"); int idx = 0; for (auto relation_id : relation_id_) { auto relation_list_name = "sort_relation_" + std::to_string(relation_id) + "_"; diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc index a09eb30ef..f6fb38450 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc @@ -173,9 +173,6 @@ class ConditionedProbeKernel::Impl { std::shared_ptr* codegen_ctx_out, int* var_id) { auto codegen_ctx = std::make_shared(); - codegen_ctx->header_codes.push_back( - R"(#include "codegen/arrow_compute/ext/array_item_index.h")"); - std::vector prepare_list; bool cond_check = false; if (condition_) cond_check = true; @@ -192,7 +189,6 @@ class ConditionedProbeKernel::Impl { hash_prepare_ss << "RETURN_NOT_OK(typed_dependent_iter_list_" << hash_relation_id_ << "->Next(" << "&" << relation_list_name << "));" << std::endl; - codegen_ctx->header_codes.push_back(R"(#include "codegen/common/hash_relation.h")"); hash_define_ss << "std::shared_ptr " << relation_list_name << ";" << std::endl; diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc index 182cf16a2..588ed9001 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc @@ -215,11 +215,6 @@ class HashAggregateKernel::Impl { std::shared_ptr* codegen_ctx_out, int* var_id) { auto codegen_ctx = std::make_shared(); - codegen_ctx->header_codes.push_back( - R"(#include "codegen/arrow_compute/ext/array_item_index.h")"); - codegen_ctx->header_codes.push_back( - R"(#include "codegen/arrow_compute/ext/actions_impl.h")"); - std::vector prepare_list; // 1.0 prepare aggregate input expressions std::stringstream prepare_ss; @@ -243,7 +238,6 @@ class HashAggregateKernel::Impl { // 1. Get action list and action_prepare_project_list if (key_node_list.size() > 0 && key_node_list[0]->return_type()->id() == arrow::Type::DECIMAL128) { - codegen_ctx->header_codes.push_back(R"(#include "precompile/hash_map.h")"); aggr_prepare_ss << "aggr_hash_table_" << level << " = std::make_shared<" << GetTypeString(key_node_list[0]->return_type(), "") << "HashMap>(ctx_->memory_pool());" << std::endl; @@ -254,7 +248,6 @@ class HashAggregateKernel::Impl { } else if (key_node_list.size() > 1 || (key_node_list.size() > 0 && key_node_list[0]->return_type()->id() == arrow::Type::STRING)) { - codegen_ctx->header_codes.push_back(R"(#include "precompile/hash_map.h")"); aggr_prepare_ss << "aggr_hash_table_" << level << " = std::make_shared<" << GetTypeString(arrow::utf8(), "") << "HashMap>(ctx_->memory_pool());" << std::endl; @@ -263,7 +256,7 @@ class HashAggregateKernel::Impl { } else if (key_node_list.size() > 0) { auto type = key_node_list[0]->return_type(); - codegen_ctx->header_codes.push_back(R"(#include "precompile/sparse_hash_map.h")"); + aggr_prepare_ss << "aggr_hash_table_" << level << " = std::make_shared<" << "SparseHashMap<" << GetCTypeString(type) << ">>(ctx_->memory_pool());" << std::endl; @@ -308,8 +301,6 @@ class HashAggregateKernel::Impl { prepare_ss << "auto " << unsafe_row_name_validity << " = " << project_output_list[i].first.first << "_validity;" << std::endl; } else { - codegen_ctx->header_codes.push_back( - R"(#include "third_party/row_wise_memory/unsafe_row.h")"); std::stringstream unsafe_row_define_ss; unsafe_row_define_ss << "std::shared_ptr " << unsafe_row_name << "_unsafe_row = std::make_shared(" @@ -562,7 +553,7 @@ class HashAggregateKernel::Impl { if (!result_expr_list_.empty()) { codegen_ctx->gandiva_projector = std::make_shared( ctx_, arrow::schema(result_field_list_), GetGandivaKernel(result_expr_list_)); - codegen_ctx->header_codes.push_back(R"(#include "precompile/gandiva_projector.h")"); + finish_ss << "RETURN_NOT_OK(gandiva_projector_list_[gp_idx++]->Evaluate(&" "do_hash_" "aggr_finish_" diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/merge_join_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/merge_join_kernel.cc index cb93dc994..105a2b48d 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/merge_join_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/merge_join_kernel.cc @@ -1312,12 +1312,7 @@ typedef )" + item_content_str + GetListContentStr(multiple_cols, left_key_index_list.size()); return BaseCodes() + R"( -#include - -#include "codegen/arrow_compute/ext/array_item_index.h" -#include "precompile/builder.h" -#include "precompile/gandiva.h" -using namespace sparkcolumnarplugin::precompile; +#include "precompile/wscgapi.hpp" )" + hash_map_include_str + R"( class FVector { diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc index 49a1ab48b..8d7292e76 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc @@ -1987,18 +1987,9 @@ class SortArraysCodegenKernel : public SortArraysToIndicesKernel::Impl { GetCachedVariablesClear(key_typed_codegen_list); return BaseCodes() + R"( -#include -#include -#include +#include "precompile/wscgapi.hpp" -#include "codegen/arrow_compute/ext/array_item_index.h" -#include "codegen/common/sort_relation.h" -#include "precompile/builder.h" -#include "precompile/type.h" -#include "third_party/ska_sort.hpp" -#include "third_party/timsort.hpp" -using namespace sparkcolumnarplugin::precompile; class TypedSorterImpl : public CodeGenBase { public: diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc index f5c4fc973..b0d575c10 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc @@ -306,8 +306,7 @@ class WholeStageCodeGenKernel::Impl { std::string out_list; std::stringstream define_ss; codes_ss << BaseCodes() << std::endl; - codes_ss << R"(#include "precompile/builder.h")" << std::endl; - codes_ss << R"(#include "utils/macros.h")" << std::endl; + codes_ss << R"(#include "precompile/wscgapi.hpp")" << std::endl; std::vector headers; for (auto codegen_ctx : codegen_ctx_list) { for (auto header : codegen_ctx->header_codes) { diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h index cb54882e0..63b635525 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h @@ -215,18 +215,7 @@ class WindowSortKernel::Impl { std::string typed_res_array_str = GetTypedResArray(shuffle_typed_codegen_list.size()); return BaseCodes() + R"( -#include -#include -#include - -#include - -#include "codegen/arrow_compute/ext/array_item_index.h" -#include "precompile/builder.h" -#include "precompile/type.h" -#include "third_party/ska_sort.hpp" -#include "third_party/timsort.hpp" -using namespace sparkcolumnarplugin::precompile; +#include "precompile/wscgapi.hpp" class TypedSorterImpl : public CodeGenBase { public: diff --git a/native-sql-engine/cpp/src/precompile/wscgapi.hpp b/native-sql-engine/cpp/src/precompile/wscgapi.hpp new file mode 100644 index 000000000..49bd18cf3 --- /dev/null +++ b/native-sql-engine/cpp/src/precompile/wscgapi.hpp @@ -0,0 +1,30 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +#include "codegen/arrow_compute/ext/actions_impl.h" +#include "codegen/arrow_compute/ext/array_item_index.h" +#include "codegen/arrow_compute/ext/code_generator_base.h" +#include "codegen/common/hash_relation.h" +#include "codegen/common/sort_relation.h" +#include "precompile/array.h" +#include "precompile/builder.h" +#include "precompile/gandiva.h" +#include "precompile/gandiva_projector.h" +#include "precompile/hash_map.h" +#include "precompile/sparse_hash_map.h" +#include "precompile/type.h" +#include "third_party/row_wise_memory/unsafe_row.h" +#include "third_party/ska_sort.hpp" +#include "third_party/timsort.hpp" +#include "utils/macros.h" +using namespace sparkcolumnarplugin::precompile; + +using namespace sparkcolumnarplugin::codegen::arrowcompute::extra;