Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

[NSE-126] improve codegen with pre-compiled header #639

Merged
merged 4 commits into from
Dec 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions native-sql-engine/cpp/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -477,9 +477,12 @@ file(COPY codegen/common/result_iterator.h DESTINATION ${root_directory}/release
file(COPY codegen/common/relation_column.h DESTINATION ${root_directory}/releases/include/codegen/common/)
file(COPY codegen/common/hash_relation.h DESTINATION ${root_directory}/releases/include/codegen/common/)
file(COPY codegen/common/sort_relation.h DESTINATION ${root_directory}/releases/include/codegen/common/)
file(CREATE_LINK ${root_directory}/releases/include ${root_directory}/releases/nativesql_include SYMBOLIC)
file(COPY codegen/common/hash_relation_string.h DESTINATION ${root_directory}/releases/include/codegen/common/)
file(COPY codegen/common/hash_relation_number.h DESTINATION ${root_directory}/releases/include/codegen/common/)



add_definitions(-DNATIVESQL_SRC_PATH="${root_directory}/releases")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations -Wno-attributes")
set(SPARK_COLUMNAR_PLUGIN_SRCS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,7 @@ namespace extra {

std::string BaseCodes() {
return R"(
#include <arrow/compute/api.h>
#include <arrow/record_batch.h>

#include "codegen/arrow_compute/ext/code_generator_base.h"
#include "precompile/array.h"
using namespace sparkcolumnarplugin::codegen::arrowcompute::extra;
)";
}

Expand Down Expand Up @@ -601,6 +596,7 @@ arrow::Status CompileCodes(std::string codes, std::string signature) {
mkdir(outpath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
std::string prefix = "/spark-columnar-plugin-codegen-";
std::string cppfile = outpath + prefix + signature + ".cc";
std::string objfile = outpath + prefix + signature + ".o";
std::string libfile = outpath + prefix + signature + ".so";
std::string jarfile = outpath + prefix + signature + ".jar";
std::string logfile = outpath + prefix + signature + ".log";
Expand All @@ -626,13 +622,16 @@ arrow::Status CompileCodes(std::string codes, std::string signature) {
}
std::string env_gcc = std::string(env_gcc_);

std::string env_codegen_option = " -O3 -march=native ";
char* env_codegen_option_ = std::getenv("CODEGEN_OPTION");

if (env_codegen_option_ == nullptr) {
env_codegen_option_ = " -O3 -march=native ";
if (env_codegen_option_ != nullptr) {
env_codegen_option = std::string(env_codegen_option_);
}
std::string env_codegen_option = std::string(env_codegen_option_);

std::string libwscgfile = GetTempPath() + "/nativesql_include/precompile/wscgapi.hpp";
std::string libwscg_pch =
GetTempPath() + "/nativesql_include/precompile/wscgapi.hpp.gch";
const char* env_arrow_dir = std::getenv("LIBARROW_DIR");
std::string arrow_header;
std::string arrow_lib, arrow_lib2;
Expand All @@ -646,14 +645,33 @@ arrow::Status CompileCodes(std::string codes, std::string signature) {
arrow_lib2 = " -L" + std::string(env_arrow_dir) + "/lib ";
}
// compile the code
std::string cmd = env_gcc + " -std=c++14 -Wno-deprecated-declarations " + arrow_header +
arrow_lib + arrow_lib2 + nativesql_header + nativesql_header_2 +
nativesql_lib + cppfile + " -o " + libfile + env_codegen_option +
" -shared -fPIC -lspark_columnar_jni 2> " + logfile;
std::string base_dir = GetTempPath();
chdir(base_dir.c_str());
std::string cmd = "";
struct stat pch_stat;
auto ret = stat(libwscg_pch.c_str(), &pch_stat);
if (ret == -1) {
cmd += env_gcc + " -std=c++14 -Wno-deprecated-declarations " + arrow_header +
arrow_lib + arrow_lib2 + nativesql_header + nativesql_header_2 + " -c " +
libwscgfile + env_codegen_option + " -fPIC && ";
}

cmd += env_gcc + " -std=c++14 -Wno-deprecated-declarations " + arrow_header +
nativesql_header + nativesql_header_2 + " -c " + cppfile + " -o " + objfile +
env_codegen_option + "-fPIC && ";
// linking
cmd += env_gcc + arrow_lib + arrow_lib2 + nativesql_lib + objfile + " -o " + libfile +
" -lspark_columnar_jni -shared && ";

// package
cmd += "cd " + outpath + " && jar -cf spark-columnar-plugin-codegen-precompile-" +
signature + ".jar spark-columnar-plugin-codegen-" + signature + ".so 2>" +
logfile;

#ifdef DEBUG
std::cout << cmd << std::endl;
#endif
int ret;

int elapse_time = 0;
TIME_MICRO(elapse_time, ret, system(cmd.c_str()));
#ifdef DEBUG
Expand All @@ -664,15 +682,6 @@ arrow::Status CompileCodes(std::string codes, std::string signature) {
std::cout << cmd << std::endl;
return arrow::Status::Invalid("compilation failed, see ", logfile);
}
cmd = "cd " + outpath + "; jar -cf spark-columnar-plugin-codegen-precompile-" +
signature + ".jar spark-columnar-plugin-codegen-" + signature + ".so";
#ifdef DEBUG
std::cout << cmd << std::endl;
#endif
ret = system(cmd.c_str());
if (WEXITSTATUS(ret) != EXIT_SUCCESS) {
return arrow::Status::Invalid("package jar failed");
}

#ifdef DEBUG
struct stat tstat;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,6 @@ class ConditionedMergeJoinKernel::Impl {
auto codegen_ctx = std::make_shared<CodeGenContext>();
bool use_relation_for_stream = input.empty();

codegen_ctx->header_codes.push_back(
R"(#include "codegen/arrow_compute/ext/array_item_index.h")");

std::vector<std::string> prepare_list;
bool cond_check = false;
if (condition_) cond_check = true;
Expand All @@ -119,7 +116,6 @@ class ConditionedMergeJoinKernel::Impl {
std::stringstream sort_define_ss;
std::vector<gandiva::FieldVector> field_list = {left_field_list_, right_field_list_};

codegen_ctx->header_codes.push_back(R"(#include "codegen/common/sort_relation.h")");
int idx = 0;
for (auto relation_id : relation_id_) {
auto relation_list_name = "sort_relation_" + std::to_string(relation_id) + "_";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,6 @@ class ConditionedProbeKernel::Impl {
std::shared_ptr<CodeGenContext>* codegen_ctx_out, int* var_id) {
auto codegen_ctx = std::make_shared<CodeGenContext>();

codegen_ctx->header_codes.push_back(
R"(#include "codegen/arrow_compute/ext/array_item_index.h")");

std::vector<std::string> prepare_list;
bool cond_check = false;
if (condition_) cond_check = true;
Expand All @@ -192,7 +189,6 @@ class ConditionedProbeKernel::Impl {
hash_prepare_ss << "RETURN_NOT_OK(typed_dependent_iter_list_" << hash_relation_id_
<< "->Next("
<< "&" << relation_list_name << "));" << std::endl;
codegen_ctx->header_codes.push_back(R"(#include "codegen/common/hash_relation.h")");

hash_define_ss << "std::shared_ptr<HashRelation> " << relation_list_name << ";"
<< std::endl;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,11 +215,6 @@ class HashAggregateKernel::Impl {
std::shared_ptr<CodeGenContext>* codegen_ctx_out, int* var_id) {
auto codegen_ctx = std::make_shared<CodeGenContext>();

codegen_ctx->header_codes.push_back(
R"(#include "codegen/arrow_compute/ext/array_item_index.h")");
codegen_ctx->header_codes.push_back(
R"(#include "codegen/arrow_compute/ext/actions_impl.h")");

std::vector<std::string> prepare_list;
// 1.0 prepare aggregate input expressions
std::stringstream prepare_ss;
Expand All @@ -243,7 +238,6 @@ class HashAggregateKernel::Impl {
// 1. Get action list and action_prepare_project_list
if (key_node_list.size() > 0 &&
key_node_list[0]->return_type()->id() == arrow::Type::DECIMAL128) {
codegen_ctx->header_codes.push_back(R"(#include "precompile/hash_map.h")");
aggr_prepare_ss << "aggr_hash_table_" << level << " = std::make_shared<"
<< GetTypeString(key_node_list[0]->return_type(), "")
<< "HashMap>(ctx_->memory_pool());" << std::endl;
Expand All @@ -254,7 +248,6 @@ class HashAggregateKernel::Impl {
} else if (key_node_list.size() > 1 ||
(key_node_list.size() > 0 &&
key_node_list[0]->return_type()->id() == arrow::Type::STRING)) {
codegen_ctx->header_codes.push_back(R"(#include "precompile/hash_map.h")");
aggr_prepare_ss << "aggr_hash_table_" << level << " = std::make_shared<"
<< GetTypeString(arrow::utf8(), "")
<< "HashMap>(ctx_->memory_pool());" << std::endl;
Expand All @@ -263,7 +256,7 @@ class HashAggregateKernel::Impl {

} else if (key_node_list.size() > 0) {
auto type = key_node_list[0]->return_type();
codegen_ctx->header_codes.push_back(R"(#include "precompile/sparse_hash_map.h")");

aggr_prepare_ss << "aggr_hash_table_" << level << " = std::make_shared<"
<< "SparseHashMap<" << GetCTypeString(type)
<< ">>(ctx_->memory_pool());" << std::endl;
Expand Down Expand Up @@ -308,8 +301,6 @@ class HashAggregateKernel::Impl {
prepare_ss << "auto " << unsafe_row_name_validity << " = "
<< project_output_list[i].first.first << "_validity;" << std::endl;
} else {
codegen_ctx->header_codes.push_back(
R"(#include "third_party/row_wise_memory/unsafe_row.h")");
std::stringstream unsafe_row_define_ss;
unsafe_row_define_ss << "std::shared_ptr<UnsafeRow> " << unsafe_row_name
<< "_unsafe_row = std::make_shared<UnsafeRow>("
Expand Down Expand Up @@ -562,7 +553,7 @@ class HashAggregateKernel::Impl {
if (!result_expr_list_.empty()) {
codegen_ctx->gandiva_projector = std::make_shared<GandivaProjector>(
ctx_, arrow::schema(result_field_list_), GetGandivaKernel(result_expr_list_));
codegen_ctx->header_codes.push_back(R"(#include "precompile/gandiva_projector.h")");

finish_ss << "RETURN_NOT_OK(gandiva_projector_list_[gp_idx++]->Evaluate(&"
"do_hash_"
"aggr_finish_"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1312,12 +1312,7 @@ typedef )" + item_content_str +
GetListContentStr(multiple_cols, left_key_index_list.size());

return BaseCodes() + R"(
#include <numeric>

#include "codegen/arrow_compute/ext/array_item_index.h"
#include "precompile/builder.h"
#include "precompile/gandiva.h"
using namespace sparkcolumnarplugin::precompile;
#include "precompile/wscgapi.hpp"
)" + hash_map_include_str +
R"(
class FVector {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1987,18 +1987,9 @@ class SortArraysCodegenKernel : public SortArraysToIndicesKernel::Impl {
GetCachedVariablesClear(key_typed_codegen_list);

return BaseCodes() + R"(
#include <arrow/buffer.h>

#include <algorithm>
#include <cmath>
#include "precompile/wscgapi.hpp"

#include "codegen/arrow_compute/ext/array_item_index.h"
#include "codegen/common/sort_relation.h"
#include "precompile/builder.h"
#include "precompile/type.h"
#include "third_party/ska_sort.hpp"
#include "third_party/timsort.hpp"
using namespace sparkcolumnarplugin::precompile;

class TypedSorterImpl : public CodeGenBase {
public:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,7 @@ class WholeStageCodeGenKernel::Impl {
std::string out_list;
std::stringstream define_ss;
codes_ss << BaseCodes() << std::endl;
codes_ss << R"(#include "precompile/builder.h")" << std::endl;
codes_ss << R"(#include "utils/macros.h")" << std::endl;
codes_ss << R"(#include "precompile/wscgapi.hpp")" << std::endl;
std::vector<std::string> headers;
for (auto codegen_ctx : codegen_ctx_list) {
for (auto header : codegen_ctx->header_codes) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,18 +215,7 @@ class WindowSortKernel::Impl {
std::string typed_res_array_str = GetTypedResArray(shuffle_typed_codegen_list.size());

return BaseCodes() + R"(
#include <arrow/array.h>
#include <arrow/buffer.h>
#include <arrow/builder.h>

#include <algorithm>

#include "codegen/arrow_compute/ext/array_item_index.h"
#include "precompile/builder.h"
#include "precompile/type.h"
#include "third_party/ska_sort.hpp"
#include "third_party/timsort.hpp"
using namespace sparkcolumnarplugin::precompile;
#include "precompile/wscgapi.hpp"

class TypedSorterImpl : public CodeGenBase {
public:
Expand Down
30 changes: 30 additions & 0 deletions native-sql-engine/cpp/src/precompile/wscgapi.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#pragma once

#include <arrow/buffer.h>
#include <arrow/compute/api.h>
#include <arrow/record_batch.h>

#include <algorithm>
#include <cmath>
#include <numeric>
#include <tuple>

#include "codegen/arrow_compute/ext/actions_impl.h"
#include "codegen/arrow_compute/ext/array_item_index.h"
#include "codegen/arrow_compute/ext/code_generator_base.h"
#include "codegen/common/hash_relation.h"
#include "codegen/common/sort_relation.h"
#include "precompile/array.h"
#include "precompile/builder.h"
#include "precompile/gandiva.h"
#include "precompile/gandiva_projector.h"
#include "precompile/hash_map.h"
#include "precompile/sparse_hash_map.h"
#include "precompile/type.h"
#include "third_party/row_wise_memory/unsafe_row.h"
#include "third_party/ska_sort.hpp"
#include "third_party/timsort.hpp"
#include "utils/macros.h"
using namespace sparkcolumnarplugin::precompile;

using namespace sparkcolumnarplugin::codegen::arrowcompute::extra;