From d9bf39a8de4136d25b50dcbfbf9ca73a6bed6b2f Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Fri, 2 Apr 2021 11:09:36 +0800 Subject: [PATCH 1/7] fix code scan Signed-off-by: Yuan Zhou --- .../cpp/src/codegen/arrow_compute/code_generator.h | 2 +- .../codegen/arrow_compute/ext/basic_physical_kernels.cc | 3 ++- .../cpp/src/codegen/arrow_compute/ext/codegen_common.cc | 4 ++-- .../arrow_compute/ext/conditioned_probe_kernel.cc | 3 ++- .../arrow_compute/ext/expression_codegen_visitor.cc | 4 ++-- .../codegen/arrow_compute/ext/hash_aggregate_kernel.cc | 1 + .../codegen/arrow_compute/ext/hash_relation_kernel.cc | 1 + .../cpp/src/codegen/arrow_compute/ext/kernels_ext.cc | 6 ++++-- .../src/codegen/arrow_compute/ext/merge_join_kernel.cc | 1 + .../arrow_compute/ext/whole_stage_codegen_kernel.cc | 8 +++++--- .../cpp/src/codegen/arrow_compute/ext/window_kernel.cc | 4 ++-- .../src/codegen/arrow_compute/ext/window_sort_kernel.h | 9 +++++++-- native-sql-engine/cpp/src/precompile/array.cc | 6 +++++- native-sql-engine/cpp/src/precompile/gandiva.h | 1 + .../cpp/src/precompile/hash_arrays_kernel.cc | 3 ++- native-sql-engine/cpp/src/shuffle/splitter.cc | 2 ++ 16 files changed, 40 insertions(+), 18 deletions(-) diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/code_generator.h b/native-sql-engine/cpp/src/codegen/arrow_compute/code_generator.h index 2015499c2..0f06d445c 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/code_generator.h +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/code_generator.h @@ -67,7 +67,7 @@ class ArrowComputeCodeGenerator : public CodeGenerator { #endif } - ~ArrowComputeCodeGenerator() { + virtual ~ArrowComputeCodeGenerator() { expr_visitor_cache_.clear(); visitor_list_.clear(); } diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/basic_physical_kernels.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/basic_physical_kernels.cc index ba4b430cc..c9844a8bd 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/basic_physical_kernels.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/basic_physical_kernels.cc @@ -178,7 +178,8 @@ class FilterKernel::Impl { &condition_node_visitor)); codegen_ctx->process_codes += condition_node_visitor->GetPrepare(); for (auto header : condition_node_visitor->GetHeaders()) { - if (std::find(codegen_ctx->header_codes.begin(), codegen_ctx->header_codes.end(), + if (!header.empty() && + std::find(codegen_ctx->header_codes.begin(), codegen_ctx->header_codes.end(), header) == codegen_ctx->header_codes.end()) { codegen_ctx->header_codes.push_back(header); } diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc index ea2f5f1f7..f7de0b66e 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc @@ -318,7 +318,7 @@ gandiva::ExpressionPtr GetHash32Kernel(std::vector key_list, std::vector key_index_list) { // This Project should be do upon GetGandivaKernel // So we need to treat inside functionNode as fieldNode. - std::vector> func_node_list = {}; + std::vector> func_node_list = {nullptr}; std::shared_ptr ret_type; auto seed = gandiva::TreeExprBuilder::MakeLiteral((int32_t)0); gandiva::NodePtr func_node; @@ -339,7 +339,7 @@ gandiva::ExpressionPtr GetHash32Kernel(std::vector key_list, gandiva::ExpressionPtr GetHash32Kernel(std::vector key_list) { // This Project should be do upon GetGandivaKernel // So we need to treat inside functionNode as fieldNode. - std::vector> func_node_list = {}; + std::vector> func_node_list = {nullptr}; std::shared_ptr ret_type; auto seed = gandiva::TreeExprBuilder::MakeLiteral((int32_t)0); gandiva::NodePtr func_node; diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc index ef35c102b..c812a5dda 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc @@ -1001,6 +1001,7 @@ class ConditionedProbeKernel::Impl { uint64_t Evaluate(std::shared_ptr key_array, const arrow::ArrayVector& key_payloads) override { auto typed_key_array = std::dynamic_pointer_cast(key_array); + assert(typed_key_array != nullptr); std::vector> payloads; int i = 0; bool do_unsafe_row = true; @@ -1071,7 +1072,7 @@ class ConditionedProbeKernel::Impl { if (!do_unsafe_row) { index = fast_probe(i); } else { - unsafe_key_row->reset(); + if(unsafe_key_row) {unsafe_key_row->reset();} for (auto payload_arr : payloads) { payload_arr->Append(i, &unsafe_key_row); } diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/expression_codegen_visitor.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/expression_codegen_visitor.cc index 7b0483203..4502ea474 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/expression_codegen_visitor.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/expression_codegen_visitor.cc @@ -1026,8 +1026,8 @@ arrow::Status ExpressionCodegenVisitor::Visit(const gandiva::BooleanNode& node) field_type_ = mixed; } for (auto header : child_visitor->GetHeaders()) { - if (std::find(header_list_.begin(), header_list_.end(), header) == - header_list_.end()) { + if (!header.empty() && std::find(header_list_.begin(), header_list_.end(), + header) == header_list_.end()) { header_list_.push_back(header); } } diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc index 8e19adfeb..885dd58ad 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc @@ -1113,6 +1113,7 @@ HashAggregateKernel::HashAggregateKernel( impl_.reset(new Impl(ctx, input_field_list, action_list, result_field_node_list, result_expr_node_list)); kernel_name_ = "HashAggregateKernelKernel"; + ctx_ = ctx; } #undef PROCESS_SUPPORTED_TYPES diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_relation_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_relation_kernel.cc index 00276e57e..08db906ca 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_relation_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_relation_kernel.cc @@ -54,6 +54,7 @@ class HashRelationKernel::Impl { std::shared_ptr root_node, const std::vector>& output_field_list) : ctx_(ctx), input_field_list_(input_field_list) { + pool_ = ctx_->memory_pool(); std::vector> hash_relation_list; for (auto field : input_field_list) { std::shared_ptr hash_relation_column; diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.cc index 296b204bb..fce886869 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.cc @@ -171,8 +171,8 @@ class HashArrayKernel::Impl { std::vector> type_list) : ctx_(ctx) { // create a new result array type here - std::vector> func_node_list = {}; - std::vector> field_list = {}; + std::vector> func_node_list = {nullptr}; + std::vector> field_list = {nullptr}; gandiva::ExpressionPtr expr; int index = 0; @@ -195,6 +195,7 @@ class HashArrayKernel::Impl { } index++; } + assert(func_node_list.size() > 0); expr = gandiva::TreeExprBuilder::MakeExpression(func_node_list[0], arrow::field("res", arrow::int64())); #ifdef DEBUG @@ -323,6 +324,7 @@ class CachedRelationKernel::Impl { result_schema_(result_schema), key_field_list_(key_field_list), result_type_(result_type) { + pool_ = ctx_->memory_pool(); for (auto field : key_field_list) { auto indices = result_schema->GetAllFieldIndices(field->name()); if (indices.size() != 1) { diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/merge_join_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/merge_join_kernel.cc index 01e19397c..9416e1ecc 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/merge_join_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/merge_join_kernel.cc @@ -1506,6 +1506,7 @@ ConditionedJoinArraysKernel::ConditionedJoinArraysKernel( const std::shared_ptr& result_schema) { impl_.reset(new Impl(ctx, left_key_list, right_key_list, func_node, join_type, left_field_list, right_field_list, result_schema)); + ctx_ = ctx; kernel_name_ = "ConditionedJoinArraysKernel"; } diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc index e036405b5..a0b452a07 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc @@ -277,8 +277,8 @@ class WholeStageCodeGenKernel::Impl { // process try { // compile codes - RETURN_NOT_OK(CompileCodes(codes, signature_)); - RETURN_NOT_OK(LoadLibrary(signature_, ctx_, out)); + arrow::Status s = CompileCodes(codes, signature_); + s = LoadLibrary(signature_, ctx_, out); } catch (const std::runtime_error& error) { FileSpinUnLock(file_lock); throw error; @@ -310,7 +310,9 @@ class WholeStageCodeGenKernel::Impl { gandiva_projector_list_.push_back(codegen_ctx->gandiva_projector); } for (auto header : headers) { - codes_ss << header << std::endl; + if (!header.empty()) { + codes_ss << header << std::endl; + } } if (is_aggr_) { diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_kernel.cc index fd09864f0..294eefda3 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_kernel.cc @@ -388,7 +388,7 @@ arrow::Status WindowRankKernel::Finish(ArrayList* out) { std::shared_ptr last_index = sorted_partition.at(j - 1); bool same = true; for (int column_id = 0; column_id < type_list_.size(); column_id++) { - bool s; + bool s = false; std::shared_ptr type = type_list_.at(column_id); switch (type->id()) { #define PROCESS(InType, BUILDER_TYPE, ARRAY_TYPE) \ @@ -408,7 +408,7 @@ arrow::Status WindowRankKernel::Finish(ArrayList* out) { break; } } - if (same) { + if (same && rank_array[index->array_id] && rank_array[last_index->array_id]) { rank_array[index->array_id][index->id] = rank_array[last_index->array_id][last_index->id]; continue; diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h index e1ecbed4e..741d1e1c3 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h @@ -93,11 +93,16 @@ class WindowSortKernel::Impl { auto file_lock = FileSpinLock(); auto status = LoadLibrary(signature_, ctx_, &sorter); if (!status.ok()) { + try { // process auto codes = ProduceCodes(result_schema); // compile codes - RETURN_NOT_OK(CompileCodes(codes, signature_)); - RETURN_NOT_OK(LoadLibrary(signature_, ctx_, &sorter)); + auto s = CompileCodes(codes, signature_); + s = LoadLibrary(signature_, ctx_, &sorter); + } catch (const std::runtime_error& error) { + FileSpinUnLock(file_lock); + throw error; + } } FileSpinUnLock(file_lock); return arrow::Status::OK(); diff --git a/native-sql-engine/cpp/src/precompile/array.cc b/native-sql-engine/cpp/src/precompile/array.cc index eb7072216..75864a940 100644 --- a/native-sql-engine/cpp/src/precompile/array.cc +++ b/native-sql-engine/cpp/src/precompile/array.cc @@ -44,7 +44,11 @@ BooleanArray::BooleanArray(const std::shared_ptr& in) : cache_(in) null_count_ = in->null_count(); \ null_bitmap_data_ = null_count_ == 0 ? NULLPTR : in->null_bitmap_data(); \ auto typed_in = std::dynamic_pointer_cast(in); \ - raw_value_ = typed_in->raw_values(); \ + if (typed_in) { \ + raw_value_ = typed_in->raw_values(); \ + } else { \ + raw_value_ = NULLPTR; \ + } \ } TYPED_NUMERIC_ARRAY_IMPL(Int8Array, int8_t) diff --git a/native-sql-engine/cpp/src/precompile/gandiva.h b/native-sql-engine/cpp/src/precompile/gandiva.h index a97b654b6..9eb1f71b6 100644 --- a/native-sql-engine/cpp/src/precompile/gandiva.h +++ b/native-sql-engine/cpp/src/precompile/gandiva.h @@ -41,6 +41,7 @@ T round2(T val, int precision = 2) { arrow::Decimal128 castDECIMAL(double val, int32_t precision, int32_t scale) { int charsNeeded = 1 + snprintf(NULL, 0, "%.*f", (int)scale, val); char* buffer = reinterpret_cast(malloc(charsNeeded)); + assert(buffer != nullptr); snprintf(buffer, charsNeeded, "%.*f", (int)scale, nextafter(val, val + 0.5)); auto decimal_str = std::string(buffer); free(buffer); diff --git a/native-sql-engine/cpp/src/precompile/hash_arrays_kernel.cc b/native-sql-engine/cpp/src/precompile/hash_arrays_kernel.cc index 6f41ca337..2706905ed 100644 --- a/native-sql-engine/cpp/src/precompile/hash_arrays_kernel.cc +++ b/native-sql-engine/cpp/src/precompile/hash_arrays_kernel.cc @@ -29,7 +29,7 @@ class HashArraysKernel::Impl { const std::vector>& field_list) : pool_(pool) { int index = 0; - std::vector> func_node_list = {}; + std::vector> func_node_list = {nullptr}; for (auto field : field_list) { auto field_node = gandiva::TreeExprBuilder::MakeField(field); auto func_node = @@ -47,6 +47,7 @@ class HashArraysKernel::Impl { } index++; } + assert(func_node_list.size() > 0); auto expr = gandiva::TreeExprBuilder::MakeExpression( func_node_list[0], arrow::field("projection_key", arrow::int64())); schema_ = arrow::schema(field_list); diff --git a/native-sql-engine/cpp/src/shuffle/splitter.cc b/native-sql-engine/cpp/src/shuffle/splitter.cc index b8bc0eba7..8febc365e 100644 --- a/native-sql-engine/cpp/src/shuffle/splitter.cc +++ b/native-sql-engine/cpp/src/shuffle/splitter.cc @@ -501,6 +501,7 @@ arrow::Status Splitter::AllocatePartitionBuffers(int32_t partition_id, int32_t n switch (column_type_id_[i]) { case Type::SHUFFLE_BINARY: { auto builder = std::make_shared(options_.memory_pool); + assert(builder != nullptr); RETURN_NOT_OK(builder->Reserve(new_size)); RETURN_NOT_OK( builder->ReserveData(binary_array_empirical_size_[binary_idx] * new_size)); @@ -510,6 +511,7 @@ arrow::Status Splitter::AllocatePartitionBuffers(int32_t partition_id, int32_t n } case Type::SHUFFLE_LARGE_BINARY: { auto builder = std::make_shared(options_.memory_pool); + assert(builder != nullptr); RETURN_NOT_OK(builder->Reserve(new_size)); RETURN_NOT_OK(builder->ReserveData( large_binary_array_empirical_size_[large_binary_idx] * new_size)); From 9fbc76364c5b544900ad41466a769161348b0463 Mon Sep 17 00:00:00 2001 From: Rui Mo Date: Fri, 2 Apr 2021 02:20:02 +0000 Subject: [PATCH 2/7] fix code scan --- .../arrow_compute/ext/codegen_node_visitor.cc | 3 +- .../ext/conditioned_probe_kernel.cc | 56 ++++++++++--------- .../arrow_compute/ext/hash_relation_kernel.cc | 6 +- .../codegen/arrow_compute/ext/kernels_ext.h | 32 +++++------ .../codegen/arrow_compute/ext/probe_kernel.cc | 6 +- .../ext/whole_stage_codegen_kernel.cc | 4 +- .../arrow_compute/ext/window_kernel.cc | 32 ++++++----- .../cpp/src/precompile/gandiva.h | 3 +- 8 files changed, 79 insertions(+), 63 deletions(-) diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_node_visitor.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_node_visitor.cc index 78cf992ad..e4fbfcfc3 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_node_visitor.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_node_visitor.cc @@ -805,7 +805,8 @@ std::string CodeGenNodeVisitor::GetNaNCheckStr(std::string left, std::string rig std::string CodeGenNodeVisitor::CombineValidity(std::vector validity_list) { bool first = true; std::stringstream out; - for (auto validity : validity_list) { + for (int i = 0; i < validity_list.size(); i++) { + auto validity = validity_list[i]; if (first) { if (validity.compare("true") != 0) { out << validity; diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc index c812a5dda..5dc1fb20a 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc @@ -768,40 +768,44 @@ class ConditionedProbeKernel::Impl { case TypeTraits::type_id: { \ using ArrayType_ = precompile::TypeTraits::ArrayType; \ auto typed_first_key_arr = std::make_shared(key_payloads[0]); \ - if (typed_first_key_arr->null_count() == 0) { \ - fast_probe = [this, typed_key_array, typed_first_key_arr](int i) { \ - return hash_relation_->Get(typed_key_array->GetView(i), \ - typed_first_key_arr->GetView(i)); \ - }; \ - } else { \ - fast_probe = [this, typed_key_array, typed_first_key_arr](int i) { \ - if (typed_first_key_arr->IsNull(i)) { \ - return hash_relation_->GetNull(); \ - } else { \ + if (typed_first_key_arr) { \ + if (typed_first_key_arr->null_count() == 0) { \ + fast_probe = [this, typed_key_array, typed_first_key_arr](int i) { \ return hash_relation_->Get(typed_key_array->GetView(i), \ - typed_first_key_arr->GetView(i)); \ - } \ - }; \ + typed_first_key_arr->GetView(i)); \ + }; \ + } else { \ + fast_probe = [this, typed_key_array, typed_first_key_arr](int i) { \ + if (typed_first_key_arr->IsNull(i)) { \ + return hash_relation_->GetNull(); \ + } else { \ + return hash_relation_->Get(typed_key_array->GetView(i), \ + typed_first_key_arr->GetView(i)); \ + } \ + }; \ + } \ } \ } break; PROCESS_SUPPORTED_TYPES(PROCESS) #undef PROCESS case TypeTraits::type_id: { auto typed_first_key_arr = std::make_shared(key_payloads[0]); - if (typed_first_key_arr->null_count() == 0) { - fast_probe = [this, typed_key_array, typed_first_key_arr](int i) { - return hash_relation_->Get(typed_key_array->GetView(i), - typed_first_key_arr->GetString(i)); - }; - } else { - fast_probe = [this, typed_key_array, typed_first_key_arr](int i) { - if (typed_first_key_arr->IsNull(i)) { - return hash_relation_->GetNull(); - } else { + if (typed_first_key_arr) { + if (typed_first_key_arr->null_count() == 0) { + fast_probe = [this, typed_key_array, typed_first_key_arr](int i) { return hash_relation_->Get(typed_key_array->GetView(i), - typed_first_key_arr->GetString(i)); - } - }; + typed_first_key_arr->GetString(i)); + }; + } else { + fast_probe = [this, typed_key_array, typed_first_key_arr](int i) { + if (typed_first_key_arr->IsNull(i)) { + return hash_relation_->GetNull(); + } else { + return hash_relation_->Get(typed_key_array->GetView(i), + typed_first_key_arr->GetString(i)); + } + }; + } } } break; default: { diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_relation_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_relation_kernel.cc index 08db906ca..57c1d57d3 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_relation_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_relation_kernel.cc @@ -130,7 +130,11 @@ class HashRelationKernel::Impl { // If single key case, we can put key in KeyArray auto key_type = std::dynamic_pointer_cast( key_hash_field_list[0]->type()); - key_size_ = key_type->bit_width() / 8; + if (key_type) { + key_size_ = key_type->bit_width() / 8; + } else { + key_size_ = 0; + } hash_relation_ = std::make_shared(ctx_, hash_relation_list, key_size_); } else { diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.h b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.h index e2b6dd972..fabea173c 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.h +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.h @@ -115,7 +115,7 @@ class EncodeArrayKernel : public KernalBase { private: class Impl; std::unique_ptr impl_; - arrow::compute::ExecContext* ctx_; + arrow::compute::ExecContext* ctx_ = nullptr; }; class WindowAggregateFunctionKernel : public KernalBase { @@ -147,7 +147,7 @@ class WindowAggregateFunctionKernel : public KernalBase { typename arrow::enable_if_number>> createBuilder(std::shared_ptr data_type); - arrow::compute::ExecContext* ctx_; + arrow::compute::ExecContext* ctx_ = nullptr; std::shared_ptr action_; std::vector> accumulated_group_ids_; std::vector> type_list_; @@ -167,7 +167,7 @@ class HashArrayKernel : public KernalBase { private: class Impl; std::unique_ptr impl_; - arrow::compute::ExecContext* ctx_; + arrow::compute::ExecContext* ctx_ = nullptr; }; class SortArraysToIndicesKernel : public KernalBase { @@ -200,7 +200,7 @@ class SortArraysToIndicesKernel : public KernalBase { private: std::unique_ptr impl_; - arrow::compute::ExecContext* ctx_; + arrow::compute::ExecContext* ctx_ = nullptr; }; class CachedRelationKernel : public KernalBase { @@ -223,7 +223,7 @@ class CachedRelationKernel : public KernalBase { private: std::unique_ptr impl_; - arrow::compute::ExecContext* ctx_; + arrow::compute::ExecContext* ctx_ = nullptr; }; class WindowSortKernel : public KernalBase { @@ -243,7 +243,7 @@ class WindowSortKernel : public KernalBase { private: std::unique_ptr impl_; - arrow::compute::ExecContext* ctx_; + arrow::compute::ExecContext* ctx_ = nullptr; }; class HashAggregateKernel : public KernalBase { @@ -299,7 +299,7 @@ class WindowRankKernel : public KernalBase { private: std::shared_ptr sorter_; - arrow::compute::ExecContext* ctx_; + arrow::compute::ExecContext* ctx_ = nullptr; std::vector input_cache_; std::vector> type_list_; bool desc_; @@ -347,7 +347,7 @@ class ConditionedProbeArraysKernel : public KernalBase { private: std::unique_ptr impl_; - arrow::compute::ExecContext* ctx_; + arrow::compute::ExecContext* ctx_ = nullptr; }; class ConditionedJoinArraysKernel : public KernalBase { public: @@ -377,7 +377,7 @@ class ConditionedJoinArraysKernel : public KernalBase { private: std::unique_ptr impl_; - arrow::compute::ExecContext* ctx_; + arrow::compute::ExecContext* ctx_ = nullptr; }; class WholeStageCodeGenKernel : public KernalBase { public: @@ -401,7 +401,7 @@ class WholeStageCodeGenKernel : public KernalBase { private: std::unique_ptr impl_; - arrow::compute::ExecContext* ctx_; + arrow::compute::ExecContext* ctx_ = nullptr; }; class HashRelationKernel : public KernalBase { public: @@ -425,7 +425,7 @@ class HashRelationKernel : public KernalBase { private: std::unique_ptr impl_; - arrow::compute::ExecContext* ctx_; + arrow::compute::ExecContext* ctx_ = nullptr; }; class ConcatArrayListKernel : public KernalBase { public: @@ -484,7 +484,7 @@ class ConditionedProbeKernel : public KernalBase { private: std::unique_ptr impl_; - arrow::compute::ExecContext* ctx_; + arrow::compute::ExecContext* ctx_ = nullptr; }; class ConditionedMergeJoinKernel : public KernalBase { public: @@ -518,7 +518,7 @@ class ConditionedMergeJoinKernel : public KernalBase { private: std::unique_ptr impl_; - arrow::compute::ExecContext* ctx_; + arrow::compute::ExecContext* ctx_ = nullptr; }; class ProjectKernel : public KernalBase { public: @@ -542,7 +542,7 @@ class ProjectKernel : public KernalBase { private: std::unique_ptr impl_; - arrow::compute::ExecContext* ctx_; + arrow::compute::ExecContext* ctx_ = nullptr; }; class FilterKernel : public KernalBase { public: @@ -566,7 +566,7 @@ class FilterKernel : public KernalBase { private: std::unique_ptr impl_; - arrow::compute::ExecContext* ctx_; + arrow::compute::ExecContext* ctx_ = nullptr; }; class ConcatArrayKernel : public KernalBase { public: @@ -581,7 +581,7 @@ class ConcatArrayKernel : public KernalBase { private: std::unique_ptr impl_; - arrow::compute::ExecContext* ctx_; + arrow::compute::ExecContext* ctx_ = nullptr; }; } // namespace extra } // namespace arrowcompute diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/probe_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/probe_kernel.cc index 2bb7b5600..407d4ac42 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/probe_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/probe_kernel.cc @@ -477,7 +477,8 @@ class ConditionedProbeArraysKernel::Impl { std::vector, std::string>> name_list) { std::stringstream ss; int i = 0; - for (auto name : name_list) { + for (int idx = 0; idx < name_list.size(); idx++) { + auto name = name_list[idx]; ss << name.second << " = std::make_shared<" << GetTypeString(name.first, "Array") << ">(projected_batch[" << i++ << "]);" << std::endl; } @@ -497,7 +498,8 @@ class ConditionedProbeArraysKernel::Impl { std::string GetResultIteratorProjectedSet( std::vector, std::string>> name_list) { std::stringstream ss; - for (auto name : name_list) { + for (int i = 0; i < name_list.size(); i++) { + auto name = name_list[i]; ss << name.second << " = " << name.second.substr(0, name.second.size() - 1) << ";" << std::endl; } diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc index a0b452a07..cfbb8f5be 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc @@ -563,7 +563,9 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext *ctx, std::string GetProcessMaterializeCodes(std::shared_ptr codegen_ctx) { std::stringstream codes_ss; int i = 0; - for (auto pair : codegen_ctx->output_list) { + auto out_list = codegen_ctx->output_list; + for (int i = 0; i < out_list.size(); i++) { + auto pair = out_list[i]; auto name = pair.first.first; auto type = pair.second; auto validity = name + "_validity"; diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_kernel.cc index 294eefda3..003c22f63 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_kernel.cc @@ -111,16 +111,17 @@ arrow::Status WindowAggregateFunctionKernel::Evaluate(const ArrayList& in) { int32_t max_group_id = 0; std::shared_ptr group_id_array = in[1]; auto group_ids = std::dynamic_pointer_cast(group_id_array); - accumulated_group_ids_.push_back(group_ids); - for (int i = 0; i < group_ids->length(); i++) { - if (group_ids->IsNull(i)) { - continue; - } - if (group_ids->GetView(i) > max_group_id) { - max_group_id = group_ids->GetView(i); + if (group_ids) { + accumulated_group_ids_.push_back(group_ids); + for (int i = 0; i < group_ids->length(); i++) { + if (group_ids->IsNull(i)) { + continue; + } + if (group_ids->GetView(i) > max_group_id) { + max_group_id = group_ids->GetView(i); + } } } - ArrayList action_input_data; action_input_data.push_back(in[0]); std::function func; @@ -128,15 +129,16 @@ arrow::Status WindowAggregateFunctionKernel::Evaluate(const ArrayList& in) { RETURN_NOT_OK( action_->Get()->Submit(action_input_data, max_group_id, &func, &null_func)); - for (int row_id = 0; row_id < group_id_array->length(); row_id++) { - if (group_ids->IsNull(row_id)) { - RETURN_NOT_OK(null_func()); - continue; + if (group_ids) { + for (int row_id = 0; row_id < group_id_array->length(); row_id++) { + if (group_ids->IsNull(row_id)) { + RETURN_NOT_OK(null_func()); + continue; + } + auto group_id = group_ids->GetView(row_id); + RETURN_NOT_OK(func(group_id)); } - auto group_id = group_ids->GetView(row_id); - RETURN_NOT_OK(func(group_id)); } - return arrow::Status::OK(); } diff --git a/native-sql-engine/cpp/src/precompile/gandiva.h b/native-sql-engine/cpp/src/precompile/gandiva.h index 9eb1f71b6..73426a3f6 100644 --- a/native-sql-engine/cpp/src/precompile/gandiva.h +++ b/native-sql-engine/cpp/src/precompile/gandiva.h @@ -30,7 +30,8 @@ int64_t castDATE64(int32_t in) { return castDATE_date32(in); } int64_t extractYear(int64_t millis) { return extractYear_timestamp(millis); } template T round2(T val, int precision = 2) { - int charsNeeded = 1 + snprintf(NULL, 0, "%.*f", (int)precision, val); + double dVal = (double)val; + int charsNeeded = 1 + snprintf(NULL, 0, "%.*f", (int)precision, dVal); char* buffer = reinterpret_cast(malloc(charsNeeded)); snprintf(buffer, charsNeeded, "%.*f", (int)precision, nextafter(val, val + 0.5)); double result = atof(buffer); From f5460ef848d6afa79b4af73c1dcc3f54d35a2732 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Fri, 2 Apr 2021 11:37:20 +0800 Subject: [PATCH 3/7] fix format Signed-off-by: Yuan Zhou --- .../arrow_compute/ext/conditioned_probe_kernel.cc | 12 +++++++----- .../codegen/arrow_compute/ext/window_sort_kernel.h | 12 ++++++------ native-sql-engine/cpp/src/precompile/array.cc | 2 +- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc index 5dc1fb20a..1028b59a8 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc @@ -772,7 +772,7 @@ class ConditionedProbeKernel::Impl { if (typed_first_key_arr->null_count() == 0) { \ fast_probe = [this, typed_key_array, typed_first_key_arr](int i) { \ return hash_relation_->Get(typed_key_array->GetView(i), \ - typed_first_key_arr->GetView(i)); \ + typed_first_key_arr->GetView(i)); \ }; \ } else { \ fast_probe = [this, typed_key_array, typed_first_key_arr](int i) { \ @@ -780,7 +780,7 @@ class ConditionedProbeKernel::Impl { return hash_relation_->GetNull(); \ } else { \ return hash_relation_->Get(typed_key_array->GetView(i), \ - typed_first_key_arr->GetView(i)); \ + typed_first_key_arr->GetView(i)); \ } \ }; \ } \ @@ -794,7 +794,7 @@ class ConditionedProbeKernel::Impl { if (typed_first_key_arr->null_count() == 0) { fast_probe = [this, typed_key_array, typed_first_key_arr](int i) { return hash_relation_->Get(typed_key_array->GetView(i), - typed_first_key_arr->GetString(i)); + typed_first_key_arr->GetString(i)); }; } else { fast_probe = [this, typed_key_array, typed_first_key_arr](int i) { @@ -802,7 +802,7 @@ class ConditionedProbeKernel::Impl { return hash_relation_->GetNull(); } else { return hash_relation_->Get(typed_key_array->GetView(i), - typed_first_key_arr->GetString(i)); + typed_first_key_arr->GetString(i)); } }; } @@ -1076,7 +1076,9 @@ class ConditionedProbeKernel::Impl { if (!do_unsafe_row) { index = fast_probe(i); } else { - if(unsafe_key_row) {unsafe_key_row->reset();} + if (unsafe_key_row) { + unsafe_key_row->reset(); + } for (auto payload_arr : payloads) { payload_arr->Append(i, &unsafe_key_row); } diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h index 741d1e1c3..8b654fbfd 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h @@ -94,12 +94,12 @@ class WindowSortKernel::Impl { auto status = LoadLibrary(signature_, ctx_, &sorter); if (!status.ok()) { try { - // process - auto codes = ProduceCodes(result_schema); - // compile codes - auto s = CompileCodes(codes, signature_); - s = LoadLibrary(signature_, ctx_, &sorter); - } catch (const std::runtime_error& error) { + // process + auto codes = ProduceCodes(result_schema); + // compile codes + auto s = CompileCodes(codes, signature_); + s = LoadLibrary(signature_, ctx_, &sorter); + } catch (const std::runtime_error& error) { FileSpinUnLock(file_lock); throw error; } diff --git a/native-sql-engine/cpp/src/precompile/array.cc b/native-sql-engine/cpp/src/precompile/array.cc index 75864a940..2da5f0b95 100644 --- a/native-sql-engine/cpp/src/precompile/array.cc +++ b/native-sql-engine/cpp/src/precompile/array.cc @@ -47,7 +47,7 @@ BooleanArray::BooleanArray(const std::shared_ptr& in) : cache_(in) if (typed_in) { \ raw_value_ = typed_in->raw_values(); \ } else { \ - raw_value_ = NULLPTR; \ + raw_value_ = NULLPTR; \ } \ } From 3ae2540beb7cf7a72305adef5ee9f6bb16532f65 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Fri, 2 Apr 2021 13:39:38 +0800 Subject: [PATCH 4/7] fix wrong patch Signed-off-by: Yuan Zhou --- .../cpp/src/codegen/arrow_compute/ext/codegen_common.cc | 4 ++-- .../cpp/src/codegen/arrow_compute/ext/kernels_ext.cc | 4 ++-- .../codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc | 4 ++-- native-sql-engine/cpp/src/precompile/hash_arrays_kernel.cc | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc index f7de0b66e..ea2f5f1f7 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc @@ -318,7 +318,7 @@ gandiva::ExpressionPtr GetHash32Kernel(std::vector key_list, std::vector key_index_list) { // This Project should be do upon GetGandivaKernel // So we need to treat inside functionNode as fieldNode. - std::vector> func_node_list = {nullptr}; + std::vector> func_node_list = {}; std::shared_ptr ret_type; auto seed = gandiva::TreeExprBuilder::MakeLiteral((int32_t)0); gandiva::NodePtr func_node; @@ -339,7 +339,7 @@ gandiva::ExpressionPtr GetHash32Kernel(std::vector key_list, gandiva::ExpressionPtr GetHash32Kernel(std::vector key_list) { // This Project should be do upon GetGandivaKernel // So we need to treat inside functionNode as fieldNode. - std::vector> func_node_list = {nullptr}; + std::vector> func_node_list = {}; std::shared_ptr ret_type; auto seed = gandiva::TreeExprBuilder::MakeLiteral((int32_t)0); gandiva::NodePtr func_node; diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.cc index fce886869..c8b0394e4 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.cc @@ -171,8 +171,8 @@ class HashArrayKernel::Impl { std::vector> type_list) : ctx_(ctx) { // create a new result array type here - std::vector> func_node_list = {nullptr}; - std::vector> field_list = {nullptr}; + std::vector> func_node_list = {}; + std::vector> field_list = {}; gandiva::ExpressionPtr expr; int index = 0; diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc index cfbb8f5be..581b2865d 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc @@ -564,8 +564,8 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext *ctx, std::stringstream codes_ss; int i = 0; auto out_list = codegen_ctx->output_list; - for (int i = 0; i < out_list.size(); i++) { - auto pair = out_list[i]; + for (int j = 0; j < out_list.size(); j++) { + auto pair = out_list[j]; auto name = pair.first.first; auto type = pair.second; auto validity = name + "_validity"; diff --git a/native-sql-engine/cpp/src/precompile/hash_arrays_kernel.cc b/native-sql-engine/cpp/src/precompile/hash_arrays_kernel.cc index 2706905ed..5f05c4a29 100644 --- a/native-sql-engine/cpp/src/precompile/hash_arrays_kernel.cc +++ b/native-sql-engine/cpp/src/precompile/hash_arrays_kernel.cc @@ -29,7 +29,7 @@ class HashArraysKernel::Impl { const std::vector>& field_list) : pool_(pool) { int index = 0; - std::vector> func_node_list = {nullptr}; + std::vector> func_node_list = {}; for (auto field : field_list) { auto field_node = gandiva::TreeExprBuilder::MakeField(field); auto func_node = From 424ad613360fd91dfc2b07c8a2b055ea41023055 Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Fri, 2 Apr 2021 13:10:03 +0800 Subject: [PATCH 5/7] Klockwork issues fix --- .../oap/datasource/parquet/ParquetReader.java | 3 +++ .../com/intel/oap/vectorized/JniUtils.java | 9 ++++---- .../ext/basic_physical_kernels.cc | 3 +++ .../arrow_compute/ext/codegen_common.cc | 15 +++++++++---- .../ext/conditioned_merge_join_kernel.cc | 2 ++ .../ext/conditioned_probe_kernel.cc | 6 ++++++ .../ext/hash_aggregate_kernel.cc | 1 + .../codegen/arrow_compute/ext/kernels_ext.cc | 1 + .../codegen/arrow_compute/ext/probe_kernel.cc | 3 +++ .../codegen/arrow_compute/ext/sort_kernel.cc | 21 +++++++++++++++---- .../ext/whole_stage_codegen_kernel.cc | 1 + native-sql-engine/cpp/src/jni/jni_wrapper.cc | 5 +++++ .../cpp/src/precompile/gandiva.h | 3 +-- native-sql-engine/cpp/src/shuffle/splitter.cc | 3 +++ 14 files changed, 61 insertions(+), 15 deletions(-) diff --git a/native-sql-engine/core/src/main/java/com/intel/oap/datasource/parquet/ParquetReader.java b/native-sql-engine/core/src/main/java/com/intel/oap/datasource/parquet/ParquetReader.java index 0b0ef0fd8..fcbad8090 100644 --- a/native-sql-engine/core/src/main/java/com/intel/oap/datasource/parquet/ParquetReader.java +++ b/native-sql-engine/core/src/main/java/com/intel/oap/datasource/parquet/ParquetReader.java @@ -119,6 +119,9 @@ public ArrowRecordBatch readNext() throws IOException { ArrowRecordBatchBuilderImpl recordBatchBuilderImpl = new ArrowRecordBatchBuilderImpl(recordBatchBuilder); ArrowRecordBatch batch = recordBatchBuilderImpl.build(); + if (batch == null) { + throw new IllegalArgumentException("failed to build record batch"); + } this.lastReadLength = batch.getLength(); return batch; } diff --git a/native-sql-engine/core/src/main/java/com/intel/oap/vectorized/JniUtils.java b/native-sql-engine/core/src/main/java/com/intel/oap/vectorized/JniUtils.java index cd5f774dc..ebca1bff7 100644 --- a/native-sql-engine/core/src/main/java/com/intel/oap/vectorized/JniUtils.java +++ b/native-sql-engine/core/src/main/java/com/intel/oap/vectorized/JniUtils.java @@ -183,11 +183,10 @@ private static File moveFileFromJarToTemp(String tmpDir, String libraryToLoad) t try (final InputStream is = JniUtils.class.getClassLoader().getResourceAsStream(libraryToLoad)) { if (is == null) { throw new FileNotFoundException(libraryToLoad); - } else { - try { - Files.copy(is, temp.toPath()); - } catch (Exception e) { - } + } + try { + Files.copy(is, temp.toPath()); + } catch (Exception e) { } } return temp; diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/basic_physical_kernels.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/basic_physical_kernels.cc index c9844a8bd..5fdf019e8 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/basic_physical_kernels.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/basic_physical_kernels.cc @@ -54,6 +54,7 @@ class ProjectKernel::Impl { auto field_node = std::dynamic_pointer_cast(node); input_field_list_.push_back(field_node->field()); } + pool_ = nullptr; } arrow::Status MakeResultIterator( @@ -125,6 +126,7 @@ ProjectKernel::ProjectKernel(arrow::compute::ExecContext* ctx, const gandiva::NodeVector& project_list) { impl_.reset(new Impl(ctx, input_field_node_list, project_list)); kernel_name_ = "ProjectKernel"; + ctx_ = nullptr; } arrow::Status ProjectKernel::MakeResultIterator( @@ -153,6 +155,7 @@ class FilterKernel::Impl { auto field_node = std::dynamic_pointer_cast(node); input_field_list_.push_back(field_node->field()); } + pool_ = nullptr; } arrow::Status MakeResultIterator( diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc index ea2f5f1f7..ea144723f 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc @@ -332,7 +332,7 @@ gandiva::ExpressionPtr GetHash32Kernel(std::vector key_list, seed = func_node; } func_node_list.push_back(func_node); - return gandiva::TreeExprBuilder::MakeExpression(func_node_list[0], + return gandiva::TreeExprBuilder::MakeExpression(func_node_list.at(0), arrow::field("hash_key", ret_type)); } @@ -605,12 +605,19 @@ arrow::Status CompileCodes(std::string codes, std::string signature) { std::string exec(const char* cmd) { std::array buffer; std::string result; - std::unique_ptr pipe(popen(cmd, "r"), pclose); + FILE *file = popen(cmd, "r"); + std::unique_ptr pipe(file, pclose); if (!pipe) { + pclose(file); throw std::runtime_error("popen() failed!"); } - while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) { - result += buffer.data(); + try { + while (fgets(buffer.data(), sizeof buffer, pipe.get()) != nullptr) { + result += buffer.data(); + } + } catch (...) { + pclose(file); + throw; } return result; } diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_merge_join_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_merge_join_kernel.cc index 7285edc3c..73cedd271 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_merge_join_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_merge_join_kernel.cc @@ -88,6 +88,8 @@ class ConditionedMergeJoinKernel::Impl { THROW_NOT_OK(GetIndexList(result_schema_, left_field_list_, right_field_list_, true, &exist_index_, &result_schema_index_list_)); } + + pool_ = nullptr; } arrow::Status MakeResultIterator( diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc index 1028b59a8..d6b4a7dd0 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc @@ -131,6 +131,8 @@ class ConditionedProbeKernel::Impl { THROW_NOT_OK(GetIndexList(result_schema_, left_field_list_, right_field_list_, true, &exist_index_, &result_schema_index_list_)); } + + pool_ = nullptr; } arrow::Status MakeResultIterator( @@ -412,6 +414,9 @@ class ConditionedProbeKernel::Impl { auto iter = dependent_iter_list[0]; auto typed_dependent = std::dynamic_pointer_cast>(iter); + if (typed_dependent == nullptr) { + throw std::runtime_error("casting on hash relation iterator failed"); + } RETURN_NOT_OK(typed_dependent->Next(&hash_relation_)); // chendi: previous result_schema_index_list design is little tricky, it @@ -1860,6 +1865,7 @@ ConditionedProbeKernel::ConditionedProbeKernel( right_schema_list, condition, join_type, result_schema, hash_configuration_list, hash_relation_idx)); kernel_name_ = "ConditionedProbeKernel"; + ctx_ = nullptr; } arrow::Status ConditionedProbeKernel::MakeResultIterator( diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc index 885dd58ad..1752856f8 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc @@ -129,6 +129,7 @@ class HashAggregateKernel::Impl { if (no_result_project) return; } result_expr_list_ = result_expr_node_list; + pool_ = nullptr; } virtual arrow::Status MakeResultIterator( diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.cc index c8b0394e4..c0ac168f7 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.cc @@ -308,6 +308,7 @@ ConcatArrayKernel::ConcatArrayKernel( std::vector> type_list) { impl_.reset(new Impl(ctx, type_list)); kernel_name_ = "ConcatArrayKernel"; + ctx_ = nullptr; } arrow::Status ConcatArrayKernel::Evaluate(const ArrayList& in, diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/probe_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/probe_kernel.cc index 407d4ac42..283444d6e 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/probe_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/probe_kernel.cc @@ -402,6 +402,9 @@ class ConditionedProbeArraysKernel::Impl { left_projected_batch_list) { std::stringstream ss; for (auto name : left_projected_batch_list) { + if (name.first == nullptr || name.second.empty()) { + throw std::runtime_error("uninitialized value found"); + } ss << "std::vector> " << name.second << ";" << std::endl; } diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc index 98f89ab2b..deef210d9 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc @@ -84,7 +84,12 @@ using is_number_bool_date = /////////////// SortArraysToIndices //////////////// class SortArraysToIndicesKernel::Impl { public: - Impl() {} + Impl() { + ctx_ = nullptr; + col_num_ = 0; + NaN_check_ = false; + } + Impl(arrow::compute::ExecContext* ctx, std::shared_ptr result_schema, std::shared_ptr key_projector, std::vector> projected_types, @@ -160,8 +165,16 @@ class SortArraysToIndicesKernel::Impl { // process auto codes = ProduceCodes(); // compile codes - RETURN_NOT_OK(CompileCodes(codes, signature_)); - RETURN_NOT_OK(LoadLibrary(signature_, ctx_, &sorter_)); + const arrow::Status &status1 = CompileCodes(codes, signature_); + if (!status1.ok()) { + FileSpinUnLock(file_lock); + return status1; + } + const arrow::Status &status2 = LoadLibrary(signature_, ctx_, &sorter_); + if (!status2.ok()) { + FileSpinUnLock(file_lock); + return status1; + } } FileSpinUnLock(file_lock); return arrow::Status::OK(); @@ -1607,7 +1620,7 @@ class SortMultiplekeyKernel : public SortArraysToIndicesKernel::Impl { return false; } - auto Sort(ArrayItemIndexS* indices_begin, ArrayItemIndexS* indices_end) { + void Sort(ArrayItemIndexS* indices_begin, ArrayItemIndexS* indices_end) { int keys_num = sort_directions_.size(); auto comp = [this, &keys_num](const ArrayItemIndexS& x, const ArrayItemIndexS& y) { return compareRow(x.array_id, x.id, y.array_id, y.id, keys_num); diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc index 581b2865d..34bac57ff 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc @@ -655,6 +655,7 @@ WholeStageCodeGenKernel::WholeStageCodeGenKernel( const std::vector>& output_field_list) { impl_.reset(new Impl(ctx, input_field_list, root_node, output_field_list)); kernel_name_ = "WholeStageCodeGenKernel"; + ctx_ = nullptr; } arrow::Status WholeStageCodeGenKernel::MakeResultIterator( diff --git a/native-sql-engine/cpp/src/jni/jni_wrapper.cc b/native-sql-engine/cpp/src/jni/jni_wrapper.cc index 1a4dbbdc4..c7afb395c 100644 --- a/native-sql-engine/cpp/src/jni/jni_wrapper.cc +++ b/native-sql-engine/cpp/src/jni/jni_wrapper.cc @@ -636,6 +636,11 @@ Java_com_intel_oap_vectorized_ExpressionEvaluatorJniWrapper_nativeSetDependency( JNIEXPORT jboolean JNICALL Java_com_intel_oap_vectorized_BatchIterator_nativeHasNext( JNIEnv* env, jobject obj, jlong id) { auto iter = GetBatchIterator(env, id); + if (iter == nullptr) { + std::string error_message = + "faked to get batch iterator"; + env->ThrowNew(io_exception_class, error_message.c_str()); + } return iter->HasNext(); } diff --git a/native-sql-engine/cpp/src/precompile/gandiva.h b/native-sql-engine/cpp/src/precompile/gandiva.h index 73426a3f6..97a7b124a 100644 --- a/native-sql-engine/cpp/src/precompile/gandiva.h +++ b/native-sql-engine/cpp/src/precompile/gandiva.h @@ -42,8 +42,7 @@ T round2(T val, int precision = 2) { arrow::Decimal128 castDECIMAL(double val, int32_t precision, int32_t scale) { int charsNeeded = 1 + snprintf(NULL, 0, "%.*f", (int)scale, val); char* buffer = reinterpret_cast(malloc(charsNeeded)); - assert(buffer != nullptr); - snprintf(buffer, charsNeeded, "%.*f", (int)scale, nextafter(val, val + 0.5)); + snprintf(buffer, sizeof(buffer), "%.*f", (int)scale, nextafter(val, val + 0.5)); auto decimal_str = std::string(buffer); free(buffer); return arrow::Decimal128::FromString(decimal_str).ValueOrDie(); diff --git a/native-sql-engine/cpp/src/shuffle/splitter.cc b/native-sql-engine/cpp/src/shuffle/splitter.cc index 8febc365e..2eebb9ae4 100644 --- a/native-sql-engine/cpp/src/shuffle/splitter.cc +++ b/native-sql-engine/cpp/src/shuffle/splitter.cc @@ -1173,6 +1173,9 @@ arrow::Status HashSplitter::ComputeAndCountPartitionId(const arrow::RecordBatch& std::to_string(outputs.size())); } auto pid_arr = std::dynamic_pointer_cast(outputs.at(0)); + if (pid_arr == nullptr) { + return arrow::Status::Invalid("failed to cast outputs.at(0)"); + } for (auto i = 0; i < num_rows; ++i) { // positive mod auto pid = pid_arr->Value(i) % num_partitions_; From 3ced51d43ebece13045ca2d2e24f11dfb95ccba8 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Fri, 2 Apr 2021 14:11:39 +0800 Subject: [PATCH 6/7] fix format Signed-off-by: Yuan Zhou --- .../cpp/src/codegen/arrow_compute/ext/codegen_common.cc | 2 +- .../cpp/src/codegen/arrow_compute/ext/sort_kernel.cc | 4 ++-- native-sql-engine/cpp/src/jni/jni_wrapper.cc | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc index ea144723f..702628509 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc @@ -605,7 +605,7 @@ arrow::Status CompileCodes(std::string codes, std::string signature) { std::string exec(const char* cmd) { std::array buffer; std::string result; - FILE *file = popen(cmd, "r"); + FILE* file = popen(cmd, "r"); std::unique_ptr pipe(file, pclose); if (!pipe) { pclose(file); diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc index deef210d9..88d3bc9ca 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc @@ -165,12 +165,12 @@ class SortArraysToIndicesKernel::Impl { // process auto codes = ProduceCodes(); // compile codes - const arrow::Status &status1 = CompileCodes(codes, signature_); + const arrow::Status& status1 = CompileCodes(codes, signature_); if (!status1.ok()) { FileSpinUnLock(file_lock); return status1; } - const arrow::Status &status2 = LoadLibrary(signature_, ctx_, &sorter_); + const arrow::Status& status2 = LoadLibrary(signature_, ctx_, &sorter_); if (!status2.ok()) { FileSpinUnLock(file_lock); return status1; diff --git a/native-sql-engine/cpp/src/jni/jni_wrapper.cc b/native-sql-engine/cpp/src/jni/jni_wrapper.cc index c7afb395c..73e74cfc1 100644 --- a/native-sql-engine/cpp/src/jni/jni_wrapper.cc +++ b/native-sql-engine/cpp/src/jni/jni_wrapper.cc @@ -637,8 +637,7 @@ JNIEXPORT jboolean JNICALL Java_com_intel_oap_vectorized_BatchIterator_nativeHas JNIEnv* env, jobject obj, jlong id) { auto iter = GetBatchIterator(env, id); if (iter == nullptr) { - std::string error_message = - "faked to get batch iterator"; + std::string error_message = "faked to get batch iterator"; env->ThrowNew(io_exception_class, error_message.c_str()); } return iter->HasNext(); From a8e4c3a4336682ea1dcc61752b4ba79ba624a5c2 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Fri, 2 Apr 2021 15:28:30 +0800 Subject: [PATCH 7/7] more fixes Signed-off-by: Yuan Zhou --- .../cpp/src/codegen/arrow_compute/ext/kernels_ext.h | 4 ++-- .../cpp/src/codegen/arrow_compute/ext/sort_kernel.cc | 2 +- .../codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc | 4 ++-- .../cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h | 4 ++-- native-sql-engine/cpp/src/codegen/expr_visitor.h | 2 +- native-sql-engine/cpp/src/jni/jni_wrapper.cc | 4 +++- native-sql-engine/cpp/src/precompile/gandiva.h | 3 ++- 7 files changed, 13 insertions(+), 10 deletions(-) diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.h b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.h index fabea173c..4921141c6 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.h +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.h @@ -273,7 +273,7 @@ class HashAggregateKernel : public KernalBase { private: std::unique_ptr impl_; - arrow::compute::ExecContext* ctx_; + arrow::compute::ExecContext* ctx_ = nullptr; }; class WindowRankKernel : public KernalBase { @@ -449,7 +449,7 @@ class ConcatArrayListKernel : public KernalBase { private: std::unique_ptr impl_; - arrow::compute::ExecContext* ctx_; + arrow::compute::ExecContext* ctx_ = nullptr; }; class ConditionedProbeKernel : public KernalBase { public: diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc index 88d3bc9ca..d9752aec4 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc @@ -248,7 +248,7 @@ class SortArraysToIndicesKernel::Impl { // true for nulls_first, false for nulls_last std::vector nulls_order_; bool NaN_check_; - int col_num_; + int col_num_ = 0; class TypedSorterCodeGenImpl { public: diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc index 34bac57ff..b334236ec 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc @@ -95,7 +95,7 @@ class WholeStageCodeGenKernel::Impl { auto function_node = std::dynamic_pointer_cast(node); auto func_name = function_node->descriptor()->name(); if (func_name.compare(0, 22, "conditionedProbeArrays") == 0) { - int join_type; + int join_type = 0; gandiva::NodeVector left_schema_list; RETURN_NOT_OK(GetArguments(function_node, 0, &left_schema_list)); gandiva::NodeVector right_schema_list; @@ -132,7 +132,7 @@ class WholeStageCodeGenKernel::Impl { out)); } else if (func_name.compare(0, 20, "conditionedMergeJoin") == 0) { - int join_type; + int join_type = 0; gandiva::NodeVector left_schema_list; RETURN_NOT_OK(GetArguments(function_node, 0, &left_schema_list)); gandiva::NodeVector right_schema_list; diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h index 8b654fbfd..aa6d0ed87 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h @@ -126,7 +126,7 @@ class WindowSortKernel::Impl { protected: std::shared_ptr sorter; - arrow::compute::ExecContext* ctx_; + arrow::compute::ExecContext* ctx_ = nullptr; std::string signature_; bool nulls_first_; bool asc_; @@ -641,7 +641,7 @@ class WindowSortOnekeyKernel : public WindowSortKernel::Impl { // using ArrayType_key = arrow::UInt32Array; std::vector> cached_key_; std::vector cached_; - arrow::compute::ExecContext* ctx_; + arrow::compute::ExecContext* ctx_ = nullptr; std::shared_ptr result_schema_; bool nulls_first_; bool asc_; diff --git a/native-sql-engine/cpp/src/codegen/expr_visitor.h b/native-sql-engine/cpp/src/codegen/expr_visitor.h index 8cd6d4ed3..568404ceb 100644 --- a/native-sql-engine/cpp/src/codegen/expr_visitor.h +++ b/native-sql-engine/cpp/src/codegen/expr_visitor.h @@ -51,7 +51,7 @@ class ExprVisitor : public VisitorBase { // "encodeArray"}; std::vector gdv{"add", "substract", "multiply", "divide"}; std::vector ce{}; - int codegen_type; + int codegen_type = 0; arrow::Status Visit(const gandiva::FunctionNode& node); }; } // namespace codegen diff --git a/native-sql-engine/cpp/src/jni/jni_wrapper.cc b/native-sql-engine/cpp/src/jni/jni_wrapper.cc index 73e74cfc1..6638ab183 100644 --- a/native-sql-engine/cpp/src/jni/jni_wrapper.cc +++ b/native-sql-engine/cpp/src/jni/jni_wrapper.cc @@ -858,7 +858,9 @@ Java_com_intel_oap_vectorized_BatchIterator_nativeProcessAndCacheOne( } auto iter = GetBatchIterator(env, id); - status = iter->ProcessAndCacheOne(in); + if (iter) { + status = iter->ProcessAndCacheOne(in); + } if (!status.ok()) { std::string error_message = diff --git a/native-sql-engine/cpp/src/precompile/gandiva.h b/native-sql-engine/cpp/src/precompile/gandiva.h index 97a7b124a..f2e6eedaf 100644 --- a/native-sql-engine/cpp/src/precompile/gandiva.h +++ b/native-sql-engine/cpp/src/precompile/gandiva.h @@ -40,7 +40,8 @@ T round2(T val, int precision = 2) { } arrow::Decimal128 castDECIMAL(double val, int32_t precision, int32_t scale) { - int charsNeeded = 1 + snprintf(NULL, 0, "%.*f", (int)scale, val); + double dVal = (double)val; + int charsNeeded = 1 + snprintf(NULL, 0, "%.*f", (int)scale, dVal); char* buffer = reinterpret_cast(malloc(charsNeeded)); snprintf(buffer, sizeof(buffer), "%.*f", (int)scale, nextafter(val, val + 0.5)); auto decimal_str = std::string(buffer);