Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

Commit

Permalink
[NSE-161] adding format check (#165)
Browse files Browse the repository at this point in the history
* adding format check

Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>

* formating code

Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>

* adding google format

Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>

* reformat with new style

Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>

* lower clang version to 10

Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>

* adding script to format code

Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>
  • Loading branch information
zhouyuan authored Mar 17, 2021
1 parent 623f43b commit b3ab08c
Show file tree
Hide file tree
Showing 76 changed files with 10,348 additions and 12,126 deletions.
20 changes: 20 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
---
BasedOnStyle: Google
DerivePointerAlignment: false
ColumnLimit: 90
11 changes: 11 additions & 0 deletions .github/workflows/unittests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,14 @@ jobs:
cd src
ctest -R
formatting-check:
name: Formatting Check
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Run clang-format style check for C/C++ programs.
uses: jidicula/clang-format-action@v3.2.0
with:
clang-format-version: '10'
check-path: 'native-sql-engine/cpp/src'
fallback-style: 'Google' # optional
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
#include <parquet/arrow/reader.h>
#include <parquet/file_reader.h>
#include <shuffle/splitter.h>

#include <chrono>

#include "codegen/code_generator.h"
#include "codegen/code_generator_factory.h"
#include "tests/test_utils.h"
Expand Down
16 changes: 8 additions & 8 deletions native-sql-engine/cpp/src/codegen/arrow_compute/code_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,7 @@ namespace arrowcompute {
class ArrowComputeCodeGenerator : public CodeGenerator {
public:
ArrowComputeCodeGenerator(
arrow::MemoryPool* memory_pool,
std::shared_ptr<arrow::Schema> schema_ptr,
arrow::MemoryPool* memory_pool, std::shared_ptr<arrow::Schema> schema_ptr,
std::vector<std::shared_ptr<gandiva::Expression>> expr_vector,
std::vector<std::shared_ptr<arrow::Field>> ret_types, bool return_when_finish,
std::vector<std::shared_ptr<::gandiva::Expression>> finish_exprs_vector)
Expand All @@ -50,13 +49,13 @@ class ArrowComputeCodeGenerator : public CodeGenerator {
for (auto expr : expr_vector) {
std::shared_ptr<ExprVisitor> root_visitor;
if (finish_exprs_vector.empty()) {
auto visitor = MakeExprVisitor(memory_pool, schema_ptr, expr, ret_types_, &expr_visitor_cache_,
&root_visitor);
auto visitor = MakeExprVisitor(memory_pool, schema_ptr, expr, ret_types_,
&expr_visitor_cache_, &root_visitor);
auto status = DistinctInsert(root_visitor, &visitor_list_);
} else {
auto visitor =
MakeExprVisitor(memory_pool, schema_ptr, expr, ret_types_, finish_exprs_vector[i++],
&expr_visitor_cache_, &root_visitor);
auto visitor = MakeExprVisitor(memory_pool, schema_ptr, expr, ret_types_,
finish_exprs_vector[i++], &expr_visitor_cache_,
&root_visitor);
auto status = DistinctInsert(root_visitor, &visitor_list_);
}
}
Expand Down Expand Up @@ -309,7 +308,8 @@ class ArrowComputeCodeGenerator : public CodeGenerator {
for (auto column : batch) {
if (length != 0 && length != column->length()) {
return arrow::Status::Invalid(
"ArrowCompute MakeBatchFromBatch found batch contains columns with different "
"ArrowCompute MakeBatchFromBatch found batch contains columns with "
"different "
"lengths, expect ",
length, " while got ", column->length(), " from ", i, "th column.");
}
Expand Down
16 changes: 10 additions & 6 deletions native-sql-engine/cpp/src/codegen/arrow_compute/expr_visitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ arrow::Status BuilderVisitor::Visit(const gandiva::FunctionNode& node) {
case BuilderVisitorNodeType::FunctionNode: {
if (dependency) {
return arrow::Status::Invalid(
"BuilderVisitor build ExprVisitor failed, got two depency while only "
"BuilderVisitor build ExprVisitor failed, got two depency "
"while only "
"support one.");
}
RETURN_NOT_OK(child_visitor->GetResult(&dependency));
Expand All @@ -117,8 +118,8 @@ arrow::Status BuilderVisitor::Visit(const gandiva::FunctionNode& node) {
}
}

// Add a new type of Function "Action", which will not create a new expr_visitor,
// instead, it will register itself to its dependency
// Add a new type of Function "Action", which will not create a new
// expr_visitor, instead, it will register itself to its dependency
if (func_name.compare(0, 7, "action_") == 0) {
if (dependency) {
RETURN_NOT_OK(dependency->AppendAction(func_name, param_names));
Expand All @@ -130,7 +131,8 @@ arrow::Status BuilderVisitor::Visit(const gandiva::FunctionNode& node) {
return arrow::Status::OK();
} else {
return arrow::Status::Invalid(
"BuilderVisitor is processing an action without dependency, this is "
"BuilderVisitor is processing an action without dependency, this "
"is "
"invalid.");
}
}
Expand Down Expand Up @@ -671,7 +673,8 @@ arrow::Status ExprVisitor::MakeResultIterator(std::shared_ptr<arrow::Schema> sch
RETURN_NOT_OK(impl_->MakeResultIterator(schema, out));
} else {
return arrow::Status::NotImplemented(
"FinishVsitor MakeResultIterator is not tested, so mark as not implemented "
"FinishVsitor MakeResultIterator is not tested, so mark as not "
"implemented "
"here, "
"codes are commented.");
}
Expand All @@ -697,7 +700,8 @@ arrow::Status ExprVisitor::GetResult(
std::vector<std::shared_ptr<arrow::Field>>* out_fields) {
if (result_batch_list_.empty()) {
return arrow::Status::Invalid(
"ArrowComputeExprVisitor::GetResult result_batch_list was not generated ",
"ArrowComputeExprVisitor::GetResult result_batch_list was not "
"generated ",
func_name_);
}
*out = result_batch_list_;
Expand Down
16 changes: 7 additions & 9 deletions native-sql-engine/cpp/src/codegen/arrow_compute/expr_visitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,16 +113,15 @@ class ExprVisitor : public std::enable_shared_from_this<ExprVisitor> {
const gandiva::FunctionNode& node,
std::shared_ptr<ExprVisitor>* out);

ExprVisitor(arrow::compute::ExecContext ctx,
std::shared_ptr<arrow::Schema> schema_ptr, std::string func_name,
std::vector<std::string> param_field_names,
ExprVisitor(arrow::compute::ExecContext ctx, std::shared_ptr<arrow::Schema> schema_ptr,
std::string func_name, std::vector<std::string> param_field_names,
std::shared_ptr<ExprVisitor> dependency,
std::shared_ptr<gandiva::Node> finish_func);

ExprVisitor(arrow::compute::ExecContext ctx, std::string func_name);

ExprVisitor(arrow::compute::ExecContext ctx,
std::shared_ptr<arrow::Schema> schema_ptr, std::string func_name);
ExprVisitor(arrow::compute::ExecContext ctx, std::shared_ptr<arrow::Schema> schema_ptr,
std::string func_name);

~ExprVisitor() {
#ifdef DEBUG
Expand All @@ -147,8 +146,7 @@ class ExprVisitor : public std::enable_shared_from_this<ExprVisitor> {
std::shared_ptr<gandiva::FunctionNode> partition_spec,
std::shared_ptr<gandiva::FunctionNode> order_spec,
std::shared_ptr<gandiva::FunctionNode> frame_spec,
std::vector<std::shared_ptr<arrow::Field>> ret_fields,
ExprVisitor* p);
std::vector<std::shared_ptr<arrow::Field>> ret_fields, ExprVisitor* p);
arrow::Status AppendAction(const std::string& func_name,
std::vector<std::string> param_name);
arrow::Status Init();
Expand Down Expand Up @@ -209,8 +207,8 @@ class ExprVisitor : public std::enable_shared_from_this<ExprVisitor> {
std::vector<int> in_batch_size_array_;
ArrayList in_batch_;
std::shared_ptr<arrow::Array> in_array_;
// group_indices is used to tell item in array_list_ and batch_list_ belong to which
// group
// group_indices is used to tell item in array_list_ and batch_list_ belong to
// which group
std::vector<int> group_indices_;

// Output data types.
Expand Down
45 changes: 31 additions & 14 deletions native-sql-engine/cpp/src/codegen/arrow_compute/expr_visitor_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,14 +205,17 @@ class WindowVisitorImpl : public ExprVisitorImpl {
for (auto col_id : partition_field_ids_) {
if (col_id >= p_->in_record_batch_->num_columns()) {
return arrow::Status::Invalid(
"WindowVisitorImpl: Partition field number overflows defined column "
"WindowVisitorImpl: Partition field number overflows defined "
"column "
"count");
}
auto col = p_->in_record_batch_->column(col_id);
in1.push_back(col);
}
#ifdef DEBUG
std::cout << "[window kernel] Calling concat_kernel_->Evaluate(in1, &out1) on batch... " << std::endl;
std::cout << "[window kernel] Calling concat_kernel_->Evaluate(in1, "
"&out1) on batch... "
<< std::endl;
#endif
RETURN_NOT_OK(concat_kernel_->Evaluate(in1, &out1));
#ifdef DEBUG
Expand All @@ -222,7 +225,9 @@ class WindowVisitorImpl : public ExprVisitorImpl {

std::shared_ptr<arrow::Array> in2 = out1;
#ifdef DEBUG
std::cout << "[window kernel] Calling partition_kernel_->Evaluate(in2, &out2) on batch... " << std::endl;
std::cout << "[window kernel] Calling partition_kernel_->Evaluate(in2, "
"&out2) on batch... "
<< std::endl;
#endif
RETURN_NOT_OK(partition_kernel_->Evaluate(in2, &out2));
#ifdef DEBUG
Expand All @@ -235,15 +240,18 @@ class WindowVisitorImpl : public ExprVisitorImpl {
for (auto col_id : function_param_field_ids_.at(func_id)) {
if (col_id >= p_->in_record_batch_->num_columns()) {
return arrow::Status::Invalid(
"WindowVisitorImpl: Function parameter number overflows defined column "
"WindowVisitorImpl: Function parameter number overflows defined "
"column "
"count");
}
auto col = p_->in_record_batch_->column(col_id);
in3.push_back(col);
}
in3.push_back(out2);
#ifdef DEBUG
std::cout << "[window kernel] Calling function_kernels_.at(func_id)->Evaluate(in3) on batch... " << std::endl;
std::cout << "[window kernel] Calling "
"function_kernels_.at(func_id)->Evaluate(in3) on batch... "
<< std::endl;
#endif
RETURN_NOT_OK(function_kernels_.at(func_id)->Evaluate(in3));
#ifdef DEBUG
Expand Down Expand Up @@ -283,15 +291,17 @@ class WindowVisitorImpl : public ExprVisitorImpl {
length = arr->length();
} else if (length != arr->length()) {
return arrow::Status::Invalid(
"WindowVisitorImpl: Return array length in the same batch are not the same "
"WindowVisitorImpl: Return array length in the same batch are "
"not the same "
"for "
"different window functions");
}
temp.push_back(arr);
}
if (length == -1) {
return arrow::Status::Invalid(
"WindowVisitorImpl: No valid batch length returned for window functions");
"WindowVisitorImpl: No valid batch length returned for window "
"functions");
}
out.push_back(temp);
out_sizes.push_back(length);
Expand Down Expand Up @@ -392,7 +402,8 @@ class EncodeVisitorImpl : public ExprVisitorImpl {
int hash_table_type_;
};

////////////////////////// SortArraysToIndicesVisitorImpl ///////////////////////
////////////////////////// SortArraysToIndicesVisitorImpl
//////////////////////////
class SortArraysToIndicesVisitorImpl : public ExprVisitorImpl {
public:
SortArraysToIndicesVisitorImpl(std::vector<std::shared_ptr<arrow::Field>> field_list,
Expand Down Expand Up @@ -481,7 +492,8 @@ class SortArraysToIndicesVisitorImpl : public ExprVisitorImpl {
} break;
default:
return arrow::Status::NotImplemented(
"SortArraysToIndicesVisitorImpl: Does not support this type of input.");
"SortArraysToIndicesVisitorImpl: Does not support this type of "
"input.");
}
return arrow::Status::OK();
}
Expand All @@ -500,7 +512,8 @@ class SortArraysToIndicesVisitorImpl : public ExprVisitorImpl {
} break;
default:
return arrow::Status::Invalid(
"SortArraysToIndicesVisitorImpl MakeResultIterator does not support "
"SortArraysToIndicesVisitorImpl MakeResultIterator does not "
"support "
"dependency type other than Batch.");
}
return arrow::Status::OK();
Expand Down Expand Up @@ -610,7 +623,8 @@ class CachedRelationVisitorImpl : public ExprVisitorImpl {
std::shared_ptr<arrow::Schema> result_schema_;
};

////////////////////////// ConditionedProbeArraysVisitorImpl ///////////////////////
////////////////////////// ConditionedProbeArraysVisitorImpl
//////////////////////////
class ConditionedProbeArraysVisitorImpl : public ExprVisitorImpl {
public:
ConditionedProbeArraysVisitorImpl(std::vector<std::shared_ptr<arrow::Field>> field_list,
Expand Down Expand Up @@ -704,7 +718,8 @@ class ConditionedProbeArraysVisitorImpl : public ExprVisitorImpl {
} break;
default:
return arrow::Status::Invalid(
"ConditionedProbeArraysVisitorImpl MakeResultIterator does not support "
"ConditionedProbeArraysVisitorImpl MakeResultIterator does not "
"support "
"dependency type other than Batch.");
}
return arrow::Status::OK();
Expand All @@ -724,7 +739,8 @@ class ConditionedProbeArraysVisitorImpl : public ExprVisitorImpl {
gandiva::NodeVector hash_configuration_list_;
};

////////////////////////// ConditionedJoinArraysVisitorImpl ///////////////////////
////////////////////////// ConditionedJoinArraysVisitorImpl
//////////////////////////
class ConditionedJoinArraysVisitorImpl : public ExprVisitorImpl {
public:
ConditionedJoinArraysVisitorImpl(
Expand Down Expand Up @@ -798,7 +814,8 @@ class ConditionedJoinArraysVisitorImpl : public ExprVisitorImpl {
} break;
default:
return arrow::Status::Invalid(
"ConditionedJoinArraysVisitorImpl MakeResultIterator does not support "
"ConditionedJoinArraysVisitorImpl MakeResultIterator does not "
"support "
"dependency type other than Batch.");
}
return arrow::Status::OK();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -280,10 +280,12 @@ class GroupByActionCodeGen : public ActionCodeGen {
validity_name = "action_groupby_" + name + "_validity_";
GetTypedArrayCastString(data_type, input_list[0]);
input_expr_list_.push_back(gandiva::TreeExprBuilder::MakeField(
input_fields_list[0])); // this line is used to gen hash for multiple keys
input_fields_list[0])); // this line is used to gen hash for
// multiple keys
}
typed_input_and_prepare_list_.push_back(std::make_pair(
"", "")); // when there is two name in sig list, we need to make others aligned
typed_input_and_prepare_list_.push_back(
std::make_pair("", "")); // when there is two name in sig list, we
// need to make others aligned

if (keep == false) {
return;
Expand Down Expand Up @@ -411,8 +413,9 @@ class SumActionCodeGen : public ActionCodeGen {
validity_name = "action_sum_" + name + "_validity_";
GetTypedArrayCastString(data_type, input_list[0]);
}
typed_input_and_prepare_list_.push_back(std::make_pair(
"", "")); // when there is two name in sig list, we need to make others aligned
typed_input_and_prepare_list_.push_back(
std::make_pair("", "")); // when there is two name in sig list, we
// need to make others aligned
func_sig_list_.push_back(sig_name);
func_sig_list_.push_back(validity_name);
auto tmp_name = typed_input_and_prepare_list_[0].first + "_tmp";
Expand Down Expand Up @@ -1769,12 +1772,12 @@ class StddevSampFinalActionCodeGen : public ActionCodeGen {
on_new_codes_list_.push_back("");
on_finish_codes_list_.push_back(
"if (" + count_name + "[i] - 1 < 0.00001) {\n" + validity_name +
".push_back(true);\n" +
sig_name + ".push_back(std::numeric_limits<double>::quiet_NaN());}\n" +
"else if (" + count_name + "[i] < 0.00001) {\n" + validity_name +
".push_back(false);\n" + sig_name + ".push_back(0);}\n" + "else {\n" +
validity_name + ".push_back(true);\n" + sig_name + ".push_back(" + "sqrt(" +
m2_name + "[i] / (" + count_name + "[i] - 1)));}\n");
".push_back(true);\n" + sig_name +
".push_back(std::numeric_limits<double>::quiet_NaN());}\n" + "else if (" +
count_name + "[i] < 0.00001) {\n" + validity_name + ".push_back(false);\n" +
sig_name + ".push_back(0);}\n" + "else {\n" + validity_name +
".push_back(true);\n" + sig_name + ".push_back(" + "sqrt(" + m2_name + "[i] / (" +
count_name + "[i] - 1)));}\n");
on_finish_codes_list_.push_back("");

finish_variable_list_.push_back(sig_name);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3943,10 +3943,9 @@ arrow::Status MakeStddevSampFinalAction(
/*case arrow::Decimal128Type::type_id: {
auto action_ptr = std::make_shared<
StddevSampFinalAction<arrow::Decimal128Type, arrow::Decimal128,
arrow::Decimal128Type, arrow::Decimal128>>(ctx, type,
type);
*out = std::dynamic_pointer_cast<ActionBase>(action_ptr);
} break;*/
arrow::Decimal128Type, arrow::Decimal128>>(ctx,
type, type); *out = std::dynamic_pointer_cast<ActionBase>(action_ptr); }
break;*/
#undef PROCESS
default:
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
* limitations under the License.
*/

#pragma once
#include <arrow/builder.h>
#include <arrow/compute/api.h>
#include <arrow/status.h>
Expand Down Expand Up @@ -128,4 +129,4 @@ arrow::Status MakeStddevSampFinalAction(
} // namespace extra
} // namespace arrowcompute
} // namespace codegen
} // namespace sparkcolumnarplugin
} // namespace sparkcolumnarplugin
Loading

0 comments on commit b3ab08c

Please sign in to comment.