Skip to content

Commit

Permalink
GDV-55: [C++] Added validation to projector build. (apache#33)
Browse files Browse the repository at this point in the history
Validating the input schema and expressions during the projector build.
  • Loading branch information
praveenbingo authored Jun 16, 2018
1 parent 8a266c1 commit ebb73ad
Show file tree
Hide file tree
Showing 16 changed files with 561 additions and 31 deletions.
25 changes: 22 additions & 3 deletions src/gandiva/src/cpp/include/gandiva/status.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,16 @@
#define GANDIVA_STATUS_H

#include <string>
#include <sstream>
#include <utility>

#define GANDIVA_RETURN_NOT_OK(status) \
do { \
Status _status = (status); \
if (!_status.ok()) { \
std::stringstream ss; \
ss << __FILE__ << ":" << __LINE__ << " code: " << #status << "\n" << _status.message(); \
ss << __FILE__ << ":" << __LINE__ << " code: " << _status.CodeAsString() \
<< " \n " << _status.message(); \
return Status(_status.code(), ss.str()); \
} \
} while (0)
Expand All @@ -36,7 +38,8 @@ do {
if (!condition) { \
Status _status = (status); \
std::stringstream ss; \
ss << __FILE__ << ":" << __LINE__ << " code: " << #status << "\n" << _status.message(); \
ss << __FILE__ << ":" << __LINE__ << " code: " << _status.CodeAsString() \
<< " \n " << _status.message(); \
return Status(_status.code(), ss.str()); \
} \
} while (0)
Expand All @@ -56,6 +59,7 @@ enum class StatusCode : char {
Invalid = 1,
CodeGenError = 2,
ArrowError = 3,
ExpressionValidationError = 4,
};

class Status {
Expand Down Expand Up @@ -92,11 +96,26 @@ class Status {
return Status(StatusCode::Invalid, msg);
}

static Status ArrowError(const std::string& msg) {
return Status(StatusCode::ArrowError, msg);
}

static Status ExpressionValidationError(const std::string& msg) {
return Status(StatusCode::ExpressionValidationError, msg);
}


// Returns true if the status indicates success.
bool ok() const { return (state_ == NULL); }

bool IsCodeGenError() const { return code() == StatusCode::CodeGenError; }

bool IsInvalid() const { return code() == StatusCode::Invalid; }

bool IsArrowError() const {return code() == StatusCode::ArrowError; }

bool IsExpressionValidationError() const {return code() == StatusCode::ExpressionValidationError; }

// Return a string representation of this status suitable for printing.
// Returns the string "OK" for success.
std::string ToString() const;
Expand Down Expand Up @@ -177,4 +196,4 @@ inline Status& Status::operator&=(Status&& s) {
}

} // namespace gandiva
#endif // GANDIVA_STATUS_H
#endif // GANDIVA_STATUS_H
7 changes: 6 additions & 1 deletion src/gandiva/src/cpp/include/gandiva/tree_expr_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,25 +34,30 @@ class TreeExprBuilder {
static NodePtr MakeLiteral(double value);

/// \brief create a node on arrow field.
/// returns null if input is null.
static NodePtr MakeField(FieldPtr field);

/// \brief create a node with a function.
/// returns null if return_type is null
static NodePtr MakeFunction(const std::string &name,
const NodeVector &children,
DataTypePtr return_type);

/// \brief Create a node with an if-else expression.
/// \brief create a node with an if-else expression.
/// returns null if any of the inputs is null.
static NodePtr MakeIf(NodePtr condition,
NodePtr this_node,
NodePtr else_node,
DataTypePtr result_type);

/// \brief create an expression with the specified root_node, and the
/// result written to result_field.
/// returns null if the result_field is null.
static ExpressionPtr MakeExpression(NodePtr root_node,
FieldPtr result_field);

/// \brief convenience function for simple function expressions.
/// returns null if the out_field is null.
static ExpressionPtr MakeExpression(const std::string &function,
const FieldVector &in_fields,
FieldPtr out_field);
Expand Down
1 change: 1 addition & 0 deletions src/gandiva/src/cpp/integ/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ project(gandiva)
add_gandiva_integ_test(projector_test.cc)
add_gandiva_integ_test(if_expr_test.cc)
add_gandiva_integ_test(literal_test.cc)
add_gandiva_integ_test(projector_build_validation_test.cc)
260 changes: 260 additions & 0 deletions src/gandiva/src/cpp/integ/projector_build_validation_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
// Copyright (C) 2017-2018 Dremio Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gtest/gtest.h>
#include "arrow/memory_pool.h"
#include "integ/test_util.h"
#include "gandiva/projector.h"
#include "gandiva/tree_expr_builder.h"

namespace gandiva {

using arrow::int32;
using arrow::float32;
using arrow::boolean;

class TestProjector : public ::testing::Test {
public:
void SetUp() { pool_ = arrow::default_memory_pool(); }

protected:
arrow::MemoryPool* pool_;
};

TEST_F(TestProjector, TestNonExistentFunction) {
// schema for input fields
auto field0 = field("f0", float32());
auto field1 = field("f2", float32());
auto schema = arrow::schema({field0, field1});

// output fields
auto field_result = field("res", boolean());

// Build expression
auto lt_expr = TreeExprBuilder::MakeExpression("non_existent_function",
{field0, field1}, field_result);

// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
Status status = Projector::Make(schema, {lt_expr}, pool_, &projector);
EXPECT_TRUE(status.IsExpressionValidationError());
std::string expected_error =
"Function bool non_existent_function(float, float) not supported yet.";
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
}

TEST_F(TestProjector, TestNotMatchingDataType) {
// schema for input fields
auto field0 = field("f0", float32());
auto schema = arrow::schema({field0});

// output fields
auto field_result = field("res", boolean());

// Build expression
auto node_f0 = TreeExprBuilder::MakeField(field0);
auto lt_expr = TreeExprBuilder::MakeExpression(node_f0, field_result);

// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
Status status = Projector::Make(schema, {lt_expr}, pool_, &projector);
EXPECT_TRUE(status.IsExpressionValidationError());
std::string expected_error =
"Return type of root node float does not match that of expression bool";
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
}

TEST_F(TestProjector, TestNotSupportedDataType) {
// schema for input fields
auto field0 = field("f0", list(int32()));
auto schema = arrow::schema({field0});

// output fields
auto field_result = field("res", list(int32()));

// Build expression
auto node_f0 = TreeExprBuilder::MakeField(field0);
auto lt_expr = TreeExprBuilder::MakeExpression(node_f0, field_result);

// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
Status status = Projector::Make(schema, {lt_expr}, pool_, &projector);
EXPECT_TRUE(status.IsExpressionValidationError());
std::string expected_error = "Field f0 has unsupported data type list";
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
}

TEST_F(TestProjector, TestIncorrectSchemaMissingField) {
// schema for input fields
auto field0 = field("f0", float32());
auto field1 = field("f2", float32());
auto schema = arrow::schema({field0, field0});

// output fields
auto field_result = field("res", boolean());

// Build expression
auto lt_expr = TreeExprBuilder::MakeExpression("less_than",
{field0, field1}, field_result);

// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
Status status = Projector::Make(schema, {lt_expr}, pool_, &projector);
EXPECT_TRUE(status.IsExpressionValidationError());
std::string expected_error = "Field f2 not in schema";
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
}

TEST_F(TestProjector, TestIncorrectSchemaTypeNotMatching) {
// schema for input fields
auto field0 = field("f0", float32());
auto field1 = field("f2", float32());
auto field2 = field("f2", int32());
auto schema = arrow::schema({field0, field2});

// output fields
auto field_result = field("res", boolean());

// Build expression
auto lt_expr = TreeExprBuilder::MakeExpression("less_than",
{field0, field1}, field_result);

// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
Status status = Projector::Make(schema, {lt_expr}, pool_, &projector);
EXPECT_TRUE(status.IsExpressionValidationError());
std::cout<<status.message();
std::string expected_error =
"Field definition in schema f2: int32 different from field in expression f2: float";
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
}

TEST_F(TestProjector, TestIfNotSupportedFunction) {
// schema for input fields
auto fielda = field("a", int32());
auto fieldb = field("b", int32());
auto schema = arrow::schema({fielda, fieldb});

// output fields
auto field_result = field("res", int32());

// build expression.
// if (a > b)
// a
// else
// b
auto node_a = TreeExprBuilder::MakeField(fielda);
auto node_b = TreeExprBuilder::MakeField(fieldb);
auto condition = TreeExprBuilder::MakeFunction("non_existent_function",
{node_a, node_b},
boolean());
auto if_node = TreeExprBuilder::MakeIf(condition, node_a, node_b, int32());

auto expr = TreeExprBuilder::MakeExpression(if_node, field_result);

// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
Status status = Projector::Make(schema, {expr}, pool_, &projector);
EXPECT_TRUE(status.IsExpressionValidationError());
}

TEST_F(TestProjector, TestIfNotMatchingReturnType) {
// schema for input fields
auto fielda = field("a", int32());
auto fieldb = field("b", int32());
auto schema = arrow::schema({fielda, fieldb});

// output fields
auto field_result = field("res", int32());


auto node_a = TreeExprBuilder::MakeField(fielda);
auto node_b = TreeExprBuilder::MakeField(fieldb);
auto condition = TreeExprBuilder::MakeFunction("less_than",
{node_a, node_b},
boolean());
auto if_node = TreeExprBuilder::MakeIf(condition, node_a, node_b, boolean());

auto expr = TreeExprBuilder::MakeExpression(if_node, field_result);

// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
Status status = Projector::Make(schema, {expr}, pool_, &projector);
EXPECT_TRUE(status.IsExpressionValidationError());
std::string expected_error =
"Return type of if bool and then int32 not matching.";
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
}

TEST_F(TestProjector, TestElseNotMatchingReturnType) {
// schema for input fields
auto fielda = field("a", int32());
auto fieldb = field("b", int32());
auto fieldc = field("c", boolean());
auto schema = arrow::schema({fielda, fieldb, fieldc});

// output fields
auto field_result = field("res", int32());


auto node_a = TreeExprBuilder::MakeField(fielda);
auto node_b = TreeExprBuilder::MakeField(fieldb);
auto node_c = TreeExprBuilder::MakeField(fieldc);
auto condition = TreeExprBuilder::MakeFunction("less_than",
{node_a, node_b},
boolean());
auto if_node = TreeExprBuilder::MakeIf(condition, node_a, node_c, int32());

auto expr = TreeExprBuilder::MakeExpression(if_node, field_result);

// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
Status status = Projector::Make(schema, {expr}, pool_, &projector);
EXPECT_TRUE(status.IsExpressionValidationError());
std::string expected_error =
"Return type of if int32 and else bool not matching.";
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
}

TEST_F(TestProjector, TestElseNotSupportedType) {
// schema for input fields
auto fielda = field("a", int32());
auto fieldb = field("b", int32());
auto fieldc = field("c", list(int32()));
auto schema = arrow::schema({fielda, fieldb});

// output fields
auto field_result = field("res", int32());


auto node_a = TreeExprBuilder::MakeField(fielda);
auto node_b = TreeExprBuilder::MakeField(fieldb);
auto node_c = TreeExprBuilder::MakeField(fieldc);
auto condition = TreeExprBuilder::MakeFunction("less_than",
{node_a, node_b},
boolean());
auto if_node = TreeExprBuilder::MakeIf(condition, node_a, node_c, int32());

auto expr = TreeExprBuilder::MakeExpression(if_node, field_result);

// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
Status status = Projector::Make(schema, {expr}, pool_, &projector);
EXPECT_TRUE(status.IsExpressionValidationError());
std::string expected_error =
"Field c has unsupported data type list";
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
}

} // namespace gandiva
1 change: 0 additions & 1 deletion src/gandiva/src/cpp/integ/projector_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -433,5 +433,4 @@ TEST_F(TestProjector, TestZeroCopyNegative) {
status = projector->Evaluate(*in_batch, {bad_array_data3});
EXPECT_EQ(status.code(), StatusCode::Invalid);
}

} // namespace gandiva
1 change: 1 addition & 0 deletions src/gandiva/src/cpp/src/codegen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ add_library(gandiva SHARED
projector.cc
status.cc
tree_expr_builder.cc
expr_validator.cc
${BC_FILE_PATH_CC})

# For users of gandiva library (including integ tests), include-dir is :
Expand Down
Loading

0 comments on commit ebb73ad

Please sign in to comment.