forked from apache/arrow
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
GDV-55: [C++] Added validation to projector build. (apache#33)
Validating the input schema and expressions during the projector build.
- Loading branch information
1 parent
ae10571
commit 30eab61
Showing
16 changed files
with
561 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
// Copyright (C) 2017-2018 Dremio Corporation | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#include <string> | ||
#include <sstream> | ||
#include <vector> | ||
|
||
#include "codegen/expr_validator.h" | ||
|
||
namespace gandiva { | ||
|
||
Status ExprValidator::Validate(const ExpressionPtr &expr) { | ||
if (expr == nullptr) { | ||
return Status::ExpressionValidationError("Expression cannot be null."); | ||
} | ||
Node &root = *expr->root(); | ||
Status status = root.Accept(*this); | ||
if (!status.ok()) { | ||
return status; | ||
} | ||
// validate return type matches | ||
// no need to check if type is supported | ||
// since root type has been validated. | ||
if (!root.return_type()->Equals(*expr->result()->type())) { | ||
std::stringstream ss; | ||
ss << "Return type of root node " << root.return_type()->name() | ||
<< " does not match that of expression " << *expr->result()->type(); | ||
return Status::ExpressionValidationError(ss.str()); | ||
} | ||
return Status::OK(); | ||
} | ||
|
||
Status ExprValidator::Visit(const FieldNode &node) { | ||
auto llvm_type = types_->IRType(node.return_type()->id()); | ||
if (llvm_type == nullptr) { | ||
std::stringstream ss; | ||
ss << "Field "<< node.field()->name() << " has unsupported data type " | ||
<< node.return_type()->name(); | ||
return Status::ExpressionValidationError(ss.str()); | ||
} | ||
|
||
auto field_in_schema_entry = field_map_.find(node.field()->name()); | ||
|
||
// validate that field is in schema. | ||
if (field_in_schema_entry == field_map_.end()) { | ||
std::stringstream ss; | ||
ss << "Field " << node.field()->name() << " not in schema."; | ||
return Status::ExpressionValidationError(ss.str()); | ||
} | ||
|
||
FieldPtr field_in_schema = field_in_schema_entry->second; | ||
// validate that field matches the definition in schema. | ||
if (!field_in_schema->Equals(node.field())) { | ||
std::stringstream ss; | ||
ss << "Field definition in schema " << field_in_schema->ToString() | ||
<< " different from field in expression " << node.field()->ToString(); | ||
return Status::ExpressionValidationError(ss.str()); | ||
} | ||
return Status::OK(); | ||
} | ||
|
||
Status ExprValidator::Visit(const FunctionNode &node) { | ||
auto desc = node.descriptor(); | ||
FunctionSignature signature(desc->name(), | ||
desc->params(), | ||
desc->return_type()); | ||
const NativeFunction *native_function = registry_.LookupSignature(signature); | ||
if (native_function == nullptr) { | ||
std::stringstream ss; | ||
ss << "Function "<< signature.ToString() << " not supported yet. "; | ||
return Status::ExpressionValidationError(ss.str()); | ||
} | ||
|
||
for (auto &child : node.children()) { | ||
Status status = child->Accept(*this); | ||
GANDIVA_RETURN_NOT_OK(status); | ||
} | ||
return Status::OK(); | ||
} | ||
|
||
Status ExprValidator::Visit(const IfNode &node) { | ||
Status status = node.condition()->Accept(*this); | ||
GANDIVA_RETURN_NOT_OK(status); | ||
status = node.then_node()->Accept(*this); | ||
GANDIVA_RETURN_NOT_OK(status); | ||
status = node.else_node()->Accept(*this); | ||
GANDIVA_RETURN_NOT_OK(status); | ||
|
||
auto if_node_ret_type = node.return_type(); | ||
auto then_node_ret_type = node.then_node()->return_type(); | ||
auto else_node_ret_type = node.else_node()->return_type(); | ||
|
||
if (if_node_ret_type != then_node_ret_type) { | ||
std::stringstream ss; | ||
ss << "Return type of if "<< *if_node_ret_type << " and then " | ||
<< then_node_ret_type->name() << " not matching."; | ||
return Status::ExpressionValidationError(ss.str()); | ||
} | ||
|
||
if (if_node_ret_type != else_node_ret_type) { | ||
std::stringstream ss; | ||
ss << "Return type of if "<< *if_node_ret_type << " and else " | ||
<< else_node_ret_type->name() << " not matching."; | ||
return Status::ExpressionValidationError(ss.str()); | ||
} | ||
|
||
return Status::OK(); | ||
} | ||
|
||
Status ExprValidator::Visit(const LiteralNode &node) { | ||
auto llvm_type = types_->IRType(node.return_type()->id()); | ||
if (llvm_type == nullptr) { | ||
std::stringstream ss; | ||
ss << "Value "<< node.holder() << " has unsupported data type " | ||
<< node.return_type()->name(); | ||
return Status::ExpressionValidationError(ss.str()); | ||
} | ||
return Status::OK(); | ||
} | ||
|
||
} // namespace gandiva |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
// Copyright (C) 2017-2018 Dremio Corporation | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#ifndef GANDIVA_EXPR_VALIDATOR_H | ||
#define GANDIVA_EXPR_VALIDATOR_H | ||
|
||
#include <string> | ||
#include <unordered_map> | ||
|
||
#include "boost/functional/hash.hpp" | ||
#include "codegen/function_registry.h" | ||
#include "codegen/node_visitor.h" | ||
#include "codegen/node.h" | ||
#include "codegen/llvm_types.h" | ||
#include "gandiva/arrow.h" | ||
#include "gandiva/expression.h" | ||
#include "gandiva/status.h" | ||
|
||
namespace gandiva { | ||
|
||
class FunctionRegistry; | ||
|
||
/// \brief Validates the entire expression tree including | ||
/// data types, signatures and return types | ||
class ExprValidator : public NodeVisitor { | ||
public: | ||
explicit ExprValidator(LLVMTypes * types, SchemaPtr schema) | ||
: types_(types), | ||
schema_(schema) { | ||
for (auto &field : schema_->fields()) { | ||
field_map_[field->name()] = field; | ||
} | ||
} | ||
|
||
/// \brief Validates the root node | ||
/// of an expression. | ||
/// 1. Data type of fields and literals. | ||
/// 2. Function signature is supported. | ||
/// 3. For if nodes that return types match | ||
/// for if, then and else nodes. | ||
Status Validate(const ExpressionPtr &expr); | ||
|
||
private: | ||
Status Visit(const FieldNode &node) override; | ||
Status Visit(const FunctionNode &node) override; | ||
Status Visit(const IfNode &node) override; | ||
Status Visit(const LiteralNode &node) override; | ||
|
||
FunctionRegistry registry_; | ||
|
||
LLVMTypes *types_; | ||
|
||
SchemaPtr schema_; | ||
|
||
using FieldMap = std::unordered_map<std::string, | ||
FieldPtr, | ||
boost::hash<std::string>>; | ||
FieldMap field_map_; | ||
}; | ||
|
||
} // namespace gandiva | ||
|
||
#endif //GANDIVA_EXPR_VALIDATOR_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.