Skip to content

Commit

Permalink
GDV-82:[Java][CPP]Export supported types from Gandiva. (apache#66)
Browse files Browse the repository at this point in the history
Exporting supported data types and functions from Gandiva.
Added a JNI bridge to access this from the java layer.
  • Loading branch information
praveenbingo authored Jul 17, 2018
1 parent eb270f2 commit 274e315
Show file tree
Hide file tree
Showing 17 changed files with 559 additions and 52 deletions.
3 changes: 2 additions & 1 deletion src/gandiva/src/cpp/cmake/BuildUtils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ function(add_gandiva_unit_test REL_TEST_NAME)
get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE)

add_executable(${TEST_NAME} ${REL_TEST_NAME} ${ARGN})
if(${REL_TEST_NAME} MATCHES "llvm")
if(${REL_TEST_NAME} MATCHES "llvm" OR
${REL_TEST_NAME} MATCHES "expression_registry")
# If the unit test has llvm in its name, include llvm.
add_dependencies(${TEST_NAME} LLVM::LLVM_INTERFACE)
target_link_libraries(${TEST_NAME} PRIVATE LLVM::LLVM_INTERFACE)
Expand Down
64 changes: 64 additions & 0 deletions src/gandiva/src/cpp/include/gandiva/expression_registry.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright (C) 2017-2018 Dremio Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef GANDIVA_TYPES_H
#define GANDIVA_TYPES_H

#include <memory>
#include <vector>

#include "gandiva/arrow.h"
#include "gandiva/function_signature.h"
#include "gandiva/gandiva_aliases.h"

namespace gandiva {

class NativeFunction;
class FunctionRegistry;
/// \brief Exports types supported by Gandiva for processing.
///
/// Has helper methods for clients to programatically discover
/// data types and functions supported by Gandiva.
class ExpressionRegistry {
public:
using iterator = const NativeFunction *;
ExpressionRegistry();
~ExpressionRegistry();
static DataTypeVector supported_types() { return supported_types_; }
class FunctionSignatureIterator {
public:
FunctionSignatureIterator(iterator begin, iterator end) : it(begin), end(end) {}

bool operator!=(const FunctionSignatureIterator &func_sign_it);

FunctionSignature operator*();

iterator operator++(int);

private:
iterator it;
iterator end;
};
const FunctionSignatureIterator function_signature_begin();
const FunctionSignatureIterator function_signature_end() const;

private:
static DataTypeVector supported_types_;
static DataTypeVector InitSupportedTypes();
static void AddArrowTypesToVector(arrow::Type::type &type, DataTypeVector &vector);
std::unique_ptr<FunctionRegistry> function_registry_;
};
} // namespace gandiva
#endif // GANDIVA_TYPES_H
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include <string>
#include <vector>

#include "boost/functional/hash.hpp"
#include "gandiva/arrow.h"
#include "gandiva/logging.h"

Expand All @@ -40,51 +39,19 @@ class FunctionSignature {
DCHECK(ret_type);
}

bool operator==(const FunctionSignature &other) const {
if (param_types_.size() != other.param_types_.size() ||
!DataTypeEquals(ret_type_, other.ret_type_) || base_name_ != other.base_name_) {
return false;
}

for (size_t idx = 0; idx < param_types_.size(); idx++) {
if (!DataTypeEquals(param_types_[idx], other.param_types_[idx])) {
return false;
}
}
return true;
}
bool operator==(const FunctionSignature &other) const;

/// calculated based on base_name, datatpype id of parameters and datatype id
/// of return type.
std::size_t Hash() const {
static const size_t kSeedValue = 17;
size_t result = kSeedValue;
boost::hash_combine(result, base_name_);
boost::hash_combine(result, ret_type_->id());
// not using hash_range since we only want to include the id from the data type
for (auto &param_type : param_types_) {
boost::hash_combine(result, param_type->id());
}
return result;
}
std::size_t Hash() const;

DataTypePtr ret_type() const { return ret_type_; }

std::string ToString() const {
std::stringstream s;

s << ret_type_->ToString() << " " << base_name_ << "(";
for (uint32_t i = 0; i < param_types_.size(); i++) {
if (i > 0) {
s << ", ";
}
const std::string &base_name() const { return base_name_; }

s << param_types_[i]->ToString();
}
DataTypeVector param_types() const { return param_types_; }

s << ")";
return s.str();
}
std::string ToString() const;

private:
// TODO : for some of the types, this shouldn't match type specific data. eg. for
Expand Down
4 changes: 4 additions & 0 deletions src/gandiva/src/cpp/include/gandiva/gandiva_aliases.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ using NodeVector = std::vector<std::shared_ptr<Node>>;
class EvalBatch;
using EvalBatchPtr = std::shared_ptr<EvalBatch>;

class FunctionSignature;
using FuncSignaturePtr = std::shared_ptr<FunctionSignature>;
using FuncSignatureVector = std::vector<FuncSignaturePtr>;

} // namespace gandiva

#endif // GANDIVA_ALIASES_H
20 changes: 12 additions & 8 deletions src/gandiva/src/cpp/src/codegen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,19 @@ set(BC_FILE_PATH_CC "${CMAKE_CURRENT_BINARY_DIR}/bc_file_path.cc")
configure_file(bc_file_path.cc.in ${BC_FILE_PATH_CC})

set(SRC_FILES annotator.cc
engine.cc
bitmap_accumulator.cc
configuration.cc
engine.cc
expr_decomposer.cc
expr_validator.cc
expression.cc
expression_registry.cc
function_registry.cc
function_signature.cc
llvm_generator.cc
llvm_types.cc
projector.cc
status.cc
expression.cc
tree_expr_builder.cc
${BC_FILE_PATH_CC})

Expand Down Expand Up @@ -81,11 +83,13 @@ install(
add_gandiva_unit_test(bitmap_accumulator_test.cc bitmap_accumulator.cc)
add_gandiva_unit_test(dex_llvm_test.cc)
add_gandiva_unit_test(engine_llvm_test.cc engine.cc llvm_types.cc status.cc configuration.cc ${BC_FILE_PATH_CC})
add_gandiva_unit_test(function_signature_test.cc)
add_gandiva_unit_test(function_registry_test.cc function_registry.cc)
add_gandiva_unit_test(function_signature_test.cc function_signature.cc)
add_gandiva_unit_test(function_registry_test.cc function_registry.cc function_signature.cc)
add_gandiva_unit_test(llvm_types_test.cc llvm_types.cc)
add_gandiva_unit_test(llvm_generator_test.cc llvm_generator.cc engine.cc llvm_types.cc expr_decomposer.cc function_registry.cc annotator.cc status.cc bitmap_accumulator.cc configuration.cc ${BC_FILE_PATH_CC})
add_gandiva_unit_test(annotator_test.cc annotator.cc)
add_gandiva_unit_test(tree_expr_test.cc tree_expr_builder.cc expr_decomposer.cc annotator.cc function_registry.cc)
add_gandiva_unit_test(expr_decomposer_test.cc expr_decomposer.cc tree_expr_builder.cc annotator.cc function_registry.cc)
add_gandiva_unit_test(llvm_generator_test.cc llvm_generator.cc engine.cc llvm_types.cc expr_decomposer.cc function_registry.cc annotator.cc status.cc bitmap_accumulator.cc configuration.cc function_signature.cc ${BC_FILE_PATH_CC})
add_gandiva_unit_test(annotator_test.cc annotator.cc function_signature.cc)
add_gandiva_unit_test(tree_expr_test.cc tree_expr_builder.cc expr_decomposer.cc annotator.cc function_registry.cc function_signature.cc)
add_gandiva_unit_test(expr_decomposer_test.cc expr_decomposer.cc tree_expr_builder.cc annotator.cc function_registry.cc function_signature.cc)
add_gandiva_unit_test(status_test.cc status.cc)
add_gandiva_unit_test(expression_registry_test.cc llvm_types.cc expression_registry.cc function_signature.cc function_registry.cc)

2 changes: 1 addition & 1 deletion src/gandiva/src/cpp/src/codegen/expr_decomposer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
#include "codegen/annotator.h"
#include "codegen/dex.h"
#include "codegen/function_registry.h"
#include "codegen/function_signature.h"
#include "codegen/node.h"
#include "gandiva/function_signature.h"

namespace gandiva {

Expand Down
2 changes: 1 addition & 1 deletion src/gandiva/src/cpp/src/codegen/expr_decomposer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
#include "codegen/annotator.h"
#include "codegen/dex.h"
#include "codegen/function_registry.h"
#include "codegen/function_signature.h"
#include "codegen/node.h"
#include "gandiva/function_signature.h"
#include "gandiva/gandiva_aliases.h"
#include "gandiva/tree_expr_builder.h"

Expand Down
151 changes: 151 additions & 0 deletions src/gandiva/src/cpp/src/codegen/expression_registry.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
// Copyright (C) 2017-2018 Dremio Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "gandiva/expression_registry.h"

#include "boost/iterator/transform_iterator.hpp"

#include "codegen/function_registry.h"
#include "codegen/llvm_types.h"

namespace gandiva {

ExpressionRegistry::ExpressionRegistry() {
function_registry_.reset(new FunctionRegistry());
}

ExpressionRegistry::~ExpressionRegistry() {}

const ExpressionRegistry::FunctionSignatureIterator
ExpressionRegistry::function_signature_begin() {
return FunctionSignatureIterator(function_registry_->begin(),
function_registry_->end());
}

const ExpressionRegistry::FunctionSignatureIterator
ExpressionRegistry::function_signature_end() const {
return FunctionSignatureIterator(function_registry_->end(), function_registry_->end());
}

bool ExpressionRegistry::FunctionSignatureIterator::operator!=(
const FunctionSignatureIterator &func_sign_it) {
return func_sign_it.it != this->it;
}

FunctionSignature ExpressionRegistry::FunctionSignatureIterator::operator*() {
return (*it).signature();
}

ExpressionRegistry::iterator ExpressionRegistry::FunctionSignatureIterator::operator++(
int increment) {
return it++;
}

DataTypeVector ExpressionRegistry::supported_types_ =
ExpressionRegistry::InitSupportedTypes();

DataTypeVector ExpressionRegistry::InitSupportedTypes() {
DataTypeVector data_type_vector;
llvm::LLVMContext llvm_context;
LLVMTypes llvm_types(llvm_context);
auto supported_arrow_types = llvm_types.GetSupportedArrowTypes();
for (auto &type_id : supported_arrow_types) {
AddArrowTypesToVector(type_id, data_type_vector);
}
return data_type_vector;
}

void ExpressionRegistry::AddArrowTypesToVector(arrow::Type::type &type,
DataTypeVector &vector) {
switch (type) {
case arrow::Type::type::BOOL:
vector.push_back(arrow::boolean());
break;
case arrow::Type::type::UINT8:
vector.push_back(arrow::uint8());
break;
case arrow::Type::type::INT8:
vector.push_back(arrow::int8());
break;
case arrow::Type::type::UINT16:
vector.push_back(arrow::uint16());
break;
case arrow::Type::type::INT16:
vector.push_back(arrow::int16());
break;
case arrow::Type::type::UINT32:
vector.push_back(arrow::uint32());
break;
case arrow::Type::type::INT32:
vector.push_back(arrow::int32());
break;
case arrow::Type::type::UINT64:
vector.push_back(arrow::uint64());
break;
case arrow::Type::type::INT64:
vector.push_back(arrow::int64());
break;
case arrow::Type::type::HALF_FLOAT:
vector.push_back(arrow::float16());
break;
case arrow::Type::type::FLOAT:
vector.push_back(arrow::float32());
break;
case arrow::Type::type::DOUBLE:
vector.push_back(arrow::float64());
break;
case arrow::Type::type::STRING:
vector.push_back(arrow::utf8());
break;
case arrow::Type::type::BINARY:
vector.push_back(arrow::binary());
break;
case arrow::Type::type::DATE32:
vector.push_back(arrow::date32());
break;
case arrow::Type::type::DATE64:
vector.push_back(arrow::date64());
break;
case arrow::Type::type::TIMESTAMP:
vector.push_back(arrow::timestamp(arrow::TimeUnit::SECOND));
vector.push_back(arrow::timestamp(arrow::TimeUnit::MILLI));
vector.push_back(arrow::timestamp(arrow::TimeUnit::NANO));
vector.push_back(arrow::timestamp(arrow::TimeUnit::MICRO));
break;
case arrow::Type::type::TIME32:
vector.push_back(arrow::time32(arrow::TimeUnit::SECOND));
vector.push_back(arrow::time32(arrow::TimeUnit::MILLI));
break;
case arrow::Type::type::TIME64:
vector.push_back(arrow::time64(arrow::TimeUnit::MICRO));
vector.push_back(arrow::time64(arrow::TimeUnit::NANO));
break;
case arrow::Type::type::NA:
vector.push_back(arrow::null());
break;
case arrow::Type::type::FIXED_SIZE_BINARY:
case arrow::Type::type::MAP:
case arrow::Type::type::INTERVAL:
case arrow::Type::type::DECIMAL:
case arrow::Type::type::LIST:
case arrow::Type::type::STRUCT:
case arrow::Type::type::UNION:
case arrow::Type::type::DICTIONARY:
// un-supported types. test ensures that
// when one of these are added build breaks.
DCHECK(false);
}
}

} // namespace gandiva
Loading

0 comments on commit 274e315

Please sign in to comment.