Skip to content

Commit

Permalink
Add translate expression support (apache#68)
Browse files Browse the repository at this point in the history
* Initial commit

* Introduce TranslateHolder

* Remove unused header
  • Loading branch information
PHILO-HE authored and zhztheplayer committed Feb 8, 2022
1 parent 7e5ae02 commit 7f40757
Show file tree
Hide file tree
Showing 8 changed files with 192 additions and 0 deletions.
2 changes: 2 additions & 0 deletions cpp/src/gandiva/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ set(SRC_FILES
llvm_types.cc
like_holder.cc
json_holder.cc
translate_holder.cc
literal_holder.cc
projector.cc
regex_util.cc
Expand Down Expand Up @@ -235,6 +236,7 @@ add_gandiva_test(internals-test
to_date_holder_test.cc
simple_arena_test.cc
json_holder_test.cc
translate_holder_test.cc
like_holder_test.cc
replace_holder_test.cc
decimal_type_util_test.cc
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/gandiva/function_holder_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "gandiva/random_generator_holder.h"
#include "gandiva/replace_holder.h"
#include "gandiva/to_date_holder.h"
#include "gandiva/translate_holder.h"

namespace gandiva {

Expand Down Expand Up @@ -70,6 +71,7 @@ class FunctionHolderRegistry {
{"random", LAMBDA_MAKER(RandomGeneratorHolder)},
{"rand", LAMBDA_MAKER(RandomGeneratorHolder)},
{"regexp_replace", LAMBDA_MAKER(ReplaceHolder)},
{"translate", LAMBDA_MAKER(TranslateHolder)}
};
return maker_map;
}
Expand Down
5 changes: 5 additions & 0 deletions cpp/src/gandiva/function_registry_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,11 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
kResultNullInternal, "gdv_fn_get_json_object_utf8_utf8",
NativeFunction::kNeedsContext | NativeFunction::kNeedsFunctionHolder |
NativeFunction::kCanReturnErrors),

NativeFunction("translate", {}, DataTypeVector{utf8(), utf8(), utf8()}, utf8(),
kResultNullIfNull, "gdv_fn_translate_utf8_utf8_utf8",
NativeFunction::kNeedsContext | NativeFunction::kNeedsFunctionHolder |
NativeFunction::kCanReturnErrors),

NativeFunction("ltrim", {}, DataTypeVector{utf8(), utf8()}, utf8(),
kResultNullIfNull, "ltrim_utf8_utf8", NativeFunction::kNeedsContext),
Expand Down
26 changes: 26 additions & 0 deletions cpp/src/gandiva/gdv_function_stubs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include "gandiva/random_generator_holder.h"
#include "gandiva/replace_holder.h"
#include "gandiva/to_date_holder.h"
#include "gandiva/translate_holder.h"

/// Stub functions that can be accessed from LLVM or the pre-compiled library.

Expand Down Expand Up @@ -92,6 +93,17 @@ const uint8_t* gdv_fn_get_json_object_utf8_utf8(int64_t ptr, int64_t holder_ptr,
return res;
}

const uint8_t* gdv_fn_translate_utf8_utf8_utf8(int64_t ptr, int64_t holder_ptr, const char* text,
int text_len, const char* matching_str,
int matching_str_len, const char* replace_str,
int replace_str_len, int32_t* out_len) {
gandiva::ExecutionContext* context = reinterpret_cast<gandiva::ExecutionContext*>(ptr);
gandiva::TranslateHolder* holder = reinterpret_cast<gandiva::TranslateHolder*>(holder_ptr);
auto res = (*holder)(context, std::string(text, text_len), std::string(matching_str, matching_str_len),
std::string(replace_str, replace_str_len), out_len);
return res;
}

bool gdv_fn_like_utf8_utf8(int64_t ptr, const char* data, int data_len,
const char* pattern, int pattern_len) {
gandiva::LikeHolder* holder = reinterpret_cast<gandiva::LikeHolder*>(ptr);
Expand Down Expand Up @@ -1327,6 +1339,20 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
engine->AddGlobalMappingForFunc("gdv_fn_get_json_object_utf8_utf8",
types->i8_ptr_type() /*return_type*/, args,
reinterpret_cast<void*>(gdv_fn_get_json_object_utf8_utf8));

// gdv_fn_translate_utf8_utf8_utf8
args = {types->i64_type(), // int64_t ptr
types->i64_type(), // int64_t holder_ptr
types->i8_ptr_type(), // const char* text
types->i32_type(), // int text_len
types->i8_ptr_type(), // const char* matching_str
types->i32_type(), // int matching_str_len
types->i8_ptr_type(), // const char* replace_str
types->i32_type(), // int replace_str_len
types->i32_ptr_type()}; // int* out_len
engine->AddGlobalMappingForFunc("gdv_fn_translate_utf8_utf8_utf8",
types->i8_ptr_type() /*return types*/, args,
reinterpret_cast<void*>(gdv_fn_translate_utf8_utf8_utf8));

// gdv_fn_like_utf8_utf8
args = {types->i64_type(), // int64_t ptr
Expand Down
5 changes: 5 additions & 0 deletions cpp/src/gandiva/gdv_function_stubs.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ bool gdv_fn_ilike_utf8_utf8(int64_t ptr, const char* data, int data_len,
const uint8_t* gdv_fn_get_json_object_utf8_utf8(int64_t ptr, int64_t holder_ptr, const char* data, int data_len, bool in1_valid,
const char* pattern, int pattern_len, bool in2_valid, bool* out_valid, int32_t* out_len);

const uint8_t* gdv_fn_translate_utf8_utf8_utf8(int64_t ptr, int64_t holder_ptr, const char* text,
int text_len, const char* matching_str,
int matching_str_len, const char* replace_str,
int replace_str_len, int32_t* out_len);

int64_t gdv_fn_to_date_utf8_utf8_int32(int64_t context, int64_t ptr, const char* data,
int data_len, bool in1_validity,
const char* pattern, int pattern_len,
Expand Down
62 changes: 62 additions & 0 deletions cpp/src/gandiva/translate_holder.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "gandiva/translate_holder.h"

#include <unordered_map>
#include "gandiva/node.h"

namespace gandiva {

Status TranslateHolder::Make(const FunctionNode& node, std::shared_ptr<TranslateHolder>* holder) {
return Make(holder);
}

Status TranslateHolder::Make(std::shared_ptr<TranslateHolder>* holder) {
*holder = std::shared_ptr<TranslateHolder>(new TranslateHolder());
return Status::OK();
}

const uint8_t* TranslateHolder::operator()(gandiva::ExecutionContext* ctx, std::string text,
std::string matching_str, std::string replace_str, int32_t* out_len) {
char res[text.length()];
std::unordered_map<char, char> replace_map;
for (int i = 0; i < matching_str.length(); i++) {
if (i >= replace_str.length()) {
replace_map[matching_str[i]] = '\0';
} else {
replace_map[matching_str[i]] = replace_str[i];
}
}
int j = 0;
for (int i = 0; i < text.length(); i++) {
if (replace_map.find(text[i]) == replace_map.end()) {
res[j++] = text[i];
continue;
}
char replace_char = replace_map[text[i]];
if (replace_char != '\0') {
res[j++] = replace_char;
}
}
*out_len = j;
auto result_buffer = reinterpret_cast<uint8_t*>(ctx->arena()->Allocate(*out_len));
memcpy(result_buffer, std::string((char*)res, *out_len).data(), *out_len);
return result_buffer;
}

} // namespace gandiva
44 changes: 44 additions & 0 deletions cpp/src/gandiva/translate_holder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <memory>
#include <string>

#include "arrow/status.h"
#include "gandiva/execution_context.h"
#include "gandiva/function_holder.h"
#include "gandiva/node.h"
#include "gandiva/visibility.h"

namespace gandiva {

/// Function Holder for SQL 'translate'
class GANDIVA_EXPORT TranslateHolder : public FunctionHolder {
public:
TranslateHolder() {}
~TranslateHolder() override = default;

static Status Make(const FunctionNode& node, std::shared_ptr<TranslateHolder>* holder);
static Status Make(std::shared_ptr<TranslateHolder>* holder);

const uint8_t* operator()(gandiva::ExecutionContext* ctx, std::string text,
std::string matching_str, std::string replace_str, int32_t* out_len);
};

} // namespace gandiva
46 changes: 46 additions & 0 deletions cpp/src/gandiva/translate_holder_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "gandiva/translate_holder.h"

#include <gtest/gtest.h>

namespace gandiva {
class TestTranslateHolder : public ::testing::Test {
protected:
ExecutionContext ctx_;
};

TEST_F(TestTranslateHolder, TestTranslate) {
std::shared_ptr<TranslateHolder> translate_holder;

auto status = TranslateHolder::Make(&translate_holder);
EXPECT_EQ(status.ok(), true) << status.message();

auto translate = *translate_holder;

int32_t out_len;
const uint8_t* out_str;

out_str = translate(&ctx_, "ab[cd]", "[]", "", &out_len);
EXPECT_EQ(std::string((char*)out_str, out_len), "abcd");

out_str = translate(&ctx_, "ab[cd]", "[]", "#", &out_len);
EXPECT_EQ(std::string((char*)out_str, out_len), "ab#cd");
}

} // namespace gandiva

0 comments on commit 7f40757

Please sign in to comment.