Skip to content

Commit

Permalink
Add from_base32 presto function
Browse files Browse the repository at this point in the history
  • Loading branch information
Joe-Abraham committed Oct 5, 2024
1 parent 123e464 commit 656920c
Show file tree
Hide file tree
Showing 4 changed files with 167 additions and 1 deletion.
122 changes: 122 additions & 0 deletions velox/common/encode/tests/Base32Test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <gtest/gtest.h>
#include "velox/common/encode/Base32.h"
#include <cstring>

namespace facebook::velox::encoding{

constexpr Base32::ReverseIndex kBase32ReverseIndexTable = {
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 26, 27, 28, 29, 30, 31, 255, 255, 255, 255,
255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255};

// Test cases for Base32::calculateDecodedSize
TEST(Base32Test, CalculateDecodedSizeEmptyInput) {
std::string_view input = "";
size_t inputSize = 0;
size_t decodedSize = 0;

auto status = Base32::calculateDecodedSize(input, inputSize, decodedSize);
EXPECT_TRUE(status.ok());
EXPECT_EQ(decodedSize, 0);
}

TEST(Base32Test, CalculateDecodedSizePaddedInput) {
std::string_view input = "MY======"; // Base32 encoded "f"
size_t inputSize = input.size();
size_t decodedSize = 0;

auto status = Base32::calculateDecodedSize(input, inputSize, decodedSize);
EXPECT_TRUE(status.ok());
EXPECT_EQ(decodedSize, 1); // "f" is 1 byte
}

TEST(Base32Test, CalculateDecodedSizeUnpaddedInput) {
std::string_view input = "MZXW6YTBOI======";
size_t inputSize = input.size();
size_t decodedSize = 0;

auto status = Base32::calculateDecodedSize(input, inputSize, decodedSize);
EXPECT_TRUE(status.ok());
EXPECT_EQ(decodedSize, 6);
}

// Test cases for Base32::base32ReverseLookup
TEST(Base32Test, Base32ReverseLookupValidChar) {
Status status;
uint8_t result = Base32::base32ReverseLookup('M', kBase32ReverseIndexTable, status);
EXPECT_TRUE(status.ok());
EXPECT_EQ(result, 12);
}

TEST(Base32Test, Base32ReverseLookupInvalidChar) {
Status status;
uint8_t result = Base32::base32ReverseLookup('@', kBase32ReverseIndexTable, status); // '@' is not in Base32 charset
EXPECT_FALSE(status.ok());
EXPECT_EQ(result, 0);
}

// Test cases for Base32::decodeImpl
TEST(Base32Test, DecodeImplValidInput) {
std::string_view input = "MZXW6YTBOI======";
size_t inputSize = input.size();
char output[6] = {0};
size_t outputSize = sizeof(output);

auto status = Base32::decodeImpl(input, inputSize, output, outputSize, kBase32ReverseIndexTable);
EXPECT_TRUE(status.ok());
EXPECT_STREQ(output, "foobar");
}

TEST(Base32Test, DecodeImplInvalidInputLength) {
std::string_view input = "MZXW6";
size_t inputSize = 3;
char output[5];
size_t outputSize = sizeof(output);

auto status = Base32::decodeImpl(input, inputSize, output, outputSize, kBase32ReverseIndexTable);
EXPECT_FALSE(status.ok());
}

TEST(Base32Test, DecodeImplOutputBufferTooSmall) {
std::string_view input = "MZXW6YQ="; // Base32 encoded "foobar"
size_t inputSize = input.size();
char output[3]; // Too small for decoded output
size_t outputSize = sizeof(output);

auto status = Base32::decodeImpl(input, inputSize, output, outputSize, kBase32ReverseIndexTable);
EXPECT_FALSE(status.ok());
EXPECT_EQ(status.message(), "Base32::decode() - output buffer too small.");
}

}
3 changes: 2 additions & 1 deletion velox/common/encode/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

add_executable(velox_common_encode_test Base64Test.cpp EncoderUtilsTests.cpp)
add_executable(velox_common_encode_test Base32Test.cpp Base64Test.cpp
EncoderUtilsTests.cpp)
add_test(velox_common_encode_test velox_common_encode_test)
target_link_libraries(
velox_common_encode_test
Expand Down
22 changes: 22 additions & 0 deletions velox/docs/functions/presto/binary.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,28 @@ Binary Functions

Decodes ``bigint`` value from a 64-bit 2’s complement big endian ``binary``.

.. function:: from_base32(string) -> varbinary

Decodes a Base32-encoded ``string`` back into its original binary form.
This function can handle both padded and non-padded Base32 encoded strings. Partially padded Base32 strings will result in an error.

Examples
--------
Query with padded Base32 string:
::
SELECT from_base32('JBSWY3DPEBLW64TMMQ======'); -- [72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100]

Query with non-padded Base32 string:
::
SELECT from_base32('JBSWY3DPEBLW64TMMQ'); -- [72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100]

Query with partially padded Base32 string:
::
SELECT from_base32('JBSWY3DPEBLW64TM=='); -- Error: Base32::decode() - invalid input string: length is not a multiple of 8.

In the examples above, both fully padded and non-padded Base32 strings ('JBSWY3DPEBLW64TMMQ======' and 'JBSWY3DPEBLW64TMMQ') decode to the binary representation of the text 'Hello World'.
The partially padded Base32 string 'JBSWY3DPEBLW64TM==' will lead to a decoding error.

.. function:: from_hex(string) -> varbinary

Decodes binary data from the hex encoded ``string``.
Expand Down
21 changes: 21 additions & 0 deletions velox/functions/prestosql/BinaryFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,27 @@ struct FromBase32Function {
}
};

template <typename TExec>
struct FromBase32Function {
VELOX_DEFINE_FUNCTION_TYPES(TExec);

// T can be either arg_type<Varchar> or arg_type<Varbinary>. These are the
// same, but hard-coding one of them might be confusing.
template <typename T>
FOLLY_ALWAYS_INLINE Status call(out_type<Varbinary>& result, const T& input) {
auto inputSize = input.size();
size_t decodedSize;
auto status = encoding::Base32::calculateDecodedSize(
input.data(), inputSize, decodedSize);
if (!status.ok()) {
return status;
}
result.resize(decodedSize);
return encoding::Base32::decode(
input.data(), inputSize, result.data(), result.size());
}
};

template <typename T>
struct FromBigEndian32 {
VELOX_DEFINE_FUNCTION_TYPES(T);
Expand Down

0 comments on commit 656920c

Please sign in to comment.