diff --git a/velox/common/encode/tests/Base32Test.cpp b/velox/common/encode/tests/Base32Test.cpp new file mode 100644 index 000000000000..b2d8a063cd06 --- /dev/null +++ b/velox/common/encode/tests/Base32Test.cpp @@ -0,0 +1,122 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "velox/common/encode/Base32.h" +#include + +namespace facebook::velox::encoding{ + +constexpr Base32::ReverseIndex kBase32ReverseIndexTable = { + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 26, 27, 28, 29, 30, 31, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255}; + +// Test cases for Base32::calculateDecodedSize +TEST(Base32Test, CalculateDecodedSizeEmptyInput) { + std::string_view input = ""; + size_t inputSize = 0; + size_t decodedSize = 0; + + auto status = Base32::calculateDecodedSize(input, inputSize, decodedSize); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(decodedSize, 0); +} + +TEST(Base32Test, CalculateDecodedSizePaddedInput) { + std::string_view input = "MY======"; // Base32 encoded "f" + size_t inputSize = input.size(); + size_t decodedSize = 0; + + auto status = Base32::calculateDecodedSize(input, inputSize, decodedSize); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(decodedSize, 1); // "f" is 1 byte +} + +TEST(Base32Test, CalculateDecodedSizeUnpaddedInput) { + std::string_view input = "MZXW6YTBOI======"; + size_t inputSize = input.size(); + size_t decodedSize = 0; + + auto status = Base32::calculateDecodedSize(input, inputSize, decodedSize); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(decodedSize, 6); +} + +// Test cases for Base32::base32ReverseLookup +TEST(Base32Test, Base32ReverseLookupValidChar) { + Status status; + uint8_t result = Base32::base32ReverseLookup('M', kBase32ReverseIndexTable, status); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(result, 12); +} + +TEST(Base32Test, Base32ReverseLookupInvalidChar) { + Status status; + uint8_t result = Base32::base32ReverseLookup('@', kBase32ReverseIndexTable, status); // '@' is not in Base32 charset + EXPECT_FALSE(status.ok()); + EXPECT_EQ(result, 0); +} + +// Test cases for Base32::decodeImpl +TEST(Base32Test, DecodeImplValidInput) { + std::string_view input = "MZXW6YTBOI======"; + size_t inputSize = input.size(); + char output[6] = {0}; + size_t outputSize = sizeof(output); + + auto status = Base32::decodeImpl(input, inputSize, output, outputSize, kBase32ReverseIndexTable); + EXPECT_TRUE(status.ok()); + EXPECT_STREQ(output, "foobar"); +} + +TEST(Base32Test, DecodeImplInvalidInputLength) { + std::string_view input = "MZXW6"; + size_t inputSize = 3; + char output[5]; + size_t outputSize = sizeof(output); + + auto status = Base32::decodeImpl(input, inputSize, output, outputSize, kBase32ReverseIndexTable); + EXPECT_FALSE(status.ok()); +} + +TEST(Base32Test, DecodeImplOutputBufferTooSmall) { + std::string_view input = "MZXW6YQ="; // Base32 encoded "foobar" + size_t inputSize = input.size(); + char output[3]; // Too small for decoded output + size_t outputSize = sizeof(output); + + auto status = Base32::decodeImpl(input, inputSize, output, outputSize, kBase32ReverseIndexTable); + EXPECT_FALSE(status.ok()); + EXPECT_EQ(status.message(), "Base32::decode() - output buffer too small."); +} + +} \ No newline at end of file diff --git a/velox/common/encode/tests/CMakeLists.txt b/velox/common/encode/tests/CMakeLists.txt index 663b2413557a..8eb6187ca881 100644 --- a/velox/common/encode/tests/CMakeLists.txt +++ b/velox/common/encode/tests/CMakeLists.txt @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -add_executable(velox_common_encode_test Base64Test.cpp EncoderUtilsTests.cpp) +add_executable(velox_common_encode_test Base32Test.cpp Base64Test.cpp + EncoderUtilsTests.cpp) add_test(velox_common_encode_test velox_common_encode_test) target_link_libraries( velox_common_encode_test diff --git a/velox/docs/functions/presto/binary.rst b/velox/docs/functions/presto/binary.rst index 97904d62ed9c..a97206a8f9c4 100644 --- a/velox/docs/functions/presto/binary.rst +++ b/velox/docs/functions/presto/binary.rst @@ -63,6 +63,28 @@ Binary Functions Decodes ``bigint`` value from a 64-bit 2’s complement big endian ``binary``. +.. function:: from_base32(string) -> varbinary + + Decodes a Base32-encoded ``string`` back into its original binary form. + This function can handle both padded and non-padded Base32 encoded strings. Partially padded Base32 strings will result in an error. + + Examples + -------- + Query with padded Base32 string: + :: + SELECT from_base32('JBSWY3DPEBLW64TMMQ======'); -- [72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100] + + Query with non-padded Base32 string: + :: + SELECT from_base32('JBSWY3DPEBLW64TMMQ'); -- [72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100] + + Query with partially padded Base32 string: + :: + SELECT from_base32('JBSWY3DPEBLW64TM=='); -- Error: Base32::decode() - invalid input string: length is not a multiple of 8. + + In the examples above, both fully padded and non-padded Base32 strings ('JBSWY3DPEBLW64TMMQ======' and 'JBSWY3DPEBLW64TMMQ') decode to the binary representation of the text 'Hello World'. + The partially padded Base32 string 'JBSWY3DPEBLW64TM==' will lead to a decoding error. + .. function:: from_hex(string) -> varbinary Decodes binary data from the hex encoded ``string``. diff --git a/velox/functions/prestosql/BinaryFunctions.h b/velox/functions/prestosql/BinaryFunctions.h index 0118765dabb8..105d8988c491 100644 --- a/velox/functions/prestosql/BinaryFunctions.h +++ b/velox/functions/prestosql/BinaryFunctions.h @@ -386,6 +386,27 @@ struct FromBase32Function { } }; +template +struct FromBase32Function { + VELOX_DEFINE_FUNCTION_TYPES(TExec); + + // T can be either arg_type or arg_type. These are the + // same, but hard-coding one of them might be confusing. + template + FOLLY_ALWAYS_INLINE Status call(out_type& result, const T& input) { + auto inputSize = input.size(); + size_t decodedSize; + auto status = encoding::Base32::calculateDecodedSize( + input.data(), inputSize, decodedSize); + if (!status.ok()) { + return status; + } + result.resize(decodedSize); + return encoding::Base32::decode( + input.data(), inputSize, result.data(), result.size()); + } +}; + template struct FromBigEndian32 { VELOX_DEFINE_FUNCTION_TYPES(T);