Skip to content

Commit

Permalink
Clean up Base64
Browse files Browse the repository at this point in the history
  • Loading branch information
Joe-Abraham committed Mar 11, 2024
1 parent 9d33332 commit 796612f
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 69 deletions.
60 changes: 15 additions & 45 deletions velox/common/encode/Base64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,23 @@
#include <folly/Portability.h>
#include <folly/container/Foreach.h>
#include <folly/io/Cursor.h>
#include <stdint.h>

namespace facebook::velox::encoding {

constexpr const Base64::Charset kBase64Charset = {
constexpr const Charset kBase64Charset = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'};
constexpr const Base64::Charset kBase64UrlCharset = {
constexpr const Charset kBase64UrlCharset = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'};

constexpr const Base64::ReverseIndex kBase64ReverseIndexTable = {
constexpr const ReverseIndex kBase64ReverseIndexTable = {
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255,
Expand All @@ -54,7 +53,7 @@ constexpr const Base64::ReverseIndex kBase64ReverseIndexTable = {
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255};
constexpr const Base64::ReverseIndex kBase64UrlReverseIndexTable = {
constexpr const ReverseIndex kBase64UrlReverseIndexTable = {
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255,
Expand Down Expand Up @@ -90,34 +89,17 @@ static_assert(
kBase64UrlCharset,
kBase64UrlReverseIndexTable),
"kBase64UrlCharset has incorrect entries");
// Similar to strchr(), but for null-terminated const strings.
// Another difference is that we do not consider "\0" to be present in the
// string.
// Returns true if "str" contains the character c.
constexpr bool constCharsetContains(
const Base64::Charset& charset,
uint8_t idx,
const char c) {
return idx < charset.size() &&
((charset[idx] == c) || constCharsetContains(charset, idx + 1, c));
}
constexpr bool checkReverseIndex(
uint8_t idx,
const Base64::Charset& charset,
const Base64::ReverseIndex& table) {
return (table[idx] == 255
? !constCharsetContains(charset, 0, static_cast<char>(idx))
: (charset[table[idx]] == idx)) &&
(idx > 0 ? checkReverseIndex(idx - 1, charset, table) : true);
}

// Verify that for every entry in kBase64ReverseIndexTable, the corresponding
// entry in kBase64Charset is correct.
static_assert(
checkReverseIndex(
sizeof(kBase64ReverseIndexTable) - 1,
kBase64Charset,
Base64::kBase,
kBase64ReverseIndexTable),
"kBase64ReverseIndexTable has incorrect entries.");

// Verify that for every entry in kBase64ReverseIndexTable, the corresponding
// entry in kBase64Charset is correct.
// We can't run this check as the URL version has two duplicate entries so that
Expand Down Expand Up @@ -204,13 +186,13 @@ template <class T>
*wp++ = charset[(curr >> 12) & 0x3f];
*wp++ = charset[(curr >> 6) & 0x3f];
if (include_pad) {
*wp = kBase64Pad;
*wp = kPadding;
}
} else {
*wp++ = charset[(curr >> 12) & 0x3f];
if (include_pad) {
*wp++ = kBase64Pad;
*wp = kBase64Pad;
*wp++ = kPadding;
*wp = kPadding;
}
}
}
Expand Down Expand Up @@ -303,18 +285,6 @@ void Base64::decode(const char* data, size_t size, char* output) {
Base64::decode(data, size, output, out_len);
}

uint8_t Base64::Base64ReverseLookup(
char p,
const Base64::ReverseIndex& reverse_lookup) {
auto curr = reverse_lookup[(uint8_t)p];
if (curr >= 0x40) {
throw Base64Exception(
"Base64::decode() - invalid input string: invalid characters");
}

return curr;
}

size_t
Base64::decode(const char* src, size_t src_len, char* dst, size_t dst_len) {
return decodeImpl(src, src_len, dst, dst_len, kBase64ReverseIndexTable);
Expand All @@ -325,10 +295,10 @@ size_t Base64::calculateDecodedSize(const char* data, size_t& size) {
return 0;
}

// Check if the input data is padded
// Check if the input data is padded.
if (isPadded(data, size)) {
/// If padded, ensure that the string length is a multiple of the encoded
/// block size
/// block size.
if (size % kEncodedBlockSize != 0) {
throw EncoderException(
"Base64::decode() - invalid input string: "
Expand All @@ -339,16 +309,16 @@ size_t Base64::calculateDecodedSize(const char* data, size_t& size) {
auto padding = countPadding(data, size);
size -= padding;

// Adjust the needed size for padding
// Adjust the needed size for padding.
return needed -
ceil((padding * kBinaryBlockSize) /
static_cast<double>(kEncodedBlockSize));
} else {
// If not padded, Calculate extra bytes, if any
// If not padded, calculate extra bytes, if any.
auto extra = size % kEncodedBlockSize;
auto needed = (size / kEncodedBlockSize) * kBinaryBlockSize;

// Adjust the needed size for extra bytes, if present
// Adjust the needed size for extra bytes, if present.
if (extra) {
if (extra == 1) {
throw EncoderException(
Expand Down
25 changes: 1 addition & 24 deletions velox/common/encode/Base64.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,27 +25,13 @@

namespace facebook::velox::encoding {

class Base64Exception : public std::exception {
public:
explicit Base64Exception(const char* msg) : msg_(msg) {}
const char* what() const noexcept override {
return msg_;
}

protected:
const char* msg_;
};

class Base64 {
public:
using Charset = std::array<char, 64>;
using ReverseIndex = std::array<uint8_t, 256>;

static std::string encode(const char* data, size_t len);
static std::string encode(folly::StringPiece text);
static std::string encode(const folly::IOBuf* text);

/// Returns encoded size for the input of the specified size.
// Returns encoded size for the input of the specified size.
static size_t calculateEncodedSize(size_t size, bool withPadding = true);

/// Encodes the specified number of characters from the 'data' and writes the
Expand Down Expand Up @@ -92,16 +78,7 @@ class Base64 {
static void
decodeUrl(const char* src, size_t src_len, char* dst, size_t dst_len);

constexpr static char kBase64Pad = '=';

private:
static inline size_t countPadding(const char* src, size_t len) {
DCHECK_GE(len, 2);
return src[len - 1] != kBase64Pad ? 0 : src[len - 2] != kBase64Pad ? 1 : 2;
}

static uint8_t Base64ReverseLookup(char p, const ReverseIndex& table);

template <class T>
static std::string
encodeImpl(const T& data, const Charset& charset, bool include_pad);
Expand Down

0 comments on commit 796612f

Please sign in to comment.