Skip to content

Commit

Permalink
Introduce utility class for encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
Joe-Abraham committed Mar 11, 2024
1 parent 02ca9b0 commit a503f4f
Showing 1 changed file with 107 additions and 0 deletions.
107 changes: 107 additions & 0 deletions velox/common/encode/EncoderUtils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <exception>
#include <map>
#include <string>

#include <folly/Range.h>

namespace facebook::velox::encoding {

class EncoderException : public std::exception {
public:
explicit EncoderException(const char* msg) : msg_(msg) {}
const char* what() const noexcept override {
return msg_;
}

protected:
const char* msg_;
};

using Charset = std::array<char, 64>;
using ReverseIndex = std::array<uint8_t, 256>;

// Padding character used in encoding
constexpr static char kPadding = '=';

// Checks is there padding in encoded data
inline bool isPadded(const char* data, size_t len) {
return (len > 0 && data[len - 1] == kPadding) ? true : false;
}

// Counts the number of padding characters in encoded data.
inline size_t countPadding(const char* src, size_t len) {
size_t padding_count = 0;
while (len > 0 && src[len - 1] == kPadding) {
padding_count++;
len--;
}

return padding_count;
}

// Gets value corresponding to an encoded character
inline uint8_t
baseReverseLookup(int base, char p, const ReverseIndex& reverse_lookup) {
auto curr = reverse_lookup[(uint8_t)p];
// Value of encoded character shall be less than base.
if (curr >= base) {
throw EncoderException(
"decode() - invalid input string: invalid characters");
}

return curr;
}

// Validate the character in charset with ReverseIndex table
constexpr bool checkForwardIndex(
uint8_t idx,
const Charset& charset,
const ReverseIndex& table) {
return (table[static_cast<uint8_t>(charset[idx])] == idx) &&
(idx > 0 ? checkForwardIndex(idx - 1, charset, table) : true);
}

/// Similar to strchr(), but for null-terminated const strings.
/// Another difference is that we do not consider "\0" to be present in the
/// string.
/// Returns true if "str" contains the character c.
constexpr bool findCharacterInCharSet(
const Charset& charset,
int base,
uint8_t idx,
const char c) {
return idx < base &&
((charset[idx] == c) ||
findCharacterInCharSet(charset, base, idx + 1, c));
}

// Validate the value in ReverseIndex table with charset.
constexpr bool checkReverseIndex(
uint8_t idx,
const Charset& charset,
int base,
const ReverseIndex& table) {
return (table[idx] == 255 ? !findCharacterInCharSet(
charset, base, 0, static_cast<char>(idx))
: (charset[table[idx]] == idx)) &&
(idx > 0 ? checkReverseIndex(idx - 1, charset, base, table) : true);
}

} // namespace facebook::velox::encoding

0 comments on commit a503f4f

Please sign in to comment.