Skip to content

Commit

Permalink
Support non-decimal bases in to_string() and when constructing from…
Browse files Browse the repository at this point in the history
… a string
  • Loading branch information
OTheDev committed Feb 29, 2024
1 parent e44e3a1 commit d878edd
Show file tree
Hide file tree
Showing 4 changed files with 228 additions and 136 deletions.
11 changes: 7 additions & 4 deletions include/bi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ SPDX-License-Identifier: Apache-2.0

#include "impl-bi_digit_vector.hpp"

// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
// NOLINTBEGIN(cppcoreguidelines-avoid-magic-numbers)

static_assert(sizeof(double) * CHAR_BIT == 64, "64-bit double is assumed.");
static_assert(-1 == ~0, "Two's complement representation assumed.");

Expand Down Expand Up @@ -56,8 +57,8 @@ class BI_API bi_t {
bi_t(T); // NOLINT(runtime/explicit)
bi_t(const bi_t&);
bi_t(bi_t&& other) noexcept;
explicit bi_t(const std::string&);
explicit bi_t(const char*);
explicit bi_t(const std::string&, int base = 10);
explicit bi_t(const char*, int base = 10);
bi_t(double); // NOLINT(runtime/explicit)

~bi_t() = default;
Expand Down Expand Up @@ -147,7 +148,7 @@ class BI_API bi_t {

// Other
void swap(bi_t&) noexcept;
std::string to_string() const;
std::string to_string(int base = 10) const;
void negate() noexcept;
int sign() const noexcept;
bool odd() const noexcept;
Expand Down Expand Up @@ -203,6 +204,8 @@ BI_API bi_t abs(const bi_t& value);

} // namespace bi

// NOLINTEND(cppcoreguidelines-avoid-magic-numbers)

#endif // BI_INCLUDE_BI_HPP_

#include "impl-bi.inl"
51 changes: 31 additions & 20 deletions src/bi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,13 @@ bi_t::bi_t(bi_t&& other) noexcept

/**
* @name Construct from a string
* @brief Construct an integer from a string representing a base-10 (decimal)
* @brief Construct an integer from a string representing a base-`base`
* integer. These are `explicit` constructors.
* @throw std::invalid_argument Throws if a parsing error occurs or if a null
* pointer is provided.
* @throw std::invalid_argument Throws if a parsing error occurs, if a null
* pointer is provided, or if an invalid base is provided.
* @details Allows leading whitespace and/or a plus/minus sign before the first
* decimal digit.
* base-`base` digit. `base` must be an integer in \f$ [2, 36] \f$ (by default,
* it is `10`).
*
* Examples:
* @code
Expand All @@ -132,16 +133,16 @@ bi_t::bi_t(bi_t&& other) noexcept
// but init_string() does. We can ignore this warning.
// NOLINTBEGIN(cppcoreguidelines-pro-type-member-init)

bi_t::bi_t(const std::string& s) {
bi_t::bi_t(const std::string& s, int base) {
// vec_ implicitly default-initialized
h_::init_string(*this, s);
h_::init_string(*this, s, base);
}

bi_t::bi_t(const char* s) {
bi_t::bi_t(const char* s, int base) {
if (s == nullptr) {
throw std::invalid_argument("Null string pointer provided.");
}
h_::init_string(*this, std::string(s));
h_::init_string(*this, std::string(s), base);
}

bi_t::bi_t(double d) { h_::assign_from_double(*this, d); }
Expand Down Expand Up @@ -872,8 +873,9 @@ void bi_t::print_internal(std::ostream& os) const noexcept {
*/

/**
* @brief Return a `string` containing the base-10 (decimal) representation of
* the integer.
* @brief Return a `string` containing the base-`base` representation of the
* integer, where `base` must be an integer in \f$ [2, 36] \f$ (by default,
* `base` is `10`).
*
* Examples:
* @code
Expand All @@ -885,32 +887,39 @@ void bi_t::print_internal(std::ostream& os) const noexcept {
* s = x.to_string(); // s == "-32768"
* @endcode
*/
std::string bi_t::to_string() const {
std::string bi_t::to_string(int base) const {
// NOLINTBEGIN(cppcoreguidelines-avoid-magic-numbers)
static constexpr auto base_digits = "0123456789abcdefghijklmnopqrstuvwxyz";

if (base <= 1 || base > 36) {
throw std::invalid_argument("base argument must be in [2, 36]");
}

if (size() == 0) {
return "0";
}

const size_t estimate = h_::decimal_length(*this);
bi_t copy = *this;
std::string result;
result.reserve(estimate);
const size_t estimate = h_::base_length(*this, base);
result.reserve(estimate + negative_);

constexpr digit divisor = powers_of_ten[max_batch_size];
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-constant-array-index)
const auto [max_batch_size, divisor] = base_mbs[base];

while (copy.size()) {
digit remainder = h_::div_algo_digit(copy, copy, divisor);

for (size_t i = 0; i < max_batch_size; ++i) {
for (unsigned i = 0; i < max_batch_size; ++i) {
if (remainder == 0 && copy.size() == 0) {
break;
}

// NOLINTBEGIN(cppcoreguidelines-avoid-magic-numbers)
digit current_digit = remainder % 10;
remainder /= 10;
// NOLINTEND(cppcoreguidelines-avoid-magic-numbers)
digit current_digit = remainder % base;
remainder /= base;

result.push_back(static_cast<char>('0' + current_digit));
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
result.push_back(base_digits[current_digit]);
}
}

Expand All @@ -919,7 +928,9 @@ std::string bi_t::to_string() const {
}

std::reverse(result.begin(), result.end());

return result;
// NOLINTEND(cppcoreguidelines-avoid-magic-numbers)
}

/**
Expand Down
138 changes: 90 additions & 48 deletions src/h_.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,15 @@ struct h_ {

// to_string()
static uint8_t idiv10(bi_t& x) noexcept;
static size_t decimal_length(const bi_t& x);
static size_t base_length(const bi_t& x, int base);

// initializing
template <std::integral T>
static void init_one_digit(bi_t& x, T value);
template <std::integral T>
static void init_atleast_one_digit(bi_t& x, T value);
static void init_string(bi_t& x, const std::string& str);
// NOLINTNEXTLINE
static void init_string(bi_t& x, const std::string& str, int base = 10);

// misc.
static dvector to_twos_complement(const dvector& vec);
Expand Down Expand Up @@ -1083,7 +1084,7 @@ uint8_t h_::idiv10(bi_t& x) noexcept {
}

/**
* @brief Return an estimate of the number of decimal digits required to
* @brief Return an estimate of the number of base-`base` digits required to
* represent this integer.
*
* Given an n-digit integer in base b, the largest integer representable is
Expand All @@ -1106,28 +1107,36 @@ uint8_t h_::idiv10(bi_t& x) noexcept {
* n = \left\lceil m \cdot \frac{\log(c)}{\log(b)} \right\rceil
* \f]
*
* Thus, the minimum number of base 10 digits required to represent any m-digit
* base 2 integer is:
* For example, the minimum number of base 10 digits required to represent any
* m-digit base 2 integer is:
* \f[
* n = \left\lceil m * \log_{10}(2) \right\rceil
* \f]
*
* @throw overflow_error Thrown when the estimated number of decimal digits is
* beyond the theoretical or practical limit for representation, indicating an
* unmanageable or impractical size for the corresponding decimal string.
* @throw overflow_error Thrown when the estimated number of base-`base` digits
* is beyond the theoretical or practical limit for representation, indicating
* an unmanageable or impractical size for the corresponding base-`base`
* string.
*/
size_t h_::decimal_length(const bi_t& x) {
// log_base_2[base] gives log_{base}(2) (with some rounding up)
constexpr std::array<double, 37> log_base_2 = {
0, 0, 1.0, 0.631, 0.5, 0.431, 0.387, 0.357, 0.334, 0.316,
0.302, 0.290, 0.279, 0.271, 0.263, 0.256, 0.25, 0.245, 0.240, 0.236,
0.232, 0.228, 0.225, 0.222, 0.219, 0.216, 0.213, 0.211, 0.209, 0.206,
0.204, 0.202, 0.200, 0.199, 0.197, 0.195, 0.194};

size_t h_::base_length(const bi_t& x, int base) {
// TODO: a different exception is probably more appropriate in this func
constexpr const char* s = "Decimal digit estimation exceeds practical limit.";
constexpr double log10_2 = 0.30103; // log10(2)
constexpr const char* s = "Digit estimation exceeds practical limit.";

const bi_bitcount_t bitlen = x.bit_length();
if (bitlen > dbl_max_int) {
throw overflow_error(s);
}

const bi_bitcount_t r = static_cast<bi_bitcount_t>(
std::floor(static_cast<double>(bitlen) * log10_2) + 1);
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-constant-array-index)
std::floor(static_cast<double>(bitlen) * log_base_2[base]) + 1);
if (r > std::numeric_limits<size_t>::max() - 2) {
throw overflow_error(s);
}
Expand Down Expand Up @@ -1374,8 +1383,8 @@ void h_::init_atleast_one_digit(bi_t& x, T value) {
* @endinternal
*/
/**
* In the comment block above `decimal_length()`, it is shown how to calculate
* the minimum number of base-b digits, \f$ n \f$, required to represent any
* In the comment block above `base_length()`, it is shown how to calculate the
* minimum number of base-b digits, \f$ n \f$, required to represent any
* integer with \f$ m \f$ base-c digits.
*
* We can find an upper bound another way as well.
Expand Down Expand Up @@ -1429,38 +1438,66 @@ constexpr uint8_t char_to_b36(char ch) {
return char_to_int_map[static_cast<uint8_t>(ch)];
}

// Calculate 10 ** n
constexpr digit pow10(size_t n) {
// Calculate base ** n
constexpr digit pow(int base, size_t n) {
digit result = 1;
for (size_t i = 0; i < n; ++i) {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
result *= 10;
result *= base;
}
return result;
}

// Generate an array of powers of ten
template <size_t... Indices>
constexpr auto make_powers_of_ten(std::index_sequence<Indices...>) {
return std::array<digit, sizeof...(Indices)>{pow10(Indices)...};
constexpr int calculate_max_batch_size(digit base) {
int n = 0;

ddigit b = base;
while (b < bi_dmax) {
b *= base;
++n;
}

return n;
}

// If digit <==> uint32_t (uint64_t), 10^{9} (10^{19}) is the highest power of
// 10 that fits in it.
constexpr unsigned max_batch_size = (bi_dwidth == 64) ? 19 : 9;
struct BaseMBS {
// max batch size
unsigned mbs;
// base ** mbs
digit base_pow_mbs;
};

// For example, if digit <==> uint32_t (uint64_t), 10^{9} (10^{19}) is the
// highest power of 10 that fits in it.
// NOLINTBEGIN(cppcoreguidelines-avoid-magic-numbers)
constexpr std::array<BaseMBS, 37> create_base_mbs_array() {
std::array<BaseMBS, 37> base_mbs{};
for (int base = 2; base <= 36; ++base) {
unsigned max_batch_size = calculate_max_batch_size(base);
base_mbs.at(base) = {max_batch_size, pow(base, max_batch_size)};
}
return base_mbs;
}
// NOLINTEND(cppcoreguidelines-avoid-magic-numbers)

constexpr auto base_mbs = create_base_mbs_array();

void h_::init_string(bi_t& x, const std::string& s, int base) {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
if (base <= 1 || base > 36) {
throw std::invalid_argument("base argument must be in [2, 36]");
}

constexpr auto powers_of_ten =
make_powers_of_ten(std::make_index_sequence<max_batch_size + 1>{});
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-constant-array-index)
const auto [max_batch_size, base_pow_max_batch_size] = base_mbs[base];

void h_::init_string(bi_t& x, const std::string& s) {
// std::stoi and company allow leading whitespace and a plus/minus sign. We
// follow suit.

// Allow leading whitespace
auto it = std::find_if_not(s.begin(), s.end(),
[](char ch) { return std::isspace(ch); });

// Allow plus/minus sign to precede first decimal digit
// Allow plus/minus sign to precede first base-`base` digit
x.negative_ = false;
if (it != s.end()) {
if (*it == '-') {
Expand All @@ -1472,39 +1509,44 @@ void h_::init_string(bi_t& x, const std::string& s) {
}

const auto start_digit = it;
// TODO: isalnum() is too broad!
it = std::find_if_not(start_digit, s.end(),
[](char ch) { return std::isdigit(ch); });
[](char ch) { return std::isalnum(ch); });

if (start_digit == it) {
throw std::invalid_argument("Invalid string format.");
}

size_t n_base10 = std::distance(start_digit, it); // it - start_digit
const size_t n_digits = uints::div_ceil(n_base10, max_batch_size);
size_t n_base = std::distance(start_digit, it); // it - start_digit
const size_t n_digits = uints::div_ceil(n_base, max_batch_size);

x.reserve_(n_digits);
x.resize_unsafe_(0);

for (auto dec_it = start_digit; dec_it < it;) {
/* We could replace all the code in the body of this loop with just
* `imul1add1(10, *dec_it++ - '0');` and the end result will be the same.
* However, it is more efficient to batch some base-10 digits together. */
// Initialize batch value
digit batch = 0;
auto dec_it = start_digit;
const size_t rem_batch_size = n_base % max_batch_size;

// Calculate how many base-10 digits we can process in this batch
const size_t remaining = std::distance(dec_it, it);
const size_t digits_in_batch =
std::min(remaining, static_cast<size_t>(max_batch_size));
// Initialize batch value
digit batch = 0;
// Convert batch substring to integer value
for (size_t j = 0; j < rem_batch_size; ++j, ++dec_it) {
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-constant-array-index)
batch = char_to_int_map[*dec_it] + batch * base;
}
// x is initially zero, so multiplication is zero in h_::imul1add1()
if (batch) {
x.vec_.push_back(batch);
}

while (dec_it < it) {
// Initialize batch value
batch = 0;
// Convert batch substring to integer value
for (size_t j = 0; j < digits_in_batch; ++j, ++dec_it) {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
batch = (*dec_it - '0') + batch * 10;
for (size_t j = 0; j < max_batch_size; ++j, ++dec_it) {
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-constant-array-index)
batch = char_to_int_map[*dec_it] + batch * base;
}

// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-constant-array-index)
h_::imul1add1(x, powers_of_ten[digits_in_batch], batch);
h_::imul1add1(x, base_pow_max_batch_size, batch);
}

x.trim_trailing_zeros();
Expand Down
Loading

0 comments on commit d878edd

Please sign in to comment.