Skip to content

Commit

Permalink
Merge pull request #96 from Alexhuszagh/bigint
Browse files Browse the repository at this point in the history
Implement the big-integer arithmetic algorithm.
  • Loading branch information
lemire authored Sep 14, 2021
2 parents 25b240a + fc0c868 commit 3f0ba09
Show file tree
Hide file tree
Showing 8 changed files with 1,140 additions and 189 deletions.
155 changes: 32 additions & 123 deletions include/fast_float/ascii_number.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,16 +91,20 @@ CXX20_CONSTEXPR fastfloat_really_inline bool is_made_of_eight_digits_fast(const
return is_made_of_eight_digits_fast(read_u64(chars));
}

typedef span<const char> byte_span;

struct parsed_number_string {
int64_t exponent;
uint64_t mantissa;
const char *lastmatch;
bool negative;
bool valid;
bool too_many_digits;
int64_t exponent{0};
uint64_t mantissa{0};
const char *lastmatch{nullptr};
bool negative{false};
bool valid{false};
bool too_many_digits{false};
// contains the range of the significant digits
byte_span integer{}; // non-nullable
byte_span fraction{}; // nullable
};


// Assuming that you use no more than 19 digits, this will
// parse an ASCII string.
CXX20_CONSTEXPR fastfloat_really_inline
Expand All @@ -125,6 +129,10 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_

uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)

while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
p += 8;
}
while ((p != pend) && is_integer(*p)) {
// a multiplication by 10 is cheaper than an arbitrary integer
// multiplication
Expand All @@ -134,24 +142,24 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
}
const char *const end_of_integer_part = p;
int64_t digit_count = int64_t(end_of_integer_part - start_digits);
answer.integer = byte_span(start_digits, size_t(digit_count));
int64_t exponent = 0;
if ((p != pend) && (*p == decimal_point)) {
++p;
// Fast approach only tested under little endian systems
if ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
p += 8;
if ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
const char* before = p;
// can occur at most twice without overflowing, but let it occur more, since
// for integers with many digits, digit parsing is the primary bottleneck.
while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
p += 8;
}
}
while ((p != pend) && is_integer(*p)) {
uint8_t digit = uint8_t(*p - '0');
++p;
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
}
exponent = end_of_integer_part + 1 - p;
exponent = before - p;
answer.fraction = byte_span(before, size_t(p - before));
digit_count -= exponent;
}
// we must have encountered at least one integer!
Expand Down Expand Up @@ -179,7 +187,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
} else {
while ((p != pend) && is_integer(*p)) {
uint8_t digit = uint8_t(*p - '0');
if (exp_number < 0x10000) {
if (exp_number < 0x10000000) {
exp_number = 10 * exp_number + digit;
}
++p;
Expand Down Expand Up @@ -212,23 +220,26 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
if (digit_count > 19) {
answer.too_many_digits = true;
// Let us start again, this time, avoiding overflows.
// We don't need to check if is_integer, since we use the
// pre-tokenized spans from above.
i = 0;
p = start_digits;
p = answer.integer.ptr;
const char* int_end = p + answer.integer.len();
const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
while((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer(*p)) {
while((i < minimal_nineteen_digit_integer) && (p != int_end)) {
i = i * 10 + uint64_t(*p - '0');
++p;
}
if (i >= minimal_nineteen_digit_integer) { // We have a big integers
exponent = end_of_integer_part - p + exp_number;
} else { // We have a value with a fractional component.
p++; // skip the dot
const char *first_after_period = p;
while((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer(*p)) {
p = answer.fraction.ptr;
const char* frac_end = p + answer.fraction.len();
while((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
i = i * 10 + uint64_t(*p - '0');
++p;
}
exponent = first_after_period - p + exp_number;
exponent = answer.fraction.ptr - p + exp_number;
}
// We have now corrected both exponent and i, to a truncated value
}
Expand All @@ -238,108 +249,6 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
return answer;
}


// This should always succeed since it follows a call to parse_number_string
// This function could be optimized. In particular, we could stop after 19 digits
// and try to bail out. Furthermore, we should be able to recover the computed
// exponent from the pass in parse_number_string.
CXX20_CONSTEXPR fastfloat_really_inline decimal parse_decimal(const char *p, const char *pend, parse_options options) noexcept {
const char decimal_point = options.decimal_point;

decimal answer;
answer.num_digits = 0;
answer.decimal_point = 0;
answer.truncated = false;
answer.negative = (*p == '-');
if (*p == '-') { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
++p;
}
// skip leading zeroes
while ((p != pend) && (*p == '0')) {
++p;
}
while ((p != pend) && is_integer(*p)) {
if (answer.num_digits < max_digits) {
answer.digits[answer.num_digits] = uint8_t(*p - '0');
}
answer.num_digits++;
++p;
}
if ((p != pend) && (*p == decimal_point)) {
++p;
const char *first_after_period = p;
// if we have not yet encountered a zero, we have to skip it as well
if(answer.num_digits == 0) {
// skip zeros
while ((p != pend) && (*p == '0')) {
++p;
}
}
// We expect that this loop will often take the bulk of the running time
// because when a value has lots of digits, these digits often
while ((std::distance(p, pend) >= 8) && (answer.num_digits + 8 < max_digits)) {
uint64_t val = read_u64(p);
if(! is_made_of_eight_digits_fast(val)) { break; }
// We have eight digits, process them in one go!
val -= 0x3030303030303030;
write_u64(answer.digits + answer.num_digits, val);
answer.num_digits += 8;
p += 8;
}
while ((p != pend) && is_integer(*p)) {
if (answer.num_digits < max_digits) {
answer.digits[answer.num_digits] = uint8_t(*p - '0');
}
answer.num_digits++;
++p;
}
answer.decimal_point = int32_t(first_after_period - p);
}
// We want num_digits to be the number of significant digits, excluding
// leading *and* trailing zeros! Otherwise the truncated flag later is
// going to be misleading.
if(answer.num_digits > 0) {
// We potentially need the answer.num_digits > 0 guard because we
// prune leading zeros. So with answer.num_digits > 0, we know that
// we have at least one non-zero digit.
const char *preverse = p - 1;
int32_t trailing_zeros = 0;
while ((*preverse == '0') || (*preverse == decimal_point)) {
if(*preverse == '0') { trailing_zeros++; };
--preverse;
}
answer.decimal_point += int32_t(answer.num_digits);
answer.num_digits -= uint32_t(trailing_zeros);
}
if(answer.num_digits > max_digits) {
answer.truncated = true;
answer.num_digits = max_digits;
}
if ((p != pend) && (('e' == *p) || ('E' == *p))) {
++p;
bool neg_exp = false;
if ((p != pend) && ('-' == *p)) {
neg_exp = true;
++p;
} else if ((p != pend) && ('+' == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
++p;
}
int32_t exp_number = 0; // exponential part
while ((p != pend) && is_integer(*p)) {
uint8_t digit = uint8_t(*p - '0');
if (exp_number < 0x10000) {
exp_number = 10 * exp_number + digit;
}
++p;
}
answer.decimal_point += (neg_exp ? -exp_number : exp_number);
}
// In very rare cases, we may have fewer than 19 digits, we want to be able to reliably
// assume that all digits up to max_digit_without_overflow have been initialized.
for(uint32_t i = answer.num_digits; i < max_digit_without_overflow; i++) { answer.digits[i] = 0; }

return answer;
}
} // namespace fast_float

#endif
Loading

0 comments on commit 3f0ba09

Please sign in to comment.