Add in comprehensive formatting changes and optimizations.

Minor changes with compile-time code elimination via `#[cfg(...)]` rather than trusting the compiler to do dead-code elimination via `if cfg!(...)` to get major performance improvements, seemingly getting 10% to 20% better performance for fast case scenarios.
Alexhuszagh · Sep 14, 2024 · eae72e4 · eae72e4
1 parent 1b69c39
commit eae72e4
Show file tree

Hide file tree

Showing 32 changed files with 960 additions and 488 deletions.
diff --git a/.github/workflows/Comprehensive.yml b/.github/workflows/Comprehensive.yml
@@ -17,4 +17,3 @@ jobs:
             toolchain: nightly
             components: rustfmt, clippy
       - run: ci/comprehensive.sh
-      - run: ALL_FEATURES=1 ci/comprehensive.sh
diff --git a/ci/comprehensive.sh b/ci/comprehensive.sh
@@ -13,22 +13,24 @@ script_home=$(realpath "${script_dir}")
 home=$(dirname "${script_home}")
 cd "${home}"
 
-FEATURES=
-if [ ! -z $ALL_FEATURES ]; then
-    FEATURES=--all-features
-fi
+run_tests() {
+    # Test the parse-float correctness tests
+    cd "${home}"
+    cd lexical-parse-float/etc/correctness
+    cargo run "${@}" --release --bin test-parse-golang
+    cargo run "${@}" --release --bin test-parse-unittests
 
-# Test the parse-float correctness tests
-cd lexical-parse-float/etc/correctness
-cargo run $FEATURES --release --bin test-parse-golang
-cargo run $FEATURES --release --bin test-parse-unittests
+    # Test the write-float correctness tests.
+    cd "${home}"
+    cd lexical-write-float/etc/correctness
+    cargo run "${@}" --release --bin shorter_interval
+    cargo run "${@}" --release --bin random
+    cargo run "${@}" --release --bin simple_random  -- --iterations 1000000
+}
 
-# Test the write-float correctness tests.
-cd "${home}"
-cd lexical-write-float/etc/correctness
-cargo run $FEATURES --release --bin shorter_interval
-cargo run $FEATURES --release --bin random
-cargo run $FEATURES --release --bin simple_random  -- --iterations 1000000
+run_tests
+run_tests --features=format
+run_tests --all-features
 
 cd "${home}"
 if [ ! -z "${EXHAUSTIVE}" ]; then

diff --git a/lexical-asm/src/lib.rs b/lexical-asm/src/lib.rs
@@ -1,10 +1,11 @@
 use core::num::ParseFloatError;
+use std::io::Write;
+
 use lexical_parse_float::FromLexical as FloatFromLexical;
 use lexical_parse_integer::FromLexical as IntFromLexical;
 use lexical_util::error::Error;
 use lexical_write_float::ToLexical as FloatToLexical;
 use lexical_write_integer::ToLexical as IntToLexical;
-use std::io::Write;
 
 // PARSE INTEGER
 // -------------

diff --git a/lexical-benchmark/algorithm/bigint.rs b/lexical-benchmark/algorithm/bigint.rs
@@ -1,4 +1,5 @@
 use core::time::Duration;
+
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use fastrand::Rng;
 use lexical_parse_float::bigint;

diff --git a/lexical-benchmark/algorithm/division.rs b/lexical-benchmark/algorithm/division.rs
@@ -1,6 +1,7 @@
 mod input;
 
 use core::time::Duration;
+
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 
 // Default random data size.

diff --git a/lexical-benchmark/input.rs b/lexical-benchmark/input.rs
@@ -6,6 +6,7 @@
 
 use core::fmt::Debug;
 use core::str::FromStr;
+
 use fastrand::Rng;
 #[cfg(feature = "floats")]
 use lexical_util::num::Float;
@@ -466,8 +467,9 @@ macro_rules! itoa_generator {
 
 macro_rules! fmt_generator {
     ($group:ident, $name:expr, $iter:expr) => {{
-        use lexical_util::constants::BUFFER_SIZE;
         use std::io::Write;
+
+        use lexical_util::constants::BUFFER_SIZE;
         let mut buffer = vec![b'0'; BUFFER_SIZE];
         $group.bench_function($name, |bench| {
             bench.iter(|| {

diff --git a/lexical-benchmark/parse-float/canada.rs b/lexical-benchmark/parse-float/canada.rs
@@ -2,6 +2,7 @@
 mod input;
 
 use core::time::Duration;
+
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use lexical_parse_float::FromLexical;
 

diff --git a/lexical-benchmark/parse-float/contrived.rs b/lexical-benchmark/parse-float/contrived.rs
@@ -3,9 +3,10 @@
 #[macro_use]
 mod input;
 
+use std::time::Duration;
+
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use lexical_parse_float::FromLexical;
-use std::time::Duration;
 
 // FLOATS
 

diff --git a/lexical-benchmark/parse-float/earth.rs b/lexical-benchmark/parse-float/earth.rs
@@ -2,6 +2,7 @@
 mod input;
 
 use core::time::Duration;
+
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use lexical_parse_float::FromLexical;
 

diff --git a/lexical-benchmark/parse-integer/json.rs b/lexical-benchmark/parse-integer/json.rs
@@ -2,6 +2,7 @@
 mod input;
 
 use core::time::Duration;
+
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use lexical_parse_integer::FromLexical;
 use serde::Deserialize;

diff --git a/lexical-benchmark/parse-integer/random.rs b/lexical-benchmark/parse-integer/random.rs
@@ -2,6 +2,7 @@
 mod input;
 
 use core::time::Duration;
+
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use lexical_parse_integer::FromLexical;
 

diff --git a/lexical-benchmark/write-float/json.rs b/lexical-benchmark/write-float/json.rs
@@ -2,6 +2,7 @@
 mod input;
 
 use core::time::Duration;
+
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use lexical_write_float::ToLexical;
 use serde::Deserialize;

diff --git a/lexical-benchmark/write-float/random.rs b/lexical-benchmark/write-float/random.rs
@@ -2,6 +2,7 @@
 mod input;
 
 use core::time::Duration;
+
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use lexical_write_float::ToLexical;
 

diff --git a/lexical-benchmark/write-integer/json.rs b/lexical-benchmark/write-integer/json.rs
@@ -2,6 +2,7 @@
 mod input;
 
 use core::time::Duration;
+
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use lexical_write_integer::ToLexical;
 use serde::Deserialize;

diff --git a/lexical-benchmark/write-integer/random.rs b/lexical-benchmark/write-integer/random.rs
@@ -2,6 +2,7 @@
 mod input;
 
 use core::time::Duration;
+
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use lexical_write_integer::ToLexical;
 

diff --git a/lexical-parse-float/etc/bellerophon_table.py b/lexical-parse-float/etc/bellerophon_table.py
@@ -1,18 +1,20 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
+'''
+    bellerophon_table
+    =================
 
-"""
-Generate powers of a given radix for the Bellerophon algorithm.
+    Generate powers of a given radix for the Bellerophon algorithm.
 
-Specifically, computes and outputs (as Rust code) a table of 10^e for some
-range of exponents e. The output is one array of 128 bit significands.
-The base two exponents can be inferred using a logarithmic slope
-of the decimal exponent. The approximations are normalized and rounded perfectly,
-i.e., within 0.5 ULP of the true value.
+    Specifically, computes and outputs (as Rust code) a table of 10^e for some
+    range of exponents e. The output is one array of 128 bit significands.
+    The base two exponents can be inferred using a logarithmic slope
+    of the decimal exponent. The approximations are normalized and rounded perfectly,
+    i.e., within 0.5 ULP of the true value.
 
-Ported from Rust's core library implementation, which itself is
-adapted from Daniel Lemire's fast_float ``table_generation.py``,
-available here: <https://github.com/fastfloat/fast_float/blob/main/script/table_generation.py>.
-"""
+    Ported from Rust's core library implementation, which itself is
+    adapted from Daniel Lemire's fast_float ``table_generation.py``,
+    available here: <https://github.com/fastfloat/fast_float/blob/main/script/table_generation.py>.
+'''
 
 import math
 from collections import deque
@@ -33,7 +35,10 @@
     bias: BASE{0}_BIAS,
 }};\n"""
 
-def calculate_bitshift(base, exponent):
+Fp = tuple[int, int]
+
+
+def calculate_bitshift(base: int, exponent: int) -> float:
     '''
     Calculate the bitshift required for a given base. The exponent
     is the absolute value of the max exponent (log distance from 1.)
@@ -42,27 +47,27 @@ def calculate_bitshift(base, exponent):
     return 63 + math.ceil(math.log2(base**exponent))
 
 
-def next_fp(fp, base, step = 1):
+def next_fp(fp: Fp, base: int, step: int = 1) -> Fp:
     '''Generate the next extended-floating point value.'''
 
     return (fp[0] * (base**step), fp[1])
 
 
-def prev_fp(fp, base, step = 1):
+def prev_fp(fp: Fp, base: int, step: int = 1) -> Fp:
     '''Generate the previous extended-floating point value.'''
 
     return (fp[0] // (base**step), fp[1])
 
 
-def normalize_fp(fp):
+def normalize_fp(fp: Fp) -> Fp:
     '''Normalize a extended-float so the MSB is the 64th bit'''
 
     while fp[0] >> 64 != 0:
         fp = (fp[0] >> 1, fp[1] + 1)
     return fp
 
 
-def generate_small(base, count):
+def generate_small(base: int, count: int) -> tuple[list[Fp], list[int]]:
     '''Generate the small powers for a given base'''
 
     bitshift = calculate_bitshift(base, count)
@@ -78,7 +83,7 @@ def generate_small(base, count):
     return fps, ints
 
 
-def generate_large(base, step):
+def generate_large(base: int, step: int) -> tuple[list[Fp], int]:
     '''Generate the large powers for a given base.'''
 
     # Get our starting parameters
@@ -106,7 +111,7 @@ def generate_large(base, step):
     return fps, -fps[0][1]
 
 
-def print_array(base, string, fps, index):
+def print_array(base: int, string: str, fps: list[Fp], index: int) -> None:
     '''Print an entire array'''
 
     print(string.format(base, len(fps)))
@@ -117,7 +122,7 @@ def print_array(base, string, fps, index):
     print("];")
 
 
-def generate_base(base):
+def generate_base(base: int) -> None:
     '''Generate all powers and variables.'''
 
     step = math.floor(math.log(1e10, base))
@@ -133,7 +138,7 @@ def generate_base(base):
     print(BIAS_STR.format(base, bias))
 
 
-def generate():
+def generate() -> None:
     '''Generate all bases.'''
 
     bases = [

diff --git a/lexical-parse-float/etc/lemire_table.py b/lexical-parse-float/etc/lemire_table.py
@@ -1,22 +1,23 @@
-#!/usr/bin/env python3
-
-"""
-Generate powers of five using Daniel Lemire's ``Eisel-Lemire algorithm`` for use in
-decimal to floating point conversions.
-
-Specifically, computes and outputs (as Rust code) a table of 10^e for some
-range of exponents e. The output is one array of 128 bit significands.
-The base two exponents can be inferred using a logarithmic slope
-of the decimal exponent. The approximations are normalized and rounded perfectly,
-i.e., within 0.5 ULP of the true value.
-
-Ported from Rust's core library implementation, which itself is
-adapted from Daniel Lemire's fast_float ``table_generation.py``,
-available here: <https://github.com/fastfloat/fast_float/blob/main/script/table_generation.py>.
-"""
-from __future__ import print_function
-from math import ceil, floor, log, log2
-from fractions import Fraction
+#!/usr/bin/env python
+'''
+    lemire_table
+    ============
+
+    Generate powers of five using Daniel Lemire's ``Eisel-Lemire algorithm`` for use in
+    decimal to floating point conversions.
+
+    Specifically, computes and outputs (as Rust code) a table of 10^e for some
+    range of exponents e. The output is one array of 128 bit significands.
+    The base two exponents can be inferred using a logarithmic slope
+    of the decimal exponent. The approximations are normalized and rounded perfectly,
+    i.e., within 0.5 ULP of the true value.
+
+    Ported from Rust's core library implementation, which itself is
+    adapted from Daniel Lemire's fast_float ``table_generation.py``,
+    available here: <https://github.com/fastfloat/fast_float/blob/main/script/table_generation.py>.
+'''
+
+from math import ceil, floor, log
 from collections import deque
 
 HEADER = """
@@ -36,6 +37,7 @@
 // the final binary.
 """
 
+
 def main():
     min_exp = minimum_exponent(10)
     max_exp = maximum_exponent(10)
@@ -78,18 +80,18 @@ def print_proper_powers(min_exp, max_exp, bias):
             b = 2 * z + 2 * 64
             c = 2 ** b // power5 + 1
             # truncate
-            while c >= (1<<128):
+            while c >= (1 << 128):
                 c //= 2
             powers.append((c, q))
 
     # Add positive exponents
     for q in range(0, max_exp + 1):
         power5 = 5 ** q
         # move the most significant bit in position
-        while power5 < (1<<127):
+        while power5 < (1 << 127):
             power5 *= 2
         # *truncate*
-        while power5 >= (1<<128):
+        while power5 >= (1 << 128):
             power5 //= 2
         powers.append((power5, q))
 
@@ -98,7 +100,6 @@ def print_proper_powers(min_exp, max_exp, bias):
     print('#[rustfmt::skip]')
     typ = '[(u64, u64); N_POWERS_OF_FIVE]'
     print('pub static POWER_OF_FIVE_128: {} = ['.format(typ))
-    lo_mask = (1 << 64) - 1
     for c, exp in powers:
         hi = '0x{:x}'.format(c // (1 << 64))
         lo = '0x{:x}'.format(c % (1 << 64))

diff --git a/lexical-parse-float/etc/limits.py b/lexical-parse-float/etc/limits.py
@@ -1,12 +1,19 @@
 #!/usr/bin/env python3
+'''
+    limits
+    ======
 
-"""
-Generate the numeric limits for a given radix.
+    Generate the numeric limits for a given radix.
 
-This is used for the fast-path algorithms, to calculate the
-maximum number of digits or exponent bits that can be exactly
-represented as a native value.
-"""
+    This is used for the fast-path algorithms, to calculate the
+    maximum number of digits or exponent bits that can be exactly
+    represented as a native value.
+
+    Note that we need to use `#[cfg]` rather than `if cfg!(...)`
+    due to performance reasons. The code is broken down in
+    the actual implementation to move the powers to const fns
+    and then implement the trait in terms of the const fns.
+'''
 
 import math