Skip to content

Commit

Permalink
Improve inlining hints for better performance.
Browse files Browse the repository at this point in the history
Affects rustc versions 1.81.0+ which changed inlining thresholds that dramatically decreased performance.

- Closes #111.
  • Loading branch information
Alexhuszagh committed Sep 9, 2024
1 parent b0c9e64 commit f282542
Show file tree
Hide file tree
Showing 51 changed files with 420 additions and 415 deletions.
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed

- Updated the MSRV to 1.63.0 (1.65.0 for development).
- Improved performance due to compiler regressions in rustc 1.81.0 and above.

### Removed

Expand Down
2 changes: 1 addition & 1 deletion lexical-benchmark/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ The benchmark requires the following:
3. An installation of [Python3](https://www.python.org/downloads/).
4. An installation of [Rust](https://doc.rust-lang.org/1.0.0/book/installing-rust.html).
5. An installation of Google [Benchmark](https://github.com/google/benchmark).
5. An installation of [CMake](https://cmake.org/download/).
6. An installation of [CMake](https://cmake.org/download/).

The use of a Rust version >= 1.59.0, with the feature `asm`, is highly recommended for better metrics and/or performance.
2 changes: 1 addition & 1 deletion lexical-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ macro_rules! to_lexical_impl {
}
}

#[cfg_attr(not(feature = "compact"), inline)]
#[cfg_attr(not(feature = "compact"), inline(always))]
fn to_lexical_with_options<'a, const FORMAT: u128>(
self,
bytes: &'a mut [u8],
Expand Down
12 changes: 6 additions & 6 deletions lexical-parse-float/src/bellerophon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ const fn error_halfscale() -> u32 {
}

/// Determine if the number of errors is tolerable for float precision.
#[cfg_attr(not(feature = "compact"), inline)]
#[cfg_attr(not(feature = "compact"), inline(always))]
fn error_is_accurate<F: RawFloat>(errors: u32, fp: &ExtendedFloat80) -> bool {
// Check we can't have a literal 0 denormal float.
debug_assert!(fp.exp >= -64);
Expand Down Expand Up @@ -283,7 +283,7 @@ fn error_is_accurate<F: RawFloat>(errors: u32, fp: &ExtendedFloat80) -> bool {
/// itself is 0.
///
/// Get the number of bytes shifted.
#[cfg_attr(not(feature = "compact"), inline)]
#[cfg_attr(not(feature = "compact"), inline(always))]
pub fn normalize(fp: &mut ExtendedFloat80) -> i32 {
// Note:
// Using the ctlz intrinsic via leading_zeros is way faster (~10x)
Expand Down Expand Up @@ -318,7 +318,7 @@ pub fn normalize(fp: &mut ExtendedFloat80) -> i32 {
/// 1. Non-signed multiplication of mantissas (requires 2x as many bits as input).
/// 2. Normalization of the result (not done here).
/// 3. Addition of exponents.
#[cfg_attr(not(feature = "compact"), inline)]
#[cfg_attr(not(feature = "compact"), inline(always))]
pub fn mul(x: &ExtendedFloat80, y: &ExtendedFloat80) -> ExtendedFloat80 {
// Logic check, values must be decently normalized prior to multiplication.
debug_assert!(x.mant >> 32 != 0);
Expand Down Expand Up @@ -370,7 +370,7 @@ pub struct BellerophonPowers {

/// Allow indexing of values without bounds checking
impl BellerophonPowers {
#[inline]
#[inline(always)]
pub const fn get_small(&self, index: usize) -> ExtendedFloat80 {
let mant = self.small[index];
let exp = (1 - 64) + ((self.log2 * index as i64) >> self.log2_shift);
Expand All @@ -380,7 +380,7 @@ impl BellerophonPowers {
}
}

#[inline]
#[inline(always)]
pub const fn get_large(&self, index: usize) -> ExtendedFloat80 {
let mant = self.large[index];
let biased_e = index as i64 * self.step as i64 - self.bias as i64;
Expand All @@ -391,7 +391,7 @@ impl BellerophonPowers {
}
}

#[inline]
#[inline(always)]
pub const fn get_small_int(&self, index: usize) -> u64 {
self.small_int[index]
}
Expand Down
Loading

0 comments on commit f282542

Please sign in to comment.