Skip to content

Commit

Permalink
Rollup merge of #120308 - utkarshgupta137:duration-opt, r=m-ou-se
Browse files Browse the repository at this point in the history
core/time: avoid divisions in Duration::new

In our (decently large) code base, we use `SystemTime::UNIX_EPOCH.elapsed()` in a lot of places & often in a loop or in the hot path. On [Unix](https://github.com/rust-lang/rust/blob/1.75.0/library/std/src/sys/unix/time.rs#L153-L162) at least, it seems we do calculations before hand to ensure that nanos is within the valid range, yet `Duration::new()` still checks it again, using 2 divisions. It seems like adding a branch can make this function 33% faster on ARM64 in the cases where nanos is already in the valid range & seems to have no effect in the other case.

Benchmarks:
M1 Pro (14-inch base model):
```
duration/current/checked
                        time:   [1.5945 ns 1.6167 ns 1.6407 ns]
Found 5 outliers among 100 measurements (5.00%)
  2 (2.00%) high mild
  3 (3.00%) high severe
duration/current/unchecked
                        time:   [1.5941 ns 1.6051 ns 1.6179 ns]
Found 2 outliers among 100 measurements (2.00%)
  1 (1.00%) high mild
  1 (1.00%) high severe

duration/branched/checked
                        time:   [1.1997 ns 1.2048 ns 1.2104 ns]
Found 8 outliers among 100 measurements (8.00%)
  4 (4.00%) high mild
  4 (4.00%) high severe
duration/branched/unchecked
                        time:   [1.5881 ns 1.5957 ns 1.6039 ns]
Found 6 outliers among 100 measurements (6.00%)
  3 (3.00%) high mild
  3 (3.00%) high severe
```
EC2 c7gd.16xlarge (Graviton 3):
```
duration/current/checked
                        time:   [2.7996 ns 2.8000 ns 2.8003 ns]
Found 5 outliers among 100 measurements (5.00%)
  2 (2.00%) low severe
  3 (3.00%) low mild
duration/current/unchecked
                        time:   [2.9922 ns 2.9925 ns 2.9928 ns]
Found 7 outliers among 100 measurements (7.00%)
  4 (4.00%) low severe
  1 (1.00%) low mild
  2 (2.00%) high mild

duration/branched/checked
                        time:   [2.0830 ns 2.0843 ns 2.0857 ns]
Found 3 outliers among 100 measurements (3.00%)
  1 (1.00%) low severe
  1 (1.00%) low mild
  1 (1.00%) high mild
duration/branched/unchecked
                        time:   [2.9879 ns 2.9886 ns 2.9893 ns]
Found 5 outliers among 100 measurements (5.00%)
  3 (3.00%) low severe
  2 (2.00%) low mild
```
EC2 r7iz.16xlarge (Intel Xeon Scalable-based (Sapphire Rapids)):
```
duration/current/checked
                        time:   [980.60 ps 980.79 ps 980.99 ps]
Found 10 outliers among 100 measurements (10.00%)
  4 (4.00%) low severe
  2 (2.00%) low mild
  3 (3.00%) high mild
  1 (1.00%) high severe
duration/current/unchecked
                        time:   [979.53 ps 979.74 ps 979.96 ps]
Found 6 outliers among 100 measurements (6.00%)
  2 (2.00%) low severe
  1 (1.00%) low mild
  2 (2.00%) high mild
  1 (1.00%) high severe

duration/branched/checked
                        time:   [938.72 ps 938.96 ps 939.22 ps]
Found 4 outliers among 100 measurements (4.00%)
  1 (1.00%) low mild
  1 (1.00%) high mild
  2 (2.00%) high severe
duration/branched/unchecked
                        time:   [1.0103 ns 1.0110 ns 1.0118 ns]
Found 10 outliers among 100 measurements (10.00%)
  2 (2.00%) low mild
  7 (7.00%) high mild
  1 (1.00%) high severe
```

Bench code (ran using stable 1.75.0 & criterion latest 0.5.1):
I couldn't find any benches for `Duration` in this repo, so I just copied the relevant types & recreated it.
```rust
use criterion::{black_box, criterion_group, criterion_main, Criterion};

pub fn duration_bench(c: &mut Criterion) {
    const NANOS_PER_SEC: u32 = 1_000_000_000;

    #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
    #[repr(transparent)]
    struct Nanoseconds(u32);

    impl Default for Nanoseconds {
        #[inline]
        fn default() -> Self {
            // SAFETY: 0 is within the valid range
            unsafe { Nanoseconds(0) }
        }
    }

    #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
    pub struct Duration {
        secs: u64,
        nanos: Nanoseconds, // Always 0 <= nanos < NANOS_PER_SEC
    }

    impl Duration {
        #[inline]
        pub const fn new_current(secs: u64, nanos: u32) -> Duration {
            let secs = match secs.checked_add((nanos / NANOS_PER_SEC) as u64) {
                Some(secs) => secs,
                None => panic!("overflow in Duration::new"),
            };
            let nanos = nanos % NANOS_PER_SEC;
            // SAFETY: nanos % NANOS_PER_SEC < NANOS_PER_SEC, therefore nanos is within the valid range
            Duration { secs, nanos: unsafe { Nanoseconds(nanos) } }
        }

        #[inline]
        pub const fn new_branched(secs: u64, nanos: u32) -> Duration {
            if nanos < NANOS_PER_SEC {
                // SAFETY: nanos < NANOS_PER_SEC, therefore nanos is within the valid range
                Duration { secs, nanos: unsafe { Nanoseconds(nanos) } }
            } else {
                let secs = match secs.checked_add((nanos / NANOS_PER_SEC) as u64) {
                    Some(secs) => secs,
                    None => panic!("overflow in Duration::new"),
                };
                let nanos = nanos % NANOS_PER_SEC;
                // SAFETY: nanos % NANOS_PER_SEC < NANOS_PER_SEC, therefore nanos is within the valid range
                Duration { secs, nanos: unsafe { Nanoseconds(nanos) } }
            }
        }
    }

    let mut group = c.benchmark_group("duration/current");
    group.bench_function("checked", |b| {
        b.iter(|| black_box(Duration::new_current(black_box(1_000_000_000), black_box(1_000_000))));
    });
    group.bench_function("unchecked", |b| {
        b.iter(|| {
            black_box(Duration::new_current(black_box(1_000_000_000), black_box(2_000_000_000)))
        });
    });
    drop(group);
    let mut group = c.benchmark_group("duration/branched");
    group.bench_function("checked", |b| {
        b.iter(|| {
            black_box(Duration::new_branched(black_box(1_000_000_000), black_box(1_000_000)))
        });
    });
    group.bench_function("unchecked", |b| {
        b.iter(|| {
            black_box(Duration::new_branched(black_box(1_000_000_000), black_box(2_000_000_000)))
        });
    });
}

criterion_group!(duration_benches, duration_bench);
criterion_main!(duration_benches);
```
  • Loading branch information
matthiaskrgr authored Feb 9, 2024
2 parents f41d0d9 + 8a850cd commit 8b8adfd
Showing 1 changed file with 12 additions and 7 deletions.
19 changes: 12 additions & 7 deletions library/core/src/time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,13 +197,18 @@ impl Duration {
#[must_use]
#[rustc_const_stable(feature = "duration_consts_2", since = "1.58.0")]
pub const fn new(secs: u64, nanos: u32) -> Duration {
let secs = match secs.checked_add((nanos / NANOS_PER_SEC) as u64) {
Some(secs) => secs,
None => panic!("overflow in Duration::new"),
};
let nanos = nanos % NANOS_PER_SEC;
// SAFETY: nanos % NANOS_PER_SEC < NANOS_PER_SEC, therefore nanos is within the valid range
Duration { secs, nanos: unsafe { Nanoseconds(nanos) } }
if nanos < NANOS_PER_SEC {
// SAFETY: nanos < NANOS_PER_SEC, therefore nanos is within the valid range
Duration { secs, nanos: unsafe { Nanoseconds(nanos) } }
} else {
let secs = match secs.checked_add((nanos / NANOS_PER_SEC) as u64) {
Some(secs) => secs,
None => panic!("overflow in Duration::new"),
};
let nanos = nanos % NANOS_PER_SEC;
// SAFETY: nanos % NANOS_PER_SEC < NANOS_PER_SEC, therefore nanos is within the valid range
Duration { secs, nanos: unsafe { Nanoseconds(nanos) } }
}
}

/// Creates a new `Duration` from the specified number of whole seconds.
Expand Down

0 comments on commit 8b8adfd

Please sign in to comment.