Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve performance of rem_euclid() for signed integers #103913

Merged
merged 3 commits into from
Nov 12, 2022

Conversation

Neutron3529
Copy link
Contributor

@Neutron3529 Neutron3529 commented Nov 3, 2022

such code is copy from
https://github.com/rust-lang/rust/blob/master/library/std/src/f32.rs and
https://github.com/rust-lang/rust/blob/master/library/std/src/f64.rs
using r+rhs.abs() is faster than calc it with an if clause. Bench result:

$ cargo bench
   Compiling div-euclid v0.1.0 (/me/div-euclid)
    Finished bench [optimized] target(s) in 1.01s
     Running unittests src/lib.rs (target/release/deps/div_euclid-7a4530ca7817d1ef)

running 7 tests
test tests::it_works ... ignored
test tests::bench_aaabs     ... bench:  10,498,793 ns/iter (+/- 104,360)
test tests::bench_aadefault ... bench:  11,061,862 ns/iter (+/- 94,107)
test tests::bench_abs       ... bench:  10,477,193 ns/iter (+/- 81,942)
test tests::bench_default   ... bench:  10,622,983 ns/iter (+/- 25,119)
test tests::bench_zzabs     ... bench:  10,481,971 ns/iter (+/- 43,787)
test tests::bench_zzdefault ... bench:  11,074,976 ns/iter (+/- 29,633)

test result: ok. 0 passed; 0 failed; 1 ignored; 6 measured; 0 filtered out; finished in 19.35s

It seems that, default rem_euclid triggered a branch prediction, thus bench_default is faster than bench_aadefault and bench_aadefault, which shuffles the order of calculations. but all of them slower than what it was in f64's and f32's rem_euclid, thus I submit this PR.

bench code:

#![feature(test)]
extern crate test;

fn rem_euclid(a:i32,rhs:i32)->i32{
    let r = a % rhs;
    if r < 0 { r + rhs.abs() } else { r }
}

#[cfg(test)]
mod tests {
    use super::*;
    use test::Bencher;
    use rand::prelude::*;
    use rand::rngs::SmallRng;
    const N:i32=1000;
    #[test]
    fn it_works() {
        let a: i32 = 7; // or any other integer type
        let b = 4;

        let d:Vec<i32>=(-N..=N).collect();
        let n:Vec<i32>=(-N..0).chain(1..=N).collect();

        for i in &d {
            for j in &n {
                assert_eq!(i.rem_euclid(*j),rem_euclid(*i,*j));
            }
        }

        assert_eq!(rem_euclid(a,b), 3);
        assert_eq!(rem_euclid(-a,b), 1);
        assert_eq!(rem_euclid(a,-b), 3);
        assert_eq!(rem_euclid(-a,-b), 1);
    }


    #[bench]
    fn bench_aaabs(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=rem_euclid(*i,*j);
                }
            }
            res
        });
    }
    #[bench]
    fn bench_aadefault(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=i.rem_euclid(*j);
                }
            }
            res
        });
    }

    #[bench]
    fn bench_abs(b: &mut Bencher) {
        let d:Vec<i32>=(-N..=N).collect();
        let n:Vec<i32>=(-N..0).chain(1..=N).collect();
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=rem_euclid(*i,*j);
                }
            }
            res
        });
    }
    #[bench]
    fn bench_default(b: &mut Bencher) {
        let d:Vec<i32>=(-N..=N).collect();
        let n:Vec<i32>=(-N..0).chain(1..=N).collect();
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=i.rem_euclid(*j);
                }
            }
            res
        });
    }

    #[bench]
    fn bench_zzabs(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=rem_euclid(*i,*j);
                }
            }
            res
        });
    }
    #[bench]
    fn bench_zzdefault(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=i.rem_euclid(*j);
                }
            }
            res
        });
    }
}

such code is copy from
https://github.com/rust-lang/rust/blob/master/library/std/src/f32.rs
and
https://github.com/rust-lang/rust/blob/master/library/std/src/f64.rs
using r+rhs.abs() is faster than calc it directly.
Bench result:
```
$ cargo bench
   Compiling div-euclid v0.1.0 (/me/div-euclid)
    Finished bench [optimized] target(s) in 1.01s
     Running unittests src/lib.rs (target/release/deps/div_euclid-7a4530ca7817d1ef)

running 7 tests
test tests::it_works ... ignored
test tests::bench_aaabs     ... bench:  10,498,793 ns/iter (+/- 104,360)
test tests::bench_aadefault ... bench:  11,061,862 ns/iter (+/- 94,107)
test tests::bench_abs       ... bench:  10,477,193 ns/iter (+/- 81,942)
test tests::bench_default   ... bench:  10,622,983 ns/iter (+/- 25,119)
test tests::bench_zzabs     ... bench:  10,481,971 ns/iter (+/- 43,787)
test tests::bench_zzdefault ... bench:  11,074,976 ns/iter (+/- 29,633)

test result: ok. 0 passed; 0 failed; 1 ignored; 6 measured; 0 filtered out; finished in 19.35s
```
bench code:
```
#![feature(test)]
extern crate test;

fn rem_euclid(a:i32,rhs:i32)->i32{
    let r = a % rhs;
    if r < 0 { r + rhs.abs() } else { r }
}

#[cfg(test)]
mod tests {
    use super::*;
    use test::Bencher;
    use rand::prelude::*;
    use rand::rngs::SmallRng;
    const N:i32=1000;
    #[test]
    fn it_works() {
        let a: i32 = 7; // or any other integer type
        let b = 4;

        let d:Vec<i32>=(-N..=N).collect();
        let n:Vec<i32>=(-N..0).chain(1..=N).collect();

        for i in &d {
            for j in &n {
                assert_eq!(i.rem_euclid(*j),rem_euclid(*i,*j));
            }
        }

        assert_eq!(rem_euclid(a,b), 3);
        assert_eq!(rem_euclid(-a,b), 1);
        assert_eq!(rem_euclid(a,-b), 3);
        assert_eq!(rem_euclid(-a,-b), 1);
    }


    #[bench]
    fn bench_aaabs(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=rem_euclid(*i,*j);
                }
            }
            res
        });
    }
    #[bench]
    fn bench_aadefault(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=i.rem_euclid(*j);
                }
            }
            res
        });
    }

    #[bench]
    fn bench_abs(b: &mut Bencher) {
        let d:Vec<i32>=(-N..=N).collect();
        let n:Vec<i32>=(-N..0).chain(1..=N).collect();
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=rem_euclid(*i,*j);
                }
            }
            res
        });
    }
    #[bench]
    fn bench_default(b: &mut Bencher) {
        let d:Vec<i32>=(-N..=N).collect();
        let n:Vec<i32>=(-N..0).chain(1..=N).collect();
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=i.rem_euclid(*j);
                }
            }
            res
        });
    }

    #[bench]
    fn bench_zzabs(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=rem_euclid(*i,*j);
                }
            }
            res
        });
    }
    #[bench]
    fn bench_zzdefault(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=i.rem_euclid(*j);
                }
            }
            res
        });
    }
}
```
@rustbot
Copy link
Collaborator

rustbot commented Nov 3, 2022

r? @thomcc

(rustbot has picked a reviewer for you, use r? to override)

@rustbot rustbot added S-waiting-on-review Status: Awaiting review from the assignee but also interested parties. T-libs Relevant to the library team, which will review and decide on the PR/issue. labels Nov 3, 2022
@rustbot
Copy link
Collaborator

rustbot commented Nov 3, 2022

Hey! It looks like you've submitted a new PR for the library teams!

If this PR contains changes to any rust-lang/rust public library APIs then please comment with @rustbot label +T-libs-api -T-libs to tag it appropriately. If this PR contains changes to any unstable APIs please edit the PR description to add a link to the relevant API Change Proposal or create one if you haven't already. If you're unsure where your change falls no worries, just leave it as is and the reviewer will take a look and make a decision to forward on if necessary.

Examples of T-libs-api changes:

  • Stabilizing library features
  • Introducing insta-stable changes such as new implementations of existing stable traits on existing stable types
  • Introducing new or changing existing unstable library APIs (excluding permanently unstable features / features without a tracking issue)
  • Changing public documentation in ways that create new stability guarantees
  • Changing observable runtime behavior of library APIs

@rust-log-analyzer

This comment has been minimized.

benchmark result:
```
$ cargo bench
   Compiling div-euclid v0.1.0 (/me/div-euclid)
    Finished bench [optimized] target(s) in 1.01s
     Running unittests src/lib.rs (target/release/deps/div_euclid-7a4530ca7817d1ef)

running 7 tests
test tests::it_works ... ignored
test tests::bench_aaabs     ... bench:  10,498,793 ns/iter (+/- 104,360)
test tests::bench_aadefault ... bench:  11,061,862 ns/iter (+/- 94,107)
test tests::bench_abs       ... bench:  10,477,193 ns/iter (+/- 81,942)
test tests::bench_default   ... bench:  10,622,983 ns/iter (+/- 25,119)
test tests::bench_zzabs     ... bench:  10,481,971 ns/iter (+/- 43,787)
test tests::bench_zzdefault ... bench:  11,074,976 ns/iter (+/- 29,633)

test result: ok. 0 passed; 0 failed; 1 ignored; 6 measured; 0 filtered out; finished in 19.35s
```
benchmark code:
```rust
#![feature(test)]
extern crate test;

#[inline(always)]
fn rem_euclid(a:i32,rhs:i32)->i32{
    let r = a % rhs;
    if r < 0 {
        // if rhs is `integer::MIN`, rhs.wrapping_abs() == rhs.wrapping_abs,
        // thus r.wrapping_add(rhs.wrapping_abs()) == r.wrapping_add(rhs) == r - rhs,
        // which suits our need.
        // otherwise, rhs.wrapping_abs() == -rhs, which won't overflow since r is negative.
        r.wrapping_add(rhs.wrapping_abs())
    } else {
        r
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use test::Bencher;
    use rand::prelude::*;
    use rand::rngs::SmallRng;
    const N:i32=1000;
    #[test]
    fn it_works() {
        let a: i32 = 7; // or any other integer type
        let b = 4;

        let d:Vec<i32>=(-N..=N).collect();
        let n:Vec<i32>=(-N..0).chain(1..=N).collect();

        for i in &d {
            for j in &n {
                assert_eq!(i.rem_euclid(*j),rem_euclid(*i,*j));
            }
        }

        assert_eq!(rem_euclid(a,b), 3);
        assert_eq!(rem_euclid(-a,b), 1);
        assert_eq!(rem_euclid(a,-b), 3);
        assert_eq!(rem_euclid(-a,-b), 1);
    }


    #[bench]
    fn bench_aaabs(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=rem_euclid(*i,*j);
                }
            }
            res
        });
    }
    #[bench]
    fn bench_aadefault(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=i.rem_euclid(*j);
                }
            }
            res
        });
    }

    #[bench]
    fn bench_abs(b: &mut Bencher) {
        let d:Vec<i32>=(-N..=N).collect();
        let n:Vec<i32>=(-N..0).chain(1..=N).collect();
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=rem_euclid(*i,*j);
                }
            }
            res
        });
    }
    #[bench]
    fn bench_default(b: &mut Bencher) {
        let d:Vec<i32>=(-N..=N).collect();
        let n:Vec<i32>=(-N..0).chain(1..=N).collect();
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=i.rem_euclid(*j);
                }
            }
            res
        });
    }

    #[bench]
    fn bench_zzabs(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=rem_euclid(*i,*j);
                }
            }
            res
        });
    }
    #[bench]
    fn bench_zzdefault(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=i.rem_euclid(*j);
                }
            }
            res
        });
    }
}
```
@Neutron3529 Neutron3529 changed the title Optimize the code to run faster. faster div-euclid for int Nov 4, 2022
Copy link
Member

@thomcc thomcc left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks good and is definitely a correct change, but i think the comment needs work. I gave a suggestion which you can take or not.

library/core/src/num/int_macros.rs Outdated Show resolved Hide resolved
@thomcc thomcc changed the title faster div-euclid for int Improve performance of rem_euclid() for signed integers Nov 4, 2022
@thomcc
Copy link
Member

thomcc commented Nov 4, 2022

I was not able to reproduce these benchmark numbers locally. I actually found your version consistently slightly slower. Note that wrapping_abs contains a branch.

In your benchmark, the outer loop has the same value every iteration -- when checking the version that is hard on the branch predictor, you should be sure to shuffle the groups of (a, b), for example:

fn samples() -> Vec<(i32, i32)> {
	let n = 8000;
    let mut all = (-n..=n)
        .flat_map(|n| (-n..=n).map(move |d| (n, d)))
        .filter(|(n, d)| n.checked_rem(*d).is_some())
        .collect::<Vec<_>>();
    let mut r = SmallRng::from_seed(SEED);
    all.shuffle(&mut r);
    all
}

@rustbot author

@rustbot rustbot added S-waiting-on-author Status: This is awaiting some action (such as code changes or more information) from the author. and removed S-waiting-on-review Status: Awaiting review from the assignee but also interested parties. labels Nov 4, 2022
@thomcc
Copy link
Member

thomcc commented Nov 4, 2022

Actually, this is a more decisive win on x86, although it's still slight. Hmm.

@thomcc
Copy link
Member

thomcc commented Nov 4, 2022

My benchmark seems around ~5% slower on aarch64, but has smaller code size. On x86_64 it's 10% faster with much cleaner/smaller assembly and one fewer potentially-unpredictable branch. I think we should probably take it.

Please update the comment, though.

I'll trigger a perf run too, just in case this happens to be used in some weird data structure in the compiler.

@bors try @rust-timer queue

@rust-timer
Copy link
Collaborator

Awaiting bors try build completion.

@rustbot label: +S-waiting-on-perf

@rustbot rustbot added the S-waiting-on-perf Status: Waiting on a perf run to be completed. label Nov 4, 2022
@bors
Copy link
Contributor

bors commented Nov 4, 2022

⌛ Trying commit aafe6db with merge 4e489a0c24dfe15e921c177ef0573acc977c8954...

@bors
Copy link
Contributor

bors commented Nov 4, 2022

☀️ Try build successful - checks-actions
Build commit: 4e489a0c24dfe15e921c177ef0573acc977c8954 (4e489a0c24dfe15e921c177ef0573acc977c8954)

@rust-timer
Copy link
Collaborator

Queued 4e489a0c24dfe15e921c177ef0573acc977c8954 with parent 6718ea1, future comparison URL.

@Neutron3529
Copy link
Contributor Author

Please update the comment, though.

The comment is updated.

@rust-timer
Copy link
Collaborator

Finished benchmarking commit (4e489a0c24dfe15e921c177ef0573acc977c8954): comparison URL.

Overall result: no relevant changes - no action needed

Benchmarking this pull request likely means that it is perf-sensitive, so we're automatically marking it as not fit for rolling up. While you can manually mark this PR as fit for rollup, we strongly recommend not doing so since this PR may lead to changes in compiler perf.

@bors rollup=never
@rustbot label: +S-waiting-on-review -S-waiting-on-perf -perf-regression

Instruction count

This benchmark run did not return any relevant results for this metric.

Max RSS (memory usage)

This benchmark run did not return any relevant results for this metric.

Cycles

This benchmark run did not return any relevant results for this metric.

@rustbot rustbot added S-waiting-on-review Status: Awaiting review from the assignee but also interested parties. and removed S-waiting-on-perf Status: Waiting on a perf run to be completed. labels Nov 4, 2022
@thomcc
Copy link
Member

thomcc commented Nov 12, 2022

@bors r+

@bors
Copy link
Contributor

bors commented Nov 12, 2022

📌 Commit d81a0e9 has been approved by thomcc

It is now in the queue for this repository.

@bors bors removed the S-waiting-on-review Status: Awaiting review from the assignee but also interested parties. label Nov 12, 2022
@bors bors added S-waiting-on-bors Status: Waiting on bors to run and complete tests. Bors will change the label on completion. and removed S-waiting-on-author Status: This is awaiting some action (such as code changes or more information) from the author. labels Nov 12, 2022
@bors
Copy link
Contributor

bors commented Nov 12, 2022

⌛ Testing commit d81a0e9 with merge 6284998...

@bors
Copy link
Contributor

bors commented Nov 12, 2022

☀️ Test successful - checks-actions
Approved by: thomcc
Pushing 6284998 to master...

@bors bors added the merged-by-bors This PR was explicitly merged by bors. label Nov 12, 2022
@bors bors merged commit 6284998 into rust-lang:master Nov 12, 2022
@rustbot rustbot added this to the 1.67.0 milestone Nov 12, 2022
@rust-timer
Copy link
Collaborator

Finished benchmarking commit (6284998): comparison URL.

Overall result: no relevant changes - no action needed

@rustbot label: -perf-regression

Instruction count

This benchmark run did not return any relevant results for this metric.

Max RSS (memory usage)

Results

This is a less reliable metric that may be of interest but was not used to determine the overall result at the top of this comment.

mean range count
Regressions ❌
(primary)
- - 0
Regressions ❌
(secondary)
3.8% [3.8%, 3.8%] 1
Improvements ✅
(primary)
- - 0
Improvements ✅
(secondary)
-6.3% [-6.3%, -6.3%] 1
All ❌✅ (primary) - - 0

Cycles

This benchmark run did not return any relevant results for this metric.

Aaron1011 pushed a commit to Aaron1011/rust that referenced this pull request Jan 6, 2023
Improve performance of `rem_euclid()` for signed integers

such code is copy from
https://github.com/rust-lang/rust/blob/master/library/std/src/f32.rs and
https://github.com/rust-lang/rust/blob/master/library/std/src/f64.rs
using `r+rhs.abs()` is faster than calc it with an if clause. Bench result:
```
$ cargo bench
   Compiling div-euclid v0.1.0 (/me/div-euclid)
    Finished bench [optimized] target(s) in 1.01s
     Running unittests src/lib.rs (target/release/deps/div_euclid-7a4530ca7817d1ef)

running 7 tests
test tests::it_works ... ignored
test tests::bench_aaabs     ... bench:  10,498,793 ns/iter (+/- 104,360)
test tests::bench_aadefault ... bench:  11,061,862 ns/iter (+/- 94,107)
test tests::bench_abs       ... bench:  10,477,193 ns/iter (+/- 81,942)
test tests::bench_default   ... bench:  10,622,983 ns/iter (+/- 25,119)
test tests::bench_zzabs     ... bench:  10,481,971 ns/iter (+/- 43,787)
test tests::bench_zzdefault ... bench:  11,074,976 ns/iter (+/- 29,633)

test result: ok. 0 passed; 0 failed; 1 ignored; 6 measured; 0 filtered out; finished in 19.35s
```
It seems that, default `rem_euclid` triggered a branch prediction, thus `bench_default` is faster than `bench_aadefault` and `bench_aadefault`, which shuffles the order of calculations. but all of them slower than what it was in `f64`'s and `f32`'s `rem_euclid`, thus I submit this PR.

bench code:
```rust
#![feature(test)]
extern crate test;

fn rem_euclid(a:i32,rhs:i32)->i32{
    let r = a % rhs;
    if r < 0 { r + rhs.abs() } else { r }
}

#[cfg(test)]
mod tests {
    use super::*;
    use test::Bencher;
    use rand::prelude::*;
    use rand::rngs::SmallRng;
    const N:i32=1000;
    #[test]
    fn it_works() {
        let a: i32 = 7; // or any other integer type
        let b = 4;

        let d:Vec<i32>=(-N..=N).collect();
        let n:Vec<i32>=(-N..0).chain(1..=N).collect();

        for i in &d {
            for j in &n {
                assert_eq!(i.rem_euclid(*j),rem_euclid(*i,*j));
            }
        }

        assert_eq!(rem_euclid(a,b), 3);
        assert_eq!(rem_euclid(-a,b), 1);
        assert_eq!(rem_euclid(a,-b), 3);
        assert_eq!(rem_euclid(-a,-b), 1);
    }

    #[bench]
    fn bench_aaabs(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=rem_euclid(*i,*j);
                }
            }
            res
        });
    }
    #[bench]
    fn bench_aadefault(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=i.rem_euclid(*j);
                }
            }
            res
        });
    }

    #[bench]
    fn bench_abs(b: &mut Bencher) {
        let d:Vec<i32>=(-N..=N).collect();
        let n:Vec<i32>=(-N..0).chain(1..=N).collect();
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=rem_euclid(*i,*j);
                }
            }
            res
        });
    }
    #[bench]
    fn bench_default(b: &mut Bencher) {
        let d:Vec<i32>=(-N..=N).collect();
        let n:Vec<i32>=(-N..0).chain(1..=N).collect();
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=i.rem_euclid(*j);
                }
            }
            res
        });
    }

    #[bench]
    fn bench_zzabs(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=rem_euclid(*i,*j);
                }
            }
            res
        });
    }
    #[bench]
    fn bench_zzdefault(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=i.rem_euclid(*j);
                }
            }
            res
        });
    }
}
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
merged-by-bors This PR was explicitly merged by bors. S-waiting-on-bors Status: Waiting on bors to run and complete tests. Bors will change the label on completion. T-libs Relevant to the library team, which will review and decide on the PR/issue.
Projects
None yet
Development

Successfully merging this pull request may close these issues.

6 participants