From c064b6560b7ce0adeb9bbf5d7dcf12b1acb0c807 Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Thu, 29 Apr 2021 16:15:50 +0000 Subject: [PATCH] [Arm64] use isb instruction instead of yield in spin loops On arm64 we have seen on several databases that ISB (instruction synchronization barrier) is better to use than yield in a spin loop. The yield instruction is a nop. The isb instruction puts the processor to sleep for some short time. isb is a good equivalent to the pause instruction on x86. Below is an experiment that shows the effects of yield and isb on Arm64 and the time of a pause instruction on x86 Intel processors. The micro-benchmarks use https://github.com/google/benchmark.git $ cat a.cc static void BM_scalar_increment(benchmark::State& state) { int i = 0; for (auto _ : state) benchmark::DoNotOptimize(i++); } BENCHMARK(BM_scalar_increment); static void BM_yield(benchmark::State& state) { for (auto _ : state) asm volatile("yield"::); } BENCHMARK(BM_yield); static void BM_isb(benchmark::State& state) { for (auto _ : state) asm volatile("isb"::); } BENCHMARK(BM_isb); BENCHMARK_MAIN(); $ g++ -o run a.cc -O2 -lbenchmark -lpthread $ ./run -------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------- AWS Graviton2 (Neoverse-N1) processor: BM_scalar_increment 0.485 ns 0.485 ns 1000000000 BM_yield 0.400 ns 0.400 ns 1000000000 BM_isb 13.2 ns 13.2 ns 52993304 AWS Graviton (A-72) processor: BM_scalar_increment 0.897 ns 0.874 ns 801558633 BM_yield 0.877 ns 0.875 ns 800002377 BM_isb 13.0 ns 12.7 ns 55169412 Apple Arm64 M1 processor: BM_scalar_increment 0.315 ns 0.315 ns 1000000000 BM_yield 0.313 ns 0.313 ns 1000000000 BM_isb 9.06 ns 9.06 ns 77259282 static void BM_pause(benchmark::State& state) { for (auto _ : state) asm volatile("pause"::); } BENCHMARK(BM_pause); Intel Skylake processor: BM_scalar_increment 0.295 ns 0.295 ns 1000000000 BM_pause 41.7 ns 41.7 ns 16780553 Tested on Graviton2 aarch64-linux with `./x.py test`. --- library/core/src/hint.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/core/src/hint.rs b/library/core/src/hint.rs index 313729581acd9..8e7c95abd6872 100644 --- a/library/core/src/hint.rs +++ b/library/core/src/hint.rs @@ -128,7 +128,7 @@ pub fn spin_loop() { #[cfg(target_arch = "aarch64")] { // SAFETY: the `cfg` attr ensures that we only execute this on aarch64 targets. - unsafe { crate::arch::aarch64::__yield() }; + unsafe { crate::arch::aarch64::__isb(crate::arch::aarch64::SY) }; } #[cfg(target_arch = "arm")] {