From d15e9d2fbdb3ccb6e175efebe93a1a7074f7732e Mon Sep 17 00:00:00 2001 From: Jethro Beekman Date: Wed, 30 Jan 2019 17:08:18 +0530 Subject: [PATCH] Optionally use std::is_x86_feature_detected for feature detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audit of all CPUID features use by *ring* at the time of the commit: | | chacha | aesni | mont5 | mont | ghash | sha512 | poly1305 | Rust | |--------------|--------|-------|-------|------|-------|--------|----------|------| | ADX | | | * | * | | | | | | AES | | | | | | | | * | | AVX | | * | | | | * | * | * | | AVX2 | * | | | * | | * | * | | | AVX512F | * | | | | | | | | | BMI1 | | | * | | | * | | | | BMI2 | | | * | * | | * | | | | FXSR | | | | | | | | 5 | | “intel CPU” | | | | | | * | | | | MOVBE | 1,2 | * | | | 1,3 | | | 6 | | PCLMULQDQ | | | | | | | | * | | SHA | | | | | | * | | | | SSSE3 | * | | | | | * | | * | | XOP | | 4 | | | | | | | | XSAVE | 1 | 1 | | | 1 | | | | 1. Instruction not used, only used to detect Atom processors 2. If Atom, change the input lengths for which different code paths are taken (presumably for performance) 3. If Atom, avoid one code path for performance 4. Instruction not used 5. Instruction not used, only used to detect PCLMULQDQ 6. Instruction not used, only used to detect AVX --- Cargo.toml | 1 + src/cpu.rs | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++-- src/lib.rs | 6 +++ 3 files changed, 145 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 35b6e8d849..2d3c6389e1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -319,6 +319,7 @@ internal_benches = [] slow_tests = [] test_logging = [] use_heap = [] +force_std_detection = [] # XXX: debug = false because of https://github.com/rust-lang/rust/issues/34122 diff --git a/src/cpu.rs b/src/cpu.rs index ce65a646ad..79a11e81b0 100644 --- a/src/cpu.rs +++ b/src/cpu.rs @@ -32,11 +32,145 @@ pub(crate) fn features() -> Features { INIT.call_once(|| { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - extern "C" { - fn GFp_cpuid_setup(); + #[cfg(any(feature = "force_std_detection", all(target_env = "sgx" /*, target_vendor = "fortanix"*/)))] + { + extern "C" { + static mut GFp_ia32cap_P: [u32; 4]; + } + let [l1edx, l1ecx, l7ebx, l7ecx] = unsafe { &mut GFp_ia32cap_P }; + + if is_x86_feature_detected!("aes") { + *l1ecx |= 1<<25; + } + if is_x86_feature_detected!("pclmulqdq") { + *l1ecx |= 1<<1; + } + if is_x86_feature_detected!("rdrand") { + *l1ecx |= 1<<30; + } + if is_x86_feature_detected!("rdseed") { + *l7ebx |= 1<<18; + } + if is_x86_feature_detected!("tsc") { + *l1edx |= 1<<4; + } + if is_x86_feature_detected!("mmx") { + *l1edx |= 1<<23; + } + if is_x86_feature_detected!("sse") { + *l1edx |= 1<<25; + } + if is_x86_feature_detected!("sse2") { + *l1edx |= 1<<26; + } + if is_x86_feature_detected!("sse3") { + *l1ecx |= 1<<0; + } + if is_x86_feature_detected!("ssse3") { + *l1ecx |= 1<<9; + } + if is_x86_feature_detected!("sse4.1") { + *l1ecx |= 1<<19; + } + if is_x86_feature_detected!("sse4.2") { + *l1ecx |= 1<<20; + } + if is_x86_feature_detected!("sha") { + *l7ebx |= 1<<29; + } + if is_x86_feature_detected!("avx") { + *l1ecx |= 1<<28; + } + if is_x86_feature_detected!("avx2") { + *l7ebx |= 1<<5; + } + if is_x86_feature_detected!("avx512f") { + *l7ebx |= 1<<16; + } + if is_x86_feature_detected!("avx512cd") { + *l7ebx |= 1<<28; + } + if is_x86_feature_detected!("avx512er") { + *l7ebx |= 1<<27; + } + if is_x86_feature_detected!("avx512pf") { + *l7ebx |= 1<<26; + } + if is_x86_feature_detected!("avx512bw") { + *l7ebx |= 1<<30; + } + if is_x86_feature_detected!("avx512dq") { + *l7ebx |= 1<<17; + } + if is_x86_feature_detected!("avx512vl") { + *l7ebx |= 1<<31; + } + if is_x86_feature_detected!("avx512ifma") { + *l7ebx |= 1<<21; + } + if is_x86_feature_detected!("avx512vbmi") { + *l7ecx |= 1<<1; + } + if is_x86_feature_detected!("avx512vpopcntdq") { + *l7ecx |= 1<<14; + } + if is_x86_feature_detected!("fma") { + *l1ecx |= 1<<12; + } + if is_x86_feature_detected!("bmi1") { + *l7ebx |= 1<<3; + } + if is_x86_feature_detected!("bmi2") { + *l7ebx |= 1<<8; + } + if is_x86_feature_detected!("popcnt") { + *l1ecx |= 1<<23; + } + if is_x86_feature_detected!("fxsr") { + *l1edx |= 1<<24; + } + if is_x86_feature_detected!("xsave") { + *l1ecx |= 1<<26; + } + /* will be stable on 1.33.0 + if is_x86_feature_detected!("cmpxchg16b") { + *l1ecx |= 1<<13; + } + if is_x86_feature_detected!("adx") { + *l7ebx |= 1<<19; + } + */ + + // Rust can't detect the MOVBE feature yet, but it's widely + // available. + *l1ecx |= 1<<22; + + // This bit is reserved in the CPUID specification, but the + // BoringSSL detection code uses it to represent that this + // is an Intel CPU. However, this bit is only used in + // conjunction with the AVX bit to test for presence of + // AVX, thus serving no purpose. Always set it. + *l1edx |= 1<<30; + + // Features that don't map to leaf 1 or leaf 7: + // Leaf 0xd: + // * xsaveopt + // * xsaves + // * xsavec + // Leaf 0x8000_0001: + // * sse4a + // * abm + // * lzcnt + // * tbm } - unsafe { - GFp_cpuid_setup(); + #[cfg(not(any(feature = "force_std_detection", all(target_env = "sgx" /*, target_vendor = "fortanix"*/))))] + { + extern "C" { + fn GFp_cpuid_setup(); + } + unsafe { + GFp_cpuid_setup(); + } } } diff --git a/src/lib.rs b/src/lib.rs index 3455ef79b5..d17be38603 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,6 +30,12 @@ //! dev_urandom_fallback feature is disabled, such //! fallbacks will not occur. See the documentation for //! rand::SystemRandom for more details. +//! force_std_detection +//! This is only applicable to x86. By default, ring will use +//! custom logic with the CPUID instruction to figure out which CPU +//! features are available. With this feature, the standard +//! std::is_x86_feature_detected macro will be used +//! instead. //! use_heap (default) //! Enable features that require use of the heap, RSA in particular. //!