diff --git a/config/toolchain-simd.m4 b/config/toolchain-simd.m4 index 37627b813bb3..d9f0607e716a 100644 --- a/config/toolchain-simd.m4 +++ b/config/toolchain-simd.m4 @@ -401,3 +401,23 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_PCLMULQDQ], [ AC_MSG_RESULT([no]) ]) ]) + +dnl # +dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE +dnl # +AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE], [ + AC_MSG_CHECKING([whether host toolchain supports MOVBE]) + + AC_LINK_IFELSE([AC_LANG_SOURCE([ + [ + void main() + { + __asm__ __volatile__("movbe 0(%eax), %eax"); + } + ]])], [ + AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_MOVBE], 1, [Define if host toolchain supports MOVBE]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) diff --git a/include/os/linux/kernel/linux/simd_x86.h b/include/os/linux/kernel/linux/simd_x86.h index 67d8cdc3acca..cdd3286d2147 100644 --- a/include/os/linux/kernel/linux/simd_x86.h +++ b/include/os/linux/kernel/linux/simd_x86.h @@ -477,6 +477,19 @@ zfs_pclmulqdq_available(void) #endif } +/* + * Check if MOVBE instruction is available + */ +static inline boolean_t +zfs_movbe_available(void) +{ +#if defined(X86_FEATURE_MOVBE) + return (!!boot_cpu_has(X86_FEATURE_MOVBE)); +#else + return (B_FALSE); +#endif +} + /* * AVX-512 family of instruction sets: * diff --git a/lib/libspl/include/sys/simd.h b/lib/libspl/include/sys/simd.h index b25e476a33b8..99cec4c749dc 100644 --- a/lib/libspl/include/sys/simd.h +++ b/lib/libspl/include/sys/simd.h @@ -77,7 +77,8 @@ typedef enum cpuid_inst_sets { AVX512ER, AVX512VL, AES, - PCLMULQDQ + PCLMULQDQ, + MOVBE } cpuid_inst_sets_t; /* @@ -101,6 +102,7 @@ typedef struct cpuid_feature_desc { #define _AVX512VL_BIT (1U << 31) /* if used also check other levels */ #define _AES_BIT (1U << 25) #define _PCLMULQDQ_BIT (1U << 1) +#define _MOVBE_BIT (1U << 22) /* * Descriptions of supported instruction sets @@ -128,6 +130,7 @@ static const cpuid_feature_desc_t cpuid_features[] = { [AVX512VL] = {7U, 0U, _AVX512ER_BIT, EBX }, [AES] = {1U, 0U, _AES_BIT, ECX }, [PCLMULQDQ] = {1U, 0U, _PCLMULQDQ_BIT, ECX }, + [MOVBE] = {1U, 0U, _MOVBE_BIT, ECX }, }; /* @@ -200,6 +203,7 @@ CPUID_FEATURE_CHECK(avx512er, AVX512ER); CPUID_FEATURE_CHECK(avx512vl, AVX512VL); CPUID_FEATURE_CHECK(aes, AES); CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ); +CPUID_FEATURE_CHECK(movbe, MOVBE); /* * Detect register set support @@ -332,6 +336,15 @@ zfs_pclmulqdq_available(void) return (__cpuid_has_pclmulqdq()); } +/* + * Check if MOVBE instruction is available + */ +static inline boolean_t +zfs_movbe_available(void) +{ + return (__cpuid_has_movbe()); +} + /* * AVX-512 family of instruction sets: * diff --git a/module/icp/algs/modes/gcm.c b/module/icp/algs/modes/gcm.c index 5f9ac3898f4c..31f280ea6966 100644 --- a/module/icp/algs/modes/gcm.c +++ b/module/icp/algs/modes/gcm.c @@ -30,7 +30,7 @@ #include #include #include -#if defined(__x86_64__) && defined(HAVE_AVX) +#ifdef CAN_USE_GCM_ASM #include #endif @@ -42,14 +42,14 @@ /* Select GCM implementation */ #define IMPL_FASTEST (UINT32_MAX) #define IMPL_CYCLE (UINT32_MAX-1) -#if defined(__x86_64__) && defined(HAVE_AVX) +#ifdef CAN_USE_GCM_ASM #define IMPL_AVX (UINT32_MAX-2) #endif #define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i)) static uint32_t icp_gcm_impl = IMPL_FASTEST; static uint32_t user_sel_impl = IMPL_FASTEST; -#if defined(__x86_64__) && defined(HAVE_AVX) +#ifdef CAN_USE_GCM_ASM /* * Whether to use the optimized openssl gcm and ghash implementations. * Set to true if module parameter icp_gcm_impl == "avx". @@ -68,7 +68,7 @@ static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *, size_t, size_t); -#endif +#endif /* ifdef CAN_USE_GCM_ASM */ /* * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode @@ -81,7 +81,7 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { -#if defined(__x86_64__) && defined(HAVE_AVX) +#ifdef CAN_USE_GCM_ASM if (ctx->gcm_use_avx == B_TRUE) return (gcm_mode_encrypt_contiguous_blocks_avx( ctx, data, length, out, block_size)); @@ -212,10 +212,10 @@ gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { -#if defined(__x86_64__) && defined(HAVE_AVX) +#ifdef CAN_USE_GCM_ASM if (ctx->gcm_use_avx == B_TRUE) return (gcm_encrypt_final_avx(ctx, out, block_size)); -#endif /* defined(__x86_64__) && defined(HAVE_AVX) */ +#endif const gcm_impl_ops_t *gops; uint64_t counter_mask = ntohll(0x00000000ffffffffULL); @@ -371,10 +371,10 @@ gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { -#if defined(__x86_64__) && defined(HAVE_AVX) +#ifdef CAN_USE_GCM_ASM if (ctx->gcm_use_avx == B_TRUE) return (gcm_decrypt_final_avx(ctx, out, block_size)); -#endif /* defined(__x86_64__) && defined(HAVE_AVX) */ +#endif const gcm_impl_ops_t *gops; size_t pt_len; @@ -614,7 +614,7 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, return (CRYPTO_MECHANISM_PARAM_INVALID); } -#if defined(__x86_64__) && defined(HAVE_AVX) +#ifdef CAN_USE_GCM_ASM /* * Handle the "cycle" implementation by creating avx and non avx * contexts alternately. @@ -631,20 +631,20 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, } /* Avx and non avx context initialization differs from here on. */ if (gcm_ctx->gcm_use_avx == B_FALSE) { -#endif /* #if defined(__x86_64__) && defined(HAVE_AVX) */ +#endif /* ifdef CAN_USE_GCM_ASM */ if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, gcm_param->pAAD, gcm_param->ulAADLen, block_size, encrypt_block, copy_block, xor_block) != 0) { rv = CRYPTO_MECHANISM_PARAM_INVALID; } -#if defined(__x86_64__) && defined(HAVE_AVX) +#ifdef CAN_USE_GCM_ASM } else { if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) { rv = CRYPTO_MECHANISM_PARAM_INVALID; } } -#endif /* defined(__x86_64__) && defined(HAVE_AVX) */ +#endif /* ifdef CAN_USE_GCM_ASM */ return (rv); } @@ -674,7 +674,7 @@ gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, return (CRYPTO_MECHANISM_PARAM_INVALID); } -#if defined(__x86_64__) && defined(HAVE_AVX) +#ifdef CAN_USE_GCM_ASM /* * Handle the "cycle" implementation by creating avx and non avx * contexts alternately. @@ -691,20 +691,20 @@ gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, } /* Avx and non avx context initialization differs from here on. */ if (gcm_ctx->gcm_use_avx == B_FALSE) { -#endif /* defined(__x86_64__) && defined(HAVE_AVX) */ +#endif /* ifdef CAN_USE_GCM_ASM */ if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, gmac_param->pAAD, gmac_param->ulAADLen, block_size, encrypt_block, copy_block, xor_block) != 0) { rv = CRYPTO_MECHANISM_PARAM_INVALID; } -#if defined(__x86_64__) && defined(HAVE_AVX) +#ifdef CAN_USE_GCM_ASM } else { if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) { rv = CRYPTO_MECHANISM_PARAM_INVALID; } } -#endif /* defined(__x86_64__) && defined(HAVE_AVX) */ +#endif /* ifdef CAN_USE_GCM_ASM */ return (rv); } @@ -834,7 +834,7 @@ gcm_impl_init(void) strcpy(gcm_fastest_impl.name, "fastest"); -#if defined(__x86_64) && defined(HAVE_AVX) +#ifdef CAN_USE_GCM_ASM /* FIXME: Should we make `avx` the `fastest` implementation? */ if (gcm_avx_will_work()) { gcm_set_avx(B_TRUE); @@ -851,7 +851,7 @@ static const struct { } gcm_impl_opts[] = { { "cycle", IMPL_CYCLE }, { "fastest", IMPL_FASTEST }, -#if defined(__x86_64__) && defined(HAVE_AVX) +#ifdef CAN_USE_GCM_ASM { "avx", IMPL_AVX }, #endif }; @@ -887,7 +887,7 @@ gcm_impl_set(const char *val) /* Check mandatory options */ for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { -#if defined(__x86_64__) && defined(HAVE_AVX) +#ifdef CAN_USE_GCM_ASM /* Ignore avx implementation if it won't work */ if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { continue; @@ -911,7 +911,7 @@ gcm_impl_set(const char *val) } } } -#if defined(__x86_64__) && defined(HAVE_AVX) +#ifdef CAN_USE_GCM_ASM /* FIXME: Should we make `avx` the `fastest` implementation? */ if (gcm_avx_will_work() == B_TRUE && (impl == IMPL_AVX || impl == IMPL_FASTEST)) { @@ -950,7 +950,7 @@ icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp) /* list mandatory options */ for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { -#if defined(__x86_64__) && defined(HAVE_AVX) +#ifdef CAN_USE_GCM_ASM /* Ignore avx implementation if it won't work */ if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { continue; @@ -974,7 +974,7 @@ module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get, MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation."); #endif /* defined(__KERNEL) */ -#if defined(__x86_64__) && defined(HAVE_AVX) +#ifdef CAN_USE_GCM_ASM #define GCM_BLOCK_LEN 16 /* * The openssl asm routines are 6x aggregated and need that many bytes @@ -1031,7 +1031,10 @@ extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t, static inline boolean_t gcm_avx_will_work(void) { - return (kfpu_allowed() && zfs_avx_available()); + /* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */ + return (kfpu_allowed() && + zfs_avx_available() && zfs_movbe_available() && + zfs_aes_available() && zfs_pclmulqdq_available()); } static inline void @@ -1532,4 +1535,4 @@ MODULE_PARM_DESC(icp_gcm_avx_chunk_size, "How many bytes to process while owning the FPU"); #endif /* defined(__KERNEL) */ -#endif /* defined(__x86_64__) && defined(HAVE_AVX) */ +#endif /* ifdef CAN_USE_GCM_ASM */ diff --git a/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S b/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S index bd95c371cf87..1e7b19a3c64d 100644 --- a/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S +++ b/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S @@ -44,7 +44,8 @@ # and modified for ICP. Modification are kept at a bare minimum to ease later # upstream merges. -#if defined(__x86_64__) && defined(HAVE_AVX) +#if defined(__x86_64__) && defined(HAVE_AVX) && \ + defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) .text @@ -888,4 +889,4 @@ atomic_toggle_boolean_nv: .section .note.GNU-stack,"",%progbits #endif -#endif /* defined(__x86_64__) && defined(HAVE_AVX) */ +#endif /* defined(__x86_64__) && defined(HAVE_AVX) && defined(HAVE_AES) ... */ diff --git a/module/icp/asm-x86_64/modes/ghash-x86_64.S b/module/icp/asm-x86_64/modes/ghash-x86_64.S index cb938dc46e58..90cc36b43a78 100644 --- a/module/icp/asm-x86_64/modes/ghash-x86_64.S +++ b/module/icp/asm-x86_64/modes/ghash-x86_64.S @@ -94,7 +94,8 @@ # and modified for ICP. Modification are kept at a bare minimum to ease later # upstream merges. -#if defined(__x86_64__) && defined(HAVE_AVX) +#if defined(__x86_64__) && defined(HAVE_AVX) && \ + defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) .text @@ -710,4 +711,4 @@ gcm_ghash_avx: .section .note.GNU-stack,"",%progbits #endif -#endif /* defined(__x86_64__) && defined(HAVE_AVX) */ +#endif /* defined(__x86_64__) && defined(HAVE_AVX) && defined(HAVE_AES) ... */ diff --git a/module/icp/include/modes/modes.h b/module/icp/include/modes/modes.h index d2aa3e3d6ae1..6a4939c40252 100644 --- a/module/icp/include/modes/modes.h +++ b/module/icp/include/modes/modes.h @@ -34,6 +34,16 @@ extern "C" { #include #include +/* + * Does the build chain support all instructions needed for the GCM assembler + * routines. AVX support should imply AES-NI and PCLMULQDQ, but make sure + * anyhow. + */ +#if defined(__x86_64__) && defined(HAVE_AVX) && \ + defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) +#define CAN_USE_GCM_ASM +#endif + #define ECB_MODE 0x00000002 #define CBC_MODE 0x00000004 #define CTR_MODE 0x00000008 @@ -214,14 +224,14 @@ typedef struct gcm_ctx { */ uint64_t gcm_ghash[2]; uint64_t gcm_H[2]; -#if defined(__x86_64__) && defined(HAVE_AVX) +#ifdef CAN_USE_GCM_ASM uint64_t gcm_Htable[12][2]; #endif uint64_t gcm_J0[2]; uint64_t gcm_len_a_len_c[2]; uint8_t *gcm_pt_buf; int gcm_kmflag; -#if defined(__x86_64__) && defined(HAVE_AVX) +#ifdef CAN_USE_GCM_ASM boolean_t gcm_use_avx; #endif } gcm_ctx_t;