Skip to content

Commit

Permalink
add checks for MOVBE instruction and use them
Browse files Browse the repository at this point in the history
Signed-off-by: Attila Fülöp <attila@fueloep.org>
  • Loading branch information
AttilaFueloep committed Dec 21, 2019
1 parent 4c39aad commit f77c83f
Show file tree
Hide file tree
Showing 7 changed files with 93 additions and 32 deletions.
20 changes: 20 additions & 0 deletions config/toolchain-simd.m4
Original file line number Diff line number Diff line change
Expand Up @@ -401,3 +401,23 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_PCLMULQDQ], [
AC_MSG_RESULT([no])
])
])

dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE], [
AC_MSG_CHECKING([whether host toolchain supports MOVBE])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
void main()
{
__asm__ __volatile__("movbe 0(%eax), %eax");
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_MOVBE], 1, [Define if host toolchain supports MOVBE])
], [
AC_MSG_RESULT([no])
])
])
13 changes: 13 additions & 0 deletions include/os/linux/kernel/linux/simd_x86.h
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,19 @@ zfs_pclmulqdq_available(void)
#endif
}

/*
* Check if MOVBE instruction is available
*/
static inline boolean_t
zfs_movbe_available(void)
{
#if defined(X86_FEATURE_MOVBE)
return (!!boot_cpu_has(X86_FEATURE_MOVBE));
#else
return (B_FALSE);
#endif
}

/*
* AVX-512 family of instruction sets:
*
Expand Down
15 changes: 14 additions & 1 deletion lib/libspl/include/sys/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ typedef enum cpuid_inst_sets {
AVX512ER,
AVX512VL,
AES,
PCLMULQDQ
PCLMULQDQ,
MOVBE
} cpuid_inst_sets_t;

/*
Expand All @@ -101,6 +102,7 @@ typedef struct cpuid_feature_desc {
#define _AVX512VL_BIT (1U << 31) /* if used also check other levels */
#define _AES_BIT (1U << 25)
#define _PCLMULQDQ_BIT (1U << 1)
#define _MOVBE_BIT (1U << 22)

/*
* Descriptions of supported instruction sets
Expand Down Expand Up @@ -128,6 +130,7 @@ static const cpuid_feature_desc_t cpuid_features[] = {
[AVX512VL] = {7U, 0U, _AVX512ER_BIT, EBX },
[AES] = {1U, 0U, _AES_BIT, ECX },
[PCLMULQDQ] = {1U, 0U, _PCLMULQDQ_BIT, ECX },
[MOVBE] = {1U, 0U, _MOVBE_BIT, ECX },
};

/*
Expand Down Expand Up @@ -200,6 +203,7 @@ CPUID_FEATURE_CHECK(avx512er, AVX512ER);
CPUID_FEATURE_CHECK(avx512vl, AVX512VL);
CPUID_FEATURE_CHECK(aes, AES);
CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ);
CPUID_FEATURE_CHECK(movbe, MOVBE);

/*
* Detect register set support
Expand Down Expand Up @@ -332,6 +336,15 @@ zfs_pclmulqdq_available(void)
return (__cpuid_has_pclmulqdq());
}

/*
* Check if MOVBE instruction is available
*/
static inline boolean_t
zfs_movbe_available(void)
{
return (__cpuid_has_movbe());
}

/*
* AVX-512 family of instruction sets:
*
Expand Down
53 changes: 28 additions & 25 deletions module/icp/algs/modes/gcm.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#include <sys/byteorder.h>
#include <sys/simd.h>
#include <modes/gcm_impl.h>
#if defined(__x86_64__) && defined(HAVE_AVX)
#ifdef CAN_USE_GCM_ASM
#include <aes/aes_impl.h>
#endif

Expand All @@ -42,14 +42,14 @@
/* Select GCM implementation */
#define IMPL_FASTEST (UINT32_MAX)
#define IMPL_CYCLE (UINT32_MAX-1)
#if defined(__x86_64__) && defined(HAVE_AVX)
#ifdef CAN_USE_GCM_ASM
#define IMPL_AVX (UINT32_MAX-2)
#endif
#define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i))
static uint32_t icp_gcm_impl = IMPL_FASTEST;
static uint32_t user_sel_impl = IMPL_FASTEST;

#if defined(__x86_64__) && defined(HAVE_AVX)
#ifdef CAN_USE_GCM_ASM
/*
* Whether to use the optimized openssl gcm and ghash implementations.
* Set to true if module parameter icp_gcm_impl == "avx".
Expand All @@ -68,7 +68,7 @@ static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *,
size_t, size_t);
#endif
#endif /* ifdef CAN_USE_GCM_ASM */

/*
* Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode
Expand All @@ -81,7 +81,7 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
void (*copy_block)(uint8_t *, uint8_t *),
void (*xor_block)(uint8_t *, uint8_t *))
{
#if defined(__x86_64__) && defined(HAVE_AVX)
#ifdef CAN_USE_GCM_ASM
if (ctx->gcm_use_avx == B_TRUE)
return (gcm_mode_encrypt_contiguous_blocks_avx(
ctx, data, length, out, block_size));
Expand Down Expand Up @@ -212,10 +212,10 @@ gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
void (*copy_block)(uint8_t *, uint8_t *),
void (*xor_block)(uint8_t *, uint8_t *))
{
#if defined(__x86_64__) && defined(HAVE_AVX)
#ifdef CAN_USE_GCM_ASM
if (ctx->gcm_use_avx == B_TRUE)
return (gcm_encrypt_final_avx(ctx, out, block_size));
#endif /* defined(__x86_64__) && defined(HAVE_AVX) */
#endif

const gcm_impl_ops_t *gops;
uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
Expand Down Expand Up @@ -371,10 +371,10 @@ gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
void (*xor_block)(uint8_t *, uint8_t *))
{
#if defined(__x86_64__) && defined(HAVE_AVX)
#ifdef CAN_USE_GCM_ASM
if (ctx->gcm_use_avx == B_TRUE)
return (gcm_decrypt_final_avx(ctx, out, block_size));
#endif /* defined(__x86_64__) && defined(HAVE_AVX) */
#endif

const gcm_impl_ops_t *gops;
size_t pt_len;
Expand Down Expand Up @@ -614,7 +614,7 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
return (CRYPTO_MECHANISM_PARAM_INVALID);
}

#if defined(__x86_64__) && defined(HAVE_AVX)
#ifdef CAN_USE_GCM_ASM
/*
* Handle the "cycle" implementation by creating avx and non avx
* contexts alternately.
Expand All @@ -631,20 +631,20 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
}
/* Avx and non avx context initialization differs from here on. */
if (gcm_ctx->gcm_use_avx == B_FALSE) {
#endif /* #if defined(__x86_64__) && defined(HAVE_AVX) */
#endif /* ifdef CAN_USE_GCM_ASM */
if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
gcm_param->pAAD, gcm_param->ulAADLen, block_size,
encrypt_block, copy_block, xor_block) != 0) {
rv = CRYPTO_MECHANISM_PARAM_INVALID;
}
#if defined(__x86_64__) && defined(HAVE_AVX)
#ifdef CAN_USE_GCM_ASM
} else {
if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) {
rv = CRYPTO_MECHANISM_PARAM_INVALID;
}
}
#endif /* defined(__x86_64__) && defined(HAVE_AVX) */
#endif /* ifdef CAN_USE_GCM_ASM */

return (rv);
}
Expand Down Expand Up @@ -674,7 +674,7 @@ gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
return (CRYPTO_MECHANISM_PARAM_INVALID);
}

#if defined(__x86_64__) && defined(HAVE_AVX)
#ifdef CAN_USE_GCM_ASM
/*
* Handle the "cycle" implementation by creating avx and non avx
* contexts alternately.
Expand All @@ -691,20 +691,20 @@ gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
}
/* Avx and non avx context initialization differs from here on. */
if (gcm_ctx->gcm_use_avx == B_FALSE) {
#endif /* defined(__x86_64__) && defined(HAVE_AVX) */
#endif /* ifdef CAN_USE_GCM_ASM */
if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
gmac_param->pAAD, gmac_param->ulAADLen, block_size,
encrypt_block, copy_block, xor_block) != 0) {
rv = CRYPTO_MECHANISM_PARAM_INVALID;
}
#if defined(__x86_64__) && defined(HAVE_AVX)
#ifdef CAN_USE_GCM_ASM
} else {
if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) {
rv = CRYPTO_MECHANISM_PARAM_INVALID;
}
}
#endif /* defined(__x86_64__) && defined(HAVE_AVX) */
#endif /* ifdef CAN_USE_GCM_ASM */

return (rv);
}
Expand Down Expand Up @@ -834,7 +834,7 @@ gcm_impl_init(void)

strcpy(gcm_fastest_impl.name, "fastest");

#if defined(__x86_64) && defined(HAVE_AVX)
#ifdef CAN_USE_GCM_ASM
/* FIXME: Should we make `avx` the `fastest` implementation? */
if (gcm_avx_will_work()) {
gcm_set_avx(B_TRUE);
Expand All @@ -851,7 +851,7 @@ static const struct {
} gcm_impl_opts[] = {
{ "cycle", IMPL_CYCLE },
{ "fastest", IMPL_FASTEST },
#if defined(__x86_64__) && defined(HAVE_AVX)
#ifdef CAN_USE_GCM_ASM
{ "avx", IMPL_AVX },
#endif
};
Expand Down Expand Up @@ -887,7 +887,7 @@ gcm_impl_set(const char *val)

/* Check mandatory options */
for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
#if defined(__x86_64__) && defined(HAVE_AVX)
#ifdef CAN_USE_GCM_ASM
/* Ignore avx implementation if it won't work */
if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
continue;
Expand All @@ -911,7 +911,7 @@ gcm_impl_set(const char *val)
}
}
}
#if defined(__x86_64__) && defined(HAVE_AVX)
#ifdef CAN_USE_GCM_ASM
/* FIXME: Should we make `avx` the `fastest` implementation? */
if (gcm_avx_will_work() == B_TRUE &&
(impl == IMPL_AVX || impl == IMPL_FASTEST)) {
Expand Down Expand Up @@ -950,7 +950,7 @@ icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp)

/* list mandatory options */
for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
#if defined(__x86_64__) && defined(HAVE_AVX)
#ifdef CAN_USE_GCM_ASM
/* Ignore avx implementation if it won't work */
if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
continue;
Expand All @@ -974,7 +974,7 @@ module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get,
MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation.");
#endif /* defined(__KERNEL) */

#if defined(__x86_64__) && defined(HAVE_AVX)
#ifdef CAN_USE_GCM_ASM
#define GCM_BLOCK_LEN 16
/*
* The openssl asm routines are 6x aggregated and need that many bytes
Expand Down Expand Up @@ -1031,7 +1031,10 @@ extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t,
static inline boolean_t
gcm_avx_will_work(void)
{
return (kfpu_allowed() && zfs_avx_available());
/* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */
return (kfpu_allowed() &&
zfs_avx_available() && zfs_movbe_available() &&
zfs_aes_available() && zfs_pclmulqdq_available());
}

static inline void
Expand Down Expand Up @@ -1532,4 +1535,4 @@ MODULE_PARM_DESC(icp_gcm_avx_chunk_size,
"How many bytes to process while owning the FPU");

#endif /* defined(__KERNEL) */
#endif /* defined(__x86_64__) && defined(HAVE_AVX) */
#endif /* ifdef CAN_USE_GCM_ASM */
5 changes: 3 additions & 2 deletions module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@
# and modified for ICP. Modification are kept at a bare minimum to ease later
# upstream merges.

#if defined(__x86_64__) && defined(HAVE_AVX)
#if defined(__x86_64__) && defined(HAVE_AVX) && \
defined(HAVE_AES) && defined(HAVE_PCLMULQDQ)

.text

Expand Down Expand Up @@ -888,4 +889,4 @@ atomic_toggle_boolean_nv:
.section .note.GNU-stack,"",%progbits
#endif

#endif /* defined(__x86_64__) && defined(HAVE_AVX) */
#endif /* defined(__x86_64__) && defined(HAVE_AVX) && defined(HAVE_AES) ... */
5 changes: 3 additions & 2 deletions module/icp/asm-x86_64/modes/ghash-x86_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@
# and modified for ICP. Modification are kept at a bare minimum to ease later
# upstream merges.

#if defined(__x86_64__) && defined(HAVE_AVX)
#if defined(__x86_64__) && defined(HAVE_AVX) && \
defined(HAVE_AES) && defined(HAVE_PCLMULQDQ)

.text

Expand Down Expand Up @@ -710,4 +711,4 @@ gcm_ghash_avx:
.section .note.GNU-stack,"",%progbits
#endif

#endif /* defined(__x86_64__) && defined(HAVE_AVX) */
#endif /* defined(__x86_64__) && defined(HAVE_AVX) && defined(HAVE_AES) ... */
14 changes: 12 additions & 2 deletions module/icp/include/modes/modes.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,16 @@ extern "C" {
#include <sys/crypto/common.h>
#include <sys/crypto/impl.h>

/*
* Does the build chain support all instructions needed for the GCM assembler
* routines. AVX support should imply AES-NI and PCLMULQDQ, but make sure
* anyhow.
*/
#if defined(__x86_64__) && defined(HAVE_AVX) && \
defined(HAVE_AES) && defined(HAVE_PCLMULQDQ)
#define CAN_USE_GCM_ASM
#endif

#define ECB_MODE 0x00000002
#define CBC_MODE 0x00000004
#define CTR_MODE 0x00000008
Expand Down Expand Up @@ -214,14 +224,14 @@ typedef struct gcm_ctx {
*/
uint64_t gcm_ghash[2];
uint64_t gcm_H[2];
#if defined(__x86_64__) && defined(HAVE_AVX)
#ifdef CAN_USE_GCM_ASM
uint64_t gcm_Htable[12][2];
#endif
uint64_t gcm_J0[2];
uint64_t gcm_len_a_len_c[2];
uint8_t *gcm_pt_buf;
int gcm_kmflag;
#if defined(__x86_64__) && defined(HAVE_AVX)
#ifdef CAN_USE_GCM_ASM
boolean_t gcm_use_avx;
#endif
} gcm_ctx_t;
Expand Down

0 comments on commit f77c83f

Please sign in to comment.