Skip to content

Commit

Permalink
Merge pull request #2258 from Niadb/dev
Browse files Browse the repository at this point in the history
Added STATIC_BMI2 for compile time detection of BMI2 on MSVC, when enabled various intrinsics are used
  • Loading branch information
Cyan4973 authored Aug 4, 2020
2 parents 60b56e3 + a8ebc14 commit 38e3854
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 15 deletions.
25 changes: 16 additions & 9 deletions lib/common/bitstream.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#if defined (__cplusplus)
extern "C" {
#endif

/*
* This API consists of small unitary functions, which must be inlined for best performance.
* Since link-time-optimization is not available for all compilers,
Expand Down Expand Up @@ -141,8 +140,12 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
assert(val != 0);
{
# if defined(_MSC_VER) /* Visual */
unsigned long r=0;
return _BitScanReverse ( &r, val ) ? (unsigned)r : 0;
# if STATIC_BMI2 == 1
return _lzcnt_u32(val) ^ 31;
# else
unsigned long r = 0;
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
return __builtin_clz (val) ^ 31;
# elif defined(__ICCARM__) /* IAR Intrinsic */
Expand Down Expand Up @@ -317,23 +320,27 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
return srcSize;
}

MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
{
return bitContainer >> start;
}

MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
{
U32 const regMask = sizeof(bitContainer)*8 - 1;
/* if start > regMask, bitstream is corrupted, and result is undefined */
assert(nbBits < BIT_MASK_SIZE);
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
}

MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
{
#if STATIC_BMI2
return _bzhi_u64(bitContainer, nbBits);
#else
assert(nbBits < BIT_MASK_SIZE);
return bitContainer & BIT_mask[nbBits];
#endif
}

/*! BIT_lookBits() :
Expand All @@ -342,7 +349,7 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
* On 32-bits, maxNbBits==24.
* On 64-bits, maxNbBits==56.
* @return : value extracted */
MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
{
/* arbitrate between double-shift and shift+mask */
#if 1
Expand All @@ -365,7 +372,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
}

MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
{
bitD->bitsConsumed += nbBits;
}
Expand All @@ -374,7 +381,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
* Read (consume) next n bits from local register and update.
* Pay attention to not read more than nbBits contained into local register.
* @return : extracted value. */
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
{
size_t const value = BIT_lookBits(bitD, nbBits);
BIT_skipBits(bitD, nbBits);
Expand Down
13 changes: 13 additions & 0 deletions lib/common/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,4 +183,17 @@
# pragma warning(disable : 4324) /* disable: C4324: padded structure */
#endif

/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
#ifndef STATIC_BMI2
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
# ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
# define STATIC_BMI2 1
# endif
# endif
#endif

#ifndef STATIC_BMI2
#define STATIC_BMI2 0
#endif

#endif /* ZSTD_COMPILER_H */
8 changes: 6 additions & 2 deletions lib/common/zstd_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -396,8 +396,12 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
assert(val != 0);
{
# if defined(_MSC_VER) /* Visual */
unsigned long r=0;
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
# if STATIC_BMI2 == 1
return _lzcnt_u32(val)^31;
# else
unsigned long r=0;
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
return __builtin_clz (val) ^ 31;
# elif defined(__ICCARM__) /* IAR Intrinsic */
Expand Down
16 changes: 12 additions & 4 deletions lib/compress/zstd_compress_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -498,8 +498,12 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
if (MEM_isLittleEndian()) {
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
unsigned long r = 0;
return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
# if STATIC_BMI2
return _tzcnt_u64(val) >> 3;
# else
unsigned long r = 0;
return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 4)
return (__builtin_ctzll((U64)val) >> 3);
# else
Expand Down Expand Up @@ -530,8 +534,12 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
} else { /* Big Endian CPU */
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
unsigned long r = 0;
return _BitScanReverse64( &r, val ) ? (unsigned)(r >> 3) : 0;
# if STATIC_BMI2
return _lzcnt_u64(val) >> 3;
# else
unsigned long r = 0;
return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0;
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 4)
return (__builtin_clzll(val) >> 3);
# else
Expand Down

0 comments on commit 38e3854

Please sign in to comment.