-
-
Notifications
You must be signed in to change notification settings - Fork 675
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Code extracted from: https://github.com/zlib-ng/zlib-ng.git at commit 41d67396924ccc7ab1ff9a7e7d434bfb0887b136 (develop).
- Loading branch information
Showing
35 changed files
with
394 additions
and
159 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,3 +3,4 @@ | |
*.h text | ||
Makefile text | ||
configure text eol=lf | ||
* -whitespace |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
/* compare256_neon.c - NEON version of compare256 | ||
* Copyright (C) 2022 Nathan Moinvaziri | ||
* For conditions of distribution and use, see copyright notice in zlib.h | ||
*/ | ||
|
||
#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) | ||
#ifdef _M_ARM64 | ||
# include <arm64_neon.h> | ||
#else | ||
# include <arm_neon.h> | ||
#endif | ||
#include "../../zbuild.h" | ||
|
||
static inline uint32_t compare256_neon_static(const uint8_t *src0, const uint8_t *src1) { | ||
uint32_t len = 0; | ||
|
||
do { | ||
uint8x16_t a, b, cmp; | ||
uint64_t lane; | ||
|
||
a = vld1q_u8(src0); | ||
b = vld1q_u8(src1); | ||
|
||
cmp = veorq_u8(a, b); | ||
|
||
lane = vgetq_lane_u64(vreinterpretq_u64_u8(cmp), 0); | ||
if (lane) { | ||
uint32_t match_byte = (uint32_t)__builtin_ctzll(lane) / 8; | ||
return len + match_byte; | ||
} | ||
len += 8; | ||
lane = vgetq_lane_u64(vreinterpretq_u64_u8(cmp), 1); | ||
if (lane) { | ||
uint32_t match_byte = (uint32_t)__builtin_ctzll(lane) / 8; | ||
return len + match_byte; | ||
} | ||
len += 8; | ||
|
||
src0 += 16, src1 += 16; | ||
} while (len < 256); | ||
|
||
return 256; | ||
} | ||
|
||
Z_INTERNAL uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1) { | ||
return compare256_neon_static(src0, src1); | ||
} | ||
|
||
#define LONGEST_MATCH longest_match_neon | ||
#define COMPARE256 compare256_neon_static | ||
|
||
#include "match_tpl.h" | ||
|
||
#define LONGEST_MATCH_SLOW | ||
#define LONGEST_MATCH longest_match_slow_neon | ||
#define COMPARE256 compare256_neon_static | ||
|
||
#include "match_tpl.h" | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
/* compare256_power9.c - Power9 version of compare256 | ||
* Copyright (C) 2019 Matheus Castanho <msc@linux.ibm.com>, IBM | ||
* For conditions of distribution and use, see copyright notice in zlib.h | ||
*/ | ||
|
||
#ifdef POWER9 | ||
#include <altivec.h> | ||
#include "../../zbuild.h" | ||
#include "../../zendian.h" | ||
|
||
/* Older versions of GCC misimplemented semantics for these bit counting builtins. | ||
* https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=3f30f2d1dbb3228b8468b26239fe60c2974ce2ac */ | ||
#if defined(__GNUC__) && (__GNUC__ < 12) | ||
# define zng_vec_vctzlsbb(vc, len) __asm__ volatile("vctzlsbb %0, %1\n\t" : "=r" (len) : "v" (vc)) | ||
# define zng_vec_vclzlsbb(vc, len) __asm__ volatile("vclzlsbb %0, %1\n\t" : "=r" (len) : "v" (vc)) | ||
#else | ||
# define zng_vec_vctzlsbb(vc, len) len = __builtin_vec_vctzlsbb(vc) | ||
# define zng_vec_vclzlsbb(vc, len) len = __builtin_vec_vclzlsbb(vc) | ||
#endif | ||
|
||
static inline uint32_t compare256_power9_static(const uint8_t *src0, const uint8_t *src1) { | ||
uint32_t len = 0, cmplen; | ||
|
||
do { | ||
vector unsigned char vsrc0, vsrc1, vc; | ||
|
||
vsrc0 = *((vector unsigned char *)src0); | ||
vsrc1 = *((vector unsigned char *)src1); | ||
|
||
/* Compare 16 bytes at a time. Each byte of vc will be either | ||
* all ones or all zeroes, depending on the result of the comparison. */ | ||
vc = (vector unsigned char)vec_cmpne(vsrc0, vsrc1); | ||
|
||
/* Since the index of matching bytes will contain only zeroes | ||
* on vc (since we used cmpne), counting the number of consecutive | ||
* bytes where LSB == 0 is the same as counting the length of the match. */ | ||
#if BYTE_ORDER == LITTLE_ENDIAN | ||
zng_vec_vctzlsbb(vc, cmplen); | ||
#else | ||
zng_vec_vclzlsbb(vc, cmplen); | ||
#endif | ||
if (cmplen != 16) | ||
return len + cmplen; | ||
|
||
src0 += 16, src1 += 16, len += 16; | ||
} while (len < 256); | ||
|
||
return 256; | ||
} | ||
|
||
Z_INTERNAL uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1) { | ||
return compare256_power9_static(src0, src1); | ||
} | ||
|
||
#define LONGEST_MATCH longest_match_power9 | ||
#define COMPARE256 compare256_power9_static | ||
|
||
#include "match_tpl.h" | ||
|
||
#define LONGEST_MATCH_SLOW | ||
#define LONGEST_MATCH longest_match_slow_power9 | ||
#define COMPARE256 compare256_power9_static | ||
|
||
#include "match_tpl.h" | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.