-
Notifications
You must be signed in to change notification settings - Fork 29.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
buffer: add SIMD Neon optimization for
byteLength
Co-authored-by: Keyhan Vakil <kvakil@sylph.kvakil.me> Co-authored-by: Daniel Lemire <daniel@lemire.me>
- Loading branch information
1 parent
c9ec72d
commit 8e4c7dd
Showing
4 changed files
with
82 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
#include "node_simd.h" | ||
|
||
#if NODE_HAS_SIMD_NEON | ||
#include <arm_neon.h> | ||
#endif | ||
|
||
namespace node { | ||
namespace simd { | ||
|
||
#if NODE_HAS_SIMD_NEON | ||
uint32_t utf8_byte_length(const uint8_t* data, size_t length) { | ||
uint32_t result{0}; | ||
const int lanes = sizeof(uint8x16_t); | ||
uint8_t remaining = length % lanes; | ||
const auto* simd_end = data + (length / lanes) * lanes; | ||
const auto threshold = vdupq_n_u8(0x80); | ||
|
||
for (; data < simd_end; data += lanes) { | ||
// load 16 bytes | ||
uint8x16_t input = vld1q_u8(data); | ||
|
||
// compare to threshold (0x80) | ||
uint8x16_t with_highbit = vcgeq_u8(input, threshold); | ||
|
||
// nshift and narrow | ||
uint8x8_t highbits = vshrn_n_u16(vreinterpretq_u16_u8(with_highbit), 4); | ||
|
||
// we have 0, 4 or 8 bits per byte | ||
uint8x8_t bitsperbyte = vcnt_u8(highbits); | ||
|
||
// sum the bytes vertically to uint32_t | ||
result += vaddlv_u8(bitsperbyte); | ||
} | ||
|
||
// we overcounted by a factor of 4 | ||
result /= 4; | ||
|
||
for (uint8_t j = 0; j < remaining; j++) { | ||
result += (simd_end[j] >> 7); | ||
} | ||
|
||
return result + length; | ||
} | ||
#else | ||
uint32_t utf8_byte_length(const uint8_t* data, size_t length) { | ||
uint32_t result = 0; | ||
for (uint32_t i = 0; i < length; ++i) { | ||
result += (data[i] >> 7); | ||
} | ||
result += length; | ||
return result; | ||
} | ||
#endif | ||
|
||
} // namespace simd | ||
} // namespace node |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#ifndef SRC_NODE_SIMD_H_ | ||
#define SRC_NODE_SIMD_H_ | ||
|
||
#if defined(__aarch64__) || defined(_M_ARM64) | ||
#define NODE_HAS_SIMD_NEON 1 | ||
#endif | ||
|
||
#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS | ||
|
||
#include <stdlib.h> | ||
|
||
namespace node { | ||
namespace simd { | ||
|
||
uint32_t utf8_byte_length(const uint8_t* input, size_t length); | ||
|
||
} // namespace simd | ||
} // namespace node | ||
|
||
#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS | ||
|
||
#endif // SRC_NODE_SIMD_H_ |