diff --git a/LICENSE b/LICENSE
index 0b37d7b..fac4bd5 100644
--- a/LICENSE
+++ b/LICENSE
@@ -23,10 +23,29 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
-This license applies to parts originating from
-https://www.cl.cam.ac.uk/~mgk25/ucs/utf8_check.c:
+This license applies to all parts of utf-8-validate that are not externally
+maintained libraries.
+
+The externally maintained simdutf library used by utf-8-validate, located at
+deps/simdutf, is licensed as follows:
"""
-Markus Kuhn -- 2005-03-30
-License: http://www.cl.cam.ac.uk/~mgk25/short-license.html
+Copyright 2021 The simdutf authors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
diff --git a/binding.gyp b/binding.gyp
index 30edf27..61b6ce9 100644
--- a/binding.gyp
+++ b/binding.gyp
@@ -2,8 +2,13 @@
'targets': [
{
'target_name': 'validation',
- 'sources': ['src/validation.c'],
- 'cflags': ['-std=c99'],
+ 'sources': [
+ 'src/validation.cc',
+ 'deps/simdutf/singleheader/simdutf.cpp'
+ ],
+ 'cflags_cc': ['-std=gnu++11'],
+ 'include_dirs': ["
+#include
+
+// Useful for debugging purposes
+namespace simdutf {
+namespace {
+
+template
+std::string toBinaryString(T b) {
+ std::string binary = "";
+ T mask = T(1) << (sizeof(T) * CHAR_BIT - 1);
+ while (mask > 0) {
+ binary += ((b & mask) == 0) ? '0' : '1';
+ mask >>= 1;
+ }
+ return binary;
+}
+}
+}
+
+// Implementations
+// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64.h
+/* begin file src/simdutf/arm64.h */
+#ifndef SIMDUTF_ARM64_H
+#define SIMDUTF_ARM64_H
+
+#ifdef SIMDUTF_FALLBACK_H
+#error "arm64.h must be included before fallback.h"
+#endif
+
+
+#ifndef SIMDUTF_IMPLEMENTATION_ARM64
+#define SIMDUTF_IMPLEMENTATION_ARM64 (SIMDUTF_IS_ARM64)
+#endif
+#define SIMDUTF_CAN_ALWAYS_RUN_ARM64 SIMDUTF_IMPLEMENTATION_ARM64 && SIMDUTF_IS_ARM64
+
+
+
+#if SIMDUTF_IMPLEMENTATION_ARM64
+
+namespace simdutf {
+/**
+ * Implementation for NEON (ARMv8).
+ */
+namespace arm64 {
+} // namespace arm64
+} // namespace simdutf
+
+// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/implementation.h
+/* begin file src/simdutf/arm64/implementation.h */
+#ifndef SIMDUTF_ARM64_IMPLEMENTATION_H
+#define SIMDUTF_ARM64_IMPLEMENTATION_H
+
+
+namespace simdutf {
+namespace arm64 {
+
+namespace {
+using namespace simdutf;
+}
+
+class implementation final : public simdutf::implementation {
+public:
+ simdutf_really_inline implementation() : simdutf::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {}
+ simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept final;
+ simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
+ simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final;
+ simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept final;
+ simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept final;
+ simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept final;
+ simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept final;
+ simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept final;
+ simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
+ simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
+ simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
+ simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
+ simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
+ simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
+ simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
+ simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
+ simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
+ simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
+ simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
+ simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
+ simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
+ simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
+ simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
+ simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
+ simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
+ simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
+ simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
+ simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
+ simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
+ simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
+ simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
+ simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
+ simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
+ simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
+ simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
+ simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
+ simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
+ simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
+ simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
+ simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
+ simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
+ void change_endianness_utf16(const char16_t * buf, size_t length, char16_t * output) const noexcept final;
+ simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t length) const noexcept;
+ simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t length) const noexcept;
+ simdutf_warn_unused size_t count_utf8(const char * buf, size_t length) const noexcept;
+ simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
+ simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
+ simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept;
+ simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept;
+ simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) const noexcept;
+ simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept;
+ simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept;
+ simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept;
+};
+
+} // namespace arm64
+} // namespace simdutf
+
+#endif // SIMDUTF_ARM64_IMPLEMENTATION_H
+/* end file src/simdutf/arm64/implementation.h */
+
+// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/begin.h
+/* begin file src/simdutf/arm64/begin.h */
+// redefining SIMDUTF_IMPLEMENTATION to "arm64"
+// #define SIMDUTF_IMPLEMENTATION arm64
+/* end file src/simdutf/arm64/begin.h */
+
+// Declarations
+// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/intrinsics.h
+/* begin file src/simdutf/arm64/intrinsics.h */
+#ifndef SIMDUTF_ARM64_INTRINSICS_H
+#define SIMDUTF_ARM64_INTRINSICS_H
+
+
+// This should be the correct header whether
+// you use visual studio or other compilers.
+#include
+
+#endif // SIMDUTF_ARM64_INTRINSICS_H
+/* end file src/simdutf/arm64/intrinsics.h */
+// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/bitmanipulation.h
+/* begin file src/simdutf/arm64/bitmanipulation.h */
+#ifndef SIMDUTF_ARM64_BITMANIPULATION_H
+#define SIMDUTF_ARM64_BITMANIPULATION_H
+
+namespace simdutf {
+namespace arm64 {
+namespace {
+
+/* result might be undefined when input_num is zero */
+simdutf_really_inline int count_ones(uint64_t input_num) {
+ return vaddv_u8(vcnt_u8(vcreate_u8(input_num)));
+}
+
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdutf
+
+#endif // SIMDUTF_ARM64_BITMANIPULATION_H
+/* end file src/simdutf/arm64/bitmanipulation.h */
+// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/simd.h
+/* begin file src/simdutf/arm64/simd.h */
+#ifndef SIMDUTF_ARM64_SIMD_H
+#define SIMDUTF_ARM64_SIMD_H
+
+#include
+
+
+namespace simdutf {
+namespace arm64 {
+namespace {
+namespace simd {
+
+#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+namespace {
+// Start of private section with Visual Studio workaround
+
+
+/**
+ * make_uint8x16_t initializes a SIMD register (uint8x16_t).
+ * This is needed because, incredibly, the syntax uint8x16_t x = {1,2,3...}
+ * is not recognized under Visual Studio! This is a workaround.
+ * Using a std::initializer_list as a parameter resulted in
+ * inefficient code. With the current approach, if the parameters are
+ * compile-time constants,
+ * GNU GCC compiles it to ldr, the same as uint8x16_t x = {1,2,3...}.
+ * You should not use this function except for compile-time constants:
+ * it is not efficient.
+ */
+simdutf_really_inline uint8x16_t make_uint8x16_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4,
+ uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8,
+ uint8_t x9, uint8_t x10, uint8_t x11, uint8_t x12,
+ uint8_t x13, uint8_t x14, uint8_t x15, uint8_t x16) {
+ // Doing a load like so end ups generating worse code.
+ // uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8,
+ // x9, x10,x11,x12,x13,x14,x15,x16};
+ // return vld1q_u8(array);
+ uint8x16_t x{};
+ // incredibly, Visual Studio does not allow x[0] = x1
+ x = vsetq_lane_u8(x1, x, 0);
+ x = vsetq_lane_u8(x2, x, 1);
+ x = vsetq_lane_u8(x3, x, 2);
+ x = vsetq_lane_u8(x4, x, 3);
+ x = vsetq_lane_u8(x5, x, 4);
+ x = vsetq_lane_u8(x6, x, 5);
+ x = vsetq_lane_u8(x7, x, 6);
+ x = vsetq_lane_u8(x8, x, 7);
+ x = vsetq_lane_u8(x9, x, 8);
+ x = vsetq_lane_u8(x10, x, 9);
+ x = vsetq_lane_u8(x11, x, 10);
+ x = vsetq_lane_u8(x12, x, 11);
+ x = vsetq_lane_u8(x13, x, 12);
+ x = vsetq_lane_u8(x14, x, 13);
+ x = vsetq_lane_u8(x15, x, 14);
+ x = vsetq_lane_u8(x16, x, 15);
+ return x;
+}
+
+// We have to do the same work for make_int8x16_t
+simdutf_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_t x4,
+ int8_t x5, int8_t x6, int8_t x7, int8_t x8,
+ int8_t x9, int8_t x10, int8_t x11, int8_t x12,
+ int8_t x13, int8_t x14, int8_t x15, int8_t x16) {
+ // Doing a load like so end ups generating worse code.
+ // int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8,
+ // x9, x10,x11,x12,x13,x14,x15,x16};
+ // return vld1q_s8(array);
+ int8x16_t x{};
+ // incredibly, Visual Studio does not allow x[0] = x1
+ x = vsetq_lane_s8(x1, x, 0);
+ x = vsetq_lane_s8(x2, x, 1);
+ x = vsetq_lane_s8(x3, x, 2);
+ x = vsetq_lane_s8(x4, x, 3);
+ x = vsetq_lane_s8(x5, x, 4);
+ x = vsetq_lane_s8(x6, x, 5);
+ x = vsetq_lane_s8(x7, x, 6);
+ x = vsetq_lane_s8(x8, x, 7);
+ x = vsetq_lane_s8(x9, x, 8);
+ x = vsetq_lane_s8(x10, x, 9);
+ x = vsetq_lane_s8(x11, x, 10);
+ x = vsetq_lane_s8(x12, x, 11);
+ x = vsetq_lane_s8(x13, x, 12);
+ x = vsetq_lane_s8(x14, x, 13);
+ x = vsetq_lane_s8(x15, x, 14);
+ x = vsetq_lane_s8(x16, x, 15);
+ return x;
+}
+
+simdutf_really_inline uint8x8_t make_uint8x8_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4,
+ uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8) {
+ uint8x8_t x{};
+ x = vset_lane_u8(x1, x, 0);
+ x = vset_lane_u8(x2, x, 1);
+ x = vset_lane_u8(x3, x, 2);
+ x = vset_lane_u8(x4, x, 3);
+ x = vset_lane_u8(x5, x, 4);
+ x = vset_lane_u8(x6, x, 5);
+ x = vset_lane_u8(x7, x, 6);
+ x = vset_lane_u8(x8, x, 7);
+ return x;
+}
+
+simdutf_really_inline uint16x8_t make_uint16x8_t(uint16_t x1, uint16_t x2, uint16_t x3, uint16_t x4,
+ uint16_t x5, uint16_t x6, uint16_t x7, uint16_t x8) {
+ uint16x8_t x{};
+ x = vsetq_lane_u16(x1, x, 0);
+ x = vsetq_lane_u16(x2, x, 1);
+ x = vsetq_lane_u16(x3, x, 2);
+ x = vsetq_lane_u16(x4, x, 3);
+ x = vsetq_lane_u16(x5, x, 4);
+ x = vsetq_lane_u16(x6, x, 5);
+ x = vsetq_lane_u16(x7, x, 6);
+ x = vsetq_lane_u16(x8, x, 7);;
+ return x;
+}
+
+simdutf_really_inline int16x8_t make_int16x8_t(int16_t x1, int16_t x2, int16_t x3, int16_t x4,
+ int16_t x5, int16_t x6, int16_t x7, int16_t x8) {
+ uint16x8_t x{};
+ x = vsetq_lane_s16(x1, x, 0);
+ x = vsetq_lane_s16(x2, x, 1);
+ x = vsetq_lane_s16(x3, x, 2);
+ x = vsetq_lane_s16(x4, x, 3);
+ x = vsetq_lane_s16(x5, x, 4);
+ x = vsetq_lane_s16(x6, x, 5);
+ x = vsetq_lane_s16(x7, x, 6);
+ x = vsetq_lane_s16(x8, x, 7);;
+ return x;
+}
+
+
+// End of private section with Visual Studio workaround
+} // namespace
+#endif // SIMDUTF_REGULAR_VISUAL_STUDIO
+
+
+ template
+ struct simd8;
+
+ //
+ // Base class of simd8 and simd8, both of which use uint8x16_t internally.
+ //
+ template>
+ struct base_u8 {
+ uint8x16_t value;
+ static const int SIZE = sizeof(value);
+
+ // Conversion from/to SIMD register
+ simdutf_really_inline base_u8(const uint8x16_t _value) : value(_value) {}
+ simdutf_really_inline operator const uint8x16_t&() const { return this->value; }
+ simdutf_really_inline operator uint8x16_t&() { return this->value; }
+ simdutf_really_inline T first() const { return vgetq_lane_u8(*this,0); }
+ simdutf_really_inline T last() const { return vgetq_lane_u8(*this,15); }
+
+ // Bit operations
+ simdutf_really_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); }
+ simdutf_really_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); }
+ simdutf_really_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); }
+ simdutf_really_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); }
+ simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; }
+ simdutf_really_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; }
+ simdutf_really_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; }
+ simdutf_really_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; }
+
+ simdutf_really_inline Mask operator==(const simd8 other) const { return vceqq_u8(*this, other); }
+
+ template
+ simdutf_really_inline simd8 prev(const simd8 prev_chunk) const {
+ return vextq_u8(prev_chunk, *this, 16 - N);
+ }
+ };
+
+ // SIMD byte mask type (returned by things like eq and gt)
+ template<>
+ struct simd8: base_u8 {
+ typedef uint16_t bitmask_t;
+ typedef uint32_t bitmask2_t;
+
+ static simdutf_really_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); }
+
+ simdutf_really_inline simd8(const uint8x16_t _value) : base_u8(_value) {}
+ // False constructor
+ simdutf_really_inline simd8() : simd8(vdupq_n_u8(0)) {}
+ // Splat constructor
+ simdutf_really_inline simd8(bool _value) : simd8(splat(_value)) {}
+ simdutf_really_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); }
+
+ // We return uint32_t instead of uint16_t because that seems to be more efficient for most
+ // purposes (cutting it down to uint16_t costs performance in some compilers).
+ simdutf_really_inline uint32_t to_bitmask() const {
+#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+ const uint8x16_t bit_mask = make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
+ 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80);
+#else
+ const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
+ 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
+#endif
+ auto minput = *this & bit_mask;
+ uint8x16_t tmp = vpaddq_u8(minput, minput);
+ tmp = vpaddq_u8(tmp, tmp);
+ tmp = vpaddq_u8(tmp, tmp);
+ return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0);
+ }
+
+ // Returns 4-bit out of each byte, alternating between the high 4 bits and low bits
+ // result it is 64 bit.
+ // This method is expected to be faster than none() and is equivalent
+ // when the vector register is the result of a comparison, with byte
+ // values 0xff and 0x00.
+ simdutf_really_inline uint64_t to_bitmask64() const {
+ return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0);
+ }
+
+ simdutf_really_inline bool any() const { return vmaxvq_u8(*this) != 0; }
+ simdutf_really_inline bool none() const { return vmaxvq_u8(*this) == 0; }
+ simdutf_really_inline bool all() const { return vminvq_u8(*this) == 0xFF; }
+
+
+ };
+
+ // Unsigned bytes
+ template<>
+ struct simd8: base_u8 {
+ static simdutf_really_inline simd8 splat(uint8_t _value) { return vmovq_n_u8(_value); }
+ static simdutf_really_inline simd8 zero() { return vdupq_n_u8(0); }
+ static simdutf_really_inline simd8 load(const uint8_t* values) { return vld1q_u8(values); }
+ simdutf_really_inline simd8(const uint8x16_t _value) : base_u8(_value) {}
+ // Zero constructor
+ simdutf_really_inline simd8() : simd8(zero()) {}
+ // Array constructor
+ simdutf_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {}
+ // Splat constructor
+ simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
+ // Member-by-member initialization
+#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+ simdutf_really_inline simd8(
+ uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
+ uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
+ ) : simd8(make_uint8x16_t(
+ v0, v1, v2, v3, v4, v5, v6, v7,
+ v8, v9, v10,v11,v12,v13,v14,v15
+ )) {}
+#else
+ simdutf_really_inline simd8(
+ uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
+ uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
+ ) : simd8(uint8x16_t{
+ v0, v1, v2, v3, v4, v5, v6, v7,
+ v8, v9, v10,v11,v12,v13,v14,v15
+ }) {}
+#endif
+
+ // Repeat 16 values as many times as necessary (usually for lookup tables)
+ simdutf_really_inline static simd8 repeat_16(
+ uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
+ uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
+ ) {
+ return simd8(
+ v0, v1, v2, v3, v4, v5, v6, v7,
+ v8, v9, v10,v11,v12,v13,v14,v15
+ );
+ }
+
+ // Store to array
+ simdutf_really_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); }
+
+ // Saturated math
+ simdutf_really_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); }
+ simdutf_really_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); }
+
+ // Addition/subtraction are the same for signed and unsigned
+ simdutf_really_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); }
+ simdutf_really_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); }
+ simdutf_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; }
+ simdutf_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; }
+
+ // Order-specific operations
+ simdutf_really_inline uint8_t max_val() const { return vmaxvq_u8(*this); }
+ simdutf_really_inline uint8_t min_val() const { return vminvq_u8(*this); }
+ simdutf_really_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); }
+ simdutf_really_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); }
+ simdutf_really_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); }
+ simdutf_really_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); }
+ simdutf_really_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); }
+ simdutf_really_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); }
+ // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's.
+ simdutf_really_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); }
+ // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's.
+ simdutf_really_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); }
+
+ // Bit-specific operations
+ simdutf_really_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); }
+ simdutf_really_inline bool is_ascii() const { return this->max_val() < 0b10000000u; }
+
+ simdutf_really_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; }
+ simdutf_really_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); }
+ template
+ simdutf_really_inline simd8 shr() const { return vshrq_n_u8(*this, N); }
+ template
+ simdutf_really_inline simd8 shl() const { return vshlq_n_u8(*this, N); }
+
+ // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values)
+ template
+ simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const {
+ return lookup_table.apply_lookup_16_to(*this);
+ }
+
+
+ template
+ simdutf_really_inline simd8 lookup_16(
+ L replace0, L replace1, L replace2, L replace3,
+ L replace4, L replace5, L replace6, L replace7,
+ L replace8, L replace9, L replace10, L replace11,
+ L replace12, L replace13, L replace14, L replace15) const {
+ return lookup_16(simd8::repeat_16(
+ replace0, replace1, replace2, replace3,
+ replace4, replace5, replace6, replace7,
+ replace8, replace9, replace10, replace11,
+ replace12, replace13, replace14, replace15
+ ));
+ }
+
+ template
+ simdutf_really_inline simd8 apply_lookup_16_to(const simd8 original) const {
+ return vqtbl1q_u8(*this, simd8(original));
+ }
+ };
+
+ // Signed bytes
+ template<>
+ struct simd8 {
+ int8x16_t value;
+
+ static simdutf_really_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); }
+ static simdutf_really_inline simd8 zero() { return vdupq_n_s8(0); }
+ static simdutf_really_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); }
+ template
+ simdutf_really_inline void store_ascii_as_utf16(char16_t * p) const {
+ uint16x8_t first = vmovl_u8(vget_low_u8 (vreinterpretq_u8_s8(this->value)));
+ uint16x8_t second = vmovl_high_u8(vreinterpretq_u8_s8(this->value));
+ if (big_endian) {
+ #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+ const uint8x16_t swap = make_uint8x16_t(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+ #else
+ const uint8x16_t swap = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
+ #endif
+ first = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(first), swap));
+ second = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(second), swap));
+ }
+ vst1q_u16(reinterpret_cast(p), first);
+ vst1q_u16(reinterpret_cast(p + 8), second);
+ }
+ simdutf_really_inline void store_ascii_as_utf32(char32_t * p) const {
+ vst1q_u32(reinterpret_cast(p), vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8 (vreinterpretq_u8_s8(this->value))))));
+ vst1q_u32(reinterpret_cast(p + 4), vmovl_high_u16(vmovl_u8(vget_low_u8 (vreinterpretq_u8_s8(this->value)))));
+ vst1q_u32(reinterpret_cast(p + 8), vmovl_u16(vget_low_u16(vmovl_high_u8(vreinterpretq_u8_s8(this->value)))));
+ vst1q_u32(reinterpret_cast(p + 12), vmovl_high_u16(vmovl_high_u8(vreinterpretq_u8_s8(this->value))));
+ }
+ // Conversion from/to SIMD register
+ simdutf_really_inline simd8(const int8x16_t _value) : value{_value} {}
+ simdutf_really_inline operator const int8x16_t&() const { return this->value; }
+ simdutf_really_inline operator const uint8x16_t() const { return vreinterpretq_u8_s8(this->value); }
+ simdutf_really_inline operator int8x16_t&() { return this->value; }
+
+ // Zero constructor
+ simdutf_really_inline simd8() : simd8(zero()) {}
+ // Splat constructor
+ simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {}
+ // Array constructor
+ simdutf_really_inline simd8(const int8_t* values) : simd8(load(values)) {}
+ // Member-by-member initialization
+#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+ simdutf_really_inline simd8(
+ int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
+ int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
+ ) : simd8(make_int8x16_t(
+ v0, v1, v2, v3, v4, v5, v6, v7,
+ v8, v9, v10,v11,v12,v13,v14,v15
+ )) {}
+#else
+ simdutf_really_inline simd8(
+ int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
+ int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
+ ) : simd8(int8x16_t{
+ v0, v1, v2, v3, v4, v5, v6, v7,
+ v8, v9, v10,v11,v12,v13,v14,v15
+ }) {}
+#endif
+ // Repeat 16 values as many times as necessary (usually for lookup tables)
+ simdutf_really_inline static simd8 repeat_16(
+ int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
+ int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
+ ) {
+ return simd8(
+ v0, v1, v2, v3, v4, v5, v6, v7,
+ v8, v9, v10,v11,v12,v13,v14,v15
+ );
+ }
+
+ // Store to array
+ simdutf_really_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, value); }
+ // Explicit conversion to/from unsigned
+ //
+ // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type.
+ // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14
+ // and relatively ugly and hard to read.
+#ifndef SIMDUTF_REGULAR_VISUAL_STUDIO
+ simdutf_really_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {}
+#endif
+ simdutf_really_inline operator simd8() const { return vreinterpretq_u8_s8(this->value); }
+
+ simdutf_really_inline simd8 operator|(const simd8 other) const { return vorrq_s8(value, other.value); }
+ simdutf_really_inline simd8 operator&(const simd8 other) const { return vandq_s8(value, other.value); }
+ simdutf_really_inline simd8 operator^(const simd8 other) const { return veorq_s8(value, other.value); }
+ simdutf_really_inline simd8 bit_andnot(const simd8 other) const { return vbicq_s8(value, other.value); }
+
+ // Math
+ simdutf_really_inline simd8 operator+(const simd8 other) const { return vaddq_s8(value, other.value); }
+ simdutf_really_inline simd8 operator-(const simd8 other) const { return vsubq_s8(value, other.value); }
+ simdutf_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; }
+ simdutf_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; }
+
+ simdutf_really_inline int8_t max_val() const { return vmaxvq_s8(value); }
+ simdutf_really_inline int8_t min_val() const { return vminvq_s8(value); }
+ simdutf_really_inline bool is_ascii() const { return this->min_val() >= 0; }
+
+ // Order-sensitive comparisons
+ simdutf_really_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(value, other.value); }
+ simdutf_really_inline simd8 min_val(const simd8 other) const { return vminq_s8(value, other.value); }
+ simdutf_really_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(value, other.value); }
+ simdutf_really_inline simd8 operator<(const simd8 other) const { return vcltq_s8(value, other.value); }
+ simdutf_really_inline simd8 operator==(const simd8 other) const { return vceqq_s8(value, other.value); }
+
+ template
+ simdutf_really_inline simd8 prev(const simd8 prev_chunk) const {
+ return vextq_s8(prev_chunk, *this, 16 - N);
+ }
+
+ // Perform a lookup assuming no value is larger than 16
+ template
+ simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const {
+ return lookup_table.apply_lookup_16_to(*this);
+ }
+ template
+ simdutf_really_inline simd8 lookup_16(
+ L replace0, L replace1, L replace2, L replace3,
+ L replace4, L replace5, L replace6, L replace7,
+ L replace8, L replace9, L replace10, L replace11,
+ L replace12, L replace13, L replace14, L replace15) const {
+ return lookup_16(simd8::repeat_16(
+ replace0, replace1, replace2, replace3,
+ replace4, replace5, replace6, replace7,
+ replace8, replace9, replace10, replace11,
+ replace12, replace13, replace14, replace15
+ ));
+ }
+
+ template
+ simdutf_really_inline simd8 apply_lookup_16_to(const simd8 original) {
+ return vqtbl1q_s8(*this, simd8(original));
+ }
+ };
+
+ template
+ struct simd8x64 {
+ static constexpr int NUM_CHUNKS = 64 / sizeof(simd8);
+ static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block.");
+ simd8 chunks[NUM_CHUNKS];
+
+ simd8x64(const simd8x64& o) = delete; // no copy allowed
+ simd8x64& operator=(const simd8 other) = delete; // no assignment allowed
+ simd8x64() = delete; // no default constructor allowed
+
+ simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
+ simdutf_really_inline simd8x64(const T* ptr) : chunks{simd8::load(ptr), simd8::load(ptr+sizeof(simd8)/sizeof(T)), simd8::load(ptr+2*sizeof(simd8)/sizeof(T)), simd8::load(ptr+3*sizeof(simd8)/sizeof(T))} {}
+
+ simdutf_really_inline void store(T* ptr) const {
+ this->chunks[0].store(ptr+sizeof(simd8)*0/sizeof(T));
+ this->chunks[1].store(ptr+sizeof(simd8)*1/sizeof(T));
+ this->chunks[2].store(ptr+sizeof(simd8)*2/sizeof(T));
+ this->chunks[3].store(ptr+sizeof(simd8)*3/sizeof(T));
+ }
+
+
+ simdutf_really_inline simd8x64& operator |=(const simd8x64 &other) {
+ this->chunks[0] |= other.chunks[0];
+ this->chunks[1] |= other.chunks[1];
+ this->chunks[2] |= other.chunks[2];
+ this->chunks[3] |= other.chunks[3];
+ return *this;
+ }
+
+ simdutf_really_inline simd8 reduce_or() const {
+ return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]);
+ }
+
+ simdutf_really_inline bool is_ascii() const {
+ return reduce_or().is_ascii();
+ }
+
+ template
+ simdutf_really_inline void store_ascii_as_utf16(char16_t * ptr) const {
+ this->chunks[0].template store_ascii_as_utf16(ptr+sizeof(simd8)*0);
+ this->chunks[1].template store_ascii_as_utf16(ptr+sizeof(simd8)*1);
+ this->chunks[2].template store_ascii_as_utf16(ptr+sizeof(simd8)*2);
+ this->chunks[3].template store_ascii_as_utf16(ptr+sizeof(simd8)*3);
+ }
+
+ simdutf_really_inline void store_ascii_as_utf32(char32_t * ptr) const {
+ this->chunks[0].store_ascii_as_utf32(ptr+sizeof(simd8)*0);
+ this->chunks[1].store_ascii_as_utf32(ptr+sizeof(simd8)*1);
+ this->chunks[2].store_ascii_as_utf32(ptr+sizeof(simd8)*2);
+ this->chunks[3].store_ascii_as_utf32(ptr+sizeof(simd8)*3);
+ }
+
+ simdutf_really_inline uint64_t to_bitmask() const {
+#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+ const uint8x16_t bit_mask = make_uint8x16_t(
+ 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
+ 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
+ );
+#else
+ const uint8x16_t bit_mask = {
+ 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
+ 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
+ };
+#endif
+ // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one.
+ uint8x16_t sum0 = vpaddq_u8(vandq_u8(uint8x16_t(this->chunks[0]), bit_mask), vandq_u8(uint8x16_t(this->chunks[1]), bit_mask));
+ uint8x16_t sum1 = vpaddq_u8(vandq_u8(uint8x16_t(this->chunks[2]), bit_mask), vandq_u8(uint8x16_t(this->chunks[3]), bit_mask));
+ sum0 = vpaddq_u8(sum0, sum1);
+ sum0 = vpaddq_u8(sum0, sum0);
+ return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0);
+ }
+
+ simdutf_really_inline uint64_t eq(const T m) const {
+ const simd8 mask = simd8::splat(m);
+ return simd8x64(
+ this->chunks[0] == mask,
+ this->chunks[1] == mask,
+ this->chunks[2] == mask,
+ this->chunks[3] == mask
+ ).to_bitmask();
+ }
+
+ simdutf_really_inline uint64_t lteq(const T m) const {
+ const simd8 mask = simd8::splat(m);
+ return simd8x64(
+ this->chunks[0] <= mask,
+ this->chunks[1] <= mask,
+ this->chunks[2] <= mask,
+ this->chunks[3] <= mask
+ ).to_bitmask();
+ }
+
+ simdutf_really_inline uint64_t in_range(const T low, const T high) const {
+ const simd8 mask_low = simd8::splat(low);
+ const simd8 mask_high = simd8::splat(high);
+
+ return simd8x64(
+ (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low),
+ (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low),
+ (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low),
+ (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)
+ ).to_bitmask();
+ }
+ simdutf_really_inline uint64_t not_in_range(const T low, const T high) const {
+ const simd8 mask_low = simd8::splat(low);
+ const simd8 mask_high = simd8::splat(high);
+ return simd8x64(
+ (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low),
+ (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low),
+ (this->chunks[2] > mask_high) | (this->chunks[2] < mask_low),
+ (this->chunks[3] > mask_high) | (this->chunks[3] < mask_low)
+ ).to_bitmask();
+ }
+ simdutf_really_inline uint64_t lt(const T m) const {
+ const simd8 mask = simd8::splat(m);
+ return simd8x64(
+ this->chunks[0] < mask,
+ this->chunks[1] < mask,
+ this->chunks[2] < mask,
+ this->chunks[3] < mask
+ ).to_bitmask();
+ }
+ simdutf_really_inline uint64_t gt(const T m) const {
+ const simd8 mask = simd8::splat(m);
+ return simd8x64(
+ this->chunks[0] > mask,
+ this->chunks[1] > mask,
+ this->chunks[2] > mask,
+ this->chunks[3] > mask
+ ).to_bitmask();
+ }
+ simdutf_really_inline uint64_t gteq(const T m) const {
+ const simd8 mask = simd8::splat(m);
+ return simd8x64(
+ this->chunks[0] >= mask,
+ this->chunks[1] >= mask,
+ this->chunks[2] >= mask,
+ this->chunks[3] >= mask
+ ).to_bitmask();
+ }
+ simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const {
+ const simd8 mask = simd8::splat(m);
+ return simd8x64(
+ simd8(uint8x16_t(this->chunks[0])) >= mask,
+ simd8(uint8x16_t(this->chunks[1])) >= mask,
+ simd8(uint8x16_t(this->chunks[2])) >= mask,
+ simd8(uint8x16_t(this->chunks[3])) >= mask
+ ).to_bitmask();
+ }
+ }; // struct simd8x64
+// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/simd16-inl.h
+/* begin file src/simdutf/arm64/simd16-inl.h */
+template
+struct simd16;
+
+ template>
+ struct base_u16 {
+ uint16x8_t value;
+ static const int SIZE = sizeof(value);
+
+ // Conversion from/to SIMD register
+ simdutf_really_inline base_u16() = default;
+ simdutf_really_inline base_u16(const uint16x8_t _value) : value(_value) {}
+ simdutf_really_inline operator const uint16x8_t&() const { return this->value; }
+ simdutf_really_inline operator uint16x8_t&() { return this->value; }
+ // Bit operations
+ simdutf_really_inline simd16 operator|(const simd16 other) const { return vorrq_u16(*this, other); }
+ simdutf_really_inline simd16 operator&(const simd16 other) const { return vandq_u16(*this, other); }
+ simdutf_really_inline simd16 operator^(const simd16 other) const { return veorq_u16(*this, other); }
+ simdutf_really_inline simd16 bit_andnot(const simd16 other) const { return vbicq_u16(*this, other); }
+ simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; }
+ simdutf_really_inline simd16& operator|=(const simd16 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; }
+ simdutf_really_inline simd16& operator&=(const simd16 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; }
+ simdutf_really_inline simd16& operator^=(const simd16 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; }
+
+ simdutf_really_inline Mask operator==(const simd16 other) const { return vceqq_u16(*this, other); }
+
+ template
+ simdutf_really_inline simd16 prev(const simd16 prev_chunk) const {
+ return vextq_u18(prev_chunk, *this, 8 - N);
+ }
+ };
+
+template>
+struct base16: base_u16 {
+ typedef uint16_t bitmask_t;
+ typedef uint32_t bitmask2_t;
+
+ simdutf_really_inline base16() : base_u16() {}
+ simdutf_really_inline base16(const uint16x8_t _value) : base_u16(_value) {}
+ template
+ simdutf_really_inline base16(const Pointer* ptr) : base16(vld1q_u16(ptr)) {}
+
+ simdutf_really_inline Mask operator==(const simd16 other) const { return vceqq_u16(*this, other); }
+
+ static const int SIZE = sizeof(base_u16::value);
+
+ template
+ simdutf_really_inline simd16 prev(const simd16 prev_chunk) const {
+ return vextq_u18(prev_chunk, *this, 8 - N);
+ }
+};
+
+// SIMD byte mask type (returned by things like eq and gt)
+template<>
+struct simd16: base16 {
+ static simdutf_really_inline simd16 splat(bool _value) { return vmovq_n_u16(uint16_t(-(!!_value))); }
+
+ simdutf_really_inline simd16() : base16() {}
+ simdutf_really_inline simd16(const uint16x8_t _value) : base16(_value) {}
+ // Splat constructor
+ simdutf_really_inline simd16(bool _value) : base16