From 40d3f3519782ff76fecefcc6731875914932581e Mon Sep 17 00:00:00 2001 From: wjr <1966336874@qq.com> Date: Sat, 10 Aug 2024 22:46:48 +0800 Subject: [PATCH] update --- include/wjr/algorithm.hpp | 134 +++++++++++ include/wjr/container/generic/bplus_tree.hpp | 8 +- .../wjr/container/generic/constexpr_tree.hpp | 8 + include/wjr/json/detail.hpp | 7 - include/wjr/json/json.hpp | 183 +++++++++++--- include/wjr/json/number.hpp | 9 + include/wjr/json/reader.hpp | 6 +- include/wjr/json/string.hpp | 51 +++- include/wjr/json/visitor.hpp | 4 +- include/wjr/memory/uninitialized.hpp | 2 +- include/wjr/x86/json/string.hpp | 1 + src/wjr/json/json.cpp | 141 +++++++++++ src/wjr/x86/json/lexer.cpp | 21 +- src/wjr/x86/json/string.cpp | 226 ++++++++++++++++++ 14 files changed, 734 insertions(+), 67 deletions(-) create mode 100644 include/wjr/container/generic/constexpr_tree.hpp create mode 100644 src/wjr/json/json.cpp diff --git a/include/wjr/algorithm.hpp b/include/wjr/algorithm.hpp index 26fe9f9d..bd906bf0 100644 --- a/include/wjr/algorithm.hpp +++ b/include/wjr/algorithm.hpp @@ -1,12 +1,146 @@ #ifndef WJR_FORMAT_ALGORITHM_HPP__ #define WJR_FORMAT_ALGORITHM_HPP__ +#include #include +#include #include namespace wjr { +#if defined(__cpp_lib_constexpr_algorithms) + +template +constexpr OutputIt constexpr_copy(InputIt first, InputIt last, OutputIt dst) { + return std::copy(first, last, dst); +} + +template +constexpr OutputIt constexpr_move_backward(InputIt first, InputIt last, OutputIt dst) { + return std::move_backward(first, last, dst); +} + +template +constexpr OutputIt constexpr_merge(InputIt1 first1, InputIt1 last1, InputIt2 first2, + InputIt2 last2, OutputIt dst, Pred pred) { + return std::merge(first1, last1, first2, last2, dst, pred); +} + +template +constexpr Iter constexpr_inplace_merge(Iter first, Iter mid, Iter last, Pred pred) { + return std::inplace_merge(first1, last1, first2, last2, dst, pred); +} + +template +constexpr void constexpr_sort(Iter first, Iter last, Pred pred) { + std::sort(first, last, pred); +} + +#else + +template +constexpr OutputIt constexpr_copy(InputIt first, InputIt last, OutputIt dst) { + for (; first != last; (void)++first, (void)++dst) { + *dst = *first; + } + + return dst; +} + +template +constexpr OutputIt constexpr_move_backward(InputIt first, InputIt last, OutputIt dst) { + while (first != last) { + *(--dst) = std::move(*(--last)); + } + + return dst; +} + +template +constexpr OutputIt constexpr_merge(InputIt1 first1, InputIt1 last1, InputIt2 first2, + InputIt2 last2, OutputIt dst, Pred pred) { + const auto count1 = std::distance(first1, last1); + const auto count2 = std::distance(first2, last2); + if (first1 != last1 && first2 != last2) { + for (;;) { + if (pred(*first2, *first1)) { + *dst = *first2; + ++dst; + ++first2; + + if (first2 == last2) { + break; + } + } else { + *dst = *first1; + ++dst; + ++first1; + + if (first1 == last1) { + break; + } + } + } + } + + dst = constexpr_copy(first1, last1, dst); + dst = constexpr_copy(first2, last2, dst); + return dst; +} + +namespace algorithm_detail { + +template +constexpr void __constexpr_insertion_sort(Iter first, Iter last, Pred pred) { + if (first != last) { + for (Iter mid = first; ++mid != last;) { + iterator_reference_t ref = *mid; + + if (pred(ref, *first)) { + iterator_value_t tmp = std::move(ref); + Iter hole = mid; + constexpr_move_backward(first, mid, ++hole); + *first = std::move(tmp); + } else { + Iter prev = mid; + if (pred(ref, *--prev)) { + iterator_value_t tmp = std::move(ref); + *mid = *prev; + + Iter pprev = prev; + for (; pred(tmp, *--pprev); prev = pprev) { + *prev = std::move(*pprev); + } + + *prev = std::move(tmp); + } + } + } + } +} + +inline constexpr size_t __insertion_sort_threshold = in_place_max; + +template +constexpr void __constexpr_sort_impl(Iter first, Iter last, Pred pred) { + __constexpr_insertion_sort(first, last, pred); +} + +} // namespace algorithm_detail + +template +constexpr void constexpr_sort(Iter first, Iter last, Pred pred) { + algorithm_detail::__constexpr_sort_impl(first, last, pred); +} + +#endif + +template +constexpr void constexpr_sort(Iter first, Iter last) { + constexpr_sort(first, last, std::less<>{}); +} + template WJR_PURE WJR_INTRINSIC_INLINE bool starts_with(std::basic_string_view str, diff --git a/include/wjr/container/generic/bplus_tree.hpp b/include/wjr/container/generic/bplus_tree.hpp index 3645c3a0..9a0e129d 100644 --- a/include/wjr/container/generic/bplus_tree.hpp +++ b/include/wjr/container/generic/bplus_tree.hpp @@ -63,7 +63,7 @@ class inline_key { ~inline_key() = default; constexpr inline_key(reference value) noexcept( - std::is_nothrow_constructible_v, reference>) + std::is_nothrow_constructible_v, reference>) : m_storage(value) {} constexpr reference operator*() const noexcept { return *m_storage; } @@ -72,7 +72,7 @@ class inline_key { private: // no need to check - aligned_storage m_storage; + uninitialized m_storage; }; template @@ -102,7 +102,7 @@ class inline_key { }; template -struct is_possible_inline_key : std::is_trivially_copyable> {}; +struct is_possible_inline_key : std::is_trivially_copyable> {}; template inline constexpr bool is_possible_inline_key_v = is_possible_inline_key::value; @@ -630,7 +630,7 @@ class basic_bplus_tree { const const_iterator iter = __search(key); const auto pos = iter.get_pos(); const bool inserted = - pos == 0 || key_comp()(*iter.get_leaf()->m_values[pos - 1], key); + pos == 0 || key_comp()(iter.get_leaf()->__get_key(pos - 1), key); return {iter, inserted}; } diff --git a/include/wjr/container/generic/constexpr_tree.hpp b/include/wjr/container/generic/constexpr_tree.hpp new file mode 100644 index 00000000..81fe95a0 --- /dev/null +++ b/include/wjr/container/generic/constexpr_tree.hpp @@ -0,0 +1,8 @@ +#ifndef WJR_CONTAINER_GERNERIC_CONSTEXPR_TREE_HPP__ +#define WJR_CONTAINER_GERNERIC_CONSTEXPR_TREE_HPP__ + +#include + +namespace wjr {} // namespace wjr + +#endif // WJR_CONTAINER_GERNERIC_CONSTEXPR_TREE_HPP__ \ No newline at end of file diff --git a/include/wjr/json/detail.hpp b/include/wjr/json/detail.hpp index a5e3eecc..2a29fa6b 100644 --- a/include/wjr/json/detail.hpp +++ b/include/wjr/json/detail.hpp @@ -59,19 +59,12 @@ enum class value_t : uint8_t { }; using number_unsigned_t = integral_constant; - using number_signed_t = integral_constant; - using number_float_t = integral_constant; - using null_t = integral_constant; - using boolean_t = integral_constant; - using string_t = integral_constant; - using object_t = integral_constant; - using array_t = integral_constant; struct basic_value { diff --git a/include/wjr/json/json.hpp b/include/wjr/json/json.hpp index c9a34ab3..5037502b 100644 --- a/include/wjr/json/json.hpp +++ b/include/wjr/json/json.hpp @@ -7,6 +7,14 @@ namespace wjr::json { +namespace detail { +extern result __check_impl(const reader &rd) noexcept; +} + +WJR_INTRINSIC_INLINE result check(const reader &rd) noexcept { + return detail::__check_impl(rd); +} + template class basic_json; @@ -22,9 +30,13 @@ struct basic_json_traits { public: using string_type = String, memory_pool>; - using object_type = Object, + using object_type = Object, memory_pool>>; using array_type = Array>; + + using value_type = json_type; + using reference = value_type &; + using const_reference = const value_type &; }; using default_json_traits = basic_json_traits; @@ -37,6 +49,54 @@ class basic_json_parser; using json = basic_json; +template +struct __json_get_impl; + +#define WJR_REGISTER_JSON_GET_IMPL(T) \ + template <> \ + struct __json_get_impl { \ + template \ + WJR_PURE WJR_INTRINSIC_CONSTEXPR static auto get(Json &&j) noexcept \ + -> decltype(std::declval().get_##T##_unsafe()) { \ + return std::forward(j).get_##T##_unsafe(); \ + } \ + } + +WJR_REGISTER_JSON_GET_IMPL(boolean); +WJR_REGISTER_JSON_GET_IMPL(number_unsigned); +WJR_REGISTER_JSON_GET_IMPL(number_signed); +WJR_REGISTER_JSON_GET_IMPL(number_float); +WJR_REGISTER_JSON_GET_IMPL(string); +WJR_REGISTER_JSON_GET_IMPL(object); +WJR_REGISTER_JSON_GET_IMPL(array); + +#undef WJR_REGISTER_JSON_GET_IMPL + +class bad_json_access : std::exception { +public: + explicit bad_json_access(error_code e) : m_err(std::move(e)) {} + + const error_code &error() const & { return m_err; } + error_code &error() & { return m_err; } + const error_code &&error() const && { return std::move(m_err); } + error_code &&error() && { return std::move(m_err); } + + WJR_NODISCARD virtual const char *what() const noexcept override { + return "Bad json access"; + } + +private: + error_code m_err; +}; + +/** + * @details At present, it's a simple but flexible implementation solution. This is not as + * good as a only iteration in terms of performance. \n + * Performance (on my local x64 machine) : \n + * twitter.json : 700~720 MB/s \n + * canada.json : 660~680 MB/s + * + */ template class basic_json { friend class detail::basic_json_parser; @@ -53,14 +113,50 @@ class basic_json { using object_type = typename traits_type::object_type; using array_type = typename traits_type::array_type; -private: - using __tp_list = - tp_list; + using value_type = typename traits_type::value_type; + using reference = typename traits_type::reference; + using const_reference = typename traits_type::const_reference; + + using size_type = uint32_t; -public: basic_json() = default; - basic_json(const basic_json &other) = delete; + basic_json(const basic_json &other) noexcept { + switch (other.type()) { + case value_t::null: + case value_t::number_unsigned: + case value_t::number_signed: + case value_t::number_float: { + // use a simple memcpy + m_value = other.m_value; + break; + } + case value_t::string: { + memory_pool al; + string_type *const str = al.allocate(1); + wjr::construct_at(str, other.__get_string()); + m_value.set(string_t(), str); + break; + } + case value_t::object: { + memory_pool al; + object_type *const obj = al.allocate(1); + wjr::construct_at(obj, other.__get_object()); + m_value.set(object_t(), obj); + break; + } + case value_t::array: { + memory_pool al; + array_type *const arr = al.allocate(1); + wjr::construct_at(arr, other.__get_array()); + m_value.set(array_t(), arr); + break; + } + default: { + WJR_UNREACHABLE(); + break; + } + } + } basic_json(basic_json &&other) noexcept : m_value(other.m_value) { other.m_value.m_type = value_t::null; @@ -83,7 +179,7 @@ class basic_json { * @todo Optimize by not using recursion * */ - ~basic_json() noexcept { __destroy_and_deallocate(); } + ~basic_json() noexcept { __destroy(); } basic_json(null_t) noexcept : m_value(null_t()) {} basic_json(boolean_t, bool f) noexcept : m_value(boolean_t(), f) {} @@ -103,6 +199,39 @@ class basic_json { WJR_PURE value_t type() const noexcept { return m_value.m_type; } + template + WJR_PURE decltype(auto) get_unsafe() noexcept { + return __json_get_impl::get(*this); + } + + template + WJR_PURE decltype(auto) get_unsafe() const noexcept { + return __json_get_impl::get(*this); + } + + template + WJR_PURE decltype(auto) get() { + if (WJR_UNLIKELY(type() != T::value)) { + WJR_THROW(bad_json_access(error_code::INCORRECT_TYPE)); + } + + return get_unsafe(); + } + + template + WJR_PURE decltype(auto) get() const { + if (WJR_UNLIKELY(type() != T::value)) { + WJR_THROW(bad_json_access(error_code::INCORRECT_TYPE)); + } + + return get_unsafe(); + } + + WJR_CONST static size_type max_depth_size() noexcept { return 256; } + + reference at(size_type idx) noexcept { return get().at(idx); } + const_reference at(size_type idx) const noexcept { return get().at(idx); } + static result parse(const reader &rd) noexcept; private: @@ -151,8 +280,6 @@ class basic_json { __destroy_impl(); } - void __destroy_and_deallocate() { __destroy(); } - WJR_PURE boolean_type &__get_boolean() noexcept { return m_value.m_boolean; } WJR_PURE const boolean_type &__get_boolean() const noexcept { return m_value.m_boolean; @@ -219,74 +346,75 @@ class basic_json_parser { WJR_INTRINSIC_INLINE result parse(const reader &rd) noexcept { json_type j; current = std::addressof(j); - WJR_EXPECTED_INIT(ret, visitor_detail::parse(*this, rd)); + WJR_EXPECTED_TRY(visitor_detail::parse(*this, rd)); return j; } protected: WJR_INTRINSIC_INLINE result visit_root_null(const char *first) const noexcept { - return detail::check_null(first); + return check_null(first); } WJR_INTRINSIC_INLINE result visit_object_null(const char *first) const noexcept { - return detail::check_null(first); + return check_null(first); } WJR_INTRINSIC_INLINE result visit_array_null(const char *first) const noexcept { current->__get_array().emplace_back(null_t()); - return detail::check_null(first); + return check_null(first); } WJR_INTRINSIC_INLINE result visit_root_true(const char *first) const noexcept { current->m_value.set(boolean_t(), true); - return detail::check_true(first); + return check_true(first); } WJR_INTRINSIC_INLINE result visit_object_true(const char *first) const noexcept { element->m_value.set(boolean_t(), true); - return detail::check_true(first); + return check_true(first); } WJR_INTRINSIC_INLINE result visit_array_true(const char *first) const noexcept { current->__get_array().emplace_back(boolean_t(), true); - return detail::check_true(first); + return check_true(first); } WJR_INTRINSIC_INLINE result visit_root_false(const char *first) const noexcept { - return detail::check_false(first); + current->m_value.set(boolean_t(), false); + return check_false(first); } WJR_INTRINSIC_INLINE result visit_object_false(const char *first) const noexcept { element->m_value.set(boolean_t(), false); - return detail::check_false(first); + return check_false(first); } WJR_INTRINSIC_INLINE result visit_array_false(const char *first) const noexcept { current->__get_array().emplace_back(boolean_t(), false); - return detail::check_false(first); + return check_false(first); } WJR_INTRINSIC_INLINE result visit_root_number(const char *first, const char *last) const noexcept { - WJR_EXPECTED_INIT(ret, detail::parse_number(first, last)); + WJR_EXPECTED_INIT(ret, parse_number(first, last)); current->m_value = *ret; return {}; } WJR_INTRINSIC_INLINE result visit_object_number(const char *first, const char *last) const noexcept { - WJR_EXPECTED_INIT(ret, detail::parse_number(first, last)); + WJR_EXPECTED_INIT(ret, parse_number(first, last)); element->m_value = *ret; return {}; } WJR_INTRINSIC_INLINE result visit_array_number(const char *first, const char *last) const noexcept { - WJR_EXPECTED_INIT(ret, detail::parse_number(first, last)); + WJR_EXPECTED_INIT(ret, parse_number(first, last)); current->__get_array().emplace_back(*ret); return {}; } @@ -297,7 +425,7 @@ class basic_json_parser { string_type *str = al.allocate(1); wjr::construct_at(str); try_uninitialized_resize(*str, last - first); - auto ret = detail::parse_string(str->data(), first, last); + auto ret = parse_string(str->data(), first, last); if (WJR_UNLIKELY(!ret)) { std::destroy_at(str); al.deallocate(str, 1); @@ -314,7 +442,7 @@ class basic_json_parser { string_type *str = al.allocate(1); wjr::construct_at(str); try_uninitialized_resize(*str, last - first); - auto ret = detail::parse_string(str->data(), first, last); + auto ret = parse_string(str->data(), first, last); if (WJR_UNLIKELY(!ret)) { std::destroy_at(str); al.deallocate(str, 1); @@ -331,7 +459,7 @@ class basic_json_parser { string_type *str = al.allocate(1); wjr::construct_at(str); try_uninitialized_resize(*str, last - first); - auto ret = detail::parse_string(str->data(), first, last); + auto ret = parse_string(str->data(), first, last); if (WJR_UNLIKELY(!ret)) { std::destroy_at(str); al.deallocate(str, 1); @@ -346,10 +474,9 @@ class basic_json_parser { const char *last) noexcept { string_type str; try_uninitialized_resize(str, last - first); - WJR_EXPECTED_INIT(ret, detail::parse_string(str.data(), first, last)); + WJR_EXPECTED_INIT(ret, parse_string(str.data(), first, last)); str.resize(*ret - str.data()); const auto iter = current->__get_object().emplace(std::move(str), dctor); - WJR_ASSUME(str.size() == 0); element = std::addressof(iter.first->second); if (WJR_UNLIKELY(!iter.second)) { std::destroy_at(element); diff --git a/include/wjr/json/number.hpp b/include/wjr/json/number.hpp index bb95c286..642bfbcf 100644 --- a/include/wjr/json/number.hpp +++ b/include/wjr/json/number.hpp @@ -73,6 +73,15 @@ parse_number(const char *first, const char *last) noexcept { return unexpected(error_code::NUMBER_ERROR); } } + +/** + * @todo This can be optimize. + */ +WJR_PURE WJR_INTRINSIC_INLINE result check_number(const char *first, + const char *last) noexcept { + return parse_number(first, last).transform([](auto &&) {}); +} + } // namespace detail } // namespace wjr::json diff --git a/include/wjr/json/reader.hpp b/include/wjr/json/reader.hpp index aa8915e9..92b76520 100644 --- a/include/wjr/json/reader.hpp +++ b/include/wjr/json/reader.hpp @@ -23,11 +23,9 @@ class reader { reader(span sp) noexcept { read(sp); } - WJR_CONSTEXPR20 const_iterator token_begin() const noexcept { - return m_tokens.begin(); - } + WJR_CONSTEXPR20 const_iterator begin() const noexcept { return m_tokens.begin(); } - WJR_CONSTEXPR20 const_iterator token_end() const noexcept { return m_tokens.end(); } + WJR_CONSTEXPR20 const_iterator end() const noexcept { return m_tokens.end(); } WJR_CONSTEXPR20 const_pointer data() const noexcept { return m_str.data(); } diff --git a/include/wjr/json/string.hpp b/include/wjr/json/string.hpp index 80ef4602..31a764a9 100644 --- a/include/wjr/json/string.hpp +++ b/include/wjr/json/string.hpp @@ -38,7 +38,6 @@ inline constexpr std::array escape_table = { namespace detail { -/// @todo complete this function WJR_INTRINSIC_INLINE result parse_unicode_codepoint(char *&dst, const char *first, const char *last) noexcept { if (const auto ret = utf8::parse_unicode_codepoint( @@ -95,6 +94,56 @@ inline result parse_string(char *dst, const char *first, } #endif +WJR_INTRINSIC_INLINE result +check_unicode_codepoint(const char *first, const char *last) noexcept { + if (const auto ret = + utf8::check_unicode_codepoint(reinterpret_cast(first), + reinterpret_cast(last)); + WJR_UNLIKELY(!ret)) { + return unexpected(error_code::UTF8_ERROR); + } else { + return reinterpret_cast(*ret); + } +} + +WJR_INTRINSIC_INLINE result generic_check_string(const char *first, + const char *last) noexcept { + using namespace string_detail; + + if (WJR_UNLIKELY(first == last)) { + return {}; + } + + do { + uint8_t ch = *first++; + + if (WJR_UNLIKELY(ch == '\\')) { + WJR_ASSERT(first != last); + ch = *first++; + + if (WJR_UNLIKELY(ch == 'u')) { + WJR_EXPECTED_SET(first, check_unicode_codepoint(first, last)); + } else { + const uint8_t code = escape_table[ch]; + + if (WJR_UNLIKELY(code == 0)) { + return unexpected(error_code::STRING_ERROR); + } + } + } + } while (first != last); + + return {}; +} + +#if WJR_HAS_BUILTIN(JSON_CHECK_STRING) +extern result check_string(const char *first, const char *last) noexcept; +#else +inline result check_string(const char *first, const char *last) noexcept { + return generic_check_string(first, last); +} +#endif + } // namespace detail } // namespace wjr::json diff --git a/include/wjr/json/visitor.hpp b/include/wjr/json/visitor.hpp index fe734675..dccd501e 100644 --- a/include/wjr/json/visitor.hpp +++ b/include/wjr/json/visitor.hpp @@ -48,8 +48,8 @@ WJR_NOINLINE static result parse(Parser &&par, const reader &rd) noexcept // token reader auto read = [&rd]() { - auto __begin = rd.token_begin(); - auto __end = rd.token_end(); + auto __begin = rd.begin(); + auto __end = rd.end(); return [__begin, __end](uint32_t &token, error_code err = error_code::TAPE_ERROR) mutable -> result { diff --git a/include/wjr/memory/uninitialized.hpp b/include/wjr/memory/uninitialized.hpp index fcebf4e0..823e10d6 100644 --- a/include/wjr/memory/uninitialized.hpp +++ b/include/wjr/memory/uninitialized.hpp @@ -423,7 +423,7 @@ template T *relocate_at(T *src, T *dst) noexcept( std::is_nothrow_move_constructible_v &&std::is_nothrow_destructible_v) { if constexpr (get_relocate_mode_v == relocate_t::trivial) { - *dst = *src; + std::memcpy(dst, src, sizeof(T)); } else { wjr::construct_at(dst, std::move(*src)); std::destroy_at(src); diff --git a/include/wjr/x86/json/string.hpp b/include/wjr/x86/json/string.hpp index 241db201..60c71956 100644 --- a/include/wjr/x86/json/string.hpp +++ b/include/wjr/x86/json/string.hpp @@ -5,6 +5,7 @@ #if WJR_HAS_SIMD(SSE2) #define WJR_HAS_BUILTIN_JSON_PARSE_STRING WJR_HAS_DEF +#define WJR_HAS_BUILTIN_JSON_CHECK_STRING WJR_HAS_DEF #endif #endif // WJR_X86_JSON_STRING_HPP__ \ No newline at end of file diff --git a/src/wjr/json/json.cpp b/src/wjr/json/json.cpp new file mode 100644 index 00000000..44c40952 --- /dev/null +++ b/src/wjr/json/json.cpp @@ -0,0 +1,141 @@ +#include + +namespace wjr::json::detail { + +class check_parser { + template + friend result visitor_detail::parse(Parser &&par, const reader &rd) noexcept; + +public: + WJR_INTRINSIC_INLINE static result parse(const reader &rd) noexcept { + return visitor_detail::parse(check_parser(), rd); + } + +protected: + WJR_INTRINSIC_INLINE result visit_root_null(const char *first) const noexcept { + return check_null(first); + } + + WJR_INTRINSIC_INLINE result + visit_object_null(const char *first) const noexcept { + return check_null(first); + } + + WJR_INTRINSIC_INLINE result visit_array_null(const char *first) const noexcept { + return check_null(first); + } + + WJR_INTRINSIC_INLINE result visit_root_true(const char *first) const noexcept { + return check_true(first); + } + + WJR_INTRINSIC_INLINE result + visit_object_true(const char *first) const noexcept { + return check_true(first); + } + + WJR_INTRINSIC_INLINE result visit_array_true(const char *first) const noexcept { + return check_true(first); + } + + WJR_INTRINSIC_INLINE result visit_root_false(const char *first) const noexcept { + return check_false(first); + } + + WJR_INTRINSIC_INLINE result + visit_object_false(const char *first) const noexcept { + return check_false(first); + } + + WJR_INTRINSIC_INLINE result + visit_array_false(const char *first) const noexcept { + return check_false(first); + } + + WJR_INTRINSIC_INLINE result visit_root_number(const char *first, + const char *last) const noexcept { + return check_number(first, last); + } + + WJR_INTRINSIC_INLINE result + visit_object_number(const char *first, const char *last) const noexcept { + return check_number(first, last); + } + + WJR_INTRINSIC_INLINE result + visit_array_number(const char *first, const char *last) const noexcept { + return check_number(first, last); + } + + WJR_INTRINSIC_INLINE result visit_root_string(const char *first, + const char *last) const noexcept { + return check_string(first, last); + } + + WJR_INTRINSIC_INLINE result + visit_object_string(const char *first, const char *last) const noexcept { + return check_string(first, last); + } + + WJR_INTRINSIC_INLINE result + visit_array_string(const char *first, const char *last) const noexcept { + return check_string(first, last); + } + + WJR_INTRINSIC_INLINE result visit_object_key_string(const char *first, + const char *last) noexcept { + return check_string(first, last); + } + + WJR_INTRINSIC_INLINE result visit_root_start_object(uint32_t) const noexcept { + return {}; + } + + WJR_INTRINSIC_INLINE result visit_object_start_object(uint32_t) noexcept { + return {}; + } + + WJR_INTRINSIC_INLINE result visit_array_start_object(uint32_t) noexcept { + return {}; + } + + WJR_INTRINSIC_INLINE result visit_root_start_array(uint32_t) const noexcept { + return {}; + } + + WJR_INTRINSIC_INLINE result visit_object_start_array(uint32_t) noexcept { + return {}; + } + + WJR_INTRINSIC_INLINE result visit_array_start_array(uint32_t) noexcept { + return {}; + } + + WJR_INTRINSIC_INLINE result visit_end_object_to_object(uint32_t) noexcept { + return {}; + } + + WJR_INTRINSIC_INLINE result visit_end_object_to_array(uint32_t) noexcept { + return {}; + } + + WJR_INTRINSIC_INLINE result visit_end_object_to_root(uint32_t) const noexcept { + return {}; + } + + WJR_INTRINSIC_INLINE result visit_end_array_to_object(uint32_t) noexcept { + return {}; + } + + WJR_INTRINSIC_INLINE result visit_end_array_to_array(uint32_t) noexcept { + return {}; + } + + WJR_INTRINSIC_INLINE result visit_end_array_to_root(uint32_t) const noexcept { + return {}; + } +}; + +result __check_impl(const reader &rd) noexcept { return check_parser::parse(rd); } + +} // namespace wjr::json::detail \ No newline at end of file diff --git a/src/wjr/x86/json/lexer.cpp b/src/wjr/x86/json/lexer.cpp index dfca12b4..5a2996cc 100644 --- a/src/wjr/x86/json/lexer.cpp +++ b/src/wjr/x86/json/lexer.cpp @@ -58,28 +58,9 @@ typename lexer::result_type lexer::read(uint32_t *token_buf, stk[i] = simd::loadu(first + i * u8_width); } } else { - char ch; - switch (last[-1]) { - case ' ': - case '\n': - case '\r': - case '\t': - case '[': - case ']': - case '{': - case '}': { - ch = ' '; - break; - } - default: { - ch = '\0'; - break; - } - } - char buf[64]; std::memcpy(buf, first, diff); - std::memset(buf + diff, ch, 64 - diff); + std::memset(buf + diff, 0, 64 - diff); for (size_t i = 0; i < u8_loop; ++i) { stk[i] = simd::loadu(buf + i * u8_width); diff --git a/src/wjr/x86/json/string.cpp b/src/wjr/x86/json/string.cpp index 80f01e40..ab1e68a0 100644 --- a/src/wjr/x86/json/string.cpp +++ b/src/wjr/x86/json/string.cpp @@ -289,6 +289,232 @@ result parse_string(char *dst, const char *first, const char *last) noex return dst; } + +} // namespace detail +#endif + +#if WJR_HAS_BUILTIN(JSON_CHECK_STRING) + +namespace detail { + +result check_string(const char *first, const char *last) noexcept { + using namespace string_detail; + + constexpr auto is_avx = WJR_HAS_SIMD(AVX2); + using simd = std::conditional_t; + using simd_mask_type = typename simd::mask_type; + + constexpr auto simd_width = simd::width(); + constexpr auto u8_width = simd_width / 8; + + if (first + u8_width <= last) { + LOOP: + const auto x = simd::loadu(first); + auto B = static_cast( + simd::movemask_epi8(simd::cmpeq_epi8(x, simd::set1_epi8('\\')))); + + if (WJR_UNLIKELY(B != 0)) { + do { + const int pos = ctz(B); + + // last backslash, special handling + if (WJR_UNLIKELY(pos == u8_width - 1)) { + WJR_ASSERT(first + u8_width + 1 <= last); + + const uint8_t ch = first[u8_width]; + + if (WJR_UNLIKELY(ch == 'u')) { + WJR_EXPECTED_SET( + first, check_unicode_codepoint(first + u8_width + 1, last)); + + if (first + u8_width <= last) { + goto LOOP; + } + + goto SMALL; + } + + const uint8_t code = escape_table[ch]; + + if (WJR_UNLIKELY(code == 0)) { + return unexpected(error_code::STRING_ERROR); + } + + first += u8_width + 1; + + if (first + u8_width <= last) { + goto LOOP; + } + + goto SMALL; + } + + const uint8_t ch = first[pos + 1]; + + if (WJR_UNLIKELY(ch == 'u')) { + const char *__first; + WJR_EXPECTED_SET(__first, + check_unicode_codepoint(first + pos + 2, last)); + + if (to_unsigned(__first - first) >= u8_width) { + first = __first; + if (first + u8_width <= last) { + goto LOOP; + } + + goto SMALL; + } + + // two backslash + if ((__first - first) - pos != 6) { + B &= B - 1; + } + } else { + const uint8_t code = escape_table[ch]; + + if (WJR_UNLIKELY(code == 0)) { + return unexpected(error_code::STRING_ERROR); + } + + // backslash + if (WJR_UNLIKELY(code == 0x5c)) { + B &= B - 1; + } + } + + B &= B - 1; + } while (B); + + first += u8_width; + + if (first + u8_width <= last) { + goto LOOP; + } + + goto SMALL; + } + + first += u8_width; + + if (first + u8_width <= last) { + goto LOOP; + } + } + +SMALL: + + const auto n = last - first; + simd_mask_type B; + + do { +#if WJR_HAS_SIMD(AVX2) + if (WJR_UNLIKELY(n > 16)) { + const auto m = n - 16; + const auto x0 = sse::loadu(first); + const auto x1 = sse::loadu(first + m); + B = sse::movemask_epi8(sse::cmpeq_epi8(x0, sse::set1_epi8('\\'))) | + sse::movemask_epi8(sse::cmpeq_epi8(x1, sse::set1_epi8('\\'))) << m; + + if (WJR_LIKELY(B == 0)) { + return {}; + } + + break; + } +#endif + + if (WJR_UNLIKELY(n <= 8)) { + if (WJR_UNLIKELY(n <= 4)) { + if (WJR_UNLIKELY(n < 2)) { + if (WJR_UNLIKELY(first == last)) { + return {}; + } + + const uint8_t ch = *first++; + if (WJR_UNLIKELY(ch == '\\')) { + return unexpected(error_code::STRING_ERROR); + } + + return {}; + } + + const auto m = n - 2; + const auto x0 = sse::loadu_si16(first); + const auto x1 = sse::loadu_si16(first + m); + B = sse::movemask_epi8(sse::cmpeq_epi8(x0, sse::set1_epi8('\\'))) | + sse::movemask_epi8(sse::cmpeq_epi8(x1, sse::set1_epi8('\\'))) << m; + + if (WJR_LIKELY(B == 0)) { + return {}; + } + + break; + } + + const auto m = n - 4; + const auto x0 = sse::loadu_si32(first); + const auto x1 = sse::loadu_si32(first + m); + B = sse::movemask_epi8(sse::cmpeq_epi8(x0, sse::set1_epi8('\\'))) | + sse::movemask_epi8(sse::cmpeq_epi8(x1, sse::set1_epi8('\\'))) << m; + + if (WJR_LIKELY(B == 0)) { + return {}; + } + + break; + } + + const auto m = n - 8; + const auto x0 = sse::loadu_si64(first); + const auto x1 = sse::loadu_si64(first + m); + B = sse::movemask_epi8(sse::cmpeq_epi8(x0, sse::set1_epi8('\\'))) | + sse::movemask_epi8(sse::cmpeq_epi8(x1, sse::set1_epi8('\\'))) << m; + + if (WJR_LIKELY(B == 0)) { + return {}; + } + } while (0); + + do { + const int pos = ctz(B); + + if (WJR_UNLIKELY(pos == n - 1)) { + return unexpected(error_code::STRING_ERROR); + } + + const uint8_t ch = first[pos + 1]; + + if (WJR_UNLIKELY(ch == 'u')) { + const char *__first = first; + WJR_EXPECTED_SET(__first, check_unicode_codepoint(first + pos + 2, last)); + + if (__first == last) { + return {}; + } + + // two backslash + if ((__first - first) - pos != 6) { + B &= B - 1; + } + } else { + const uint8_t code = escape_table[ch]; + + if (WJR_UNLIKELY(code == 0)) { + return unexpected(error_code::STRING_ERROR); + } + + // backslash + if (WJR_UNLIKELY(code == 0x5c)) { + B &= B - 1; + } + } + + B &= B - 1; + } while (B); + + return {}; +} + } // namespace detail #endif