From d70601eb96a59d21cbe0c43a3035f821095c3fc1 Mon Sep 17 00:00:00 2001 From: Max Sagebaum Date: Sun, 11 Aug 2024 23:03:47 +0200 Subject: [PATCH] Separation of metafunction and runtime regex code. (#1219) * Separation of metafunction and runtime regex code. * Move metafunction work into reflect.h2 Instead of a separate file in ./source * Update of regression tests. * Merge regex metafunction into cpp2::meta namespace The run-time support library stays in `cpp2util.h` and can use the `cpp2::regex::` subnamespace there The compile-time parts stay in `reflect.h2` and use the `cpp2::meta::` namespace there Eventually this can be generalized for user-written metafunctions when those are supported, but for right now this keeps things consistent for the metafunctions supplied in-the-box with cppfront * Add `add_runtime_support_include` to meta For metafunctions that require runtime support via #includes to be injected into lowered Cpp1 programs Also cleanup while I'm at it: - remove headers from `common.h` that are now already included via `cpp2util.h` - line up some thing that got un-lined-up (OCD) --------- Co-authored-by: Herb Sutter --- include/cpp2regex.h | 3935 ++--------- include/cpp2regex.h2 | 2306 +------ include/cpp2util.h | 2 - .../pure2-default-arguments.cpp.output | 4 + .../pure2-default-arguments.cpp.execution | 2 + .../pure2-default-arguments.cpp.output | 4 + .../pure2-default-arguments.cpp.execution | 2 + .../pure2-default-arguments.cpp.output | 7 + .../pure2-default-arguments.cpp.output | 7 + ...s-clause-in-forward-declaration.cpp.output | 8 +- .../gcc-10-c++20/pure2-print.cpp.output | 4 +- ...ed-bounds-safety-with-assert.cpp.execution | 2 +- .../pure2-default-arguments.cpp.execution | 2 + .../gcc-14-c++2b/gcc-version.output | 2 +- ...ed-bounds-safety-with-assert.cpp.execution | 2 +- .../pure2-default-arguments.cpp.execution | 2 + .../pure2-default-arguments.cpp.output | 1 + .../pure2-regex_01_char_matcher.cpp | 1 + .../test-results/pure2-regex_02_ranges.cpp | 1 + .../test-results/pure2-regex_03_wildcard.cpp | 1 + .../test-results/pure2-regex_04_start_end.cpp | 1 + .../test-results/pure2-regex_05_classes.cpp | 1 + .../pure2-regex_06_boundaries.cpp | 1 + .../pure2-regex_07_short_classes.cpp | 1 + .../pure2-regex_08_alternatives.cpp | 1 + .../test-results/pure2-regex_09_groups.cpp | 1 + .../test-results/pure2-regex_10_escapes.cpp | 1 + .../pure2-regex_11_group_references.cpp | 1 + .../pure2-regex_12_case_insensitive.cpp | 1 + .../pure2-regex_13_possessive_modifier.cpp | 1 + .../pure2-regex_14_multiline_modifier.cpp | 1 + .../pure2-regex_15_group_modifiers.cpp | 1 + .../pure2-regex_16_perl_syntax_modifier.cpp | 1 + .../test-results/pure2-regex_17_comments.cpp | 1 + .../pure2-regex_18_branch_reset.cpp | 1 + .../test-results/pure2-regex_19_lookahead.cpp | 1 + source/common.h | 9 - source/parse.h | 14 +- source/reflect.h | 5951 +++++++++++++---- source/reflect.h2 | 2128 +++++- source/sema.h | 2 +- source/to_cpp1.h | 7 +- 42 files changed, 7266 insertions(+), 7156 deletions(-) create mode 100644 regression-tests/test-results/apple-clang-14-c++2b/pure2-default-arguments.cpp.output create mode 100644 regression-tests/test-results/apple-clang-15-c++2b/pure2-default-arguments.cpp.execution create mode 100644 regression-tests/test-results/clang-15-c++20-libcpp/pure2-default-arguments.cpp.output create mode 100644 regression-tests/test-results/clang-15-c++20/pure2-default-arguments.cpp.execution create mode 100644 regression-tests/test-results/clang-18-c++20/pure2-default-arguments.cpp.output create mode 100644 regression-tests/test-results/clang-18-c++23-libcpp/pure2-default-arguments.cpp.output create mode 100644 regression-tests/test-results/gcc-13-c++2b/pure2-default-arguments.cpp.execution create mode 100644 regression-tests/test-results/msvc-2022-c++20/pure2-default-arguments.cpp.execution create mode 100644 regression-tests/test-results/msvc-2022-c++20/pure2-default-arguments.cpp.output diff --git a/include/cpp2regex.h b/include/cpp2regex.h index 01fc3440e..c78cff6db 100644 --- a/include/cpp2regex.h +++ b/include/cpp2regex.h @@ -10,134 +10,67 @@ #line 1 "cpp2regex.h2" -#line 30 "cpp2regex.h2" +#line 27 "cpp2regex.h2" namespace cpp2 { namespace regex { -#line 47 "cpp2regex.h2" -class expression_flags; - -#line 59 "cpp2regex.h2" +#line 43 "cpp2regex.h2" template class match_group; -#line 69 "cpp2regex.h2" +#line 53 "cpp2regex.h2" template class match_return; -#line 77 "cpp2regex.h2" +#line 61 "cpp2regex.h2" template class match_context; -#line 138 "cpp2regex.h2" +#line 122 "cpp2regex.h2" class true_end_func; -#line 146 "cpp2regex.h2" +#line 130 "cpp2regex.h2" class no_reset; -#line 153 "cpp2regex.h2" +#line 137 "cpp2regex.h2" template class on_return; -#line 180 "cpp2regex.h2" +#line 164 "cpp2regex.h2" template class single_class_entry; -#line 189 "cpp2regex.h2" +#line 173 "cpp2regex.h2" template class range_class_entry; -#line 198 "cpp2regex.h2" +#line 182 "cpp2regex.h2" template class combined_class_entry; -#line 207 "cpp2regex.h2" +#line 191 "cpp2regex.h2" template class list_class_entry; -#line 216 "cpp2regex.h2" +#line 200 "cpp2regex.h2" template class named_class_entry; -#line 223 "cpp2regex.h2" +#line 207 "cpp2regex.h2" template class negated_class_entry; -#line 232 "cpp2regex.h2" +#line 216 "cpp2regex.h2" template class shorthand_class_entry; -#line 285 "cpp2regex.h2" -class regex_token; - -#line 311 "cpp2regex.h2" -class regex_token_check; - -#line 330 "cpp2regex.h2" -class regex_token_code; - -#line 349 "cpp2regex.h2" -class regex_token_empty; - -#line 365 "cpp2regex.h2" -class regex_token_list; - -#line 407 "cpp2regex.h2" -class parse_context_group_state; - -#line 468 "cpp2regex.h2" -class parse_context_branch_reset_state; - -#line 511 "cpp2regex.h2" -class parse_context; - -#line 909 "cpp2regex.h2" -class generation_function_context; - - -#line 927 "cpp2regex.h2" -class generation_context; - -#line 1126 "cpp2regex.h2" -class alternative_token; - -#line 1141 "cpp2regex.h2" -class alternative_token_gen; - -#line 1191 "cpp2regex.h2" +#line 264 "cpp2regex.h2" template class alternative_token_matcher; -#line 1220 "cpp2regex.h2" -class any_token; - -#line 1251 "cpp2regex.h2" -class char_token; - -#line 1388 "cpp2regex.h2" -class class_token; - -#line 1545 "cpp2regex.h2" +#line 342 "cpp2regex.h2" template class class_token_matcher; -#line 1661 "cpp2regex.h2" -class group_ref_token; - -#line 1823 "cpp2regex.h2" -class group_token; - -#line 2129 "cpp2regex.h2" -class lookahead_token; - -#line 2235 "cpp2regex.h2" +#line 497 "cpp2regex.h2" class range_flags; -#line 2244 "cpp2regex.h2" -class range_token; - -#line 2390 "cpp2regex.h2" +#line 506 "cpp2regex.h2" template class range_token_matcher; -#line 2527 "cpp2regex.h2" -class special_range_token; - -#line 2618 "cpp2regex.h2" +#line 678 "cpp2regex.h2" template class regular_expression; -#line 2710 "cpp2regex.h2" -template class regex_generator; - -#line 2784 "cpp2regex.h2" +#line 761 "cpp2regex.h2" } } @@ -165,62 +98,29 @@ template class regex_generator; #ifndef CPP2_CPP2REGEX_H #define CPP2_CPP2REGEX_H - template using matcher_wrapper_type = typename matcher_wrapper::template wrap; template using matcher_context_type = typename matcher::context; -using error_func = std::function; - -#line 30 "cpp2regex.h2" +#line 27 "cpp2regex.h2" namespace cpp2 { namespace regex { -template using bview = std::basic_string_view; template using bstring = std::basic_string; +template using bview = std::basic_string_view; -#line 47 "cpp2regex.h2" -class expression_flags - { -private: cpp2::u8 _value; private: constexpr expression_flags(cpp2::impl::in _val); - -private: constexpr auto operator=(cpp2::impl::in _val) -> expression_flags& ; -public: constexpr auto operator|=(expression_flags const& that) & -> void; -public: constexpr auto operator&=(expression_flags const& that) & -> void; -public: constexpr auto operator^=(expression_flags const& that) & -> void; -public: [[nodiscard]] constexpr auto operator|(expression_flags const& that) const& -> expression_flags; -public: [[nodiscard]] constexpr auto operator&(expression_flags const& that) const& -> expression_flags; -public: [[nodiscard]] constexpr auto operator^(expression_flags const& that) const& -> expression_flags; -public: [[nodiscard]] constexpr auto has(expression_flags const& that) const& -> bool; -public: constexpr auto set(expression_flags const& that) & -> void; -public: constexpr auto clear(expression_flags const& that) & -> void; -public: static const expression_flags case_insensitive; -public: static const expression_flags multiple_lines; -public: static const expression_flags single_line; -public: static const expression_flags no_group_captures; -public: static const expression_flags perl_code_syntax; -public: static const expression_flags perl_code_syntax_in_classes; -public: static const expression_flags none; -public: [[nodiscard]] constexpr auto get_raw_value() const& -> cpp2::u8; -public: constexpr explicit expression_flags(); -public: constexpr expression_flags(expression_flags const& that); -public: constexpr auto operator=(expression_flags const& that) -> expression_flags& ; -public: constexpr expression_flags(expression_flags&& that) noexcept; -public: constexpr auto operator=(expression_flags&& that) noexcept -> expression_flags& ; -public: [[nodiscard]] auto operator<=>(expression_flags const& that) const& -> std::strong_ordering = default; -public: [[nodiscard]] auto to_string_impl(cpp2::impl::in prefix, cpp2::impl::in separator) const& -> std::string; -public: [[nodiscard]] auto to_string() const& -> std::string; -public: [[nodiscard]] auto to_code() const& -> std::string; -public: [[nodiscard]] static auto from_string(cpp2::impl::in s) -> expression_flags; -public: [[nodiscard]] static auto from_code(cpp2::impl::in s) -> expression_flags; - -#line 55 "cpp2regex.h2" -}; +//----------------------------------------------------------------------- +// +// Helper structures for the expression matching. +// +//----------------------------------------------------------------------- +// -#line 59 "cpp2regex.h2" +// Structure for storing group information. +// template class match_group { public: Iter start {}; @@ -230,10 +130,11 @@ template class match_group public: match_group(auto const& start_, auto const& end_, auto const& matched_); public: match_group(); -#line 65 "cpp2regex.h2" +#line 49 "cpp2regex.h2" }; -#line 69 "cpp2regex.h2" +// Return value for every matcher. +// template class match_return { public: bool matched {false}; @@ -241,10 +142,11 @@ template class match_return public: match_return(auto const& matched_, auto const& pos_); public: match_return(); -#line 73 "cpp2regex.h2" +#line 57 "cpp2regex.h2" }; -#line 77 "cpp2regex.h2" +// Modifiable state during matching. +// template class match_context { public: Iter begin; @@ -254,77 +156,94 @@ template class match_context public: explicit match_context(Iter const& begin_, Iter const& end_); -#line 89 "cpp2regex.h2" +#line 73 "cpp2regex.h2" public: match_context(match_context const& that); -#line 89 "cpp2regex.h2" +#line 73 "cpp2regex.h2" public: auto operator=(match_context const& that) -> match_context& ; -#line 89 "cpp2regex.h2" +#line 73 "cpp2regex.h2" public: match_context(match_context&& that) noexcept; -#line 89 "cpp2regex.h2" +#line 73 "cpp2regex.h2" public: auto operator=(match_context&& that) noexcept -> match_context& ; -#line 93 "cpp2regex.h2" + // Getter and setter for groups + // public: [[nodiscard]] auto get_group(auto const& group) const& -> auto; public: [[nodiscard]] auto get_group_end(auto const& group) const& -> int; -#line 101 "cpp2regex.h2" +#line 85 "cpp2regex.h2" public: [[nodiscard]] auto get_group_start(auto const& group) const& -> int; -#line 107 "cpp2regex.h2" +#line 91 "cpp2regex.h2" public: [[nodiscard]] auto get_group_string(auto const& group) const& -> std::string; -#line 114 "cpp2regex.h2" +#line 98 "cpp2regex.h2" public: auto set_group_end(auto const& group, auto const& pos) & -> void; -#line 119 "cpp2regex.h2" +#line 103 "cpp2regex.h2" public: auto set_group_invalid(auto const& group) & -> void; -#line 123 "cpp2regex.h2" +#line 107 "cpp2regex.h2" public: auto set_group_start(auto const& group, auto const& pos) & -> void; -#line 127 "cpp2regex.h2" +#line 111 "cpp2regex.h2" public: [[nodiscard]] auto size() const& -> auto; -#line 131 "cpp2regex.h2" + // Misc functions + // public: [[nodiscard]] auto fail() const& -> auto; public: [[nodiscard]] auto pass(cpp2::impl::in cur) const& -> auto; }; -#line 138 "cpp2regex.h2" +#line 120 "cpp2regex.h2" +// End function that returns a valid match. +// class true_end_func { public: [[nodiscard]] auto operator()(auto const& cur, auto& ctx) const& -> auto; }; -#line 146 "cpp2regex.h2" +#line 128 "cpp2regex.h2" +// Empty group reset function. +// class no_reset { public: auto operator()([[maybe_unused]] auto& unnamed_param_2) const& -> void; }; -#line 153 "cpp2regex.h2" +#line 136 "cpp2regex.h2" +// Evaluate func on destruction of the handle. template class on_return { private: Func func; public: explicit on_return(Func const& f); -#line 157 "cpp2regex.h2" +#line 141 "cpp2regex.h2" public: auto operator=(Func const& f) -> on_return& ; -#line 161 "cpp2regex.h2" +#line 145 "cpp2regex.h2" public: ~on_return() noexcept; public: on_return(on_return const&) = delete; /* No 'that' constructor, suppress copy */ public: auto operator=(on_return const&) -> void = delete; -#line 164 "cpp2regex.h2" +#line 148 "cpp2regex.h2" }; -#line 168 "cpp2regex.h2" +#line 151 "cpp2regex.h2" +// Helper for auto deduction of the Func type. template [[nodiscard]] auto make_on_return(Func const& func) -> auto; -#line 180 "cpp2regex.h2" +#line 155 "cpp2regex.h2" +//----------------------------------------------------------------------- +// +// Character classes for regular expressions. +// +//----------------------------------------------------------------------- +// + +// Class syntax: Example: a +// template class single_class_entry { public: [[nodiscard]] static auto includes(cpp2::impl::in c) -> auto; @@ -333,10 +252,12 @@ template class single_class_entry public: single_class_entry(single_class_entry const&) = delete; /* No 'that' constructor, suppress copy */ public: auto operator=(single_class_entry const&) -> void = delete; -#line 184 "cpp2regex.h2" +#line 168 "cpp2regex.h2" }; -#line 189 "cpp2regex.h2" +#line 171 "cpp2regex.h2" +// Class syntax: - Example: a-c +// template class range_class_entry { public: [[nodiscard]] static auto includes(cpp2::impl::in c) -> auto; @@ -345,10 +266,12 @@ template class range_class_entry public: range_class_entry(range_class_entry const&) = delete; /* No 'that' constructor, suppress copy */ public: auto operator=(range_class_entry const&) -> void = delete; -#line 193 "cpp2regex.h2" +#line 177 "cpp2regex.h2" }; -#line 198 "cpp2regex.h2" +#line 180 "cpp2regex.h2" +// Helper for combining two character classes +// template class combined_class_entry { public: [[nodiscard]] static auto includes(cpp2::impl::in c) -> auto; @@ -357,10 +280,12 @@ template class combined_class_entry public: combined_class_entry(combined_class_entry const&) = delete; /* No 'that' constructor, suppress copy */ public: auto operator=(combined_class_entry const&) -> void = delete; -#line 202 "cpp2regex.h2" +#line 186 "cpp2regex.h2" }; -#line 207 "cpp2regex.h2" +#line 189 "cpp2regex.h2" +// Class syntax: Example: abcd +// template class list_class_entry { public: [[nodiscard]] static auto includes(cpp2::impl::in c) -> auto; @@ -369,10 +294,12 @@ template class list_class_entry public: list_class_entry(list_class_entry const&) = delete; /* No 'that' constructor, suppress copy */ public: auto operator=(list_class_entry const&) -> void = delete; -#line 211 "cpp2regex.h2" +#line 195 "cpp2regex.h2" }; -#line 216 "cpp2regex.h2" +#line 198 "cpp2regex.h2" +// Class syntax: [: class named_class_entry { public: [[nodiscard]] static auto includes(cpp2::impl::in c) -> auto; @@ -381,10 +308,10 @@ template class n public: named_class_entry(named_class_entry const&) = delete; /* No 'that' constructor, suppress copy */ public: auto operator=(named_class_entry const&) -> void = delete; -#line 220 "cpp2regex.h2" +#line 204 "cpp2regex.h2" }; -#line 223 "cpp2regex.h2" +#line 207 "cpp2regex.h2" template class negated_class_entry : public Inner { @@ -393,10 +320,12 @@ template class negated_class_entry public: negated_class_entry(negated_class_entry const&) = delete; /* No 'that' constructor, suppress copy */ public: auto operator=(negated_class_entry const&) -> void = delete; -#line 227 "cpp2regex.h2" +#line 211 "cpp2regex.h2" }; -#line 232 "cpp2regex.h2" +#line 214 "cpp2regex.h2" +// Short class syntax: \ Example: \w +// template class shorthand_class_entry { public: [[nodiscard]] static auto includes(cpp2::impl::in c) -> auto; @@ -405,15 +334,18 @@ template class s public: shorthand_class_entry(shorthand_class_entry const&) = delete; /* No 'that' constructor, suppress copy */ public: auto operator=(shorthand_class_entry const&) -> void = delete; -#line 236 "cpp2regex.h2" +#line 220 "cpp2regex.h2" }; -#line 241 "cpp2regex.h2" +#line 223 "cpp2regex.h2" +// Named basic character classes +// template using digits_class = named_class_entry>; template using lower_class = named_class_entry>; template using upper_class = named_class_entry>; -#line 247 "cpp2regex.h2" +// Named other classes +// template using alnum_class = named_class_entry,upper_class,digits_class>>; template using alpha_class = named_class_entry,upper_class>>; template using ascii_class = named_class_entry>; @@ -428,7 +360,8 @@ template using ver_space_class = named_class_entry using word_class = named_class_entry,single_class_entry>>; template using xdigit_class = named_class_entry,range_class_entry,digits_class>>; -#line 263 "cpp2regex.h2" +// Shorthand class entries +// template using short_digits_class = shorthand_class_entry>; template using short_hor_space_class = shorthand_class_entry>; template using short_space_class = shorthand_class_entry>; @@ -441,2280 +374,430 @@ template using short_not_space_class = nega template using short_not_vert_space_class = negated_class_entry>>; template using short_not_word_class = negated_class_entry>>; -#line 285 "cpp2regex.h2" -class regex_token +#line 260 "cpp2regex.h2" +// Regex syntax: | Example: ab|ba +// +// Non greedy implementation. First alternative that matches is chosen. +// +template class alternative_token_matcher { - public: std::string string_rep; - - public: explicit regex_token(cpp2::impl::in str); - -#line 293 "cpp2regex.h2" - public: explicit regex_token(); - -#line 298 "cpp2regex.h2" - public: virtual auto generate_code([[maybe_unused]] generation_context& unnamed_param_2) const -> void = 0; + public: [[nodiscard]] static auto match(auto const& cur, auto& ctx, auto const& end_func, auto const& tail, auto const& ...functions) -> auto; - public: virtual auto add_groups([[maybe_unused]] std::set& unnamed_param_2) const -> void; - public: [[nodiscard]] auto to_string() const& -> std::string; - public: auto set_string(cpp2::impl::in s) & -> void; - public: virtual ~regex_token() noexcept; +#line 270 "cpp2regex.h2" + private: template [[nodiscard]] static auto match_first(auto const& cur, auto& ctx, auto const& end_func, auto const& tail, auto const& cur_func, auto const& cur_reset, Other const& ...other) -> auto; + public: alternative_token_matcher() = default; + public: alternative_token_matcher(alternative_token_matcher const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(alternative_token_matcher const&) -> void = delete; - public: regex_token(regex_token const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(regex_token const&) -> void = delete; -#line 303 "cpp2regex.h2" +#line 288 "cpp2regex.h2" }; -using token_ptr = std::shared_ptr; -using token_vec = std::vector; - -#line 309 "cpp2regex.h2" -// Adds a check in code generation. +#line 291 "cpp2regex.h2" +// Regex syntax: . // -class regex_token_check -: public regex_token { +template [[nodiscard]] auto any_token_matcher(auto& cur, auto& ctx) -> bool; + +#line 306 "cpp2regex.h2" +// TODO: Check if vectorization works at some point with this implementation. +// char_token_matcher: (inout cur, inout ctx) -> bool = { +// if !(std::distance(cur, ctx.end) < tokens..size()) { +// return false; +// } +// matched : bool = true; +// (copy i: int = 0) while i < tokens..size() next i += 1 { +// if tokens..data()[i] != cur[i] { +// matched = false; // No break for performance optimization. Without break, the loop vectorizes. +// } +// } +// if matched { +// cur += tokens..size(); +// } +// return matched; +// } -#line 315 "cpp2regex.h2" - private: std::string check; +// char_token_case_insensitive_matcher: (inout cur, inout ctx) -> bool = { +// if !(std::distance(cur, ctx.end) < lower..size()) { +// return false; +// } +// matched : bool = true; +// (copy i : int = 0) while i < lower..size() next i += 1 { +// if !(lower..data()[i] == cur[i] || upper..data()[i] == cur[i]) { +// matched = false; // No break for performance optimization. Without break, the loop vectorizes. +// } +// } +// if matched { +// cur += lower..size(); +// } +// return matched; +// } - public: explicit regex_token_check(cpp2::impl::in str, cpp2::impl::in check_); +#line 340 "cpp2regex.h2" +// Regex syntax: [] Example: [abcx-y[:digits:]] +// +template class class_token_matcher + { + public: [[nodiscard]] static auto match(auto& cur, auto& ctx) -> bool; -#line 322 "cpp2regex.h2" - public: auto generate_code(generation_context& ctx) const -> void override; - public: virtual ~regex_token_check() noexcept; +#line 373 "cpp2regex.h2" + private: template [[nodiscard]] static auto match_any(cpp2::impl::in c) -> bool; + public: class_token_matcher() = default; + public: class_token_matcher(class_token_matcher const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(class_token_matcher const&) -> void = delete; - public: regex_token_check(regex_token_check const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(regex_token_check const&) -> void = delete; +#line 386 "cpp2regex.h2" + // TODO: Implement proper to string + // to_string: () -> bstring = { + // r: bstring = "["; + // if negate { + // r += "^"; + // } + // r += (bstring() + ... + List::to_string()); + // r += "]"; -#line 325 "cpp2regex.h2" + // return r; + // } +#line 397 "cpp2regex.h2" }; -#line 328 "cpp2regex.h2" -// Adds code in code generation. +#line 400 "cpp2regex.h2" +// Named short classes // -class regex_token_code -: public regex_token { - -#line 334 "cpp2regex.h2" - private: std::string code; - - public: explicit regex_token_code(cpp2::impl::in str, cpp2::impl::in code_); - -#line 341 "cpp2regex.h2" - public: auto generate_code(generation_context& ctx) const -> void override; - public: virtual ~regex_token_code() noexcept; - - public: regex_token_code(regex_token_code const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(regex_token_code const&) -> void = delete; +template using named_class_no_new_line = class_token_matcher>; +template using named_class_digits = class_token_matcher>; +template using named_class_hor_space = class_token_matcher>; +template using named_class_space = class_token_matcher>; +template using named_class_ver_space = class_token_matcher>; +template using named_class_word = class_token_matcher>; +template using named_class_not_digits = class_token_matcher>; +template using named_class_not_hor_space = class_token_matcher>; +template using named_class_not_space = class_token_matcher>; +template using named_class_not_ver_space = class_token_matcher>; +template using named_class_not_word = class_token_matcher>; -#line 344 "cpp2regex.h2" -}; +#line 416 "cpp2regex.h2" +// Regex syntax: \ Example: \1 +// \g{name_or_number} +// \k{name_or_number} +// \k +// \k'name_or_number' +// +template [[nodiscard]] auto group_ref_token_matcher(auto& cur, auto& ctx) -> bool; -#line 347 "cpp2regex.h2" -// Token that does not influence the matching. E.g. comment. +#line 453 "cpp2regex.h2" +// Regex syntax: $ Example: aa$ // -class regex_token_empty -: public regex_token { +template [[nodiscard]] auto line_end_token_matcher(auto const& cur, auto& ctx) -> bool; -#line 353 "cpp2regex.h2" - public: explicit regex_token_empty(cpp2::impl::in str); +#line 469 "cpp2regex.h2" +// Regex syntax: ^ Example: ^aa +// +template [[nodiscard]] auto line_start_token_matcher(auto const& cur, auto& ctx) -> bool; -#line 357 "cpp2regex.h2" - public: auto generate_code([[maybe_unused]] generation_context& unnamed_param_2) const -> void override; - public: virtual ~regex_token_empty() noexcept; +#line 478 "cpp2regex.h2" +// Regex syntax: (?=) or (?!) or (*pla), etc. Example: (?=AA) +// +// Parsed in group_token. +// +template [[nodiscard]] auto lookahead_token_matcher(auto const& cur, auto& ctx, auto const& func) -> bool; - public: regex_token_empty(regex_token_empty const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(regex_token_empty const&) -> void = delete; +#line 493 "cpp2regex.h2" +// TODO: @enum as template parameter currently not working. See issue https://github.com/hsutter/cppfront/issues/1147 +#line 496 "cpp2regex.h2" +// Options for range matching. +class range_flags { + public: static const int not_greedy;// Try to take as few as possible. + public: static const int greedy;// Try to take as many as possible. + public: static const int possessive;// Do not give back after a greedy match. No backtracking. -#line 360 "cpp2regex.h2" + public: range_flags() = default; + public: range_flags(range_flags const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(range_flags const&) -> void = delete; }; +#line 502 "cpp2regex.h2" -#line 363 "cpp2regex.h2" -// Represents a list of regex tokens as one token. +#line 504 "cpp2regex.h2" +// Regex syntax: {min, max} Example: a{2,4} // -class regex_token_list -: public regex_token { +template class range_token_matcher + { + + public: template [[nodiscard]] static auto match(Iter const& cur, auto& ctx, auto const& inner, auto const& reset_func, auto const& end_func, auto const& tail) -> auto; + +#line 522 "cpp2regex.h2" + private: [[nodiscard]] static auto is_below_upper_bound(cpp2::impl::in count) -> bool; -#line 369 "cpp2regex.h2" - public: token_vec tokens; +#line 527 "cpp2regex.h2" + private: [[nodiscard]] static auto is_below_lower_bound(cpp2::impl::in count) -> bool; - public: explicit regex_token_list(cpp2::impl::in t); +#line 532 "cpp2regex.h2" + private: [[nodiscard]] static auto is_in_range(cpp2::impl::in count) -> bool; -#line 376 "cpp2regex.h2" - public: auto generate_code(generation_context& ctx) const -> void override; +#line 538 "cpp2regex.h2" + private: template [[nodiscard]] static auto match_min_count(Iter const& cur, auto& ctx, auto const& inner, auto const& end_func, int& count_r) -> auto; -#line 382 "cpp2regex.h2" - public: auto add_groups(std::set& groups) const -> void override; +#line 554 "cpp2regex.h2" + private: template [[nodiscard]] static auto match_greedy(cpp2::impl::in count, Iter const& cur, Iter const& last_valid, auto& ctx, auto const& inner, auto const& reset_func, auto const& end_func, auto const& other) -> match_return; -#line 388 "cpp2regex.h2" - public: [[nodiscard]] static auto gen_string(cpp2::impl::in vec) -> std::string; - public: virtual ~regex_token_list() noexcept; +#line 584 "cpp2regex.h2" + private: template [[nodiscard]] static auto match_possessive(Iter const& cur, auto& ctx, auto const& inner, auto const& end_func, auto const& other) -> match_return; - public: regex_token_list(regex_token_list const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(regex_token_list const&) -> void = delete; +#line 612 "cpp2regex.h2" + private: template [[nodiscard]] static auto match_not_greedy(Iter const& cur, auto& ctx, auto const& inner, auto const& end_func, auto const& other) -> match_return; + public: range_token_matcher() = default; + public: range_token_matcher(range_token_matcher const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(range_token_matcher const&) -> void = delete; -#line 395 "cpp2regex.h2" +#line 638 "cpp2regex.h2" }; -#line 398 "cpp2regex.h2" -//----------------------------------------------------------------------- +#line 641 "cpp2regex.h2" +// Regex syntax: \b or \B Example: \bword\b // -// Parse and generation context. +// Matches the start end end of word boundaries. // +template [[nodiscard]] auto word_boundary_token_matcher(auto& cur, auto& ctx) -> bool; + +#line 670 "cpp2regex.h2" //----------------------------------------------------------------------- // - -// State of the current capturing group. See '()' +// Regular expression implementation. // -class parse_context_group_state - { - public: token_vec cur_match_list {}; // Current list of matchers. - public: token_vec alternate_match_lists {}; // List of alternate matcher lists. E.g. ab|cd|xy. - public: expression_flags modifiers {}; // Current modifiers for the group/regular expression. - - // Start a new alternative. - public: auto next_alternative() & -> void; - -#line 421 "cpp2regex.h2" - // Swap this state with the other one. - public: auto swap(parse_context_group_state& t) & -> void; - -#line 428 "cpp2regex.h2" - // Convert this state into a regex token. - public: [[nodiscard]] auto get_as_token() & -> token_ptr; - -#line 440 "cpp2regex.h2" - // Add a token to the current matcher list. - public: auto add(cpp2::impl::in token) & -> void; - -#line 445 "cpp2regex.h2" - // True if current matcher list is empty. - public: [[nodiscard]] auto empty() const& -> bool; - -#line 449 "cpp2regex.h2" - // Apply optimizations to the matcher list. - public: static auto post_process_list(token_vec& list) -> void; - public: parse_context_group_state(auto const& cur_match_list_, auto const& alternate_match_lists_, auto const& modifiers_); -public: parse_context_group_state(); - - -#line 463 "cpp2regex.h2" -}; - -#line 466 "cpp2regex.h2" -// State for the branch reset. Takes care of the group numbering. See '(|)'. +//----------------------------------------------------------------------- // -class parse_context_branch_reset_state - { - public: bool is_active {false}; // If we have a branch reset group. - public: int cur_group {1}; // Next group identifier. 0 == global capture group. - public: int max_group {1}; // Maximum group identifier generated. - public: int from {1}; // Starting identifier on new alternative branch. - - // Next group identifier. - public: [[nodiscard]] auto next() & -> int; - -#line 484 "cpp2regex.h2" - // Set next group identifier. - public: auto set_next(cpp2::impl::in g) & -> void; - -#line 490 "cpp2regex.h2" - // Start a new alternative branch. - public: auto next_alternative() & -> void; - -#line 497 "cpp2regex.h2" - // Initialize for a branch reset group. - public: auto set_active_reset(cpp2::impl::in restart) & -> void; - public: parse_context_branch_reset_state(auto const& is_active_, auto const& cur_group_, auto const& max_group_, auto const& from_); -public: parse_context_branch_reset_state(); - - -#line 504 "cpp2regex.h2" -}; -#line 507 "cpp2regex.h2" -// Context during parsing of the regular expressions. -// -// Keeps track of the distributed group identifiers, current parsed group and branch resets. -// -class parse_context +// Regular expression implementation +template class regular_expression { - private: std::string_view regex; // Regular expression string. - private: size_t pos {0}; // Current parsing position. - private: token_ptr root; // Token representing the regular expression. - - private: parse_context_group_state cur_group_state {}; - private: parse_context_branch_reset_state cur_branch_reset_state {}; - -#line 521 "cpp2regex.h2" - public: std::map named_groups {}; - - private: error_func error_out; // TODO: Declaring std::function fails for cpp2. - private: bool has_error {false}; - - public: explicit parse_context(cpp2::impl::in r, auto const& e); - -#line 532 "cpp2regex.h2" - // State management functions - // - - // Returned group state needs to be stored and provided in `end_group`. - public: [[nodiscard]] auto start_group() & -> parse_context_group_state; - -#line 545 "cpp2regex.h2" - // `old_state` argument needs to be from start group. - public: [[nodiscard]] auto end_group(cpp2::impl::in old_state) & -> token_ptr; - -#line 553 "cpp2regex.h2" - public: [[nodiscard]] auto get_modifiers() const& -> expression_flags; - -#line 557 "cpp2regex.h2" - public: auto set_modifiers(cpp2::impl::in mod) & -> void; - -#line 561 "cpp2regex.h2" - // Branch reset management functions - // - - public: [[nodiscard]] auto branch_reset_new_state() & -> parse_context_branch_reset_state; - -#line 573 "cpp2regex.h2" - public: auto branch_reset_restore_state(cpp2::impl::in old_state) & -> void; - -#line 580 "cpp2regex.h2" - public: auto next_alternative() & -> void; - -#line 586 "cpp2regex.h2" - // Regex token management - // - public: auto add_token(cpp2::impl::in token) & -> void; - -#line 592 "cpp2regex.h2" - public: [[nodiscard]] auto has_token() const& -> bool; - -#line 596 "cpp2regex.h2" - public: [[nodiscard]] auto pop_token() & -> token_ptr; - -#line 607 "cpp2regex.h2" - public: [[nodiscard]] auto get_as_token() & -> token_ptr; - -#line 611 "cpp2regex.h2" - // Group management - // - public: [[nodiscard]] auto get_cur_group() const& -> int; - -#line 617 "cpp2regex.h2" - public: [[nodiscard]] auto next_group() & -> int; - -#line 621 "cpp2regex.h2" - public: auto set_named_group(cpp2::impl::in name, cpp2::impl::in id) & -> void; - -#line 628 "cpp2regex.h2" - public: [[nodiscard]] auto get_named_group(cpp2::impl::in name) const& -> int; - -#line 639 "cpp2regex.h2" - // Position management functions - // - public: [[nodiscard]] auto current() const& -> char; + public: template using matcher = matcher_wrapper_type;// TODO: Remove when nested types are allowed: https://github.com/hsutter/cppfront/issues/727 + public: template using context = matcher_context_type>;// TODO: Remove when nested types are allowed: https://github.com/hsutter/cppfront/issues/727 - // Get the next token in the regex, skipping spaces according to the parameters. See `x` and `xx` modifiers. - private: [[nodiscard]] auto get_next_position(cpp2::impl::in in_class, cpp2::impl::in no_skip) const& -> size_t; + public: template class search_return + { + public: bool matched; + public: context ctx; + public: int pos; -#line 683 "cpp2regex.h2" - // Return true if next token is available. - private: [[nodiscard]] auto next_impl(cpp2::impl::in in_class, cpp2::impl::in no_skip) & -> bool; + public: explicit search_return(cpp2::impl::in matched_, context const& ctx_, Iter const& pos_); #line 695 "cpp2regex.h2" - public: [[nodiscard]] auto next() & -> auto; - public: [[nodiscard]] auto next_in_class() & -> auto; - public: [[nodiscard]] auto next_no_skip() & -> auto; - - public: [[nodiscard]] auto next_n(cpp2::impl::in n) & -> bool; - -#line 708 "cpp2regex.h2" - public: [[nodiscard]] auto has_next() const& -> bool; - - private: [[nodiscard]] auto grab_until_impl(cpp2::impl::in e, cpp2::impl::out r, cpp2::impl::in any) & -> bool; - -#line 731 "cpp2regex.h2" - public: [[nodiscard]] auto grab_until(cpp2::impl::in e, cpp2::impl::out r) & -> auto; - public: [[nodiscard]] auto grab_until(cpp2::impl::in e, cpp2::impl::out r) & -> auto; - public: [[nodiscard]] auto grab_until_one_of(cpp2::impl::in e, cpp2::impl::out r) & -> auto; - - public: [[nodiscard]] auto grab_n(cpp2::impl::in n, cpp2::impl::out r) & -> bool; - -#line 748 "cpp2regex.h2" - public: [[nodiscard]] auto grab_number() & -> std::string; - -#line 769 "cpp2regex.h2" - private: [[nodiscard]] auto peek_impl(cpp2::impl::in in_class) const& -> char; - -#line 779 "cpp2regex.h2" - public: [[nodiscard]] auto peek() const& -> auto; - public: [[nodiscard]] auto peek_in_class() const& -> auto; - -#line 783 "cpp2regex.h2" - // Parsing functions - // - public: [[nodiscard]] auto parser_group_modifiers(cpp2::impl::in change_str, expression_flags& parser_modifiers) & -> bool; - -#line 839 "cpp2regex.h2" - public: [[nodiscard]] auto parse_until(cpp2::impl::in term) & -> bool; - -#line 877 "cpp2regex.h2" - public: [[nodiscard]] auto parse(cpp2::impl::in modifiers) & -> bool; - -#line 892 "cpp2regex.h2" - // Misc functions - - public: [[nodiscard]] auto get_pos() const& -> auto; - public: [[nodiscard]] auto get_range(cpp2::impl::in start, cpp2::impl::in end) const& -> auto; - public: [[nodiscard]] auto valid() const& -> bool; - - public: [[nodiscard]] auto error(cpp2::impl::in err) & -> token_ptr; - public: parse_context(parse_context const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(parse_context const&) -> void = delete; - - -#line 903 "cpp2regex.h2" -}; - -#line 906 "cpp2regex.h2" -// Context for one function generation. Generation of functions can be interleaved, -// therefore we buffer the code for one function here. -// -class generation_function_context { - public: std::string code {""}; - public: std::string tabs {""}; - - public: auto add_tabs(cpp2::impl::in c) & -> void; - -#line 920 "cpp2regex.h2" - public: auto remove_tabs(cpp2::impl::in c) & -> void; - public: generation_function_context(auto const& code_, auto const& tabs_); -public: generation_function_context(); - - -#line 923 "cpp2regex.h2" -}; - -#line 926 "cpp2regex.h2" -// Context for generating the state machine. -class generation_context - { - private: std::vector gen_stack {1}; // Element 0 contains all the code. - - private: int matcher_func {0}; - private: int reset_func {0}; - private: int temp_name {0}; - private: std::string entry_func {""}; - - // Generation helpers - // - public: [[nodiscard]] auto match_parameters() const& -> std::string; - - // Code generation. - - // Add code line. - public: auto add(cpp2::impl::in s) & -> void; - -#line 948 "cpp2regex.h2" - // Add check for token. The check needs to be a function call that returns a boolean. - public: auto add_check(cpp2::impl::in check) & -> void; - -#line 954 "cpp2regex.h2" - // Add a stateful check. The check needs to return a `match_return`. - public: auto add_statefull(cpp2::impl::in next_func, cpp2::impl::in check) & -> void; - -#line 963 "cpp2regex.h2" - protected: auto start_func_named(cpp2::impl::in name) & -> void; - -#line 974 "cpp2regex.h2" - protected: [[nodiscard]] auto start_func() & -> std::string; - -#line 981 "cpp2regex.h2" - protected: auto end_func_statefull(cpp2::impl::in s) & -> void; - -#line 1000 "cpp2regex.h2" - // Generate the function for a token. - public: [[nodiscard]] auto generate_func(cpp2::impl::in token) & -> std::string; + public: [[nodiscard]] auto group_number() const& -> auto; + public: [[nodiscard]] auto group(cpp2::impl::in g) const& -> auto; + public: [[nodiscard]] auto group_start(cpp2::impl::in g) const& -> auto; + public: [[nodiscard]] auto group_end(cpp2::impl::in g) const& -> auto; -#line 1010 "cpp2regex.h2" - // Generate the reset for a list of group identifiers. - public: [[nodiscard]] auto generate_reset(cpp2::impl::in> groups) & -> std::string; + public: [[nodiscard]] auto group(cpp2::impl::in> g) const& -> auto; + public: [[nodiscard]] auto group_start(cpp2::impl::in> g) const& -> auto; + public: [[nodiscard]] auto group_end(cpp2::impl::in> g) const& -> auto; -#line 1033 "cpp2regex.h2" - // Name generation - // - protected: [[nodiscard]] auto gen_func_name() & -> std::string; + private: [[nodiscard]] auto get_group_id(cpp2::impl::in> g) const& -> auto; + public: search_return(search_return const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(search_return const&) -> void = delete; -#line 1041 "cpp2regex.h2" - public: [[nodiscard]] auto next_func_name() & -> std::string; -#line 1045 "cpp2regex.h2" - protected: [[nodiscard]] auto gen_reset_func_name() & -> std::string; +#line 711 "cpp2regex.h2" + }; -#line 1051 "cpp2regex.h2" - public: [[nodiscard]] auto gen_temp() & -> std::string; + public: [[nodiscard]] auto match(cpp2::impl::in> str) const& -> auto; + public: [[nodiscard]] auto match(cpp2::impl::in> str, auto const& start) const& -> auto; + public: [[nodiscard]] auto match(cpp2::impl::in> str, auto const& start, auto const& length) const& -> auto; + public: template [[nodiscard]] auto match(Iter const& start, Iter const& end) const& -> search_return; -#line 1057 "cpp2regex.h2" - // Context management - // - public: [[nodiscard]] auto new_context() & -> generation_function_context*; +#line 724 "cpp2regex.h2" + public: [[nodiscard]] auto search(cpp2::impl::in> str) const& -> auto; + public: [[nodiscard]] auto search(cpp2::impl::in> str, auto const& start) const& -> auto; + public: [[nodiscard]] auto search(cpp2::impl::in> str, auto const& start, auto const& length) const& -> auto; + public: template [[nodiscard]] auto search(Iter const& start, Iter const& end) const& -> search_return; -#line 1067 "cpp2regex.h2" - public: auto finish_context() & -> void; +#line 747 "cpp2regex.h2" + public: [[nodiscard]] auto to_string() const& -> auto; -#line 1075 "cpp2regex.h2" - // Misc functions + // Helper functions // - private: [[nodiscard]] auto get_current() & -> generation_function_context*; - -#line 1081 "cpp2regex.h2" - private: [[nodiscard]] auto get_base() & -> generation_function_context*; - -#line 1085 "cpp2regex.h2" - public: [[nodiscard]] auto get_entry_func() const& -> std::string; - -#line 1089 "cpp2regex.h2" - public: [[nodiscard]] auto create_named_group_lookup(cpp2::impl::in> named_groups) const& -> std::string; - -#line 1113 "cpp2regex.h2" - // Run the generation for the token. - public: [[nodiscard]] auto run(cpp2::impl::in token) & -> std::string; - public: generation_context() = default; - public: generation_context(generation_context const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(generation_context const&) -> void = delete; - - -#line 1119 "cpp2regex.h2" -}; - -#line 1122 "cpp2regex.h2" -// Regex syntax: | Example: ab|ba -// -// Non greedy implementation. First alternative that matches is chosen. -// -class alternative_token -: public regex_token_empty { - // No code gen here. alternative_token_gen is created in the parse_context - - public: explicit alternative_token(); - - public: [[nodiscard]] static auto parse(parse_context& ctx) -> token_ptr; - public: virtual ~alternative_token() noexcept; - - public: alternative_token(alternative_token const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(alternative_token const&) -> void = delete; - - -#line 1139 "cpp2regex.h2" -}; - -class alternative_token_gen -: public regex_token { - -#line 1145 "cpp2regex.h2" - private: token_vec alternatives; - - public: explicit alternative_token_gen(cpp2::impl::in a); - -#line 1152 "cpp2regex.h2" - public: auto generate_code(generation_context& ctx) const -> void override; - -#line 1169 "cpp2regex.h2" - public: auto add_groups(std::set& groups) const -> void override; - -#line 1176 "cpp2regex.h2" - public: [[nodiscard]] static auto gen_string(cpp2::impl::in a) -> std::string; - public: virtual ~alternative_token_gen() noexcept; - - public: alternative_token_gen(alternative_token_gen const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(alternative_token_gen const&) -> void = delete; - - -#line 1188 "cpp2regex.h2" -}; - -#line 1191 "cpp2regex.h2" -template class alternative_token_matcher - { - public: [[nodiscard]] static auto match(auto const& cur, auto& ctx, auto const& end_func, auto const& tail, auto const& ...functions) -> auto; - -#line 1197 "cpp2regex.h2" - private: template [[nodiscard]] static auto match_first(auto const& cur, auto& ctx, auto const& end_func, auto const& tail, auto const& cur_func, auto const& cur_reset, Other const& ...other) -> auto; - public: alternative_token_matcher() = default; - public: alternative_token_matcher(alternative_token_matcher const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(alternative_token_matcher const&) -> void = delete; - - -#line 1215 "cpp2regex.h2" -}; - -#line 1218 "cpp2regex.h2" -// Regex syntax: . -// -class any_token -: public regex_token_check { - -#line 1224 "cpp2regex.h2" - public: explicit any_token(cpp2::impl::in single_line); - -#line 1228 "cpp2regex.h2" - public: [[nodiscard]] static auto parse(parse_context& ctx) -> token_ptr; - public: virtual ~any_token() noexcept; - - public: any_token(any_token const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(any_token const&) -> void = delete; + private: [[nodiscard]] static auto get_iter(cpp2::impl::in> str, auto const& pos) -> auto; + public: regular_expression() = default; + public: regular_expression(regular_expression const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(regular_expression const&) -> void = delete; -#line 1233 "cpp2regex.h2" +#line 759 "cpp2regex.h2" }; -#line 1236 "cpp2regex.h2" -template [[nodiscard]] auto any_token_matcher(auto& cur, auto& ctx) -> bool; - -#line 1249 "cpp2regex.h2" -// Regex syntax: a -// -class char_token -: public regex_token { - -#line 1255 "cpp2regex.h2" - private: std::string token; - private: bool ignore_case; - - public: explicit char_token(cpp2::impl::in t, cpp2::impl::in ignore_case_); +} +} +#endif -#line 1264 "cpp2regex.h2" - public: [[nodiscard]] static auto parse(parse_context& ctx) -> token_ptr; -#line 1268 "cpp2regex.h2" - public: auto generate_code(generation_context& ctx) const -> void override; +//=== Cpp2 function definitions ================================================= -#line 1291 "cpp2regex.h2" - public: auto gen_case_insensitive(cpp2::impl::in lower, cpp2::impl::in upper, generation_context& ctx) const& -> void; +#line 1 "cpp2regex.h2" -#line 1312 "cpp2regex.h2" - public: auto gen_case_sensitive(generation_context& ctx) const& -> void; +#line 27 "cpp2regex.h2" +namespace cpp2 { -#line 1330 "cpp2regex.h2" - public: [[nodiscard]] auto add_escapes(std::string str) const& -> std::string; +namespace regex { -#line 1345 "cpp2regex.h2" - public: auto append(char_token const& that) & -> void; - public: virtual ~char_token() noexcept; +template match_group::match_group(auto const& start_, auto const& end_, auto const& matched_) + : start{ start_ } + , end{ end_ } + , matched{ matched_ }{} +template match_group::match_group(){} +template match_return::match_return(auto const& matched_, auto const& pos_) + : matched{ matched_ } + , pos{ pos_ }{} +template match_return::match_return(){} - public: char_token(char_token const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(char_token const&) -> void = delete; +#line 68 "cpp2regex.h2" + template match_context::match_context(Iter const& begin_, Iter const& end_) + : begin{ begin_ } + , end{ end_ }{ +#line 71 "cpp2regex.h2" + } -#line 1349 "cpp2regex.h2" -}; +#line 73 "cpp2regex.h2" + template match_context::match_context(match_context const& that) + : begin{ that.begin } + , end{ that.end } + , groups{ that.groups }{} +#line 73 "cpp2regex.h2" + template auto match_context::operator=(match_context const& that) -> match_context& { + begin = that.begin; + end = that.end; + groups = that.groups; + return *this; } +#line 73 "cpp2regex.h2" + template match_context::match_context(match_context&& that) noexcept + : begin{ std::move(that).begin } + , end{ std::move(that).end } + , groups{ std::move(that).groups }{} +#line 73 "cpp2regex.h2" + template auto match_context::operator=(match_context&& that) noexcept -> match_context& { + begin = std::move(that).begin; + end = std::move(that).end; + groups = std::move(that).groups; + return *this; } -#line 1352 "cpp2regex.h2" -// TODO: Check if vectorization works at some point with this implementation. -// char_token_matcher: (inout cur, inout ctx) -> bool = { -// if !(std::distance(cur, ctx.end) < tokens..size()) { -// return false; -// } -// matched : bool = true; -// (copy i: int = 0) while i < tokens..size() next i += 1 { -// if tokens..data()[i] != cur[i] { -// matched = false; // No break for performance optimization. Without break, the loop vectorizes. -// } -// } -// if matched { -// cur += tokens..size(); -// } -// return matched; -// } +#line 77 "cpp2regex.h2" + template [[nodiscard]] auto match_context::get_group(auto const& group) const& -> auto { return CPP2_ASSERT_IN_BOUNDS(groups, group); } -// char_token_case_insensitive_matcher: (inout cur, inout ctx) -> bool = { -// if !(std::distance(cur, ctx.end) < lower..size()) { -// return false; -// } -// matched : bool = true; -// (copy i : int = 0) while i < lower..size() next i += 1 { -// if !(lower..data()[i] == cur[i] || upper..data()[i] == cur[i]) { -// matched = false; // No break for performance optimization. Without break, the loop vectorizes. -// } -// } -// if matched { -// cur += lower..size(); -// } -// return matched; -// } - -#line 1386 "cpp2regex.h2" -// Regex syntax: [] Example: [abcx-y[:digits:]] -// -class class_token -: public regex_token { - -#line 1392 "cpp2regex.h2" - private: bool negate; - private: bool case_insensitive; - private: std::string class_str; - - public: explicit class_token(cpp2::impl::in negate_, cpp2::impl::in case_insensitive_, cpp2::impl::in class_str_, cpp2::impl::in str); - -#line 1404 "cpp2regex.h2" - // TODO: Rework class generation: Generate check functions for classes. - public: [[nodiscard]] static auto parse(parse_context& ctx) -> token_ptr; - -#line 1530 "cpp2regex.h2" - public: auto generate_code(generation_context& ctx) const -> void override; - -#line 1535 "cpp2regex.h2" - private: [[nodiscard]] static auto create_matcher(cpp2::impl::in name, cpp2::impl::in template_arguments) -> std::string; - public: virtual ~class_token() noexcept; - - public: class_token(class_token const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(class_token const&) -> void = delete; - - -#line 1542 "cpp2regex.h2" -}; - -#line 1545 "cpp2regex.h2" -template class class_token_matcher - { - public: [[nodiscard]] static auto match(auto& cur, auto& ctx) -> bool; - -#line 1576 "cpp2regex.h2" - private: template [[nodiscard]] static auto match_any(cpp2::impl::in c) -> bool; - public: class_token_matcher() = default; - public: class_token_matcher(class_token_matcher const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(class_token_matcher const&) -> void = delete; - - -#line 1589 "cpp2regex.h2" - // TODO: Implement proper to string - // to_string: () -> bstring = { - // r: bstring = "["; - // if negate { - // r += "^"; - // } - // r += (bstring() + ... + List::to_string()); - // r += "]"; - - // return r; - // } -#line 1600 "cpp2regex.h2" -}; - -#line 1603 "cpp2regex.h2" -// Regex syntax: \a or \n or \[ -// -[[nodiscard]] auto escape_token_parse(parse_context& ctx) -> token_ptr; - -#line 1644 "cpp2regex.h2" -// Regex syntax: \K Example: ab\Kcd -// -[[nodiscard]] auto global_group_reset_token_parse(parse_context& ctx) -> token_ptr; - -#line 1655 "cpp2regex.h2" -// Regex syntax: \ Example: \1 -// \g{name_or_number} -// \k{name_or_number} -// \k -// \k'name_or_number' -// -class group_ref_token -: public regex_token { - -#line 1665 "cpp2regex.h2" - private: int id; - private: bool case_insensitive; - - public: explicit group_ref_token(cpp2::impl::in id_, cpp2::impl::in case_insensitive_, cpp2::impl::in str); - -#line 1675 "cpp2regex.h2" - public: [[nodiscard]] static auto parse(parse_context& ctx) -> token_ptr; - -#line 1776 "cpp2regex.h2" - public: auto generate_code(generation_context& ctx) const -> void override; - public: virtual ~group_ref_token() noexcept; - - public: group_ref_token(group_ref_token const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(group_ref_token const&) -> void = delete; - - -#line 1779 "cpp2regex.h2" -}; - -#line 1782 "cpp2regex.h2" -template [[nodiscard]] auto group_ref_token_matcher(auto& cur, auto& ctx) -> bool; - -#line 1813 "cpp2regex.h2" -// Regex syntax: () Example: (abc) -// (?:) (?i:abc) -// (?<>:) (?:abc) -// (?#) (#Step 1 finished) -// (?|) (?|(abc)|(cde)) -// (?=) (?=abc) -// (?!) (?!abc) -// (*: syntax, cpp2::impl::in positive) -> token_ptr; - -#line 1844 "cpp2regex.h2" - public: [[nodiscard]] static auto parse(parse_context& ctx) -> token_ptr; - -#line 1981 "cpp2regex.h2" - public: [[nodiscard]] static auto gen_string(cpp2::impl::in name, cpp2::impl::in name_brackets, cpp2::impl::in has_modifier, cpp2::impl::in modifiers, cpp2::impl::in inner_) -> std::string; - -#line 1999 "cpp2regex.h2" - public: auto generate_code(generation_context& ctx) const -> void override; - -#line 2019 "cpp2regex.h2" - public: auto add_groups(std::set& groups) const -> void override; - public: virtual ~group_token() noexcept; - - public: group_token() = default; - public: group_token(group_token const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(group_token const&) -> void = delete; - - -#line 2026 "cpp2regex.h2" -}; - -#line 2029 "cpp2regex.h2" -// Regex syntax: \x or \x{} Example: \x{62} -// -[[nodiscard]] auto hexadecimal_token_parse(parse_context& ctx) -> token_ptr; - -#line 2070 "cpp2regex.h2" -// Regex syntax: $ Example: aa$ -// -[[nodiscard]] auto line_end_token_parse(parse_context& ctx) -> token_ptr; - -#line 2089 "cpp2regex.h2" -template [[nodiscard]] auto line_end_token_matcher(auto const& cur, auto& ctx) -> bool; - -#line 2103 "cpp2regex.h2" -// Regex syntax: ^ Example: ^aa -// -[[nodiscard]] auto line_start_token_parse(parse_context& ctx) -> token_ptr; - -#line 2118 "cpp2regex.h2" -template [[nodiscard]] auto line_start_token_matcher(auto const& cur, auto& ctx) -> bool; - -#line 2125 "cpp2regex.h2" -// Regex syntax: (?=) or (?!) or (*pla), etc. Example: (?=AA) -// -// Parsed in group_token. -// -class lookahead_token -: public regex_token { - -#line 2133 "cpp2regex.h2" - protected: bool positive; - public: token_ptr inner {nullptr}; - - public: explicit lookahead_token(cpp2::impl::in positive_); - -#line 2140 "cpp2regex.h2" - public: auto generate_code(generation_context& ctx) const -> void override; - -#line 2146 "cpp2regex.h2" - public: auto add_groups(std::set& groups) const -> void override; - public: virtual ~lookahead_token() noexcept; - - public: lookahead_token(lookahead_token const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(lookahead_token const&) -> void = delete; - - -#line 2149 "cpp2regex.h2" -}; - -template [[nodiscard]] auto lookahead_token_matcher(auto const& cur, auto& ctx, auto const& func) -> bool; - -#line 2162 "cpp2regex.h2" -// Named character classes -// -[[nodiscard]] auto named_class_token_parse(parse_context& ctx) -> token_ptr; - -#line 2190 "cpp2regex.h2" -template using named_class_no_new_line = class_token_matcher>; -template using named_class_digits = class_token_matcher>; -template using named_class_hor_space = class_token_matcher>; -template using named_class_space = class_token_matcher>; -template using named_class_ver_space = class_token_matcher>; -template using named_class_word = class_token_matcher>; - -template using named_class_not_digits = class_token_matcher>; -template using named_class_not_hor_space = class_token_matcher>; -template using named_class_not_space = class_token_matcher>; -template using named_class_not_ver_space = class_token_matcher>; -template using named_class_not_word = class_token_matcher>; - -#line 2204 "cpp2regex.h2" -// Regex syntax: \o{} Example: \o{142} -// -[[nodiscard]] auto octal_token_parse(parse_context& ctx) -> token_ptr; - -#line 2231 "cpp2regex.h2" -// TODO: @enum as template parameter currently not working. See issue https://github.com/hsutter/cppfront/issues/1147 - -#line 2234 "cpp2regex.h2" -// Options for range matching. -class range_flags { - public: static const int not_greedy;// Try to take as few as possible. - public: static const int greedy;// Try to take as many as possible. - public: static const int possessive;// Do not give back after a greedy match. No backtracking. - - public: range_flags() = default; - public: range_flags(range_flags const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(range_flags const&) -> void = delete; -}; -#line 2240 "cpp2regex.h2" - -#line 2242 "cpp2regex.h2" -// Regex syntax: {min, max} Example: a{2,4} -// -class range_token -: public regex_token { - -#line 2248 "cpp2regex.h2" - protected: int min_count {-1}; - protected: int max_count {-1}; - protected: int kind {range_flags::greedy}; - protected: token_ptr inner_token {nullptr}; - - public: explicit range_token(); - - public: [[nodiscard]] static auto parse(parse_context& ctx) -> token_ptr; - -#line 2328 "cpp2regex.h2" - public: auto parse_modifier(parse_context& ctx) & -> void; - -#line 2340 "cpp2regex.h2" - public: [[nodiscard]] auto gen_mod_string() const& -> std::string; - -#line 2353 "cpp2regex.h2" - public: [[nodiscard]] auto gen_range_string() const& -> std::string; - -#line 2372 "cpp2regex.h2" - public: auto generate_code(generation_context& ctx) const -> void override; - -#line 2383 "cpp2regex.h2" - public: auto add_groups(std::set& groups) const -> void override; - public: virtual ~range_token() noexcept; - - public: range_token(range_token const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(range_token const&) -> void = delete; - - -#line 2387 "cpp2regex.h2" -}; - -#line 2390 "cpp2regex.h2" -template class range_token_matcher - { - - public: template [[nodiscard]] static auto match(Iter const& cur, auto& ctx, auto const& inner, auto const& reset_func, auto const& end_func, auto const& tail) -> auto; - -#line 2406 "cpp2regex.h2" - private: [[nodiscard]] static auto is_below_upper_bound(cpp2::impl::in count) -> bool; - -#line 2411 "cpp2regex.h2" - private: [[nodiscard]] static auto is_below_lower_bound(cpp2::impl::in count) -> bool; - -#line 2416 "cpp2regex.h2" - private: [[nodiscard]] static auto is_in_range(cpp2::impl::in count) -> bool; - -#line 2422 "cpp2regex.h2" - private: template [[nodiscard]] static auto match_min_count(Iter const& cur, auto& ctx, auto const& inner, auto const& end_func, int& count_r) -> auto; - -#line 2438 "cpp2regex.h2" - private: template [[nodiscard]] static auto match_greedy(cpp2::impl::in count, Iter const& cur, Iter const& last_valid, auto& ctx, auto const& inner, auto const& reset_func, auto const& end_func, auto const& other) -> match_return; - -#line 2468 "cpp2regex.h2" - private: template [[nodiscard]] static auto match_possessive(Iter const& cur, auto& ctx, auto const& inner, auto const& end_func, auto const& other) -> match_return; - -#line 2496 "cpp2regex.h2" - private: template [[nodiscard]] static auto match_not_greedy(Iter const& cur, auto& ctx, auto const& inner, auto const& end_func, auto const& other) -> match_return; - public: range_token_matcher() = default; - public: range_token_matcher(range_token_matcher const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(range_token_matcher const&) -> void = delete; - - -#line 2522 "cpp2regex.h2" -}; - -#line 2525 "cpp2regex.h2" -// Regex syntax: *, +, or ? Example: aa* -// -class special_range_token -: public range_token { - -#line 2531 "cpp2regex.h2" - public: [[nodiscard]] static auto parse(parse_context& ctx) -> token_ptr; - public: virtual ~special_range_token() noexcept; - - public: special_range_token() = default; - public: special_range_token(special_range_token const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(special_range_token const&) -> void = delete; - - -#line 2561 "cpp2regex.h2" -}; - -#line 2564 "cpp2regex.h2" -// Regex syntax: \b or \B Example: \bword\b -// -// Matches the start end end of word boundaries. -// -[[nodiscard]] auto word_boundary_token_parse(parse_context& ctx) -> token_ptr; - -#line 2585 "cpp2regex.h2" -template [[nodiscard]] auto word_boundary_token_matcher(auto& cur, auto& ctx) -> bool; - -#line 2610 "cpp2regex.h2" -//----------------------------------------------------------------------- -// -// Regular expression implementation. -// -//----------------------------------------------------------------------- -// - -// Regular expression implementation -template class regular_expression - { - public: template using matcher = matcher_wrapper_type;// TODO: Remove when nested types are allowed: https://github.com/hsutter/cppfront/issues/727 - public: template using context = matcher_context_type>;// TODO: Remove when nested types are allowed: https://github.com/hsutter/cppfront/issues/727 - - public: template class search_return - { - public: bool matched; - public: context ctx; - public: int pos; - - public: explicit search_return(cpp2::impl::in matched_, context const& ctx_, Iter const& pos_); - -#line 2635 "cpp2regex.h2" - public: [[nodiscard]] auto group_number() const& -> auto; - public: [[nodiscard]] auto group(cpp2::impl::in g) const& -> auto; - public: [[nodiscard]] auto group_start(cpp2::impl::in g) const& -> auto; - public: [[nodiscard]] auto group_end(cpp2::impl::in g) const& -> auto; - - public: [[nodiscard]] auto group(cpp2::impl::in> g) const& -> auto; - public: [[nodiscard]] auto group_start(cpp2::impl::in> g) const& -> auto; - public: [[nodiscard]] auto group_end(cpp2::impl::in> g) const& -> auto; - - private: [[nodiscard]] auto get_group_id(cpp2::impl::in> g) const& -> auto; - public: search_return(search_return const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(search_return const&) -> void = delete; - - -#line 2651 "cpp2regex.h2" - }; - - public: [[nodiscard]] auto match(cpp2::impl::in> str) const& -> auto; - public: [[nodiscard]] auto match(cpp2::impl::in> str, auto const& start) const& -> auto; - public: [[nodiscard]] auto match(cpp2::impl::in> str, auto const& start, auto const& length) const& -> auto; - public: template [[nodiscard]] auto match(Iter const& start, Iter const& end) const& -> search_return; - -#line 2664 "cpp2regex.h2" - public: [[nodiscard]] auto search(cpp2::impl::in> str) const& -> auto; - public: [[nodiscard]] auto search(cpp2::impl::in> str, auto const& start) const& -> auto; - public: [[nodiscard]] auto search(cpp2::impl::in> str, auto const& start, auto const& length) const& -> auto; - public: template [[nodiscard]] auto search(Iter const& start, Iter const& end) const& -> search_return; - -#line 2687 "cpp2regex.h2" - public: [[nodiscard]] auto to_string() const& -> auto; - - // Helper functions - // - private: [[nodiscard]] static auto get_iter(cpp2::impl::in> str, auto const& pos) -> auto; - public: regular_expression() = default; - public: regular_expression(regular_expression const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(regular_expression const&) -> void = delete; - - -#line 2699 "cpp2regex.h2" -}; - -#line 2702 "cpp2regex.h2" -//----------------------------------------------------------------------- -// -// Parser for regular expression. -// -//----------------------------------------------------------------------- -// - -// Parser and generator for regular expressions. -template class regex_generator - { - private: std::string_view regex; - private: std::string modifier {""}; - private: std::string modifier_escape {""}; - - private: Error_out error_out; - - private: std::string source {""}; - - public: explicit regex_generator(cpp2::impl::in r, Error_out const& e); - -#line 2725 "cpp2regex.h2" - public: [[nodiscard]] auto parse() & -> std::string; - -#line 2760 "cpp2regex.h2" - private: auto extract_modifiers() & -> void; - public: regex_generator(regex_generator const&) = delete; /* No 'that' constructor, suppress copy */ - public: auto operator=(regex_generator const&) -> void = delete; - - -#line 2774 "cpp2regex.h2" -}; - -template [[nodiscard]] auto generate_regex(cpp2::impl::in regex, Err const& err) -> std::string; - -#line 2784 "cpp2regex.h2" -} -} - - -#endif - -#line 1 "cpp2regex.h2" - -//=== Cpp2 function definitions ================================================= - -#line 1 "cpp2regex.h2" - -#line 30 "cpp2regex.h2" -namespace cpp2 { - -namespace regex { - - -constexpr expression_flags::expression_flags(cpp2::impl::in _val) - : _value{ cpp2::unsafe_narrow(_val) } { } - -constexpr auto expression_flags::operator=(cpp2::impl::in _val) -> expression_flags& { - _value = cpp2::unsafe_narrow(_val); - return *this; } -constexpr auto expression_flags::operator|=(expression_flags const& that) & -> void { _value |= that._value; } -constexpr auto expression_flags::operator&=(expression_flags const& that) & -> void { _value &= that._value; } -constexpr auto expression_flags::operator^=(expression_flags const& that) & -> void { _value ^= that._value; } -[[nodiscard]] constexpr auto expression_flags::operator|(expression_flags const& that) const& -> expression_flags { return _value | that._value; } -[[nodiscard]] constexpr auto expression_flags::operator&(expression_flags const& that) const& -> expression_flags { return _value & that._value; } -[[nodiscard]] constexpr auto expression_flags::operator^(expression_flags const& that) const& -> expression_flags { return _value ^ that._value; } -[[nodiscard]] constexpr auto expression_flags::has(expression_flags const& that) const& -> bool { return _value & that._value; } -constexpr auto expression_flags::set(expression_flags const& that) & -> void { _value |= that._value; } -constexpr auto expression_flags::clear(expression_flags const& that) & -> void { _value &= ~that._value; } -inline CPP2_CONSTEXPR expression_flags expression_flags::case_insensitive{ 1 }; - -inline CPP2_CONSTEXPR expression_flags expression_flags::multiple_lines{ 2 }; - -inline CPP2_CONSTEXPR expression_flags expression_flags::single_line{ 4 }; - -inline CPP2_CONSTEXPR expression_flags expression_flags::no_group_captures{ 8 }; - -inline CPP2_CONSTEXPR expression_flags expression_flags::perl_code_syntax{ 16 }; - -inline CPP2_CONSTEXPR expression_flags expression_flags::perl_code_syntax_in_classes{ 32 }; - -inline CPP2_CONSTEXPR expression_flags expression_flags::none{ 0 }; - -[[nodiscard]] constexpr auto expression_flags::get_raw_value() const& -> cpp2::u8 { return _value; } -constexpr expression_flags::expression_flags() - : _value{ none._value }{} -constexpr expression_flags::expression_flags(expression_flags const& that) - : _value{ that._value }{} -constexpr auto expression_flags::operator=(expression_flags const& that) -> expression_flags& { - _value = that._value; - return *this;} -constexpr expression_flags::expression_flags(expression_flags&& that) noexcept - : _value{ std::move(that)._value }{} -constexpr auto expression_flags::operator=(expression_flags&& that) noexcept -> expression_flags& { - _value = std::move(that)._value; - return *this;} -[[nodiscard]] auto expression_flags::to_string_impl(cpp2::impl::in prefix, cpp2::impl::in separator) const& -> std::string{ - -std::string ret {"("}; - -std::string sep {}; -if ((*this) == none) {return "(none)"; } - -auto pref {cpp2::to_string(prefix)}; -if (((*this) & case_insensitive) == case_insensitive) {ret += sep + pref + "case_insensitive";sep = separator;} -if (((*this) & multiple_lines) == multiple_lines) {ret += sep + pref + "multiple_lines";sep = separator;} -if (((*this) & single_line) == single_line) {ret += sep + pref + "single_line";sep = separator;} -if (((*this) & no_group_captures) == no_group_captures) {ret += sep + pref + "no_group_captures";sep = separator;} -if (((*this) & perl_code_syntax) == perl_code_syntax) {ret += sep + pref + "perl_code_syntax";sep = separator;} -if (((*this) & perl_code_syntax_in_classes) == perl_code_syntax_in_classes) {ret += sep + cpp2::move(pref) + "perl_code_syntax_in_classes";sep = separator;} -return cpp2::move(ret) + ")"; -} - -[[nodiscard]] auto expression_flags::to_string() const& -> std::string { return to_string_impl("", ", "); } -[[nodiscard]] auto expression_flags::to_code() const& -> std::string { return to_string_impl("expression_flags::", " | "); } -[[nodiscard]] auto expression_flags::from_string(cpp2::impl::in s) -> expression_flags{ - -auto ret {none}; -do {{ -for ( auto const& x : cpp2::string_util::split_string_list(s) ) { -if ("case_insensitive" == x) {ret |= case_insensitive;} -else {if ("multiple_lines" == x) {ret |= multiple_lines;} -else {if ("single_line" == x) {ret |= single_line;} -else {if ("no_group_captures" == x) {ret |= no_group_captures;} -else {if ("perl_code_syntax" == x) {ret |= perl_code_syntax;} -else {if ("perl_code_syntax_in_classes" == x) {ret |= perl_code_syntax_in_classes;} -else {if ("none" == x) {ret |= none;} -else {goto BREAK_outer;} -#line 1 "cpp2regex.h2" -}}}}}} -} - -return ret; -} CPP2_CONTINUE_BREAK(outer) } - while ( -false -); -CPP2_UFCS(report_violation)(cpp2::type_safety, CPP2_UFCS(c_str)(("can't convert string '" + cpp2::to_string(s) + "' to flag_enum of type expression_flags"))); -return none; -} - -[[nodiscard]] auto expression_flags::from_code(cpp2::impl::in s) -> expression_flags{ -std::string str {s}; return from_string(cpp2::string_util::replace_all(cpp2::move(str), "expression_flags::", "")); } -template match_group::match_group(auto const& start_, auto const& end_, auto const& matched_) - : start{ start_ } - , end{ end_ } - , matched{ matched_ }{} -template match_group::match_group(){} -template match_return::match_return(auto const& matched_, auto const& pos_) - : matched{ matched_ } - , pos{ pos_ }{} -template match_return::match_return(){} - -#line 38 "cpp2regex.h2" -//----------------------------------------------------------------------- -// -// Helper structures for the expression matching. -// -//----------------------------------------------------------------------- -// - -// Possible modifiers for a regular expression. -// - -#line 49 "cpp2regex.h2" - // mod: i - // mod: m - // mod: s - // mod: n - // mod: x - // mod: xx - -#line 57 "cpp2regex.h2" -// Structure for storing group information. -// - -#line 67 "cpp2regex.h2" -// Return value for every matcher. -// - -#line 75 "cpp2regex.h2" -// Modifiable state during matching. -// - -#line 84 "cpp2regex.h2" - template match_context::match_context(Iter const& begin_, Iter const& end_) - : begin{ begin_ } - , end{ end_ }{ - -#line 87 "cpp2regex.h2" - } - -#line 89 "cpp2regex.h2" - template match_context::match_context(match_context const& that) - : begin{ that.begin } - , end{ that.end } - , groups{ that.groups }{} -#line 89 "cpp2regex.h2" - template auto match_context::operator=(match_context const& that) -> match_context& { - begin = that.begin; - end = that.end; - groups = that.groups; - return *this; } -#line 89 "cpp2regex.h2" - template match_context::match_context(match_context&& that) noexcept - : begin{ std::move(that).begin } - , end{ std::move(that).end } - , groups{ std::move(that).groups }{} -#line 89 "cpp2regex.h2" - template auto match_context::operator=(match_context&& that) noexcept -> match_context& { - begin = std::move(that).begin; - end = std::move(that).end; - groups = std::move(that).groups; - return *this; } - - // Getter and setter for groups - // -#line 93 "cpp2regex.h2" - template [[nodiscard]] auto match_context::get_group(auto const& group) const& -> auto { return CPP2_ASSERT_IN_BOUNDS(groups, group); } - -#line 95 "cpp2regex.h2" - template [[nodiscard]] auto match_context::get_group_end(auto const& group) const& -> int{ - if (cpp2::impl::cmp_greater_eq(group,max_groups) || !(CPP2_ASSERT_IN_BOUNDS(groups, group).matched)) { - return 0; - } - return cpp2::unsafe_narrow(std::distance(begin, CPP2_ASSERT_IN_BOUNDS(groups, group).end)); - } -#line 101 "cpp2regex.h2" - template [[nodiscard]] auto match_context::get_group_start(auto const& group) const& -> int{ - if (cpp2::impl::cmp_greater_eq(group,max_groups) || !(CPP2_ASSERT_IN_BOUNDS(groups, group).matched)) { - return 0; - } - return cpp2::unsafe_narrow(std::distance(begin, CPP2_ASSERT_IN_BOUNDS(groups, group).start)); - } -#line 107 "cpp2regex.h2" - template [[nodiscard]] auto match_context::get_group_string(auto const& group) const& -> std::string{ - if (cpp2::impl::cmp_greater_eq(group,max_groups) || !(CPP2_ASSERT_IN_BOUNDS(groups, group).matched)) { - return ""; - } - return std::string(CPP2_ASSERT_IN_BOUNDS(groups, group).start, CPP2_ASSERT_IN_BOUNDS(groups, group).end); - } - -#line 114 "cpp2regex.h2" - template auto match_context::set_group_end(auto const& group, auto const& pos) & -> void{ - CPP2_ASSERT_IN_BOUNDS(groups, group).end = pos; - CPP2_ASSERT_IN_BOUNDS(groups, group).matched = true; - } - -#line 119 "cpp2regex.h2" - template auto match_context::set_group_invalid(auto const& group) & -> void{ - CPP2_ASSERT_IN_BOUNDS(groups, group).matched = false; - } - -#line 123 "cpp2regex.h2" - template auto match_context::set_group_start(auto const& group, auto const& pos) & -> void{ - CPP2_ASSERT_IN_BOUNDS(groups, group).start = pos; - } - -#line 127 "cpp2regex.h2" - template [[nodiscard]] auto match_context::size() const& -> auto { return max_groups; } - - // Misc functions - // -#line 131 "cpp2regex.h2" - template [[nodiscard]] auto match_context::fail() const& -> auto { return match_return(false, end); } -#line 132 "cpp2regex.h2" - template [[nodiscard]] auto match_context::pass(cpp2::impl::in cur) const& -> auto { return match_return(true, cur); } - -#line 136 "cpp2regex.h2" -// End function that returns a valid match. -// - -#line 140 "cpp2regex.h2" - [[nodiscard]] auto true_end_func::operator()(auto const& cur, auto& ctx) const& -> auto { return ctx.pass(cur); } - -#line 144 "cpp2regex.h2" -// Empty group reset function. -// - -#line 148 "cpp2regex.h2" - auto no_reset::operator()([[maybe_unused]] auto& unnamed_param_2) const& -> void{} - -#line 152 "cpp2regex.h2" -// Evaluate func on destruction of the handle. - -#line 157 "cpp2regex.h2" - template on_return::on_return(Func const& f) - : func{ f }{ - -#line 159 "cpp2regex.h2" - } -#line 157 "cpp2regex.h2" - template auto on_return::operator=(Func const& f) -> on_return& { - func = f; - return *this; - -#line 159 "cpp2regex.h2" - } - -#line 161 "cpp2regex.h2" - template on_return::~on_return() noexcept{ - cpp2::move(*this).func(); - } - -#line 167 "cpp2regex.h2" -// Helper for auto deduction of the Func type. -#line 168 "cpp2regex.h2" -template [[nodiscard]] auto make_on_return(Func const& func) -> auto { return on_return(func); } - -#line 171 "cpp2regex.h2" -//----------------------------------------------------------------------- -// -// Character classes for regular expressions. -// -//----------------------------------------------------------------------- -// - -// Class syntax: Example: a -// - -#line 182 "cpp2regex.h2" - template [[nodiscard]] auto single_class_entry::includes(cpp2::impl::in c) -> auto { return c == C; } -#line 183 "cpp2regex.h2" - template [[nodiscard]] auto single_class_entry::to_string() -> auto { return bstring(1, C); } - -#line 187 "cpp2regex.h2" -// Class syntax: - Example: a-c -// - -#line 191 "cpp2regex.h2" - template [[nodiscard]] auto range_class_entry::includes(cpp2::impl::in c) -> auto { return [_0 = Start, _1 = c, _2 = End]{ return cpp2::impl::cmp_less_eq(_0,_1) && cpp2::impl::cmp_less_eq(_1,_2); }(); } -#line 192 "cpp2regex.h2" - template [[nodiscard]] auto range_class_entry::to_string() -> auto { return "" + cpp2::to_string(Start) + "-" + cpp2::to_string(End) + ""; } - -#line 196 "cpp2regex.h2" -// Helper for combining two character classes -// - -#line 200 "cpp2regex.h2" - template [[nodiscard]] auto combined_class_entry::includes(cpp2::impl::in c) -> auto { return (false || ... || List::includes(c)); } -#line 201 "cpp2regex.h2" - template [[nodiscard]] auto combined_class_entry::to_string() -> auto { return (bstring() + ... + List::to_string()); } - -#line 205 "cpp2regex.h2" -// Class syntax: Example: abcd -// - -#line 209 "cpp2regex.h2" - template [[nodiscard]] auto list_class_entry::includes(cpp2::impl::in c) -> auto { return (false || ... || (List == c)); } -#line 210 "cpp2regex.h2" - template [[nodiscard]] auto list_class_entry::to_string() -> auto { return (bstring() + ... + List); } - -#line 214 "cpp2regex.h2" -// Class syntax: [: [[nodiscard]] auto named_class_entry::includes(cpp2::impl::in c) -> auto { return Inner::includes(c); } -#line 219 "cpp2regex.h2" - template [[nodiscard]] auto named_class_entry::to_string() -> auto { return "[:" + cpp2::to_string(Name.data()) + ":]"; } - -#line 226 "cpp2regex.h2" - template [[nodiscard]] auto negated_class_entry::includes(cpp2::impl::in c) -> auto { return !(Inner::includes(c)); } - -#line 230 "cpp2regex.h2" -// Short class syntax: \ Example: \w -// - -#line 234 "cpp2regex.h2" - template [[nodiscard]] auto shorthand_class_entry::includes(cpp2::impl::in c) -> auto { return Inner::includes(c); } -#line 235 "cpp2regex.h2" - template [[nodiscard]] auto shorthand_class_entry::to_string() -> auto { return Name.str(); } - -#line 239 "cpp2regex.h2" -// Named basic character classes -// - -#line 245 "cpp2regex.h2" -// Named other classes -// - -#line 261 "cpp2regex.h2" -// Shorthand class entries -// - -#line 276 "cpp2regex.h2" -//----------------------------------------------------------------------- -// -// Tokens for regular expressions. -// -//----------------------------------------------------------------------- -// - -// Basic class for a regex token. -// - -#line 289 "cpp2regex.h2" - regex_token::regex_token(cpp2::impl::in str) - : string_rep{ str }{ - -#line 291 "cpp2regex.h2" - } - -#line 293 "cpp2regex.h2" - regex_token::regex_token() - : string_rep{ "" }{ - -#line 295 "cpp2regex.h2" - } - - //parse: (inout ctx: parse_context) -> token_ptr; - // Generate the matching code. - -#line 300 "cpp2regex.h2" - auto regex_token::add_groups([[maybe_unused]] std::set& unnamed_param_2) const -> void{}// Adds all group indices to the set. -#line 301 "cpp2regex.h2" - [[nodiscard]] auto regex_token::to_string() const& -> std::string{return string_rep; }// Create a string representation. -#line 302 "cpp2regex.h2" - auto regex_token::set_string(cpp2::impl::in s) & -> void{string_rep = s; } - - regex_token::~regex_token() noexcept{}// Set the string representation. - -#line 317 "cpp2regex.h2" - regex_token_check::regex_token_check(cpp2::impl::in str, cpp2::impl::in check_) - : regex_token{ str } - , check{ check_ }{ - -#line 320 "cpp2regex.h2" - } - -#line 322 "cpp2regex.h2" - auto regex_token_check::generate_code(generation_context& ctx) const -> void{ - ctx.add_check(check + "(" + ctx.match_parameters() + ")"); - } - - regex_token_check::~regex_token_check() noexcept{} - -#line 336 "cpp2regex.h2" - regex_token_code::regex_token_code(cpp2::impl::in str, cpp2::impl::in code_) - : regex_token{ str } - , code{ code_ }{ - -#line 339 "cpp2regex.h2" - } - -#line 341 "cpp2regex.h2" - auto regex_token_code::generate_code(generation_context& ctx) const -> void{ - ctx.add(code); - } - - regex_token_code::~regex_token_code() noexcept{} - -#line 353 "cpp2regex.h2" - regex_token_empty::regex_token_empty(cpp2::impl::in str) - : regex_token{ str }{ - -#line 355 "cpp2regex.h2" - } - -#line 357 "cpp2regex.h2" - auto regex_token_empty::generate_code([[maybe_unused]] generation_context& unnamed_param_2) const -> void{ - // Nothing. - } - - regex_token_empty::~regex_token_empty() noexcept{} - -#line 371 "cpp2regex.h2" - regex_token_list::regex_token_list(cpp2::impl::in t) - : regex_token{ gen_string(t) } - , tokens{ t }{ - -#line 374 "cpp2regex.h2" - } - -#line 376 "cpp2regex.h2" - auto regex_token_list::generate_code(generation_context& ctx) const -> void{ - for ( auto const& token : tokens ) { - (*cpp2::impl::assert_not_null(token)).generate_code(ctx); - } - } - -#line 382 "cpp2regex.h2" - auto regex_token_list::add_groups(std::set& groups) const -> void{ - for ( auto const& token : tokens ) { - (*cpp2::impl::assert_not_null(token)).add_groups(groups); - } - } - -#line 388 "cpp2regex.h2" - [[nodiscard]] auto regex_token_list::gen_string(cpp2::impl::in vec) -> std::string{ - std::string r {""}; - for ( auto const& token : vec ) { - r += (*cpp2::impl::assert_not_null(token)).to_string(); - } - return r; - } - - regex_token_list::~regex_token_list() noexcept{} - -#line 414 "cpp2regex.h2" - auto parse_context_group_state::next_alternative() & -> void{ - token_vec new_list {}; - std::swap(new_list, cur_match_list); - post_process_list(new_list); - static_cast(alternate_match_lists.insert(alternate_match_lists.end(), CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, cpp2::move(new_list)))); - } - -#line 422 "cpp2regex.h2" - auto parse_context_group_state::swap(parse_context_group_state& t) & -> void{ - std::swap(cur_match_list, t.cur_match_list); - std::swap(alternate_match_lists, t.alternate_match_lists); - std::swap(modifiers, t.modifiers); - } - -#line 429 "cpp2regex.h2" - [[nodiscard]] auto parse_context_group_state::get_as_token() & -> token_ptr{ - if (alternate_match_lists.empty()) { - post_process_list(cur_match_list); - return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, cur_match_list); - } - else { - next_alternative(); - return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, alternate_match_lists); - } - } - -#line 441 "cpp2regex.h2" - auto parse_context_group_state::add(cpp2::impl::in token) & -> void{ - cur_match_list.push_back(token); - } - -#line 446 "cpp2regex.h2" - [[nodiscard]] auto parse_context_group_state::empty() const& -> bool { return cur_match_list.empty(); } - -#line 450 "cpp2regex.h2" - auto parse_context_group_state::post_process_list(token_vec& list) -> void{ - // Merge all characters - auto merge_pos {list.begin()}; - for( ; merge_pos != list.end(); (++merge_pos) ) { - if (cpp2::impl::is(*cpp2::impl::assert_not_null(*cpp2::impl::assert_not_null(merge_pos)))) { - auto combine_pos {merge_pos + 1}; - while( combine_pos != list.end() && cpp2::impl::is(*cpp2::impl::assert_not_null(*cpp2::impl::assert_not_null(combine_pos))) ) {// The erase advances combine_pos - (cpp2::impl::as_(*cpp2::impl::assert_not_null(*cpp2::impl::assert_not_null(merge_pos)))).append(cpp2::impl::as_(*cpp2::impl::assert_not_null(*cpp2::impl::assert_not_null(combine_pos)))); - combine_pos = list.erase(combine_pos); - } - } - } - } - - parse_context_group_state::parse_context_group_state(auto const& cur_match_list_, auto const& alternate_match_lists_, auto const& modifiers_) - : cur_match_list{ cur_match_list_ } - , alternate_match_lists{ alternate_match_lists_ } - , modifiers{ modifiers_ }{} -parse_context_group_state::parse_context_group_state(){} - -#line 476 "cpp2regex.h2" - [[nodiscard]] auto parse_context_branch_reset_state::next() & -> int{ - auto g {cur_group}; - cur_group += 1; - max_group = max(max_group, cur_group); - - return g; - } - -#line 485 "cpp2regex.h2" - auto parse_context_branch_reset_state::set_next(cpp2::impl::in g) & -> void{ - cur_group = g; - max_group = max(max_group, g); - } - -#line 491 "cpp2regex.h2" - auto parse_context_branch_reset_state::next_alternative() & -> void{ - if (is_active) { - cur_group = from; - } - } - -#line 498 "cpp2regex.h2" - auto parse_context_branch_reset_state::set_active_reset(cpp2::impl::in restart) & -> void{ - is_active = true; - cur_group = restart; - from = restart; - max_group = restart; - } - - parse_context_branch_reset_state::parse_context_branch_reset_state(auto const& is_active_, auto const& cur_group_, auto const& max_group_, auto const& from_) - : is_active{ is_active_ } - , cur_group{ cur_group_ } - , max_group{ max_group_ } - , from{ from_ }{} -parse_context_branch_reset_state::parse_context_branch_reset_state(){} - -#line 526 "cpp2regex.h2" - parse_context::parse_context(cpp2::impl::in r, auto const& e) - : regex{ r } - , root{ CPP2_UFCS_TEMPLATE_NONLOCAL(cpp2_new)(cpp2::shared, "") } - , error_out{ e }{ - -#line 530 "cpp2regex.h2" - } - -#line 536 "cpp2regex.h2" - [[nodiscard]] auto parse_context::start_group() & -> parse_context_group_state - { - parse_context_group_state old_state {}; - old_state.swap(cur_group_state); - cur_group_state.modifiers = old_state.modifiers; - - return old_state; - } - -#line 546 "cpp2regex.h2" - [[nodiscard]] auto parse_context::end_group(cpp2::impl::in old_state) & -> token_ptr - { - auto inner {cur_group_state.get_as_token()}; - cur_group_state = old_state; - return inner; - } - -#line 553 "cpp2regex.h2" - [[nodiscard]] auto parse_context::get_modifiers() const& -> expression_flags{ - return cur_group_state.modifiers; - } - -#line 557 "cpp2regex.h2" - auto parse_context::set_modifiers(cpp2::impl::in mod) & -> void{ - cur_group_state.modifiers = mod; - } - -#line 564 "cpp2regex.h2" - [[nodiscard]] auto parse_context::branch_reset_new_state() & -> parse_context_branch_reset_state - { - parse_context_branch_reset_state old_state {}; - std::swap(old_state, cur_branch_reset_state); - - cur_branch_reset_state.set_active_reset(old_state.cur_group); - return old_state; - } - -#line 573 "cpp2regex.h2" - auto parse_context::branch_reset_restore_state(cpp2::impl::in old_state) & -> void - { - auto max_group {cur_branch_reset_state.max_group}; - cur_branch_reset_state = old_state; - cur_branch_reset_state.set_next(cpp2::move(max_group)); - } - -#line 580 "cpp2regex.h2" - auto parse_context::next_alternative() & -> void - { - cur_group_state.next_alternative(); - cur_branch_reset_state.next_alternative(); - } - -#line 588 "cpp2regex.h2" - auto parse_context::add_token(cpp2::impl::in token) & -> void{ - cur_group_state.add(token); - } - -#line 592 "cpp2regex.h2" - [[nodiscard]] auto parse_context::has_token() const& -> bool{ - return !(cur_group_state.empty()); - } - -#line 596 "cpp2regex.h2" - [[nodiscard]] auto parse_context::pop_token() & -> token_ptr - { - token_ptr r {nullptr}; - if (has_token()) { - r = cur_group_state.cur_match_list.back(); - cur_group_state.cur_match_list.pop_back(); - } - - return r; - } - -#line 607 "cpp2regex.h2" - [[nodiscard]] auto parse_context::get_as_token() & -> token_ptr{ - return root; - } - -#line 613 "cpp2regex.h2" - [[nodiscard]] auto parse_context::get_cur_group() const& -> int{ - return cur_branch_reset_state.cur_group; - } - -#line 617 "cpp2regex.h2" - [[nodiscard]] auto parse_context::next_group() & -> int{ - return cur_branch_reset_state.next(); - } - -#line 621 "cpp2regex.h2" - auto parse_context::set_named_group(cpp2::impl::in name, cpp2::impl::in id) & -> void - { - if (!(named_groups.contains(name))) {// Redefinition of group name is not an error. The left most one is retained. - CPP2_ASSERT_IN_BOUNDS(named_groups, name) = id; - } - } - -#line 628 "cpp2regex.h2" - [[nodiscard]] auto parse_context::get_named_group(cpp2::impl::in name) const& -> int - { - auto iter {named_groups.find(name)}; - if (iter == named_groups.end()) { - return -1; - } - else { - return (*cpp2::impl::assert_not_null(cpp2::move(iter))).second; - } - } - -#line 641 "cpp2regex.h2" - [[nodiscard]] auto parse_context::current() const& -> char{return CPP2_ASSERT_IN_BOUNDS(regex, pos); } - -#line 644 "cpp2regex.h2" - [[nodiscard]] auto parse_context::get_next_position(cpp2::impl::in in_class, cpp2::impl::in no_skip) const& -> size_t - { - auto perl_syntax {false}; - if (!(no_skip)) { - if (in_class) { - perl_syntax = get_modifiers().has(expression_flags::perl_code_syntax) && get_modifiers().has(expression_flags::perl_code_syntax_in_classes); - } - else { - perl_syntax = get_modifiers().has(expression_flags::perl_code_syntax); - } - } - auto cur {pos + 1}; - if (cpp2::move(perl_syntax)) { - for( ; cpp2::impl::cmp_less(cur,regex.size()); (cur += 1) ) { - auto n {CPP2_ASSERT_IN_BOUNDS(regex, cur)}; - - if (space_class::includes(n)) { - continue; - } - else {if (!(in_class) && '#' == cpp2::move(n)) { - cur = regex.find("\n", cur); - if (std::string::npos == cur) { - // No new line, comment runs until the end of the pattern - cur = regex.size(); - } - } - else { // None space none comment char - break; - }} - } - } - - // Check for end of file. - if (cpp2::impl::cmp_greater(cur,regex.size())) { - cur = regex.size(); - } - return cur; - } - -#line 684 "cpp2regex.h2" - [[nodiscard]] auto parse_context::next_impl(cpp2::impl::in in_class, cpp2::impl::in no_skip) & -> bool - { - pos = get_next_position(in_class, no_skip); - if (pos != regex.size()) { - return true; - } - else { - return false; - } - } - -#line 695 "cpp2regex.h2" - [[nodiscard]] auto parse_context::next() & -> auto { return next_impl(false, false); } -#line 696 "cpp2regex.h2" - [[nodiscard]] auto parse_context::next_in_class() & -> auto { return next_impl(true, false); } -#line 697 "cpp2regex.h2" - [[nodiscard]] auto parse_context::next_no_skip() & -> auto { return next_impl(false, true); } - -#line 699 "cpp2regex.h2" - [[nodiscard]] auto parse_context::next_n(cpp2::impl::in n) & -> bool{ - auto r {true}; - auto cur {0}; - for( ; r && cpp2::impl::cmp_less(cur,n); (r = next()) ) { - cur += 1; - } - return r; - } - -#line 708 "cpp2regex.h2" - [[nodiscard]] auto parse_context::has_next() const& -> bool{return cpp2::impl::cmp_less(pos,regex.size()); } - -#line 710 "cpp2regex.h2" - [[nodiscard]] auto parse_context::grab_until_impl(cpp2::impl::in e, cpp2::impl::out r, cpp2::impl::in any) & -> bool - { - auto end {pos}; - if (any) { - end = regex.find_first_of(e, pos); - } - else { - end = regex.find(e, pos); - } - - if (end != std::string_view::npos) { - r.construct(regex.substr(pos, end - pos)); - pos = cpp2::move(end); - return true; - } - else { - r.construct(""); - return false; - } - } - -#line 731 "cpp2regex.h2" - [[nodiscard]] auto parse_context::grab_until(cpp2::impl::in e, cpp2::impl::out r) & -> auto { return grab_until_impl(e, cpp2::impl::out(&r), false); } -#line 732 "cpp2regex.h2" - [[nodiscard]] auto parse_context::grab_until(cpp2::impl::in e, cpp2::impl::out r) & -> auto { return grab_until_impl(std::string(1, e), cpp2::impl::out(&r), false); } -#line 733 "cpp2regex.h2" - [[nodiscard]] auto parse_context::grab_until_one_of(cpp2::impl::in e, cpp2::impl::out r) & -> auto { return grab_until_impl(e, cpp2::impl::out(&r), true); } - -#line 735 "cpp2regex.h2" - [[nodiscard]] auto parse_context::grab_n(cpp2::impl::in n, cpp2::impl::out r) & -> bool - { - if (cpp2::impl::cmp_less_eq(pos + cpp2::impl::as_(n),regex.size())) { - r.construct(regex.substr(pos, cpp2::impl::as_(n))); - pos += (cpp2::impl::as_(n)) - 1; - return true; - } - else { - r.construct(""); - return false; - } - } - -#line 748 "cpp2regex.h2" - [[nodiscard]] auto parse_context::grab_number() & -> std::string - { - auto start {pos}; - auto start_search {pos}; - if (CPP2_ASSERT_IN_BOUNDS(regex, start_search) == '-') { - start_search += 1; - } - auto end {regex.find_first_not_of("1234567890", cpp2::move(start_search))}; - - cpp2::impl::deferred_init r; - if (end != std::string::npos) { - r.construct(regex.substr(start, end - start)); - pos = cpp2::move(end) - 1; - } - else { - r.construct(regex.substr(cpp2::move(start))); - pos = regex.size() - 1; - } - return cpp2::move(r.value()); - } - -#line 769 "cpp2regex.h2" - [[nodiscard]] auto parse_context::peek_impl(cpp2::impl::in in_class) const& -> char{ - auto next_pos {get_next_position(in_class, false)}; - if (cpp2::impl::cmp_less(next_pos,regex.size())) { - return CPP2_ASSERT_IN_BOUNDS(regex, cpp2::move(next_pos)); - } - else { - return '\0'; - } - } - -#line 779 "cpp2regex.h2" - [[nodiscard]] auto parse_context::peek() const& -> auto { return peek_impl(false); } -#line 780 "cpp2regex.h2" - [[nodiscard]] auto parse_context::peek_in_class() const& -> auto { return peek_impl(true); } - -#line 785 "cpp2regex.h2" - [[nodiscard]] auto parse_context::parser_group_modifiers(cpp2::impl::in change_str, expression_flags& parser_modifiers) & -> bool - { - auto is_negative {false}; - auto is_reset {false}; - - auto apply {[&, _1 = (&is_negative), _2 = (&parser_modifiers)](cpp2::impl::in flag) mutable -> void{ - if (*cpp2::impl::assert_not_null(_1)) { - (*cpp2::impl::assert_not_null(_2)).clear(flag); - } - else { - (*cpp2::impl::assert_not_null(_2)).set(flag); - } - }}; - - auto iter {change_str.begin()}; - for( ; iter != change_str.end(); (++iter) ) - { - auto cur {*cpp2::impl::assert_not_null(iter)}; - if (cur == '^') { - is_reset = true; - parser_modifiers = expression_flags::none; - } - else {if (cur == '-') { - if (is_reset) {static_cast(error("No negative modifier allowed.")); return false; } - is_negative = true; - } - else {if (cur == 'i') {apply(expression_flags::case_insensitive); } - else {if (cur == 'm') {apply(expression_flags::multiple_lines); } - else {if (cur == 's') {apply(expression_flags::single_line); } - else {if (cur == 'n') {apply(expression_flags::no_group_captures); } - else {if (cur == 'x') { - if ((iter + 1) == change_str.end() || *cpp2::impl::assert_not_null((iter + 1)) != 'x') { - // x modifier - apply(expression_flags::perl_code_syntax); - - // Just x unsets xx and remove x also removes xx - parser_modifiers.clear(expression_flags::perl_code_syntax_in_classes); - } - else { // xx modifier - // xx also sets or unsets x - apply(expression_flags::perl_code_syntax); - apply(expression_flags::perl_code_syntax_in_classes); - - ++iter; // Skip the second x - } - } - else { - static_cast(error("Unknown modifier: " + cpp2::to_string(cpp2::move(cur)) + "")); return false; - }}}}}}} - } - - return true; - } - -#line 839 "cpp2regex.h2" - [[nodiscard]] auto parse_context::parse_until(cpp2::impl::in term) & -> bool{ - token_ptr cur_token {}; - - for( ; valid(); static_cast(next()) ) - { - if (term == current()) {break; } - - cur_token = nullptr; - - if (!(cur_token) && valid()) {cur_token = alternative_token::parse((*this)); } - if (!(cur_token) && valid()) {cur_token = any_token::parse((*this)); } - if (!(cur_token) && valid()) {cur_token = class_token::parse((*this)); } - if (!(cur_token) && valid()) {cur_token = escape_token_parse((*this)); } - if (!(cur_token) && valid()) {cur_token = global_group_reset_token_parse((*this)); } - if (!(cur_token) && valid()) {cur_token = group_ref_token::parse((*this)); } - if (!(cur_token) && valid()) {cur_token = group_token::parse((*this)); } - if (!(cur_token) && valid()) {cur_token = hexadecimal_token_parse((*this)); } - if (!(cur_token) && valid()) {cur_token = line_end_token_parse((*this)); } - if (!(cur_token) && valid()) {cur_token = line_start_token_parse((*this)); } - if (!(cur_token) && valid()) {cur_token = named_class_token_parse((*this)); } - if (!(cur_token) && valid()) {cur_token = octal_token_parse((*this)); } - if (!(cur_token) && valid()) {cur_token = range_token::parse((*this)); } - if (!(cur_token) && valid()) {cur_token = special_range_token::parse((*this)); } - if (!(cur_token) && valid()) {cur_token = word_boundary_token_parse((*this)); } - - // Everything else is matched as it is. - if (!(cur_token) && valid()) {cur_token = char_token::parse((*this)); } - - if (cur_token && valid()) { - add_token(cur_token); - }else { - return false; - } - } - - return true; - } - -#line 877 "cpp2regex.h2" - [[nodiscard]] auto parse_context::parse(cpp2::impl::in modifiers) & -> bool - { - - expression_flags flags {}; - if (!(parser_group_modifiers(modifiers, flags))) {return false; } - set_modifiers(cpp2::move(flags)); - - auto r {parse_until('\0')}; - if (r) { - root = cur_group_state.get_as_token(); - } - - return r; - } - -#line 894 "cpp2regex.h2" - [[nodiscard]] auto parse_context::get_pos() const& -> auto { return pos; } -#line 895 "cpp2regex.h2" - [[nodiscard]] auto parse_context::get_range(cpp2::impl::in start, cpp2::impl::in end) const& -> auto { return std::string(regex.substr(start, end - start + 1)); } -#line 896 "cpp2regex.h2" - [[nodiscard]] auto parse_context::valid() const& -> bool{return has_next() && !(has_error); } - -#line 898 "cpp2regex.h2" - [[nodiscard]] auto parse_context::error(cpp2::impl::in err) & -> token_ptr{ - has_error = true; - error_out("Error during parsing of regex '" + cpp2::to_string(regex) + "' at position '" + cpp2::to_string(pos) + "': " + cpp2::to_string(err) + ""); - return nullptr; - } - -#line 913 "cpp2regex.h2" - auto generation_function_context::add_tabs(cpp2::impl::in c) & -> void{ - int i {0}; - for( ; cpp2::impl::cmp_less(i,c); i += 1 ) { - tabs += " "; - } - } - -#line 920 "cpp2regex.h2" - auto generation_function_context::remove_tabs(cpp2::impl::in c) & -> void{ - tabs = tabs.substr(0, (cpp2::impl::as_(c)) * 2); - } - - generation_function_context::generation_function_context(auto const& code_, auto const& tabs_) - : code{ code_ } - , tabs{ tabs_ }{} -generation_function_context::generation_function_context(){} - -#line 938 "cpp2regex.h2" - [[nodiscard]] auto generation_context::match_parameters() const& -> std::string{return "r.pos, ctx"; } - -#line 943 "cpp2regex.h2" - auto generation_context::add(cpp2::impl::in s) & -> void{ - auto cur {get_current()}; - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + cpp2::to_string(s) + "\n"; - } - -#line 949 "cpp2regex.h2" - auto generation_context::add_check(cpp2::impl::in check) & -> void{ - auto cur {get_current()}; - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + "if !cpp2::regex::" + cpp2::to_string(check) + " { r.matched = false; break; }\n"; - } - -#line 955 "cpp2regex.h2" - auto generation_context::add_statefull(cpp2::impl::in next_func, cpp2::impl::in check) & -> void - { - end_func_statefull(check); - - auto name {next_func.substr(0, next_func.size() - 2)}; - start_func_named(cpp2::move(name)); - } - -#line 963 "cpp2regex.h2" - auto generation_context::start_func_named(cpp2::impl::in name) & -> void - { - auto cur {new_context()}; - - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + cpp2::to_string(name) + ": @struct type = {\n"; - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " operator(): (this, cur: Iter, inout ctx: context, other) -> cpp2::regex::match_return = {\n"; - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " r := ctx..pass(cur);\n"; - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " do {\n"; - (*cpp2::impl::assert_not_null(cpp2::move(cur))).add_tabs(3); - } - -#line 974 "cpp2regex.h2" - [[nodiscard]] auto generation_context::start_func() & -> std::string - { - auto name {gen_func_name()}; - start_func_named(name); - return cpp2::move(name) + "()"; - } - -#line 981 "cpp2regex.h2" - auto generation_context::end_func_statefull(cpp2::impl::in s) & -> void - { - auto cur {get_current()}; - (*cpp2::impl::assert_not_null(cur)).remove_tabs(3); - (*cpp2::impl::assert_not_null(cur)).code += "\n"; - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " } while false;\n"; - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " if r.matched {\n"; - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " r = " + cpp2::to_string(s) + ";\n"; - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " }\n"; - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " else {\n"; - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " r.pos = ctx.end;\n"; - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " }\n"; - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " return r;\n"; - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " }\n"; - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + "}\n"; - - finish_context(); - } - -#line 1001 "cpp2regex.h2" - [[nodiscard]] auto generation_context::generate_func(cpp2::impl::in token) & -> std::string - { - auto name {start_func()}; - (*cpp2::impl::assert_not_null(token)).generate_code((*this)); - end_func_statefull("other(" + cpp2::to_string(match_parameters()) + ")"); - - return name; - } - -#line 1011 "cpp2regex.h2" - [[nodiscard]] auto generation_context::generate_reset(cpp2::impl::in> groups) & -> std::string - { - if (groups.empty()) { - return "cpp2::regex::no_reset()"; - } - - auto name {gen_reset_func_name()}; - auto cur {new_context()}; - - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + cpp2::to_string(name) + ": @struct type = {\n"; - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " operator(): (this, inout ctx) = {\n"; - for ( auto const& g : groups ) { - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " ctx..set_group_invalid(" + cpp2::to_string(g) + ");\n"; +#line 79 "cpp2regex.h2" + template [[nodiscard]] auto match_context::get_group_end(auto const& group) const& -> int{ + if (cpp2::impl::cmp_greater_eq(group,max_groups) || !(CPP2_ASSERT_IN_BOUNDS(groups, group).matched)) { + return 0; } - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " }\n"; - (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + "}\n"; - - finish_context(); - - return cpp2::move(name) + "()"; - } - -#line 1035 "cpp2regex.h2" - [[nodiscard]] auto generation_context::gen_func_name() & -> std::string{ - auto cur_id {matcher_func}; - matcher_func += 1; - return "func_" + cpp2::to_string(cpp2::move(cur_id)) + ""; - } - -#line 1041 "cpp2regex.h2" - [[nodiscard]] auto generation_context::next_func_name() & -> std::string{ - return gen_func_name() + "()"; - } - -#line 1045 "cpp2regex.h2" - [[nodiscard]] auto generation_context::gen_reset_func_name() & -> std::string{ - auto cur_id {reset_func}; - reset_func += 1; - return "reset_" + cpp2::to_string(cpp2::move(cur_id)) + ""; + return cpp2::unsafe_narrow(std::distance(begin, CPP2_ASSERT_IN_BOUNDS(groups, group).end)); } - -#line 1051 "cpp2regex.h2" - [[nodiscard]] auto generation_context::gen_temp() & -> std::string{ - auto cur_id {temp_name}; - temp_name += 1; - return "tmp_" + cpp2::to_string(cpp2::move(cur_id)) + ""; +#line 85 "cpp2regex.h2" + template [[nodiscard]] auto match_context::get_group_start(auto const& group) const& -> int{ + if (cpp2::impl::cmp_greater_eq(group,max_groups) || !(CPP2_ASSERT_IN_BOUNDS(groups, group).matched)) { + return 0; + } + return cpp2::unsafe_narrow(std::distance(begin, CPP2_ASSERT_IN_BOUNDS(groups, group).start)); } - -#line 1059 "cpp2regex.h2" - [[nodiscard]] auto generation_context::new_context() & -> generation_function_context*{ - gen_stack.push_back(generation_function_context()); - auto cur {get_current()}; - (*cpp2::impl::assert_not_null(cur)).tabs = " "; - - return cur; +#line 91 "cpp2regex.h2" + template [[nodiscard]] auto match_context::get_group_string(auto const& group) const& -> std::string{ + if (cpp2::impl::cmp_greater_eq(group,max_groups) || !(CPP2_ASSERT_IN_BOUNDS(groups, group).matched)) { + return ""; + } + return std::string(CPP2_ASSERT_IN_BOUNDS(groups, group).start, CPP2_ASSERT_IN_BOUNDS(groups, group).end); } -#line 1067 "cpp2regex.h2" - auto generation_context::finish_context() & -> void{ - auto cur {get_current()}; - auto base {get_base()}; - (*cpp2::impl::assert_not_null(base)).code += (*cpp2::impl::assert_not_null(cpp2::move(cur))).code; - - gen_stack.pop_back(); +#line 98 "cpp2regex.h2" + template auto match_context::set_group_end(auto const& group, auto const& pos) & -> void{ + CPP2_ASSERT_IN_BOUNDS(groups, group).end = pos; + CPP2_ASSERT_IN_BOUNDS(groups, group).matched = true; } -#line 1077 "cpp2regex.h2" - [[nodiscard]] auto generation_context::get_current() & -> generation_function_context*{ - return &gen_stack.back(); +#line 103 "cpp2regex.h2" + template auto match_context::set_group_invalid(auto const& group) & -> void{ + CPP2_ASSERT_IN_BOUNDS(groups, group).matched = false; } -#line 1081 "cpp2regex.h2" - [[nodiscard]] auto generation_context::get_base() & -> generation_function_context*{ - return &CPP2_ASSERT_IN_BOUNDS_LITERAL(gen_stack, 0); +#line 107 "cpp2regex.h2" + template auto match_context::set_group_start(auto const& group, auto const& pos) & -> void{ + CPP2_ASSERT_IN_BOUNDS(groups, group).start = pos; } -#line 1085 "cpp2regex.h2" - [[nodiscard]] auto generation_context::get_entry_func() const& -> std::string{ - return entry_func; - } +#line 111 "cpp2regex.h2" + template [[nodiscard]] auto match_context::size() const& -> auto { return max_groups; } -#line 1089 "cpp2regex.h2" - [[nodiscard]] auto generation_context::create_named_group_lookup(cpp2::impl::in> named_groups) const& -> std::string - { - std::string res {"get_named_group_index: (name) -> int = {\n"}; +#line 115 "cpp2regex.h2" + template [[nodiscard]] auto match_context::fail() const& -> auto { return match_return(false, end); } +#line 116 "cpp2regex.h2" + template [[nodiscard]] auto match_context::pass(cpp2::impl::in cur) const& -> auto { return match_return(true, cur); } - // Generate if selection. - std::string sep {""}; - for ( auto const& cur : named_groups ) { - res += "" + cpp2::to_string(sep) + "if name == \"" + cpp2::to_string(cur.first) + "\" { return " + cpp2::to_string(cur.second) + "; }"; - sep = "else "; - } +#line 124 "cpp2regex.h2" + [[nodiscard]] auto true_end_func::operator()(auto const& cur, auto& ctx) const& -> auto { return ctx.pass(cur); } - // Generate else branch or return if list is empty. - if (named_groups.empty()) { - res += " _ = name;\n"; - res += " return -1;\n"; - } - else { - res += " else { return -1; }\n"; - } - res += "}\n"; - return res; - } +#line 132 "cpp2regex.h2" + auto no_reset::operator()([[maybe_unused]] auto& unnamed_param_2) const& -> void{} -#line 1114 "cpp2regex.h2" - [[nodiscard]] auto generation_context::run(cpp2::impl::in token) & -> std::string{ - entry_func = generate_func(token); +#line 141 "cpp2regex.h2" + template on_return::on_return(Func const& f) + : func{ f }{ - return (*cpp2::impl::assert_not_null(get_base())).code; +#line 143 "cpp2regex.h2" } +#line 141 "cpp2regex.h2" + template auto on_return::operator=(Func const& f) -> on_return& { + func = f; + return *this; -#line 1130 "cpp2regex.h2" - alternative_token::alternative_token() - : regex_token_empty{ "" }{} - -#line 1132 "cpp2regex.h2" - [[nodiscard]] auto alternative_token::parse(parse_context& ctx) -> token_ptr{ - if (ctx.current() != '|') {return nullptr; } - - if (!(ctx.has_token())) {return ctx.error("Alternative with no content."); } - ctx.next_alternative(); - return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared); +#line 143 "cpp2regex.h2" } - alternative_token::~alternative_token() noexcept{} - -#line 1147 "cpp2regex.h2" - alternative_token_gen::alternative_token_gen(cpp2::impl::in a) - : regex_token{ gen_string(a) } - , alternatives{ a }{ - -#line 1150 "cpp2regex.h2" +#line 145 "cpp2regex.h2" + template on_return::~on_return() noexcept{ + cpp2::move(*this).func(); } -#line 1152 "cpp2regex.h2" - auto alternative_token_gen::generate_code(generation_context& ctx) const -> void - { - std::string functions {""}; - - for ( auto const& cur : alternatives ) { - std::set groups {}; - (*cpp2::impl::assert_not_null(cur)).add_groups(groups); - - functions += ", " + ctx.generate_func(cur); - functions += ", " + ctx.generate_reset(cpp2::move(groups)); - } +#line 152 "cpp2regex.h2" +template [[nodiscard]] auto make_on_return(Func const& func) -> auto { return on_return(func); } - auto next_name {ctx.next_func_name()}; +#line 166 "cpp2regex.h2" + template [[nodiscard]] auto single_class_entry::includes(cpp2::impl::in c) -> auto { return c == C; } +#line 167 "cpp2regex.h2" + template [[nodiscard]] auto single_class_entry::to_string() -> auto { return bstring(1, C); } - ctx.add_statefull(next_name, "cpp2::regex::alternative_token_matcher::match(" + cpp2::to_string(ctx.match_parameters()) + ", other, " + cpp2::to_string(next_name) + " " + cpp2::to_string(cpp2::move(functions)) + ")"); - } +#line 175 "cpp2regex.h2" + template [[nodiscard]] auto range_class_entry::includes(cpp2::impl::in c) -> auto { return [_0 = Start, _1 = c, _2 = End]{ return cpp2::impl::cmp_less_eq(_0,_1) && cpp2::impl::cmp_less_eq(_1,_2); }(); } +#line 176 "cpp2regex.h2" + template [[nodiscard]] auto range_class_entry::to_string() -> auto { return "" + cpp2::to_string(Start) + "-" + cpp2::to_string(End) + ""; } -#line 1169 "cpp2regex.h2" - auto alternative_token_gen::add_groups(std::set& groups) const -> void - { - for ( auto const& cur : alternatives ) { - (*cpp2::impl::assert_not_null(cur)).add_groups(groups); - } - } +#line 184 "cpp2regex.h2" + template [[nodiscard]] auto combined_class_entry::includes(cpp2::impl::in c) -> auto { return (false || ... || List::includes(c)); } +#line 185 "cpp2regex.h2" + template [[nodiscard]] auto combined_class_entry::to_string() -> auto { return (bstring() + ... + List::to_string()); } -#line 1176 "cpp2regex.h2" - [[nodiscard]] auto alternative_token_gen::gen_string(cpp2::impl::in a) -> std::string - { - std::string r {""}; - std::string sep {""}; +#line 193 "cpp2regex.h2" + template [[nodiscard]] auto list_class_entry::includes(cpp2::impl::in c) -> auto { return (false || ... || (List == c)); } +#line 194 "cpp2regex.h2" + template [[nodiscard]] auto list_class_entry::to_string() -> auto { return (bstring() + ... + List); } - for ( auto const& cur : a ) { - r += sep + (*cpp2::impl::assert_not_null(cur)).to_string(); - sep = "|"; - } +#line 202 "cpp2regex.h2" + template [[nodiscard]] auto named_class_entry::includes(cpp2::impl::in c) -> auto { return Inner::includes(c); } +#line 203 "cpp2regex.h2" + template [[nodiscard]] auto named_class_entry::to_string() -> auto { return "[:" + cpp2::to_string(Name.data()) + ":]"; } - return r; - } +#line 210 "cpp2regex.h2" + template [[nodiscard]] auto negated_class_entry::includes(cpp2::impl::in c) -> auto { return !(Inner::includes(c)); } - alternative_token_gen::~alternative_token_gen() noexcept{} +#line 218 "cpp2regex.h2" + template [[nodiscard]] auto shorthand_class_entry::includes(cpp2::impl::in c) -> auto { return Inner::includes(c); } +#line 219 "cpp2regex.h2" + template [[nodiscard]] auto shorthand_class_entry::to_string() -> auto { return Name.str(); } -#line 1193 "cpp2regex.h2" +#line 266 "cpp2regex.h2" template [[nodiscard]] auto alternative_token_matcher::match(auto const& cur, auto& ctx, auto const& end_func, auto const& tail, auto const& ...functions) -> auto{ return match_first(cur, ctx, end_func, tail, functions...); } -#line 1197 "cpp2regex.h2" +#line 270 "cpp2regex.h2" template template [[nodiscard]] auto alternative_token_matcher::match_first(auto const& cur, auto& ctx, auto const& end_func, auto const& tail, auto const& cur_func, auto const& cur_reset, Other const& ...other) -> auto { auto inner_call {[_0 = (tail), _1 = (end_func)](auto const& tail_cur, auto& tail_ctx) -> auto{ @@ -2734,23 +817,7 @@ generation_function_context::generation_function_context(){} } } -#line 1224 "cpp2regex.h2" - any_token::any_token(cpp2::impl::in single_line) - : regex_token_check{ ".", "any_token_matcher" }{ - -#line 1226 "cpp2regex.h2" - } - -#line 1228 "cpp2regex.h2" - [[nodiscard]] auto any_token::parse(parse_context& ctx) -> token_ptr{ - if ('.' != ctx.current()) {return nullptr; } - - return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, ctx.get_modifiers().has(expression_flags::single_line)); - } - - any_token::~any_token() noexcept{} - -#line 1236 "cpp2regex.h2" +#line 293 "cpp2regex.h2" template [[nodiscard]] auto any_token_matcher(auto& cur, auto& ctx) -> bool { if ( cur != ctx.end // any char except the end @@ -2763,270 +830,7 @@ template [[nodiscard]] auto any_token_matcher( return false; } -#line 1258 "cpp2regex.h2" - char_token::char_token(cpp2::impl::in t, cpp2::impl::in ignore_case_) - : regex_token{ std::string(1, t) } - , token{ t } - , ignore_case{ ignore_case_ }{ - -#line 1262 "cpp2regex.h2" - } - -#line 1264 "cpp2regex.h2" - [[nodiscard]] auto char_token::parse(parse_context& ctx) -> token_ptr{ - return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, ctx.current(), ctx.get_modifiers().has(expression_flags::case_insensitive)); - } - -#line 1268 "cpp2regex.h2" - auto char_token::generate_code(generation_context& ctx) const -> void - { - if (ignore_case) { - std::string upper {token}; - std::string lower {token}; -{ -size_t i{0}; - -#line 1274 "cpp2regex.h2" - for( ; cpp2::impl::cmp_less(i,token.size()); i += 1 ) { - CPP2_ASSERT_IN_BOUNDS(lower, i) = string_util::safe_tolower(CPP2_ASSERT_IN_BOUNDS(token, i)); - CPP2_ASSERT_IN_BOUNDS(upper, i) = string_util::safe_toupper(CPP2_ASSERT_IN_BOUNDS(token, i)); - } -} - -#line 1279 "cpp2regex.h2" - if (upper != lower) { - gen_case_insensitive(cpp2::move(lower), cpp2::move(upper), ctx); - } - else { - gen_case_sensitive(ctx); - } - } - else { - gen_case_sensitive(ctx); - } - } - -#line 1291 "cpp2regex.h2" - auto char_token::gen_case_insensitive(cpp2::impl::in lower, cpp2::impl::in upper, generation_context& ctx) const& -> void - { - std::string name {"str_" + cpp2::to_string(ctx.gen_temp()) + ""}; - std::string lower_name {"lower_" + cpp2::to_string(name) + ""}; - std::string upper_name {"upper_" + cpp2::to_string(cpp2::move(name)) + ""}; - auto size {token.size()}; - ctx.add("" + cpp2::to_string(lower_name) + " : std::array = \"" + cpp2::to_string(add_escapes(lower)) + "\";"); // TODO: Add constexpr when Issue https://github.com/hsutter/cppfront/issues/1104 is resolved. - ctx.add("" + cpp2::to_string(upper_name) + " : std::array = \"" + cpp2::to_string(add_escapes(upper)) + "\";"); // TODO: Add constexpr when Issue https://github.com/hsutter/cppfront/issues/1104 is resolved. - ctx.add("if std::distance(r.pos, ctx.end) < " + cpp2::to_string(size) + " {"); - ctx.add(" r.matched = false;"); - ctx.add(" break;"); - ctx.add("}"); - ctx.add(""); - ctx.add("(copy i : int = 0) while i < " + cpp2::to_string(size) + " next (i += 1) {"); - ctx.add(" if !(" + cpp2::to_string(cpp2::move(lower_name)) + "[i] == r.pos[i] || " + cpp2::to_string(cpp2::move(upper_name)) + "[i] == r.pos[i]) { r.matched = false; }"); - ctx.add("}"); - ctx.add(""); - ctx.add("if r.matched { r.pos += " + cpp2::to_string(cpp2::move(size)) + "; }"); - ctx.add("else { break; }"); - } - -#line 1312 "cpp2regex.h2" - auto char_token::gen_case_sensitive(generation_context& ctx) const& -> void - { - std::string name {"str_" + cpp2::to_string(ctx.gen_temp()) + ""}; - auto size {token.size()}; - ctx.add("" + cpp2::to_string(name) + " : std::array = \"" + cpp2::to_string(add_escapes(token)) + "\";"); // TODO: Add constexpr when Issue https://github.com/hsutter/cppfront/issues/1104 is resolved. - ctx.add("if std::distance(r.pos, ctx.end) < " + cpp2::to_string(size) + " {"); - ctx.add(" r.matched = false;"); - ctx.add(" break;"); - ctx.add("}"); - ctx.add(""); - ctx.add("(copy i : int = 0) while i < " + cpp2::to_string(size) + " next (i += 1) {"); - ctx.add(" if " + cpp2::to_string(cpp2::move(name)) + "[i] != r.pos[i] { r.matched = false; }"); - ctx.add("}"); - ctx.add(""); - ctx.add("if r.matched { r.pos += " + cpp2::to_string(cpp2::move(size)) + "; }"); - ctx.add("else { break; }"); - } - -#line 1330 "cpp2regex.h2" - [[nodiscard]] auto char_token::add_escapes(std::string str) const& -> std::string - { - str = string_util::replace_all(str, "\\", "\\\\"); - str = string_util::replace_all(str, "\a", "\\a"); - str = string_util::replace_all(str, "\f", "\\f"); - str = string_util::replace_all(str, "\x1b", "\" \"\\x1b\" \""); // Generate a separated string. This prevents - // situations like `\x1bblub` from generating - // wrong hex characters. - str = string_util::replace_all(str, "\n", "\\n"); - str = string_util::replace_all(str, "\r", "\\r"); - str = string_util::replace_all(str, "\t", "\\t"); - - return cpp2::move(str); - } - -#line 1345 "cpp2regex.h2" - auto char_token::append(char_token const& that) & -> void{ - (*this).token += that.token; - (*this).string_rep += that.string_rep; - } - - char_token::~char_token() noexcept{} - -#line 1396 "cpp2regex.h2" - class_token::class_token(cpp2::impl::in negate_, cpp2::impl::in case_insensitive_, cpp2::impl::in class_str_, cpp2::impl::in str) - : regex_token{ str } - , negate{ negate_ } - , case_insensitive{ case_insensitive_ } - , class_str{ class_str_ } -#line 1397 "cpp2regex.h2" - { - -#line 1402 "cpp2regex.h2" - } - -#line 1405 "cpp2regex.h2" - [[nodiscard]] auto class_token::parse(parse_context& ctx) -> token_ptr - { - if (ctx.current() != '[') {return nullptr; } - - auto start_pos {ctx.get_pos()}; - - std::vector supported_classes {"alnum", "alpha", "ascii", "blank", "cntrl", "digits", "graph", - "lower", "print", "punct", "space", "upper", "word", "xdigit"}; - - std::vector classes {}; - - // First step: parse until the end bracket and push single chars, ranges or groups on the class stack. - auto is_negate {false}; - auto first {true}; - auto range {false}; - while( ctx.next_in_class() && (ctx.current() != ']' || first) ) - { - if (ctx.current() == '^') - { - is_negate = true; - continue; // Skip rest of the loop. Also the first update. - } - - if (ctx.current() == '[' && ctx.peek_in_class() == ':') - { - // We have a character class. - static_cast(ctx.next_n(2));// Skip [: - - std::string name {""}; - if (!(ctx.grab_until(":]", cpp2::impl::out(&name)))) {return ctx.error("Could not find end of character class."); } - if (supported_classes.end() == std::find(supported_classes.begin(), supported_classes.end(), name)) { - return ctx.error("Unsupported character class. Supported ones are: " + cpp2::to_string(string_util::join(supported_classes)) + ""); - } - - classes.push_back("[:" + cpp2::to_string(cpp2::move(name)) + ":]"); - - static_cast(ctx.next());// Skip ':' pointing to the ending ']'. - } - else {if (ctx.current() == '\\') - { - if (ctx. next_no_skip() && (ctx. current() != ']')) - { - if ( ' ' == ctx. current() - && ctx.get_modifiers().has(expression_flags::perl_code_syntax) - && ctx.get_modifiers().has(expression_flags::perl_code_syntax_in_classes)) - { - classes.push_back(std::string(1, ctx.current())); - } - else { - auto name {""}; - if ( 'd' == ctx. current()) { name = "short_digits"; } - else {if ('D' == ctx.current()) {name = "short_not_digits"; } - else {if ('h' == ctx.current()) {name = "short_hor_space"; } - else {if ('H' == ctx.current()) {name = "short_not_hor_space"; } - else {if ('s' == ctx.current()) {name = "short_space"; } - else {if ('S' == ctx.current()) {name = "short_not_space"; } - else {if ('v' == ctx.current()) {name = "short_ver_space"; } - else {if ('V' == ctx.current()) {name = "short_not_ver_space"; } - else {if ('w' == ctx.current()) {name = "short_word"; } - else {if ('W' == ctx.current()) {name = "short_not_word"; } - else { - return ctx.error("Unknown group escape."); - }}}}}}}}}} - classes.push_back("[:" + cpp2::to_string(cpp2::move(name)) + ":]"); - } - }else { - return ctx.error("Escape without a following character."); - } - } - else {if (ctx.current() == '-') - { - if (first) {// Literal if first entry. - classes.push_back("" + cpp2::to_string(ctx.current()) + ""); - }else { - range = true; - } - } - else - { - if (range) {// Modify last element to be a range. - classes.back() += "-" + cpp2::to_string(ctx.current()) + ""; - range = false; - } - else { - classes.push_back("" + cpp2::to_string(ctx.current()) + ""); - } - }}} - - first = false; - } - - if (ctx.current() != ']') { - return ctx.error("Error end of character class definition before terminating ']'."); - } - auto end_pos {ctx.get_pos()}; - - if (cpp2::move(range)) {// If '-' is last entry treat it as a literal char. - classes.push_back("-"); - } - - // Second step: Wrap the item on the class stack with corresponding class implementation. - for ( auto& cur : classes ) - { - if (cur.starts_with("[:")) { - auto name {cur.substr(2, cur.size() - 4)}; - cur = create_matcher("" + cpp2::to_string(cpp2::move(name)) + "_class", ""); - } - else {if (1 != cur.size()) { - cur = create_matcher("range_class_entry", "'" + cpp2::to_string(CPP2_ASSERT_IN_BOUNDS_LITERAL(cur, 0)) + "', '" + cpp2::to_string(CPP2_ASSERT_IN_BOUNDS_LITERAL(cur, 2)) + "'"); - } - else { - cur = create_matcher("single_class_entry", "'" + cpp2::to_string(cur) + "'"); - }} - } - - auto inner {string_util::join(cpp2::move(classes))}; - auto string_rep {ctx.get_range(cpp2::move(start_pos), cpp2::move(end_pos))}; - return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, - cpp2::move(is_negate), - ctx.get_modifiers().has(expression_flags::case_insensitive), - cpp2::move(inner), - cpp2::move(string_rep) - ); - } - -#line 1530 "cpp2regex.h2" - auto class_token::generate_code(generation_context& ctx) const -> void - { - ctx.add_check("class_token_matcher::match(" + cpp2::to_string(ctx.match_parameters()) + ")"); - } - -#line 1535 "cpp2regex.h2" - [[nodiscard]] auto class_token::create_matcher(cpp2::impl::in name, cpp2::impl::in template_arguments) -> std::string - { - auto sep {", "}; - if (template_arguments.empty()) {sep = ""; } - - return "::cpp2::regex::" + cpp2::to_string(name) + ""; - } - - class_token::~class_token() noexcept{} - -#line 1547 "cpp2regex.h2" +#line 344 "cpp2regex.h2" template [[nodiscard]] auto class_token_matcher::match(auto& cur, auto& ctx) -> bool { if constexpr (case_insensitive) @@ -3037,208 +841,40 @@ size_t i{0}; || match_any(string_util::safe_toupper(*cpp2::impl::assert_not_null(cur))))) { - cur += 1; - return true; - } - else { - return false; - } - } - else - { - if (cur != ctx.end && negate != match_any(*cpp2::impl::assert_not_null(cur))) { - cur += 1; - return true; - } - else { - return false; - } - } - } - -#line 1576 "cpp2regex.h2" - template template [[nodiscard]] auto class_token_matcher::match_any(cpp2::impl::in c) -> bool - { - bool r {First::includes(c)}; - - if (!(r)) { - if constexpr (0 != sizeof...(Other)) { - r = match_any(c); - } - } - - return r; - } - -#line 1605 "cpp2regex.h2" -[[nodiscard]] auto escape_token_parse(parse_context& ctx) -> token_ptr -{ - if (ctx.current() != '\\') {return nullptr; } - -#line 1610 "cpp2regex.h2" - if (std::string::npos == std::string("afenrt^.[]()*{}?+|\\").find(ctx.peek())) { - return nullptr; - } - - static_cast(ctx.next());// Skip escape - - if (std::string::npos != std::string("afenrt\\").find(ctx.current())) - { - // Escape of string special char - char t {'\0'}; - if ( 'a' == ctx. current()) { t = '\a'; } - else {if ('f' == ctx.current()) {t = '\f'; } - else {if ('e' == ctx.current()) {t = '\x1b'; } - else {if ('n' == ctx.current()) {t = '\n'; } - else {if ('r' == ctx.current()) {t = '\r'; } - else {if ('t' == ctx.current()) {t = '\t'; } - else {if ('\\' == ctx.current()) {t = '\\'; } - else {return ctx.error("Internal: missing switch case for special escape."); }}}}}}} - - auto r {CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, cpp2::move(t), false)}; - (*cpp2::impl::assert_not_null(r)).set_string("\\" + cpp2::to_string(ctx.current()) + ""); - return r; - } - else - { - // Escape of regex special char - auto r {CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, ctx.current(), false)}; - (*cpp2::impl::assert_not_null(r)).set_string("\\" + cpp2::to_string(ctx.current()) + ""); - return r; - } - -} - -#line 1646 "cpp2regex.h2" -[[nodiscard]] auto global_group_reset_token_parse(parse_context& ctx) -> token_ptr -{ - if (!((ctx.current() == '\\' && ctx.peek() == 'K'))) {return nullptr; } - - static_cast(ctx.next());// Skip escape. - return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "\\K", "ctx..set_group_start(0, r.pos);"); -} - -#line 1668 "cpp2regex.h2" - group_ref_token::group_ref_token(cpp2::impl::in id_, cpp2::impl::in case_insensitive_, cpp2::impl::in str) - : regex_token{ str } - , id{ id_ } - , case_insensitive{ case_insensitive_ } -#line 1669 "cpp2regex.h2" - { - -#line 1673 "cpp2regex.h2" - } - -#line 1675 "cpp2regex.h2" - [[nodiscard]] auto group_ref_token::parse(parse_context& ctx) -> token_ptr - { - if (ctx.current() != '\\') {return nullptr; } - - std::string str {"\\"}; - std::string group {""}; - - if ([_0 = '0', _1 = ctx.peek(), _2 = '9']{ return cpp2::impl::cmp_less_eq(_0,_1) && cpp2::impl::cmp_less_eq(_1,_2); }()) - { - static_cast(ctx.next());// Skip escape - group = ctx.grab_number(); - - if (cpp2::impl::cmp_greater_eq(group.size(),cpp2::impl::as_())) - { - // Octal syntax (\000) not a group ref matcher. - auto number {0}; - if (!(string_util::string_to_int(group, number, 8))) {return ctx.error("Could not convert octal to int."); } - - char number_as_char {cpp2::unsafe_narrow(cpp2::move(number))}; - - auto token {CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, number_as_char, ctx.get_modifiers().has(expression_flags::case_insensitive))}; - (*cpp2::impl::assert_not_null(token)).set_string("\\" + cpp2::to_string(string_util::int_to_string<8>(cpp2::impl::as_(cpp2::move(number_as_char)))) + ""); - - return token; - } - - str += group; - // Regular group ref - } - else {if ('g' == ctx.peek()) - { - static_cast(ctx.next());// Skip escape - if (!(ctx.next())) {return ctx.error("Group escape without a following char."); }// Skip g - - str += "g"; - - if (ctx.current() == '{') { - str += "{"; - if (!((ctx.next() && ctx.grab_until('}', cpp2::impl::out(&group))))) {return ctx.error("No ending bracket."); } - - str += group + "}"; - } - else { - group = ctx.grab_number(); - str += group; - } - } - else {if ('k' == ctx.peek()) - { - static_cast(ctx.next());// Skip escape - if (!(ctx.next())) {return ctx.error("Group escape without a following char."); }// Skip k - - str += "k"; - - auto term_char {'\0'}; - if (ctx.current() == '{') {term_char = '}'; } - else {if (ctx.current() == '<') {term_char = '>'; } - else {if (ctx.current() == '\'') {term_char = '\''; } - else { - return ctx.error("Group escape has wrong operator."); - }}} - - str += ctx.current(); - - if (!((ctx.next() && ctx.grab_until(term_char, cpp2::impl::out(&group))))) {return ctx.error("No ending bracket."); } - - str += group + cpp2::move(term_char); - } - else - { - // No group ref matcher - return nullptr; - }}} - - // Parse the group - group = string_util::trim_copy(group); - int group_id {0}; - if (string_util::string_to_int(group, group_id)) - { - if (cpp2::impl::cmp_less(group_id,0)) { - group_id = ctx.get_cur_group() + group_id; - - if (cpp2::impl::cmp_less(group_id,1)) {// Negative and zero are no valid groups. - return ctx.error("Relative group reference does not reference a valid group. (Would be " + cpp2::to_string(group_id) + ".)"); - } + cur += 1; + return true; } - - if (cpp2::impl::cmp_greater_eq(group_id,ctx.get_cur_group())) { - return ctx.error("Group reference is used before the group is declared."); + else { + return false; } } else { - // Named group - group_id = ctx.get_named_group(group); - if (-1 == group_id) {return ctx.error("Group names does not exist. (Name is: " + cpp2::to_string(cpp2::move(group)) + ")"); } + if (cur != ctx.end && negate != match_any(*cpp2::impl::assert_not_null(cur))) { + cur += 1; + return true; + } + else { + return false; + } } - - return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, cpp2::move(group_id), ctx.get_modifiers().has(expression_flags::case_insensitive), cpp2::move(str)); } -#line 1776 "cpp2regex.h2" - auto group_ref_token::generate_code(generation_context& ctx) const -> void{ - ctx.add_check("group_ref_token_matcher(" + cpp2::to_string(ctx.match_parameters()) + ")"); - } +#line 373 "cpp2regex.h2" + template template [[nodiscard]] auto class_token_matcher::match_any(cpp2::impl::in c) -> bool + { + bool r {First::includes(c)}; + + if (!(r)) { + if constexpr (0 != sizeof...(Other)) { + r = match_any(c); + } + } - group_ref_token::~group_ref_token() noexcept{} + return r; + } -#line 1782 "cpp2regex.h2" +#line 422 "cpp2regex.h2" template [[nodiscard]] auto group_ref_token_matcher(auto& cur, auto& ctx) -> bool { auto g {ctx.get_group(group)}; @@ -3269,268 +905,7 @@ template [[nodiscard]] auto gr } } -#line 1830 "cpp2regex.h2" - [[nodiscard]] auto group_token::parse_lookahead(parse_context& ctx, cpp2::impl::in syntax, cpp2::impl::in positive) -> token_ptr - { - static_cast(ctx.next());// Skip last token defining the syntax - - auto r {CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, positive)}; - - auto old_state {ctx.start_group()}; - if (!(ctx.parse_until(')'))) {return ctx.error("Lookahead without a closing bracket."); } - (*cpp2::impl::assert_not_null(r)).inner = ctx.end_group(cpp2::move(old_state)); - (*cpp2::impl::assert_not_null(r)).set_string("(" + cpp2::to_string(syntax) + cpp2::to_string((*cpp2::impl::assert_not_null((*cpp2::impl::assert_not_null(r)).inner)).to_string()) + ")"); - - return r; - } - -#line 1844 "cpp2regex.h2" - [[nodiscard]] auto group_token::parse(parse_context& ctx) -> token_ptr - { - if (ctx.current() != '(') {return nullptr; } - - auto has_id {!(ctx.get_modifiers().has(expression_flags::no_group_captures))}; - auto has_pattern {true}; - std::string group_name {""}; - auto group_name_brackets {true}; - std::string modifiers {""}; - auto modifiers_change_to {ctx.get_modifiers()}; - - // Skip the '(' - if (!(ctx.next())) {return ctx.error("Group without closing bracket."); } - - if (ctx.current() == '?') - { - // Special group - if (!(ctx.next_no_skip())) {return ctx.error("Missing character after group opening."); } - - if (ctx.current() == '<' || ctx.current() == '\'') - { - // Named group - auto end_char {ctx.current()}; - if (end_char == '<') { - end_char = '>'; - }else { - group_name_brackets = false; - } - has_id = true; // Force id for named groups. - if (!(ctx.next())) { return ctx. error("Missing ending bracket for named group."); }/* skip '<' */ - if (!(ctx.grab_until(cpp2::move(end_char), cpp2::impl::out(&group_name)))) {return ctx.error("Missing ending bracket for named group."); } - if (!(ctx.next())) {return ctx.error("Group without closing bracket."); } - } - else {if (ctx.current() == '#') - { - // Comment - std::string comment_str {""}; - static_cast(ctx.next());// Skip # - if (!(ctx.grab_until(")", cpp2::impl::out(&comment_str)))) {return ctx.error("Group without closing bracket."); } - // Do not add comment. Has problems with ranges. - - // Pop token and add a list. This fixes comments between a token and a range - if (ctx.has_token()) { - token_vec list {}; - list.push_back(ctx.pop_token()); - list.push_back(CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "(?#" + cpp2::to_string(cpp2::move(comment_str)) + ")")); - - return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, cpp2::move(list)); - } - else { - return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "(?#" + cpp2::to_string(cpp2::move(comment_str)) + ")"); - } - } - else {if (ctx.current() == '|') - { - // Branch reset group - - if (!(ctx.next())) { return ctx. error("Missing ending bracket for named group."); }/* skip '|' */ - - auto old_parser_state {ctx.start_group()}; - auto old_branch_state {ctx.branch_reset_new_state()}; - if (!(ctx.parse_until(')'))) {return nullptr; } - ctx.branch_reset_restore_state(cpp2::move(old_branch_state)); - auto inner_ {ctx.end_group(cpp2::move(old_parser_state))}; - - token_vec list {CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "(?|"), cpp2::move(inner_), CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, ")")}; - return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, cpp2::move(list)); - } - else {if (ctx.current() == '=' || ctx.current() == '!') - { - return parse_lookahead(ctx, "?" + cpp2::to_string(ctx.current()) + "", ctx.current() == '='); - } - else - { - // Simple modifier - has_id = false; - if (!(ctx.grab_until_one_of("):", cpp2::impl::out(&modifiers)))) {return ctx.error("Missing ending bracket for group."); } - if (!(ctx.parser_group_modifiers(modifiers, modifiers_change_to))) { - return nullptr; - } - - if (')' == ctx.current()) { - has_pattern = false; - } - else { - if (!(ctx.next())) { return ctx. error("Missing ending bracket for group."); }/* skip ':' */ - } - }}}} - } - else {if (ctx.current() == '*') - { - // Named pattern - static_cast(ctx.next());// Skip *. - std::string name {""}; - if (!(ctx.grab_until(':', cpp2::impl::out(&name)))) {return ctx.error("Missing colon for named pattern."); } - - if (name == "pla" || name == "positive_lookahead") { - return parse_lookahead(ctx, "*" + cpp2::to_string(cpp2::move(name)) + ":", true); - } - else {if (name == "nla" || name == "negative_lookahead") { - return parse_lookahead(ctx, "*" + cpp2::to_string(cpp2::move(name)) + ":", false); - } - else { - return ctx.error("Unknown named group pattern: '" + cpp2::to_string(cpp2::move(name)) + "'"); - }} - }} - - if (cpp2::move(has_pattern)) - { - // Regular group - - auto r {CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared)}; - if (has_id) { - (*cpp2::impl::assert_not_null(r)).number = ctx.next_group(); - - if (0 != group_name.size()) { - ctx.set_named_group(group_name, (*cpp2::impl::assert_not_null(r)).number); - } - } - - auto old_state {ctx.start_group()}; - ctx.set_modifiers(cpp2::move(modifiers_change_to)); - if (!(ctx.parse_until(')'))) {return nullptr; } - (*cpp2::impl::assert_not_null(r)).inner = ctx.end_group(cpp2::move(old_state)); - (*cpp2::impl::assert_not_null(r)).set_string(gen_string(cpp2::move(group_name), cpp2::move(group_name_brackets), !(cpp2::move(has_id)), cpp2::move(modifiers), (*cpp2::impl::assert_not_null(r)).inner)); - - return r; - } - else - { - // Only a modifier - ctx.set_modifiers(cpp2::move(modifiers_change_to)); - - return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "(?" + cpp2::to_string(cpp2::move(modifiers)) + ")"); - } - } - -#line 1981 "cpp2regex.h2" - [[nodiscard]] auto group_token::gen_string(cpp2::impl::in name, cpp2::impl::in name_brackets, cpp2::impl::in has_modifier, cpp2::impl::in modifiers, cpp2::impl::in inner_) -> std::string - { - std::string start {"("}; - if (0 != name.size()) { - if (name_brackets) { - start += "?<" + cpp2::to_string(name.data()) + ">"; - } - else { - start += "?'" + cpp2::to_string(name.data()) + "'"; - } - } - else {if (has_modifier) { - start += "?" + modifiers + ":"; - }} - - return cpp2::move(start) + (*cpp2::impl::assert_not_null(inner_)).to_string() + ")"; - } - -#line 1999 "cpp2regex.h2" - auto group_token::generate_code(generation_context& ctx) const -> void - { - if (-1 != number) { - ctx.add("ctx..set_group_start(" + cpp2::to_string(number) + ", r.pos);"); - } - - (*cpp2::impl::assert_not_null(inner)).generate_code(ctx); - if (-1 != number) { - ctx.add("ctx..set_group_end(" + cpp2::to_string(number) + ", r.pos);"); - auto tmp_name {ctx.gen_temp()}; - ctx.add("" + cpp2::to_string(tmp_name) + "_func := :() = {"); - ctx.add(" if !r&$*.matched {"); - ctx.add(" ctx&$*..set_group_invalid(" + cpp2::to_string(number) + ");"); - ctx.add(" }"); - ctx.add("};"); - ctx.add("" + cpp2::to_string(tmp_name) + " := cpp2::regex::make_on_return(" + cpp2::to_string(tmp_name) + "_func);"); - ctx.add("_ = " + cpp2::to_string(cpp2::move(tmp_name)) + ";");// Logic is done in the destructor. Same behavior as for guard objects. - } - } - -#line 2019 "cpp2regex.h2" - auto group_token::add_groups(std::set& groups) const -> void - { - (*cpp2::impl::assert_not_null(inner)).add_groups(groups); - if (-1 != number) { - static_cast(groups.insert(number)); - } - } - - group_token::~group_token() noexcept{} - -#line 2031 "cpp2regex.h2" -[[nodiscard]] auto hexadecimal_token_parse(parse_context& ctx) -> token_ptr -{ - if (!((ctx.current() == '\\' && ctx.peek() == 'x'))) {return nullptr; } - - static_cast(ctx.next());// Skip escape. - - if (!(ctx.next())) {return ctx.error("x escape without number."); } - - auto has_brackets {false}; - std::string number_str {""}; - if ('{' == ctx.current()) { - // Bracketed - has_brackets = true; - static_cast(ctx.next());// Skip '{' - if (!(ctx.grab_until('}', cpp2::impl::out(&number_str)))) {return ctx.error("No ending bracket for \\x"); } - } - else { - // Grab two chars - if (!(ctx.grab_n(2, cpp2::impl::out(&number_str)))) {return ctx.error("Missing hexadecimal digits after \\x."); } - } - - auto number {0}; - if (!(string_util::string_to_int(cpp2::move(number_str), number, 16))) {return ctx.error("Could not convert hexadecimal to int."); } - - // TODO: Change for unicode. - char number_as_char {cpp2::unsafe_narrow(cpp2::move(number))}; - - std::string syntax {string_util::int_to_string<16>(cpp2::impl::as_(number_as_char))}; - if (cpp2::move(has_brackets)) { - syntax = "{" + cpp2::to_string(syntax) + "}"; - } - syntax = "\\x" + cpp2::to_string(syntax) + ""; - - auto r {CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, cpp2::move(number_as_char), ctx.get_modifiers().has(expression_flags::case_insensitive))}; - (*cpp2::impl::assert_not_null(r)).set_string(cpp2::move(syntax)); - return r; -} - -#line 2072 "cpp2regex.h2" -[[nodiscard]] auto line_end_token_parse(parse_context& ctx) -> token_ptr -{ - if (ctx.current() == '$' || (ctx.current() == '\\' && ctx.peek() == '$')) { - if ((ctx.current() == '\\')) {static_cast(ctx.next()); }// Skip escape - return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "$", "line_end_token_matcher"); - } - else {if (ctx.current() == '\\' && (ctx.peek() == 'z' || ctx.peek() == 'Z')) { - static_cast(ctx.next());// Skip escape - - auto negate {ctx.current() == 'Z'}; - return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "\\" + cpp2::to_string(ctx.current()) + "", "line_end_token_matcher"); - } - else { - return nullptr; - }} -} - -#line 2089 "cpp2regex.h2" +#line 455 "cpp2regex.h2" template [[nodiscard]] auto line_end_token_matcher(auto const& cur, auto& ctx) -> bool { if (cur == ctx.end || (match_new_line && *cpp2::impl::assert_not_null(cur) == '\n')) { @@ -3544,50 +919,14 @@ template [[ }} } -#line 2105 "cpp2regex.h2" -[[nodiscard]] auto line_start_token_parse(parse_context& ctx) -> token_ptr -{ - if (ctx.current() != '^' && !((ctx.current() == '\\' && ctx.peek() == 'A'))) {return nullptr; } - - if (ctx.current() == '\\') { - static_cast(ctx.next()); - return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "\\A", "line_start_token_matcher"); - } - else { - return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "^", "line_start_token_matcher"); - } -} - -#line 2118 "cpp2regex.h2" +#line 471 "cpp2regex.h2" template [[nodiscard]] auto line_start_token_matcher(auto const& cur, auto& ctx) -> bool { return cur == ctx.begin || // Start of string (match_new_line && *cpp2::impl::assert_not_null((cur - 1)) == '\n'); // Start of new line } -#line 2136 "cpp2regex.h2" - lookahead_token::lookahead_token(cpp2::impl::in positive_) - : regex_token{ "" } - , positive{ positive_ }{ - -#line 2138 "cpp2regex.h2" - } - -#line 2140 "cpp2regex.h2" - auto lookahead_token::generate_code(generation_context& ctx) const -> void{ - auto inner_name {ctx.generate_func(inner)}; - - ctx.add_check("lookahead_token_matcher(" + cpp2::to_string(ctx.match_parameters()) + ", " + cpp2::to_string(cpp2::move(inner_name)) + ")"); - } - -#line 2146 "cpp2regex.h2" - auto lookahead_token::add_groups(std::set& groups) const -> void{ - (*cpp2::impl::assert_not_null(inner)).add_groups(groups); - } - - lookahead_token::~lookahead_token() noexcept{} - -#line 2151 "cpp2regex.h2" +#line 482 "cpp2regex.h2" template [[nodiscard]] auto lookahead_token_matcher(auto const& cur, auto& ctx, auto const& func) -> bool { auto r {func(cur, ctx, true_end_func())}; @@ -3598,208 +937,12 @@ template [[nodiscard]] auto lookahead_token_match return cpp2::move(r).matched; } -#line 2164 "cpp2regex.h2" -[[nodiscard]] auto named_class_token_parse(parse_context& ctx) -> token_ptr -{ - if (ctx.current() != '\\') {return nullptr; } - - auto name {""}; - auto c_next {ctx.peek()}; - - if ( 'd' == c_next) { name = "named_class_digits"; } - else {if ('D' == c_next) {name = "named_class_not_digits"; } - else {if ('h' == c_next) {name = "named_class_hor_space"; } - else {if ('H' == c_next) {name = "named_class_not_hor_space"; } - else {if ('N' == c_next) {name = "named_class_no_new_line"; } - else {if ('s' == c_next) {name = "named_class_space"; } - else {if ('S' == c_next) {name = "named_class_not_space"; } - else {if ('v' == c_next) {name = "named_class_ver_space"; } - else {if ('V' == c_next) {name = "named_class_not_ver_space"; } - else {if ('w' == c_next) {name = "named_class_word"; } - else {if ('W' == cpp2::move(c_next)) {name = "named_class_not_word"; } - else { return nullptr; }}}}}}}}}}} - - static_cast(ctx.next());// Skip escape - - return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "\\" + cpp2::to_string(ctx.current()) + "", "" + cpp2::to_string(cpp2::move(name)) + "::match"); -} - -#line 2206 "cpp2regex.h2" -[[nodiscard]] auto octal_token_parse(parse_context& ctx) -> token_ptr -{ - if (!((ctx.current() == '\\' && ctx.peek() == 'o'))) {return nullptr; } - - static_cast(ctx.next());// Skip escape. - - if (!(ctx.next())) { return ctx. error("o escape without number."); } - if (ctx.current() != '{') {return ctx.error("Missing opening bracket for \\o."); } - - std::string number_str {""}; - static_cast(ctx.next());// Skip '{' - if (!(ctx.grab_until('}', cpp2::impl::out(&number_str)))) {return ctx.error("No ending bracket for \\o"); } - - auto number {0}; - if (!(string_util::string_to_int(cpp2::move(number_str), number, 8))) {return ctx.error("Could not convert octal to int."); } - - // TODO: Change for unicode. - char number_as_char {cpp2::unsafe_narrow(cpp2::move(number))}; - - std::string syntax {"\\o{" + cpp2::to_string(string_util::int_to_string<8>(cpp2::impl::as_(number_as_char))) + "}"}; - auto r {CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, cpp2::move(number_as_char), ctx.get_modifiers().has(expression_flags::case_insensitive))}; - (*cpp2::impl::assert_not_null(r)).set_string(cpp2::move(syntax)); - return r; -} - -#line 2236 "cpp2regex.h2" +#line 498 "cpp2regex.h2" inline CPP2_CONSTEXPR int range_flags::not_greedy{ 1 }; inline CPP2_CONSTEXPR int range_flags::greedy{ 2 }; inline CPP2_CONSTEXPR int range_flags::possessive{ 3 }; -#line 2253 "cpp2regex.h2" - range_token::range_token() - : regex_token{ "" }{} - -#line 2255 "cpp2regex.h2" - [[nodiscard]] auto range_token::parse(parse_context& ctx) -> token_ptr - { - auto r {CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared)}; - if (ctx.current() == '{') - { - if (!(ctx.has_token())) {return ctx.error("'{' without previous element."); } - - std::string inner {""}; - if (!(ctx.grab_until('}', cpp2::impl::out(&inner)))) {return ctx.error("Missing closing bracket '}'."); } - - inner = string_util::trim_copy(inner.substr(1)); // Remove '{' and white spaces. - if (inner.empty()) {return ctx.error("Empty range specifier. Either '{n}', '{n,}', '{,m}' '{n,m}'"); } - - // Non-greedy or possessive - (*cpp2::impl::assert_not_null(r)).parse_modifier(ctx); - - // Get range arguments - std::string min_count_str {"-1"}; - std::string max_count_str {"-1"}; - - size_t sep {inner.find(",")}; - if (sep == std::string::npos) - { - min_count_str = inner; - max_count_str = inner; - if (!(string_util::string_to_int(cpp2::move(inner), (*cpp2::impl::assert_not_null(r)).min_count))) {return ctx.error("Could not convert range to number."); } - (*cpp2::impl::assert_not_null(r)).max_count = (*cpp2::impl::assert_not_null(r)).min_count; - } - else - { - std::string inner_first {string_util::trim_copy(inner.substr(0, sep))}; - std::string inner_last {string_util::trim_copy(cpp2::move(inner).substr(cpp2::move(sep) + 1))}; - - if ((inner_first.empty() && inner_last.empty())) { - return ctx.error("Empty range specifier. Either '{n}', '{n,}', '{,m}' '{n,m}'"); - } - - if (!(inner_first.empty())) { - min_count_str = inner_first; - if (!(string_util::string_to_int(cpp2::move(inner_first), (*cpp2::impl::assert_not_null(r)).min_count))) {return ctx.error("Could not convert range to number."); } - } - if (!(inner_last.empty())) { - max_count_str = inner_last; - if (!(string_util::string_to_int(cpp2::move(inner_last), (*cpp2::impl::assert_not_null(r)).max_count))) {return ctx.error("Could not convert range to number."); } - } - } - - // Check validity of the range. - if (-1 != (*cpp2::impl::assert_not_null(r)).min_count) { - if (!((cpp2::impl::cmp_less_eq(0,(*cpp2::impl::assert_not_null(r)).min_count)))) { - return ctx.error("Min value in range is negative. Have " + cpp2::to_string((*cpp2::impl::assert_not_null(r)).min_count) + ")"); - } - } - if (-1 != (*cpp2::impl::assert_not_null(r)).max_count) { - if (!((cpp2::impl::cmp_less_eq(0,(*cpp2::impl::assert_not_null(r)).max_count)))) { - return ctx.error("Max value in range is negative. Have " + cpp2::to_string((*cpp2::impl::assert_not_null(r)).max_count) + ")"); - } - if (-1 != (*cpp2::impl::assert_not_null(r)).min_count) { - if (!((cpp2::impl::cmp_less_eq((*cpp2::impl::assert_not_null(r)).min_count,(*cpp2::impl::assert_not_null(r)).max_count)))) { - return ctx.error("Min and max values in range are wrong it should hold 0 <= min <= max. Have 0 <= " + cpp2::to_string((*cpp2::impl::assert_not_null(r)).min_count) + " <= " + cpp2::to_string((*cpp2::impl::assert_not_null(r)).max_count) + ""); - } - } - } - - (*cpp2::impl::assert_not_null(r)).inner_token = ctx.pop_token(); - (*cpp2::impl::assert_not_null(r)).string_rep = (*cpp2::impl::assert_not_null((*cpp2::impl::assert_not_null(r)).inner_token)).to_string() + (*cpp2::impl::assert_not_null(r)).gen_range_string() + (*cpp2::impl::assert_not_null(r)).gen_mod_string(); - - return r; - } - - return nullptr; - } - -#line 2328 "cpp2regex.h2" - auto range_token::parse_modifier(parse_context& ctx) & -> void - { - if (ctx.peek() == '?') { - kind = range_flags::not_greedy; - static_cast(ctx.next()); - } - else {if (ctx.peek() == '+') { - kind = range_flags::possessive; - static_cast(ctx.next()); - }} - } - -#line 2340 "cpp2regex.h2" - [[nodiscard]] auto range_token::gen_mod_string() const& -> std::string - { - if (kind == range_flags::not_greedy) { - return "?"; - } - else {if (kind == range_flags::possessive) { - return "+"; - } - else { - return ""; - }} - } - -#line 2353 "cpp2regex.h2" - [[nodiscard]] auto range_token::gen_range_string() const& -> std::string - { - std::string r {""}; - if (min_count == max_count) { - r += "{" + cpp2::to_string(min_count) + "}"; - } - else {if (min_count == -1) { - r += "{," + cpp2::to_string(max_count) + "}"; - } - else {if (max_count == -1) { - r += "{" + cpp2::to_string(min_count) + ",}"; - } - else { - r += "{" + cpp2::to_string(min_count) + "," + cpp2::to_string(max_count) + "}"; - }}} - - return r; - } - -#line 2372 "cpp2regex.h2" - auto range_token::generate_code(generation_context& ctx) const -> void - { - auto inner_name {ctx.generate_func(inner_token)}; - std::set groups {}; - (*cpp2::impl::assert_not_null(inner_token)).add_groups(groups); - auto reset_name {ctx.generate_reset(cpp2::move(groups))}; - - auto next_name {ctx.next_func_name()}; - ctx.add_statefull(next_name, "cpp2::regex::range_token_matcher::match(" + cpp2::to_string(ctx.match_parameters()) + ", " + cpp2::to_string(cpp2::move(inner_name)) + ", " + cpp2::to_string(cpp2::move(reset_name)) + ", other, " + cpp2::to_string(next_name) + ")"); - } - -#line 2383 "cpp2regex.h2" - auto range_token::add_groups(std::set& groups) const -> void{ - (*cpp2::impl::assert_not_null(inner_token)).add_groups(groups); - } - - range_token::~range_token() noexcept{} - -#line 2393 "cpp2regex.h2" +#line 509 "cpp2regex.h2" template template [[nodiscard]] auto range_token_matcher::match(Iter const& cur, auto& ctx, auto const& inner, auto const& reset_func, auto const& end_func, auto const& tail) -> auto { if (range_flags::possessive == kind) { @@ -3813,26 +956,26 @@ template [[nodiscard]] auto lookahead_token_match }} } -#line 2406 "cpp2regex.h2" +#line 522 "cpp2regex.h2" template [[nodiscard]] auto range_token_matcher::is_below_upper_bound(cpp2::impl::in count) -> bool{ if (-1 == max_count) {return true; } else {return cpp2::impl::cmp_less(count,max_count); } } -#line 2411 "cpp2regex.h2" +#line 527 "cpp2regex.h2" template [[nodiscard]] auto range_token_matcher::is_below_lower_bound(cpp2::impl::in count) -> bool{ if (-1 == min_count) {return false; } else {return cpp2::impl::cmp_less(count,min_count); } } -#line 2416 "cpp2regex.h2" +#line 532 "cpp2regex.h2" template [[nodiscard]] auto range_token_matcher::is_in_range(cpp2::impl::in count) -> bool{ if (-1 != min_count && cpp2::impl::cmp_less(count,min_count)) {return false; } if (-1 != max_count && cpp2::impl::cmp_greater(count,max_count)) {return false; } return true; } -#line 2422 "cpp2regex.h2" +#line 538 "cpp2regex.h2" template template [[nodiscard]] auto range_token_matcher::match_min_count(Iter const& cur, auto& ctx, auto const& inner, auto const& end_func, int& count_r) -> auto { // TODO: count_r as out parameter introduces a performance loss. auto res {ctx.pass(cur)}; @@ -3849,7 +992,7 @@ template [[nodiscard]] auto lookahead_token_match return res; } -#line 2438 "cpp2regex.h2" +#line 554 "cpp2regex.h2" template template [[nodiscard]] auto range_token_matcher::match_greedy(cpp2::impl::in count, Iter const& cur, Iter const& last_valid, auto& ctx, auto const& inner, auto const& reset_func, auto const& end_func, auto const& other) -> match_return { auto inner_call {[_0 = (count + 1), _1 = (cur), _2 = (inner), _3 = (reset_func), _4 = (end_func), _5 = (other)](auto const& tail_cur, auto& tail_ctx) -> auto{ @@ -3880,7 +1023,7 @@ template [[nodiscard]] auto lookahead_token_match return r; } -#line 2468 "cpp2regex.h2" +#line 584 "cpp2regex.h2" template template [[nodiscard]] auto range_token_matcher::match_possessive(Iter const& cur, auto& ctx, auto const& inner, auto const& end_func, auto const& other) -> match_return { auto count {0}; @@ -3909,7 +1052,7 @@ template [[nodiscard]] auto lookahead_token_match return other(cpp2::move(pos), ctx, end_func); } -#line 2496 "cpp2regex.h2" +#line 612 "cpp2regex.h2" template template [[nodiscard]] auto range_token_matcher::match_not_greedy(Iter const& cur, auto& ctx, auto const& inner, auto const& end_func, auto const& other) -> match_return { auto count {0}; @@ -3937,59 +1080,7 @@ template [[nodiscard]] auto lookahead_token_match return other(cpp2::move(pos), ctx, end_func); // Upper bound reached. } -#line 2531 "cpp2regex.h2" - [[nodiscard]] auto special_range_token::parse(parse_context& ctx) -> token_ptr - { - auto r {CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared)}; - char symbol {'\0'}; - if (ctx.current() == '*') { - (*cpp2::impl::assert_not_null(r)).min_count = 0; - (*cpp2::impl::assert_not_null(r)).max_count = -1; - symbol = '*'; - } - else {if (ctx.current() == '+') { - (*cpp2::impl::assert_not_null(r)).min_count = 1; - (*cpp2::impl::assert_not_null(r)).max_count = -1; - symbol = '+'; - }else {if (ctx.current() == '?') { - (*cpp2::impl::assert_not_null(r)).min_count = 0; - (*cpp2::impl::assert_not_null(r)).max_count = 1; - symbol = '?'; - }else { - return nullptr; - }}} - - if (!(ctx.has_token())) {return ctx.error("'" + cpp2::to_string(ctx.current()) + "' without previous element."); } - -#line 2555 "cpp2regex.h2" - (*cpp2::impl::assert_not_null(r)).parse_modifier(ctx); - - (*cpp2::impl::assert_not_null(r)).inner_token = ctx.pop_token(); - (*cpp2::impl::assert_not_null(r)).string_rep = (*cpp2::impl::assert_not_null((*cpp2::impl::assert_not_null(r)).inner_token)).to_string() + cpp2::move(symbol) + (*cpp2::impl::assert_not_null(r)).gen_mod_string(); - return r; - } - - special_range_token::~special_range_token() noexcept{} - -#line 2568 "cpp2regex.h2" -[[nodiscard]] auto word_boundary_token_parse(parse_context& ctx) -> token_ptr -{ - if (ctx.current() != '\\') {return nullptr; } - - if (ctx.peek() == 'b') { - static_cast(ctx.next()); - return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "\\b", "word_boundary_token_matcher"); - } - else {if (ctx.peek() == 'B') { - static_cast(ctx.next()); - return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "\\B", "word_boundary_token_matcher"); - } - else { - return nullptr; - }} -} - -#line 2585 "cpp2regex.h2" +#line 645 "cpp2regex.h2" template [[nodiscard]] auto word_boundary_token_matcher(auto& cur, auto& ctx) -> bool { word_class words {}; @@ -4014,32 +1105,32 @@ template [[nodiscard]] auto word_boundary_token_mat return is_match; } -#line 2629 "cpp2regex.h2" +#line 689 "cpp2regex.h2" template template regular_expression::search_return::search_return(cpp2::impl::in matched_, context const& ctx_, Iter const& pos_) : matched{ matched_ } , ctx{ ctx_ } , pos{ cpp2::unsafe_narrow(std::distance(ctx_.begin, pos_)) }{ -#line 2633 "cpp2regex.h2" +#line 693 "cpp2regex.h2" } -#line 2635 "cpp2regex.h2" +#line 695 "cpp2regex.h2" template template [[nodiscard]] auto regular_expression::search_return::group_number() const& -> auto { return ctx.size(); } -#line 2636 "cpp2regex.h2" +#line 696 "cpp2regex.h2" template template [[nodiscard]] auto regular_expression::search_return::group(cpp2::impl::in g) const& -> auto { return ctx.get_group_string(g); } -#line 2637 "cpp2regex.h2" +#line 697 "cpp2regex.h2" template template [[nodiscard]] auto regular_expression::search_return::group_start(cpp2::impl::in g) const& -> auto { return ctx.get_group_start(g); } -#line 2638 "cpp2regex.h2" +#line 698 "cpp2regex.h2" template template [[nodiscard]] auto regular_expression::search_return::group_end(cpp2::impl::in g) const& -> auto { return ctx.get_group_end(g); } -#line 2640 "cpp2regex.h2" +#line 700 "cpp2regex.h2" template template [[nodiscard]] auto regular_expression::search_return::group(cpp2::impl::in> g) const& -> auto { return group(get_group_id(g)); } -#line 2641 "cpp2regex.h2" +#line 701 "cpp2regex.h2" template template [[nodiscard]] auto regular_expression::search_return::group_start(cpp2::impl::in> g) const& -> auto { return group_start(get_group_id(g)); } -#line 2642 "cpp2regex.h2" +#line 702 "cpp2regex.h2" template template [[nodiscard]] auto regular_expression::search_return::group_end(cpp2::impl::in> g) const& -> auto { return group_end(get_group_id(g)); } -#line 2644 "cpp2regex.h2" +#line 704 "cpp2regex.h2" template template [[nodiscard]] auto regular_expression::search_return::get_group_id(cpp2::impl::in> g) const& -> auto{ auto group_id {matcher::get_named_group_index(g)}; if (-1 == group_id) { @@ -4048,13 +1139,13 @@ template [[nodiscard]] auto word_boundary_token_mat return group_id; } -#line 2653 "cpp2regex.h2" +#line 713 "cpp2regex.h2" template [[nodiscard]] auto regular_expression::match(cpp2::impl::in> str) const& -> auto { return match(str.begin(), str.end()); } -#line 2654 "cpp2regex.h2" +#line 714 "cpp2regex.h2" template [[nodiscard]] auto regular_expression::match(cpp2::impl::in> str, auto const& start) const& -> auto { return match(get_iter(str, start), str.end()); } -#line 2655 "cpp2regex.h2" +#line 715 "cpp2regex.h2" template [[nodiscard]] auto regular_expression::match(cpp2::impl::in> str, auto const& start, auto const& length) const& -> auto { return match(get_iter(str, start), get_iter(str, start + length)); } -#line 2656 "cpp2regex.h2" +#line 716 "cpp2regex.h2" template template [[nodiscard]] auto regular_expression::match(Iter const& start, Iter const& end) const& -> search_return { context ctx {start, end}; @@ -4063,13 +1154,13 @@ template [[nodiscard]] auto word_boundary_token_mat return search_return(r.matched && r.pos == end, cpp2::move(ctx), r.pos); } -#line 2664 "cpp2regex.h2" +#line 724 "cpp2regex.h2" template [[nodiscard]] auto regular_expression::search(cpp2::impl::in> str) const& -> auto { return search(str.begin(), str.end()); } -#line 2665 "cpp2regex.h2" +#line 725 "cpp2regex.h2" template [[nodiscard]] auto regular_expression::search(cpp2::impl::in> str, auto const& start) const& -> auto { return search(get_iter(str, start), str.end()); } -#line 2666 "cpp2regex.h2" +#line 726 "cpp2regex.h2" template [[nodiscard]] auto regular_expression::search(cpp2::impl::in> str, auto const& start, auto const& length) const& -> auto { return search(get_iter(str, start), get_iter(str, start + length)); } -#line 2667 "cpp2regex.h2" +#line 727 "cpp2regex.h2" template template [[nodiscard]] auto regular_expression::search(Iter const& start, Iter const& end) const& -> search_return { context ctx {start, end}; @@ -4090,10 +1181,10 @@ template [[nodiscard]] auto word_boundary_token_mat return search_return(r.matched, cpp2::move(ctx), cpp2::move(r).pos); } -#line 2687 "cpp2regex.h2" +#line 747 "cpp2regex.h2" template [[nodiscard]] auto regular_expression::to_string() const& -> auto { return matcher_wrapper::to_string(); } -#line 2691 "cpp2regex.h2" +#line 751 "cpp2regex.h2" template [[nodiscard]] auto regular_expression::get_iter(cpp2::impl::in> str, auto const& pos) -> auto{ if (cpp2::impl::cmp_less(pos,str.size())) { return str.begin() + pos; @@ -4103,75 +1194,7 @@ template [[nodiscard]] auto word_boundary_token_mat } } -#line 2720 "cpp2regex.h2" - template regex_generator::regex_generator(cpp2::impl::in r, Error_out const& e) - : regex{ r } - , error_out{ e }{ - -#line 2723 "cpp2regex.h2" - } - -#line 2725 "cpp2regex.h2" - template [[nodiscard]] auto regex_generator::parse() & -> std::string - { - // Extract modifiers and adapt regex. - extract_modifiers(); - - parse_context parse_ctx {regex, error_out}; - if (!(parse_ctx.parse(modifier))) { - return ""; - } - - source += "{\n"; - source += " wrap: type = {\n"; // TODO: Remove wrapper when template template parameters are available. - source += " context: type == cpp2::regex::match_context;"; - - generation_context gen_ctx {}; - source += gen_ctx.run(parse_ctx.get_as_token()); - source += " entry: (cur: Iter, inout ctx: context) -> cpp2::regex::match_return = {\n"; - source += " ctx..set_group_start(0, cur);\n"; - source += " r := " + cpp2::to_string(gen_ctx.get_entry_func()) + "(cur, ctx, cpp2::regex::true_end_func());\n"; - source += " if r.matched { ctx..set_group_end(0, r.pos); }\n"; - source += " return r;\n"; - source += " }\n"; - - source += cpp2::move(gen_ctx).create_named_group_lookup(parse_ctx.named_groups); - source += "}\n"; - - auto string {(*cpp2::impl::assert_not_null(parse_ctx.get_as_token())).to_string()}; - source += " to_string: () -> std::string = { return R\"(" + cpp2::to_string(modifier_escape) + cpp2::to_string(cpp2::move(string)) + cpp2::to_string(modifier_escape) + cpp2::to_string(modifier) + ")\"; }\n"; - source += "}\n"; - - static_cast(cpp2::move(parse_ctx)); - - return source; - } - -#line 2760 "cpp2regex.h2" - template auto regex_generator::extract_modifiers() & -> void - { - if (regex.find_first_of("'/") == 0) { - char mod_token {CPP2_ASSERT_IN_BOUNDS_LITERAL(regex, 0)}; - - auto end_pos {regex.rfind(mod_token)}; - if (end_pos != 0) { - // Found valid start end escape - modifier = regex.substr(end_pos + 1); - modifier_escape = cpp2::move(mod_token); - regex = regex.substr(1, cpp2::move(end_pos) - 1); - } - } - } - -#line 2776 "cpp2regex.h2" -template [[nodiscard]] auto generate_regex(cpp2::impl::in regex, Err const& err) -> std::string -{ - regex_generator parser {regex, err}; - auto r {parser.parse()}; - static_cast(cpp2::move(parser)); - return r; -} - +#line 761 "cpp2regex.h2" } } diff --git a/include/cpp2regex.h2 b/include/cpp2regex.h2 index 204a15a6e..3d9518fee 100644 --- a/include/cpp2regex.h2 +++ b/include/cpp2regex.h2 @@ -18,22 +18,18 @@ #ifndef CPP2_CPP2REGEX_H #define CPP2_CPP2REGEX_H - template using matcher_wrapper_type = typename matcher_wrapper::template wrap; template using matcher_context_type = typename matcher::context; -using error_func = std::function; - cpp2: namespace = { regex: namespace = { -bview : type == std::basic_string_view; bstring: type == std::basic_string; - +bview : type == std::basic_string_view; //----------------------------------------------------------------------- // @@ -42,18 +38,6 @@ bstring: type == std::basic_string; //----------------------------------------------------------------------- // -// Possible modifiers for a regular expression. -// -expression_flags: @flag_enum type = -{ - case_insensitive; // mod: i - multiple_lines; // mod: m - single_line; // mod: s - no_group_captures; // mod: n - perl_code_syntax; // mod: x - perl_code_syntax_in_classes; // mod: xx -} - // Structure for storing group information. // match_group: @struct type = @@ -273,1819 +257,201 @@ short_not_vert_space_class : type == negated_class_entry type == negated_class_entry>>; -//----------------------------------------------------------------------- -// -// Tokens for regular expressions. -// -//----------------------------------------------------------------------- -// - -// Basic class for a regex token. -// -regex_token: @polymorphic_base type = -{ - public string_rep: std::string; - - operator=:(out this, str: std::string) = { - string_rep = str; - } - - operator=:(out this) = { - string_rep = ""; - } - - //parse: (inout ctx: parse_context) -> token_ptr; - generate_code: (virtual this, inout _: generation_context); // Generate the matching code. - - add_groups: (virtual this, inout _: std::set) = {} // Adds all group indices to the set. - to_string: (this) -> std::string = { return string_rep; } // Create a string representation. - set_string: (inout this, s: std::string) = { string_rep = s; } // Set the string representation. -} - -token_ptr : type == std::shared_ptr; -token_vec: type == std::vector; - - -// Adds a check in code generation. -// -regex_token_check: @polymorphic_base type = -{ - this: regex_token; - - check: std::string; - - operator=:(out this, str: std::string, check_: std::string) = { - regex_token = (str); - check = check_; - } - - generate_code: (override this, inout ctx: generation_context) = { - ctx..add_check(check + "(" + ctx..match_parameters() + ")"); - } -} - - -// Adds code in code generation. -// -regex_token_code: @polymorphic_base type = -{ - this: regex_token; - - code: std::string; - - operator=:(out this, str: std::string, code_: std::string) = { - regex_token = (str); - code = code_; - } - - generate_code: (override this, inout ctx: generation_context) = { - ctx..add(code); - } -} - - -// Token that does not influence the matching. E.g. comment. +// Regex syntax: | Example: ab|ba // -regex_token_empty: @polymorphic_base type = -{ - this: regex_token; - - operator=:(out this, str: std::string) = { - regex_token = (str); - } - - generate_code: (override this, inout _: generation_context) = { - // Nothing. - } -} - - -// Represents a list of regex tokens as one token. +// Non greedy implementation. First alternative that matches is chosen. // -regex_token_list: @polymorphic_base type = +alternative_token_matcher: type = { - this: regex_token; - - public tokens: token_vec; - - operator=:(out this, t: token_vec) = { - regex_token = (gen_string(t)); - tokens = t; - } - - generate_code: (override this, inout ctx: generation_context) = { - for tokens do (token) { - token*..generate_code(ctx); - } - } - - add_groups: (override this, inout groups: std::set) = { - for tokens do (token) { - token*..add_groups(groups); - } - } - - gen_string: (vec: token_vec) -> std::string = { - r : std::string = ""; - for vec do (token) { - r += token*..to_string(); - } - return r; - } -} - - -//----------------------------------------------------------------------- -// -// Parse and generation context. -// -//----------------------------------------------------------------------- -// - -// State of the current capturing group. See '()' -// -parse_context_group_state: @struct type = -{ - cur_match_list: token_vec = (); // Current list of matchers. - alternate_match_lists: token_vec = (); // List of alternate matcher lists. E.g. ab|cd|xy. - modifiers : expression_flags = (); // Current modifiers for the group/regular expression. - - // Start a new alternative. - next_alternative: (inout this) = { - new_list: token_vec = (); - std::swap(new_list, cur_match_list); - post_process_list(new_list); - _ = alternate_match_lists..insert(alternate_match_lists..end(), shared.new(new_list)); - } - - // Swap this state with the other one. - swap: (inout this, inout t: parse_context_group_state) = { - std::swap(cur_match_list, t.cur_match_list); - std::swap(alternate_match_lists, t.alternate_match_lists); - std::swap(modifiers, t.modifiers); - } - - // Convert this state into a regex token. - get_as_token: (inout this) -> token_ptr = { - if alternate_match_lists..empty() { - post_process_list(cur_match_list); - return shared.new(cur_match_list); - } - else { - next_alternative(); - return shared.new(alternate_match_lists); - } - } - - // Add a token to the current matcher list. - add: (inout this, token: token_ptr) = { - cur_match_list..push_back(token); + match: (cur, inout ctx, end_func, tail, functions ...) -> _ = { + return match_first(cur, ctx, end_func, tail, functions...); } - // True if current matcher list is empty. - empty: (this) -> bool = cur_match_list..empty(); + private match_first: (cur, inout ctx, end_func, tail, cur_func, cur_reset, other ...: Other) -> _ = + { + inner_call := :(tail_cur, inout tail_ctx) -> _ == { + return (tail)$(tail_cur, tail_ctx, (end_func)$); + }; + r := cur_func(cur, ctx, inner_call); + if r.matched { + return r; + } else { + cur_reset(ctx); - - // Apply optimizations to the matcher list. - post_process_list: (inout list: token_vec) = { - // Merge all characters - merge_pos := list..begin(); - while merge_pos != list..end() next (merge_pos++) { - if merge_pos** is char_token { - combine_pos := merge_pos + 1; - while combine_pos != list..end() && combine_pos** is char_token { // The erase advances combine_pos - (merge_pos** as char_token)..append(combine_pos** as char_token); - combine_pos = list..erase(combine_pos); - } + if constexpr 0 != sizeof...(Other) { + return match_first(cur, ctx, end_func, tail, other...); + } else { + return ctx..fail(); } } } } -// State for the branch reset. Takes care of the group numbering. See '(|)'. +// Regex syntax: . // -parse_context_branch_reset_state: @struct type = +any_token_matcher: (inout cur, inout ctx) -> bool = { - is_active : bool = false; // If we have a branch reset group. - cur_group : int = 1; // Next group identifier. 0 == global capture group. - max_group : int = 1; // Maximum group identifier generated. - from : int = 1; // Starting identifier on new alternative branch. - - // Next group identifier. - next: (inout this) -> int = { - g := cur_group; - cur_group += 1; - max_group = max(max_group, cur_group); - - return g; + if cur != ctx.end // any char except the end + && (single_line || cur* != '\n') // do not match new lines in multi line mode + { + cur += 1; + return true; } + // Else + return false; +} - // Set next group identifier. - set_next: (inout this, g: int) = { - cur_group = g; - max_group = max(max_group, g); - } - // Start a new alternative branch. - next_alternative: (inout this) = { - if is_active { - cur_group = from; - } - } +// TODO: Check if vectorization works at some point with this implementation. +// char_token_matcher: (inout cur, inout ctx) -> bool = { +// if !(std::distance(cur, ctx.end) < tokens..size()) { +// return false; +// } +// matched : bool = true; +// (copy i: int = 0) while i < tokens..size() next i += 1 { +// if tokens..data()[i] != cur[i] { +// matched = false; // No break for performance optimization. Without break, the loop vectorizes. +// } +// } +// if matched { +// cur += tokens..size(); +// } +// return matched; +// } - // Initialize for a branch reset group. - set_active_reset: (inout this, restart: int) = { - is_active = true; - cur_group = restart; - from = restart; - max_group = restart; - } -} +// char_token_case_insensitive_matcher: (inout cur, inout ctx) -> bool = { +// if !(std::distance(cur, ctx.end) < lower..size()) { +// return false; +// } +// matched : bool = true; +// (copy i : int = 0) while i < lower..size() next i += 1 { +// if !(lower..data()[i] == cur[i] || upper..data()[i] == cur[i]) { +// matched = false; // No break for performance optimization. Without break, the loop vectorizes. +// } +// } +// if matched { +// cur += lower..size(); +// } +// return matched; +// } -// Context during parsing of the regular expressions. -// -// Keeps track of the distributed group identifiers, current parsed group and branch resets. +// Regex syntax: [] Example: [abcx-y[:digits:]] // -parse_context: type = +class_token_matcher: type = { - regex: std::string_view; // Regular expression string. - pos: size_t = 0; // Current parsing position. - root: token_ptr; // Token representing the regular expression. - - cur_group_state: parse_context_group_state = (); - cur_branch_reset_state: parse_context_branch_reset_state = (); - - - public named_groups: std::map = (); - - error_out: error_func; // TODO: Declaring std::function fails for cpp2. - has_error: bool = false; - - operator=:(out this, r: std::string_view, e) = { - regex = r; - root = shared.new(""); - error_out = e; - } - - // State management functions - // - - // Returned group state needs to be stored and provided in `end_group`. - start_group: (inout this) -> parse_context_group_state = - { - old_state: parse_context_group_state = (); - old_state..swap(cur_group_state); - cur_group_state.modifiers = old_state.modifiers; - - return old_state; - } - - // `old_state` argument needs to be from start group. - end_group: (inout this, old_state: parse_context_group_state) -> token_ptr = - { - inner := cur_group_state..get_as_token(); - cur_group_state = old_state; - return inner; - } - - get_modifiers: (this) -> expression_flags = { - return cur_group_state.modifiers; - } - - set_modifiers: (inout this, mod: expression_flags) = { - cur_group_state.modifiers = mod; - } - - // Branch reset management functions - // - - branch_reset_new_state: (inout this) -> parse_context_branch_reset_state = - { - old_state: parse_context_branch_reset_state = (); - std::swap(old_state, cur_branch_reset_state); - - cur_branch_reset_state..set_active_reset(old_state.cur_group); - return old_state; - } - - branch_reset_restore_state: (inout this, old_state: parse_context_branch_reset_state) = - { - max_group := cur_branch_reset_state.max_group; - cur_branch_reset_state = old_state; - cur_branch_reset_state..set_next(max_group); - } - - next_alternative: (inout this) = - { - cur_group_state..next_alternative(); - cur_branch_reset_state..next_alternative(); - } - - // Regex token management - // - add_token: (inout this, token: token_ptr) = { - cur_group_state..add(token); - } - - has_token: (this) -> bool = { - return !cur_group_state..empty(); - } - - pop_token: (inout this) -> token_ptr = - { - r : token_ptr = nullptr; - if has_token() { - r = cur_group_state.cur_match_list..back(); - cur_group_state.cur_match_list..pop_back(); - } - - return r; - } - - get_as_token: (inout this) -> token_ptr = { - return root; - } - - // Group management - // - get_cur_group: (this) -> int = { - return cur_branch_reset_state.cur_group; - } - - next_group: (inout this) -> int = { - return cur_branch_reset_state..next(); - } - - set_named_group: (inout this, name: std::string, id: int) = - { - if !named_groups..contains(name) { // Redefinition of group name is not an error. The left most one is retained. - named_groups[name] = id; - } - } - - get_named_group: (this, name: std::string) -> int = + match: (inout cur, inout ctx) -> bool = { - iter := named_groups..find(name); - if iter == named_groups..end() { - return -1; - } - else { - return iter*.second; - } - } - - // Position management functions - // - current: (this) -> char = { return regex[pos]; } - - // Get the next token in the regex, skipping spaces according to the parameters. See `x` and `xx` modifiers. - private get_next_position: (in this, in_class: bool, no_skip: bool) -> size_t = - { - perl_syntax := false; - if !no_skip { - if in_class { - perl_syntax = get_modifiers()..has(expression_flags::perl_code_syntax) && get_modifiers()..has(expression_flags::perl_code_syntax_in_classes); + if constexpr case_insensitive + { + if cur != ctx.end + && negate != ( + match_any(string_util::safe_tolower(cur*)) + || match_any(string_util::safe_toupper(cur*)) + ) + { + cur += 1; + return true; } else { - perl_syntax = get_modifiers()..has(expression_flags::perl_code_syntax); - } - } - cur := pos + 1; - if perl_syntax { - while cur < regex..size() next (cur += 1) { - n: = regex[cur]; - - if space_class::includes(n) { - continue; - } - else if !in_class && '#' == n { - cur = regex..find("\n", cur); - if std::string::npos == cur { - // No new line, comment runs until the end of the pattern - cur = regex..size(); - } - } - else { // None space none comment char - break; - } - } - } - - // Check for end of file. - if cur > regex..size() { - cur = regex..size(); - } - return cur; - } - - // Return true if next token is available. - private next_impl: (inout this, in_class: bool, no_skip: bool) -> bool = - { - pos = get_next_position(in_class, no_skip); - if pos != regex..size() { - return true; - } - else { - return false; - } - } - - next : (inout this) next_impl(false, false); - next_in_class: (inout this) next_impl( true, false); - next_no_skip : (inout this) next_impl(false, true); - - next_n: (inout this, n: int) -> bool = { - r := true; - cur := 0; - while r && cur < n next (r = next()) { - cur += 1; - } - return r; - } - - has_next: (this) -> bool = { return pos < regex..size(); } - - private grab_until_impl: (inout this, in e: std::string, out r: std::string, any: bool) -> bool = - { - end:= pos; - if any { - end = regex..find_first_of(e, pos); - } - else { - end = regex..find(e, pos); - } - - if end != std::string_view::npos { - r = regex..substr(pos, end - pos); - pos = end; - return true; - } - else { - r = ""; - return false; - } - } - - grab_until: (inout this, in e: std::string, out r: std::string) grab_until_impl(e, out r, false); - grab_until: (inout this, in e: char, out r: std::string) grab_until_impl(std::string(1, e), out r, false); - grab_until_one_of: (inout this, in e: std::string, out r: std::string) grab_until_impl(e, out r, true); - - grab_n: (inout this, in n: int, out r: std::string) -> bool = - { - if pos + n as size_t <= regex..size() { - r = regex..substr(pos, n as size_t); - pos += (n as size_t) - 1; - return true; - } - else { - r = ""; - return false; - } - } - - grab_number: (inout this) -> std::string = - { - start := pos; - start_search := pos; - if regex[start_search] == '-' { - start_search += 1; - } - end := regex..find_first_not_of("1234567890", start_search); - - r : std::string; - if end != std::string::npos { - r = regex..substr(start, end - start); - pos = end - 1; - } - else { - r = regex..substr(start); - pos = regex..size() - 1; - } - return r; - } - - private peek_impl: (in this, in_class: bool) -> char = { - next_pos := get_next_position(in_class, false); - if next_pos < regex..size() { - return regex[next_pos]; - } - else { - return '\0'; - } - } - - peek : (in this) peek_impl(false); - peek_in_class: (in this) peek_impl( true); - - - // Parsing functions - // - parser_group_modifiers: (inout this, change_str: std::string, inout parser_modifiers: expression_flags) -> bool = - { - is_negative := false; - is_reset := false; - - apply := :(flag: expression_flags) = { - if is_negative&$* { - parser_modifiers&$*..clear(flag); - } - else { - parser_modifiers&$*..set(flag); - } - }; - - iter := change_str..begin(); - while iter != change_str..end() next (iter++) - { - cur := iter*; - if cur == '^' { - is_reset = true; - parser_modifiers = expression_flags::none; - } - else if cur == '-' { - if is_reset { _= error("No negative modifier allowed."); return false; } - is_negative = true; - } - else if cur == 'i' { apply(expression_flags::case_insensitive); } - else if cur == 'm' { apply(expression_flags::multiple_lines); } - else if cur == 's' { apply(expression_flags::single_line); } - else if cur == 'n' { apply(expression_flags::no_group_captures); } - else if cur == 'x' { - if (iter + 1) == change_str..end() || (iter + 1)* != 'x' { - // x modifier - apply(expression_flags::perl_code_syntax); - - // Just x unsets xx and remove x also removes xx - parser_modifiers..clear(expression_flags::perl_code_syntax_in_classes); - } - else { // xx modifier - // xx also sets or unsets x - apply(expression_flags::perl_code_syntax); - apply(expression_flags::perl_code_syntax_in_classes); - - iter++; // Skip the second x - } - } - else { - _= error("Unknown modifier: (cur)$"); return false; - } - } - - return true; - } - - parse_until:(inout this, term: char) -> bool = { - cur_token: token_ptr = (); - - while valid() next _ = next() - { - if term == current() { break; } - - cur_token = nullptr; - - if !cur_token && valid() { cur_token = alternative_token::parse(this); } - if !cur_token && valid() { cur_token = any_token::parse(this); } - if !cur_token && valid() { cur_token = class_token::parse(this); } - if !cur_token && valid() { cur_token = escape_token_parse(this); } - if !cur_token && valid() { cur_token = global_group_reset_token_parse(this); } - if !cur_token && valid() { cur_token = group_ref_token::parse(this); } - if !cur_token && valid() { cur_token = group_token::parse(this); } - if !cur_token && valid() { cur_token = hexadecimal_token_parse(this); } - if !cur_token && valid() { cur_token = line_end_token_parse(this); } - if !cur_token && valid() { cur_token = line_start_token_parse(this); } - if !cur_token && valid() { cur_token = named_class_token_parse(this); } - if !cur_token && valid() { cur_token = octal_token_parse(this); } - if !cur_token && valid() { cur_token = range_token::parse(this); } - if !cur_token && valid() { cur_token = special_range_token::parse(this); } - if !cur_token && valid() { cur_token = word_boundary_token_parse(this); } - - // Everything else is matched as it is. - if !cur_token && valid() { cur_token = char_token::parse(this); } - - if cur_token && valid() { - add_token(cur_token); - } else { - return false; - } - } - - return true; - } - - parse: (inout this, modifiers: std::string) -> bool = - { - - flags : expression_flags = (); - if !parser_group_modifiers(modifiers, flags) { return false; } - set_modifiers(flags); - - r := parse_until('\0'); - if r { - root = cur_group_state..get_as_token(); - } - - return r; - } - - // Misc functions - - get_pos: (this) pos; - get_range: (this, start: size_t, end: size_t) std::string(regex..substr(start, end - start + 1)); - valid: (this) -> bool = { return has_next() && !has_error; } - - error: (inout this, err: std::string) -> token_ptr = { - has_error = true; - error_out("Error during parsing of regex '(regex)$' at position '(pos)$': (err)$"); - return nullptr; - } -} - - -// Context for one function generation. Generation of functions can be interleaved, -// therefore we buffer the code for one function here. -// -generation_function_context: @struct type = { - code: std::string = ""; - tabs: std::string = ""; - - add_tabs: (inout this, c: int) = { - i: int = 0; - while i < c next i += 1 { - tabs += " "; - } - } - - remove_tabs: (inout this, c: int) = { - tabs = tabs..substr(0, (c as size_t) * 2); - } -} - - -// Context for generating the state machine. -generation_context: type = -{ - gen_stack: std::vector = (1); // Element 0 contains all the code. - - matcher_func: int = 0; - reset_func: int = 0; - temp_name: int = 0; - entry_func: std::string = ""; - - // Generation helpers - // - match_parameters: (this) -> std::string = { return "r.pos, ctx"; } - - // Code generation. - - // Add code line. - add: (inout this, s: std::string) = { - cur := get_current(); - cur*.code += "(cur*.tabs)$(s)$\n"; - } - - // Add check for token. The check needs to be a function call that returns a boolean. - add_check: (inout this, check: std::string) = { - cur := get_current(); - cur*.code += "(cur*.tabs)$if !cpp2::regex::(check)$ { r.matched = false; break; }\n"; - } - - // Add a stateful check. The check needs to return a `match_return`. - add_statefull: (inout this, next_func: std::string, check: std::string) = - { - end_func_statefull(check); - - name := next_func..substr(0, next_func..size() - 2); - start_func_named(name); - } - - protected start_func_named: (inout this, name: std::string) = - { - cur := new_context(); - - cur*.code += "(cur*.tabs)$(name)$: @struct type = {\n"; - cur*.code += "(cur*.tabs)$ operator(): (this, cur: Iter, inout ctx: context, other) -> cpp2::regex::match_return = {\n"; - cur*.code += "(cur*.tabs)$ r := ctx..pass(cur);\n"; - cur*.code += "(cur*.tabs)$ do {\n"; - cur*..add_tabs(3); - } - - protected start_func: (inout this) -> std::string = - { - name := gen_func_name(); - start_func_named(name); - return name + "()"; - } - - protected end_func_statefull: (inout this, s: std::string) = - { - cur := get_current(); - cur*..remove_tabs(3); - cur*.code += "\n"; - cur*.code += "(cur*.tabs)$ } while false;\n"; - cur*.code += "(cur*.tabs)$ if r.matched {\n"; - cur*.code += "(cur*.tabs)$ r = (s)$;\n"; - cur*.code += "(cur*.tabs)$ }\n"; - cur*.code += "(cur*.tabs)$ else {\n"; - cur*.code += "(cur*.tabs)$ r.pos = ctx.end;\n"; - cur*.code += "(cur*.tabs)$ }\n"; - cur*.code += "(cur*.tabs)$ return r;\n"; - cur*.code += "(cur*.tabs)$ }\n"; - cur*.code += "(cur*.tabs)$}\n"; - - finish_context(); - } - - // Generate the function for a token. - generate_func: (inout this, token: token_ptr) -> std::string = - { - name := start_func(); - token*..generate_code(this); - end_func_statefull("other((match_parameters())$)"); - - return name; - } - - // Generate the reset for a list of group identifiers. - generate_reset: (inout this, groups: std::set) -> std::string = - { - if groups..empty() { - return "cpp2::regex::no_reset()"; - } - - name := gen_reset_func_name(); - cur := new_context(); - - cur*.code += "(cur*.tabs)$(name)$: @struct type = {\n"; - cur*.code += "(cur*.tabs)$ operator(): (this, inout ctx) = {\n"; - for groups do (g) { - cur*.code += "(cur*.tabs)$ ctx..set_group_invalid((g)$);\n"; - } - cur*.code += "(cur*.tabs)$ }\n"; - cur*.code += "(cur*.tabs)$}\n"; - - finish_context(); - - return name + "()"; - } - - // Name generation - // - protected gen_func_name: (inout this) -> std::string = { - cur_id : = matcher_func; - matcher_func += 1; - return "func_(cur_id)$"; - } - - next_func_name: (inout this) -> std::string = { - return gen_func_name() + "()"; - } - - protected gen_reset_func_name: (inout this) -> std::string = { - cur_id : = reset_func; - reset_func += 1; - return "reset_(cur_id)$"; - } - - gen_temp: (inout this) -> std::string = { - cur_id := temp_name; - temp_name += 1; - return "tmp_(cur_id)$"; - } - - // Context management - // - new_context: (inout this) -> *generation_function_context = { - gen_stack..push_back(generation_function_context()); - cur := get_current(); - cur*.tabs = " "; - - return cur; - } - - finish_context: (inout this) = { - cur := get_current(); - base := get_base(); - base*.code += cur*.code; - - gen_stack..pop_back(); - } - - // Misc functions - // - private get_current: (inout this) -> *generation_function_context = { - return gen_stack..back()&; - } - - private get_base: (inout this) -> *generation_function_context = { - return gen_stack[0]&; - } - - get_entry_func: (this) -> std::string = { - return entry_func; - } - - create_named_group_lookup: (this, named_groups: std::map) -> std::string = - { - res: std::string = "get_named_group_index: (name) -> int = {\n"; - - // Generate if selection. - sep: std::string = ""; - for named_groups do (cur) { - res += "(sep)$if name == \"(cur.first)$\" { return (cur.second)$; }"; - sep = "else "; - } - - // Generate else branch or return if list is empty. - if named_groups..empty() { - res += " _ = name;\n"; - res += " return -1;\n"; - } - else { - res += " else { return -1; }\n"; - } - res += "}\n"; - return res; - } - - - // Run the generation for the token. - run: (inout this, token: token_ptr) -> std::string = { - entry_func = generate_func(token); - - return get_base()*.code; - } -} - - -// Regex syntax: | Example: ab|ba -// -// Non greedy implementation. First alternative that matches is chosen. -// -alternative_token: @polymorphic_base type = -{ - this: regex_token_empty = (""); // No code gen here. alternative_token_gen is created in the parse_context - - operator=:(out this) = {} - - parse: (inout ctx: parse_context) -> token_ptr = { - if ctx..current() != '|' { return nullptr; } - - if !ctx..has_token() { return ctx..error("Alternative with no content."); } - ctx..next_alternative(); - return shared.new(); - } -} - -alternative_token_gen: @polymorphic_base type = -{ - this: regex_token; - - alternatives: token_vec; - - operator=: (out this, a: token_vec) = { - regex_token = gen_string(a); - alternatives = a; - } - - generate_code: (override this, inout ctx: generation_context) = - { - functions: std::string = ""; - - for alternatives do (cur) { - groups: std::set = (); - cur*..add_groups(groups); - - functions += ", " + ctx..generate_func(cur); - functions += ", " + ctx..generate_reset(groups); - } - - next_name := ctx..next_func_name(); - - ctx..add_statefull(next_name, "cpp2::regex::alternative_token_matcher::match((ctx..match_parameters())$, other, (next_name)$ (functions)$)"); - } - - add_groups: (override this, inout groups: std::set) = - { - for alternatives do (cur) { - cur*..add_groups(groups); - } - } - - gen_string: (a: token_vec) -> std::string = - { - r: std::string = ""; - sep: std::string = ""; - - for a do (cur) { - r += sep + cur*..to_string(); - sep = "|"; - } - - return r; - } -} - - -alternative_token_matcher: type = -{ - match: (cur, inout ctx, end_func, tail, functions ...) -> _ = { - return match_first(cur, ctx, end_func, tail, functions...); - } - - private match_first: (cur, inout ctx, end_func, tail, cur_func, cur_reset, other ...: Other) -> _ = - { - inner_call := :(tail_cur, inout tail_ctx) -> _ == { - return (tail)$(tail_cur, tail_ctx, (end_func)$); - }; - r := cur_func(cur, ctx, inner_call); - if r.matched { - return r; - } else { - cur_reset(ctx); - - if constexpr 0 != sizeof...(Other) { - return match_first(cur, ctx, end_func, tail, other...); - } else { - return ctx..fail(); - } - } - } -} - - -// Regex syntax: . -// -any_token: @polymorphic_base type = -{ - this: regex_token_check = ("."); - - operator=:(out this, single_line: bool) = { - regex_token_check = (".", "any_token_matcher"); - } - - parse: (inout ctx: parse_context) -> token_ptr = { - if '.' != ctx..current() { return nullptr;} - - return shared.new(ctx..get_modifiers()..has(expression_flags::single_line)); - } -} - - -any_token_matcher: (inout cur, inout ctx) -> bool = -{ - if cur != ctx.end // any char except the end - && (single_line || cur* != '\n') // do not match new lines in multi line mode - { - cur += 1; - return true; - } - // Else - return false; -} - - -// Regex syntax: a -// -char_token: @polymorphic_base type = -{ - this: regex_token; - - token : std::string; - ignore_case: bool; - - operator=: (out this, t: char, ignore_case_: bool) = { - regex_token = (std::string(1, t)); - token = t; - ignore_case = ignore_case_; - } - - parse: (inout ctx: parse_context) -> token_ptr = { - return shared.new(ctx..current(), ctx..get_modifiers()..has(expression_flags::case_insensitive)); - } - - generate_code: (override this, inout ctx: generation_context) = - { - if ignore_case { - upper: std::string = token; - lower: std::string = token; - - (copy i: size_t = 0) while i < token..size() next i += 1 { - lower[i] = string_util::safe_tolower(token[i]); - upper[i] = string_util::safe_toupper(token[i]); - } - - if upper != lower { - gen_case_insensitive(lower, upper, ctx); - } - else { - gen_case_sensitive(ctx); - } - } - else { - gen_case_sensitive(ctx); - } - } - - gen_case_insensitive: (this, lower: std::string, upper: std::string, inout ctx: generation_context) = - { - name: std::string = "str_(ctx..gen_temp())$"; - lower_name: std::string = "lower_(name)$"; - upper_name: std::string = "upper_(name)$"; - size := token..size(); - ctx..add("(lower_name)$ : std::array = \"(add_escapes(lower))$\";"); // TODO: Add constexpr when Issue https://github.com/hsutter/cppfront/issues/1104 is resolved. - ctx..add("(upper_name)$ : std::array = \"(add_escapes(upper))$\";"); // TODO: Add constexpr when Issue https://github.com/hsutter/cppfront/issues/1104 is resolved. - ctx..add("if std::distance(r.pos, ctx.end) < (size)$ {"); - ctx..add(" r.matched = false;"); - ctx..add(" break;"); - ctx..add("}"); - ctx..add(""); - ctx..add("(copy i : int = 0) while i < (size)$ next (i += 1) {"); - ctx..add(" if !((lower_name)$[i] == r.pos[i] || (upper_name)$[i] == r.pos[i]) { r.matched = false; }"); - ctx..add("}"); - ctx..add(""); - ctx..add("if r.matched { r.pos += (size)$; }"); - ctx..add("else { break; }"); - } - - gen_case_sensitive: (this, inout ctx: generation_context) = - { - name: std::string = "str_(ctx..gen_temp())$"; - size := token..size(); - ctx..add("(name)$ : std::array = \"(add_escapes(token))$\";"); // TODO: Add constexpr when Issue https://github.com/hsutter/cppfront/issues/1104 is resolved. - ctx..add("if std::distance(r.pos, ctx.end) < (size)$ {"); - ctx..add(" r.matched = false;"); - ctx..add(" break;"); - ctx..add("}"); - ctx..add(""); - ctx..add("(copy i : int = 0) while i < (size)$ next (i += 1) {"); - ctx..add(" if (name)$[i] != r.pos[i] { r.matched = false; }"); - ctx..add("}"); - ctx..add(""); - ctx..add("if r.matched { r.pos += (size)$; }"); - ctx..add("else { break; }"); - } - - add_escapes: (this, copy str: std::string) -> std::string = - { - str = string_util::replace_all(str, "\\", "\\\\"); - str = string_util::replace_all(str, "\a", "\\a"); - str = string_util::replace_all(str, "\f", "\\f"); - str = string_util::replace_all(str, "\x1b", "\" \"\\x1b\" \""); // Generate a separated string. This prevents - // situations like `\x1bblub` from generating - // wrong hex characters. - str = string_util::replace_all(str, "\n", "\\n"); - str = string_util::replace_all(str, "\r", "\\r"); - str = string_util::replace_all(str, "\t", "\\t"); - - return str; - } - - append: (inout this, that) = { - this.token += that.token; - this.string_rep += that.string_rep; - } -} - - -// TODO: Check if vectorization works at some point with this implementation. -// char_token_matcher: (inout cur, inout ctx) -> bool = { -// if !(std::distance(cur, ctx.end) < tokens..size()) { -// return false; -// } -// matched : bool = true; -// (copy i: int = 0) while i < tokens..size() next i += 1 { -// if tokens..data()[i] != cur[i] { -// matched = false; // No break for performance optimization. Without break, the loop vectorizes. -// } -// } -// if matched { -// cur += tokens..size(); -// } -// return matched; -// } - -// char_token_case_insensitive_matcher: (inout cur, inout ctx) -> bool = { -// if !(std::distance(cur, ctx.end) < lower..size()) { -// return false; -// } -// matched : bool = true; -// (copy i : int = 0) while i < lower..size() next i += 1 { -// if !(lower..data()[i] == cur[i] || upper..data()[i] == cur[i]) { -// matched = false; // No break for performance optimization. Without break, the loop vectorizes. -// } -// } -// if matched { -// cur += lower..size(); -// } -// return matched; -// } - - -// Regex syntax: [] Example: [abcx-y[:digits:]] -// -class_token: @polymorphic_base type = -{ - this : regex_token = (); - - negate : bool; - case_insensitive: bool; - class_str : std::string; - - operator=: (out this, negate_: bool, case_insensitive_: bool, class_str_: std::string, str: std::string) = - { - regex_token = str; - negate = negate_; - case_insensitive = case_insensitive_; - class_str = class_str_; - } - - // TODO: Rework class generation: Generate check functions for classes. - parse: (inout ctx: parse_context) -> token_ptr = - { - if ctx..current() != '[' { return nullptr; } - - start_pos := ctx..get_pos(); - - supported_classes: std::vector = ("alnum", "alpha", "ascii", "blank", "cntrl", "digits", "graph", - "lower", "print", "punct", "space", "upper", "word", "xdigit"); - - classes: std::vector = (); - - // First step: parse until the end bracket and push single chars, ranges or groups on the class stack. - is_negate := false; - first := true; - range := false; - while ctx..next_in_class() && (ctx..current() != ']' || first) - { - if ctx..current() == '^' - { - is_negate = true; - continue; // Skip rest of the loop. Also the first update. - } - - if ctx..current() == '[' && ctx..peek_in_class() == ':' - { - // We have a character class. - _ = ctx..next_n(2); // Skip [: - - name: std::string = ""; - if !ctx..grab_until(":]", out name) { return ctx..error("Could not find end of character class."); } - if supported_classes..end() == std::find(supported_classes..begin(), supported_classes..end(), name) { - return ctx..error("Unsupported character class. Supported ones are: (string_util::join(supported_classes))$"); - } - - classes..push_back("[:(name)$:]"); - - _ = ctx..next(); // Skip ':' pointing to the ending ']'. - } - else if ctx..current() == '\\' - { - if ctx..next_no_skip() && (ctx..current() != ']') - { - if ' ' == ctx..current() - && ctx..get_modifiers()..has(expression_flags::perl_code_syntax) - && ctx..get_modifiers()..has(expression_flags::perl_code_syntax_in_classes) - { - classes..push_back(std::string(1, ctx..current())); - } - else { - name := ""; - if 'd' == ctx..current() { name = "short_digits"; } - else if 'D' == ctx..current() { name = "short_not_digits"; } - else if 'h' == ctx..current() { name = "short_hor_space"; } - else if 'H' == ctx..current() { name = "short_not_hor_space"; } - else if 's' == ctx..current() { name = "short_space"; } - else if 'S' == ctx..current() { name = "short_not_space"; } - else if 'v' == ctx..current() { name = "short_ver_space"; } - else if 'V' == ctx..current() { name = "short_not_ver_space"; } - else if 'w' == ctx..current() { name = "short_word"; } - else if 'W' == ctx..current() { name = "short_not_word"; } - else { - return ctx..error("Unknown group escape."); - } - classes..push_back("[:(name)$:]"); - } - } else { - return ctx..error("Escape without a following character."); - } - } - else if ctx..current() == '-' - { - if first { // Literal if first entry. - classes..push_back("(ctx..current())$"); - } else { - range = true; - } - } - else - { - if range { // Modify last element to be a range. - classes..back() += "-(ctx..current())$"; - range = false; - } - else { - classes..push_back("(ctx..current())$"); - } - } - - first = false; - } - - if ctx..current() != ']' { - return ctx..error("Error end of character class definition before terminating ']'."); - } - end_pos := ctx..get_pos(); - - if range { // If '-' is last entry treat it as a literal char. - classes..push_back("-"); - } - - // Second step: Wrap the item on the class stack with corresponding class implementation. - for classes do (inout cur) - { - if cur..starts_with("[:") { - name := cur..substr(2, cur..size() - 4); - cur = create_matcher("(name)$_class", ""); - } - else if 1 != cur..size() { - cur = create_matcher("range_class_entry", "'(cur[0])$', '(cur[2])$'"); - } - else { - cur = create_matcher("single_class_entry", "'(cur)$'"); - } - } - - inner := string_util::join(classes); - string_rep := ctx..get_range(start_pos, end_pos); - return shared.new( - is_negate, - ctx..get_modifiers()..has(expression_flags::case_insensitive), - inner, - string_rep - ); - } - - generate_code: (override this, inout ctx: generation_context) = - { - ctx..add_check("class_token_matcher::match((ctx..match_parameters())$)"); - } - - private create_matcher: (name: std::string, template_arguments: std::string) -> std::string = - { - sep := ", "; - if template_arguments..empty() { sep = ""; } - - return "::cpp2::regex::(name)$"; - } -} - - -class_token_matcher: type = -{ - match: (inout cur, inout ctx) -> bool = - { - if constexpr case_insensitive - { - if cur != ctx.end - && negate != ( - match_any(string_util::safe_tolower(cur*)) - || match_any(string_util::safe_toupper(cur*)) - ) - { - cur += 1; - return true; - } - else { - return false; - } - } - else - { - if cur != ctx.end && negate != match_any(cur*) { - cur += 1; - return true; - } - else { - return false; - } - } - } - - private match_any: (c: CharT) -> bool = - { - r: bool = First::includes(c); - - if !r { - if constexpr 0 != sizeof...(Other) { - r = match_any(c); - } - } - - return r; - } - - // TODO: Implement proper to string - // to_string: () -> bstring = { - // r: bstring = "["; - // if negate { - // r += "^"; - // } - // r += (bstring() + ... + List::to_string()); - // r += "]"; - - // return r; - // } -} - - -// Regex syntax: \a or \n or \[ -// -escape_token_parse: (inout ctx: parse_context) -> token_ptr = -{ - if ctx..current() != '\\' { return nullptr; } - - - if std::string::npos == std::string("afenrt^.[]()*{}?+|\\")..find(ctx..peek()) { - return nullptr; - } - - _ = ctx..next(); // Skip escape - - if std::string::npos != std::string("afenrt\\")..find(ctx..current()) - { - // Escape of string special char - t : char = '\0'; - if 'a' == ctx..current() { t = '\a'; } - else if 'f' == ctx..current() { t = '\f'; } - else if 'e' == ctx..current() { t = '\x1b'; } - else if 'n' == ctx..current() { t = '\n'; } - else if 'r' == ctx..current() { t = '\r'; } - else if 't' == ctx..current() { t = '\t'; } - else if '\\' == ctx..current() { t = '\\'; } - else { return ctx..error("Internal: missing switch case for special escape."); } - - r: = shared.new(t, false); - r*..set_string("\\(ctx..current())$"); - return r; - } - else - { - // Escape of regex special char - r := shared.new(ctx..current(), false); - r*..set_string("\\(ctx..current())$"); - return r; - } - -} - - -// Regex syntax: \K Example: ab\Kcd -// -global_group_reset_token_parse: (inout ctx: parse_context) -> token_ptr = -{ - if !(ctx..current() == '\\' && ctx..peek() == 'K') { return nullptr; } - - _ = ctx..next(); // Skip escape. - return shared.new("\\K", "ctx..set_group_start(0, r.pos);"); -} - - -// Regex syntax: \ Example: \1 -// \g{name_or_number} -// \k{name_or_number} -// \k -// \k'name_or_number' -// -group_ref_token: @polymorphic_base type = -{ - this : regex_token = (); - - id : int; - case_insensitive: bool; - - operator=:(out this, id_: int, case_insensitive_: bool, str: std::string) = - { - regex_token = str; - id = id_; - case_insensitive = case_insensitive_; - } - - parse: (inout ctx: parse_context) -> token_ptr = - { - if ctx..current() != '\\' { return nullptr; } - - str : std::string = "\\"; - group : std::string = ""; - - if '0' <= ctx..peek() <= '9' - { - _ = ctx..next(); // Skip escape - group = ctx..grab_number(); - - if group..size() >= 3 as size_t - { - // Octal syntax (\000) not a group ref matcher. - number := 0; - if !string_util::string_to_int(group, number, 8) { return ctx..error("Could not convert octal to int."); } - - number_as_char : char = unsafe_narrow(number); - - token := shared.new(number_as_char, ctx..get_modifiers()..has(expression_flags::case_insensitive)); - token*..set_string("\\(string_util::int_to_string<8>(number_as_char as int))$"); - - return token; - } - - str += group; - // Regular group ref - } - else if 'g' == ctx..peek() - { - _ = ctx..next(); // Skip escape - if !ctx..next() { return ctx..error("Group escape without a following char."); } // Skip g - - str += "g"; - - if ctx..current() == '{' { - str += "{"; - if !(ctx..next() && ctx..grab_until('}', out group)) { return ctx..error("No ending bracket."); } - - str += group + "}"; - } - else { - group = ctx..grab_number(); - str += group; - } - } - else if 'k' == ctx..peek() - { - _ = ctx..next(); // Skip escape - if !ctx..next() { return ctx..error("Group escape without a following char."); } // Skip k - - str += "k"; - - term_char := '\0'; - if ctx..current() == '{' { term_char = '}'; } - else if ctx..current() == '<' { term_char = '>'; } - else if ctx..current() == '\'' { term_char = '\''; } - else { - return ctx..error("Group escape has wrong operator."); - } - - str += ctx..current(); - - if !(ctx..next() && ctx..grab_until(term_char, out group)) { return ctx..error("No ending bracket."); } - - str += group + term_char; - } - else - { - // No group ref matcher - return nullptr; - } - - // Parse the group - group = string_util::trim_copy(group); - group_id : int = 0; - if string_util::string_to_int(group, group_id) - { - if group_id < 0 { - group_id = ctx..get_cur_group() + group_id; - - if group_id < 1 { // Negative and zero are no valid groups. - return ctx..error("Relative group reference does not reference a valid group. (Would be (group_id)$.)"); - } - } - - if group_id >= ctx..get_cur_group() { - return ctx..error("Group reference is used before the group is declared."); - } - } - else - { - // Named group - group_id = ctx..get_named_group(group); - if -1 == group_id { return ctx..error("Group names does not exist. (Name is: (group)$)");} - } - - return shared.new(group_id, ctx..get_modifiers()..has(expression_flags::case_insensitive), str); - } - - generate_code: (override this, inout ctx: generation_context) = { - ctx..add_check("group_ref_token_matcher((ctx..match_parameters())$)"); - } -} - - -group_ref_token_matcher: (inout cur, inout ctx) -> bool = -{ - g := ctx..get_group(group); - - group_pos := g.start; - while - group_pos != g.end - && cur != ctx.end - next (group_pos++, cur++) - { - if constexpr case_insensitive { - if string_util::safe_tolower(group_pos*) != string_util::safe_tolower(cur*) { return false; - } - } - else { - if group_pos* != cur* { - return false; - } - } - } - - if group_pos == g.end { - return true; - } - else { - return false; - } -} - - -// Regex syntax: () Example: (abc) -// (?:) (?i:abc) -// (?<>:) (?:abc) -// (?#) (#Step 1 finished) -// (?|) (?|(abc)|(cde)) -// (?=) (?=abc) -// (?!) (?!abc) -// (*: token_ptr = - { - _ = ctx..next(); // Skip last token defining the syntax - - r := shared.new(positive); - - old_state := ctx..start_group(); - if !ctx..parse_until(')') { return ctx..error("Lookahead without a closing bracket."); } - r*.inner = ctx..end_group(old_state); - r*..set_string("((syntax)$(r*.inner*..to_string())$)"); - - return r; - } - - parse: (inout ctx: parse_context) -> token_ptr = - { - if ctx..current() != '(' { return nullptr; } - - has_id := !ctx..get_modifiers()..has(expression_flags::no_group_captures); - has_pattern := true; - group_name : std::string = ""; - group_name_brackets := true; - modifiers : std::string = ""; - modifiers_change_to : = ctx..get_modifiers(); - - // Skip the '(' - if !ctx..next() { return ctx..error("Group without closing bracket."); } - - if ctx..current() == '?' - { - // Special group - if !ctx..next_no_skip() { return ctx..error("Missing character after group opening."); } - - if ctx..current() == '<' || ctx..current() == '\'' - { - // Named group - end_char := ctx..current(); - if end_char == '<' { - end_char = '>'; - } else { - group_name_brackets = false; - } - has_id = true; // Force id for named groups. - if !ctx..next() /* skip '<' */ { return ctx..error("Missing ending bracket for named group."); } - if !ctx..grab_until(end_char, out group_name) { return ctx..error("Missing ending bracket for named group."); } - if !ctx..next() { return ctx..error("Group without closing bracket."); } - } - else if ctx..current() == '#' - { - // Comment - comment_str : std::string = ""; - _ = ctx..next(); // Skip # - if !ctx..grab_until(")", out comment_str) { return ctx..error("Group without closing bracket."); } - // Do not add comment. Has problems with ranges. - - // Pop token and add a list. This fixes comments between a token and a range - if ctx..has_token() { - list : token_vec = (); - list..push_back(ctx..pop_token()); - list..push_back(shared.new("(?#(comment_str)$)")); - - return shared.new(list); - } - else { - return shared.new("(?#(comment_str)$)"); - } - } - else if ctx..current() == '|' - { - // Branch reset group - - if !ctx..next() /* skip '|' */ { return ctx..error("Missing ending bracket for named group."); } - - old_parser_state := ctx..start_group(); - old_branch_state := ctx..branch_reset_new_state(); - if !ctx..parse_until(')') { return nullptr; } - ctx..branch_reset_restore_state(old_branch_state); - inner_ := ctx..end_group(old_parser_state); - - list: token_vec = (shared.new("(?|"), inner_, shared.new(")")); - return shared.new(list); - } - else if ctx..current() == '=' || ctx..current() == '!' - { - return parse_lookahead(ctx, "?(ctx..current())$", ctx..current() == '='); - } - else - { - // Simple modifier - has_id = false; - if !ctx..grab_until_one_of("):", out modifiers) { return ctx..error("Missing ending bracket for group."); } - if !ctx..parser_group_modifiers(modifiers, modifiers_change_to) { - return nullptr; - } - - if ')' == ctx..current() { - has_pattern = false; - } - else { - if !ctx..next() /* skip ':' */ { return ctx..error("Missing ending bracket for group."); } - } - } - } - else if ctx..current() == '*' - { - // Named pattern - _ = ctx..next(); // Skip *. - name: std::string = ""; - if !ctx..grab_until(':', out name) { return ctx..error("Missing colon for named pattern."); } - - if name == "pla" || name == "positive_lookahead" { - return parse_lookahead(ctx, "*(name)$:", true); - } - else if name == "nla" || name == "negative_lookahead" { - return parse_lookahead(ctx, "*(name)$:", false); - } - else { - return ctx..error("Unknown named group pattern: '(name)$'"); - } - } - - if has_pattern - { - // Regular group - - r := shared.new(); - if has_id { - r*.number = ctx..next_group(); - - if 0 != group_name..size() { - ctx..set_named_group(group_name, r*.number); - } - } - - old_state := ctx..start_group(); - ctx..set_modifiers(modifiers_change_to); - if !ctx..parse_until(')') { return nullptr; } - r*.inner = ctx..end_group(old_state); - r*..set_string(gen_string(group_name, group_name_brackets, !has_id, modifiers, r*.inner)); - - return r; + } } else { - // Only a modifier - ctx..set_modifiers(modifiers_change_to); - - return shared.new("(?(modifiers)$)"); - } - } - - gen_string: (name: std::string, name_brackets: bool, has_modifier: bool, modifiers: std::string, inner_: token_ptr) -> std::string = - { - start : std::string = "("; - if 0 != name..size() { - if name_brackets { - start += "?<(name..data())$>"; + if cur != ctx.end && negate != match_any(cur*) { + cur += 1; + return true; } else { - start += "?'(name..data())$'"; + return false; } } - else if has_modifier { - start += "?" + modifiers + ":"; - } - - return start + inner_*..to_string() + ")"; } - generate_code: (override this, inout ctx: generation_context) = + private match_any: (c: CharT) -> bool = { - if -1 != number { - ctx..add("ctx..set_group_start((number)$, r.pos);"); - } + r: bool = First::includes(c); - inner*..generate_code(ctx); - if -1 != number { - ctx..add("ctx..set_group_end((number)$, r.pos);"); - tmp_name := ctx..gen_temp(); - ctx..add("(tmp_name)$_func := :() = {"); - ctx..add(" if !r&$*.matched {"); - ctx..add(" ctx&$*..set_group_invalid((number)$);"); - ctx..add(" }"); - ctx..add("};"); - ctx..add("(tmp_name)$ := cpp2::regex::make_on_return((tmp_name)$_func);"); - ctx..add("_ = (tmp_name)$;"); // Logic is done in the destructor. Same behavior as for guard objects. + if !r { + if constexpr 0 != sizeof...(Other) { + r = match_any(c); + } } - } - add_groups: (override this, inout groups: std::set) = - { - inner*..add_groups(groups); - if -1 != number { - _ = groups..insert(number); - } + return r; } -} - -// Regex syntax: \x or \x{} Example: \x{62} -// -hexadecimal_token_parse: (inout ctx: parse_context) -> token_ptr = -{ - if !(ctx..current() == '\\' && ctx..peek() == 'x') { return nullptr; } - - _ = ctx..next(); // Skip escape. - - if !ctx..next() { return ctx..error("x escape without number.");} - - has_brackets := false; - number_str: std::string = ""; - if '{' == ctx..current() { - // Bracketed - has_brackets = true; - _ = ctx..next(); // Skip '{' - if !ctx..grab_until('}', out number_str) { return ctx..error("No ending bracket for \\x"); } - } - else { - // Grab two chars - if !ctx..grab_n(2, out number_str) { return ctx..error("Missing hexadecimal digits after \\x."); } - } + // TODO: Implement proper to string + // to_string: () -> bstring = { + // r: bstring = "["; + // if negate { + // r += "^"; + // } + // r += (bstring() + ... + List::to_string()); + // r += "]"; - number := 0; - if !string_util::string_to_int(number_str, number, 16) { return ctx..error("Could not convert hexadecimal to int."); } + // return r; + // } +} - // TODO: Change for unicode. - number_as_char : char = unsafe_narrow(number); - syntax: std::string = string_util::int_to_string<16>(number_as_char as int); - if has_brackets { - syntax = "{(syntax)$}"; - } - syntax = "\\x(syntax)$"; +// Named short classes +// +named_class_no_new_line : type == class_token_matcher>; +named_class_digits : type == class_token_matcher>; +named_class_hor_space : type == class_token_matcher>; +named_class_space : type == class_token_matcher>; +named_class_ver_space : type == class_token_matcher>; +named_class_word : type == class_token_matcher>; - r := shared.new(number_as_char, ctx..get_modifiers()..has(expression_flags::case_insensitive)); - r*..set_string(syntax); - return r; -} +named_class_not_digits : type == class_token_matcher>; +named_class_not_hor_space : type == class_token_matcher>; +named_class_not_space : type == class_token_matcher>; +named_class_not_ver_space : type == class_token_matcher>; +named_class_not_word : type == class_token_matcher>; -// Regex syntax: $ Example: aa$ +// Regex syntax: \ Example: \1 +// \g{name_or_number} +// \k{name_or_number} +// \k +// \k'name_or_number' // -line_end_token_parse: (inout ctx: parse_context) -> token_ptr = +group_ref_token_matcher: (inout cur, inout ctx) -> bool = { - if ctx..current() == '$' || (ctx..current() == '\\' && ctx..peek() == '$') { - if (ctx..current() == '\\') { _ = ctx..next(); } // Skip escape - return shared.new("$", "line_end_token_matcher"); + g := ctx..get_group(group); + + group_pos := g.start; + while + group_pos != g.end + && cur != ctx.end + next (group_pos++, cur++) + { + if constexpr case_insensitive { + if string_util::safe_tolower(group_pos*) != string_util::safe_tolower(cur*) { + return false; + } + } + else { + if group_pos* != cur* { + return false; + } + } } - else if ctx..current() == '\\' && (ctx..peek() == 'z' || ctx..peek() == 'Z') { - _ = ctx..next(); // Skip escape - negate := ctx..current() == 'Z'; - return shared.new("\\(ctx..current())$", "line_end_token_matcher"); + if group_pos == g.end { + return true; } else { - return nullptr; + return false; } } + +// Regex syntax: $ Example: aa$ +// line_end_token_matcher: (cur, inout ctx) -> bool = { if cur == ctx.end || (match_new_line && cur* == '\n') { @@ -2102,19 +468,6 @@ line_end_token_matcher: token_ptr = -{ - if ctx..current() != '^' && !(ctx..current() == '\\' && ctx..peek() == 'A') { return nullptr; } - - if ctx..current() == '\\' { - _ = ctx..next(); - return shared.new("\\A", "line_start_token_matcher"); - } - else { - return shared.new("^", "line_start_token_matcher"); - } -} - line_start_token_matcher: (cur, inout ctx) -> bool = { return cur == ctx.begin || // Start of string @@ -2126,28 +479,6 @@ line_start_token_matcher: (cur, inout ctx) -> bool // // Parsed in group_token. // -lookahead_token: @polymorphic_base type = -{ - this: regex_token = (""); - - protected positive: bool; - public inner : token_ptr = nullptr; - - operator=: (out this, positive_: bool) = { - positive = positive_; - } - - generate_code: (override this, inout ctx: generation_context) = { - inner_name := ctx..generate_func(inner); - - ctx..add_check("lookahead_token_matcher((ctx..match_parameters())$, (inner_name)$)"); - } - - add_groups: (override this, inout groups: std::set) = { - inner*..add_groups(groups); - } -} - lookahead_token_matcher: (cur, inout ctx, func) -> bool = { r := func(cur, ctx, true_end_func()); @@ -2159,75 +490,6 @@ lookahead_token_matcher: (cur, inout ctx, func) -> bool } -// Named character classes -// -named_class_token_parse: (inout ctx: parse_context) -> token_ptr = -{ - if ctx..current() != '\\' { return nullptr; } - - name := ""; - c_next := ctx..peek(); - - if 'd' == c_next { name = "named_class_digits"; } - else if 'D' == c_next { name = "named_class_not_digits"; } - else if 'h' == c_next { name = "named_class_hor_space"; } - else if 'H' == c_next { name = "named_class_not_hor_space"; } - else if 'N' == c_next { name = "named_class_no_new_line"; } - else if 's' == c_next { name = "named_class_space"; } - else if 'S' == c_next { name = "named_class_not_space"; } - else if 'v' == c_next { name = "named_class_ver_space"; } - else if 'V' == c_next { name = "named_class_not_ver_space"; } - else if 'w' == c_next { name = "named_class_word"; } - else if 'W' == c_next { name = "named_class_not_word"; } - else { return nullptr; } - - _ = ctx..next(); // Skip escape - - return shared.new("\\(ctx..current())$", "(name)$::match"); -} - - -named_class_no_new_line : type == class_token_matcher>; -named_class_digits : type == class_token_matcher>; -named_class_hor_space : type == class_token_matcher>; -named_class_space : type == class_token_matcher>; -named_class_ver_space : type == class_token_matcher>; -named_class_word : type == class_token_matcher>; - -named_class_not_digits : type == class_token_matcher>; -named_class_not_hor_space : type == class_token_matcher>; -named_class_not_space : type == class_token_matcher>; -named_class_not_ver_space : type == class_token_matcher>; -named_class_not_word : type == class_token_matcher>; - - -// Regex syntax: \o{} Example: \o{142} -// -octal_token_parse: (inout ctx: parse_context) -> token_ptr = -{ - if !(ctx..current() == '\\' && ctx..peek() == 'o') { return nullptr; } - - _ = ctx..next(); // Skip escape. - - if !ctx..next() { return ctx..error("o escape without number.");} - if ctx..current() != '{' { return ctx..error("Missing opening bracket for \\o."); } - - number_str: std::string = ""; - _ = ctx..next(); // Skip '{' - if !ctx..grab_until('}', out number_str) { return ctx..error("No ending bracket for \\o"); } - - number := 0; - if !string_util::string_to_int(number_str, number, 8) { return ctx..error("Could not convert octal to int."); } - - // TODO: Change for unicode. - number_as_char : char = unsafe_narrow(number); - - syntax: std::string = "\\o{(string_util::int_to_string<8>(number_as_char as int))$}"; - r := shared.new(number_as_char, ctx..get_modifiers()..has(expression_flags::case_insensitive)); - r*..set_string(syntax); - return r; -} - // TODO: @enum as template parameter currently not working. See issue https://github.com/hsutter/cppfront/issues/1147 @@ -2241,152 +503,6 @@ range_flags: type = { // Regex syntax: {min, max} Example: a{2,4} // -range_token: @polymorphic_base type = -{ - this : regex_token = (""); - - protected min_count : int = -1; - protected max_count : int = -1; - protected kind : int = range_flags::greedy; - protected inner_token: token_ptr = nullptr; - - operator=: (out this) = {} - - parse: (inout ctx: parse_context) -> token_ptr = - { - r := shared.new(); - if ctx..current() == '{' - { - if !ctx..has_token() { return ctx..error("'{' without previous element."); } - - inner: std::string = ""; - if !ctx..grab_until('}', out inner) { return ctx..error("Missing closing bracket '}'."); } - - inner = string_util::trim_copy(inner..substr(1)); // Remove '{' and white spaces. - if inner..empty() { return ctx..error("Empty range specifier. Either '{n}', '{n,}', '{,m}' '{n,m}'"); } - - // Non-greedy or possessive - r*..parse_modifier(ctx); - - // Get range arguments - min_count_str: std::string = "-1"; - max_count_str: std::string = "-1"; - - sep: size_t = inner..find(","); - if sep == std::string::npos - { - min_count_str = inner; - max_count_str = inner; - if !string_util::string_to_int(inner, r*.min_count) { return ctx..error("Could not convert range to number."); } - r*.max_count = r*.min_count; - } - else - { - inner_first: std::string = string_util::trim_copy(inner..substr(0, sep)); - inner_last: std::string = string_util::trim_copy(inner..substr(sep + 1)); - - if (inner_first..empty() && inner_last..empty()) { - return ctx..error("Empty range specifier. Either '{n}', '{n,}', '{,m}' '{n,m}'"); - } - - if !inner_first..empty() { - min_count_str = inner_first; - if !string_util::string_to_int(inner_first, r*.min_count) { return ctx..error("Could not convert range to number."); } - } - if !inner_last..empty() { - max_count_str = inner_last; - if !string_util::string_to_int(inner_last, r*.max_count) { return ctx..error("Could not convert range to number."); } - } - } - - // Check validity of the range. - if -1 != r*.min_count { - if !(0 <= r*.min_count) { - return ctx..error("Min value in range is negative. Have (r*.min_count)$)"); - } - } - if -1 != r*.max_count { - if !(0 <= r*.max_count) { - return ctx..error("Max value in range is negative. Have (r*.max_count)$)"); - } - if -1 != r*.min_count { - if !(r*.min_count <= r*.max_count) { - return ctx..error("Min and max values in range are wrong it should hold 0 <= min <= max. Have 0 <= (r*.min_count)$ <= (r*.max_count)$"); - } - } - } - - r*.inner_token = ctx..pop_token(); - r*.string_rep = r*.inner_token*..to_string() + r*..gen_range_string() + r*..gen_mod_string(); - - return r; - } - - return nullptr; - } - - parse_modifier: (inout this, inout ctx: parse_context) = - { - if ctx..peek() == '?' { - kind = range_flags::not_greedy; - _ = ctx..next(); - } - else if ctx..peek() == '+' { - kind = range_flags::possessive; - _ = ctx..next(); - } - } - - gen_mod_string: (this) -> std::string = - { - if kind == range_flags::not_greedy { - return "?"; - } - else if kind == range_flags::possessive { - return "+"; - } - else { - return ""; - } - } - - gen_range_string: (this) -> std::string = - { - r : std::string = ""; - if min_count == max_count { - r += "{(min_count)$}"; - } - else if min_count == -1 { - r += "{,(max_count)$}"; - } - else if max_count == -1 { - r += "{(min_count)$,}"; - } - else { - r += "{(min_count)$,(max_count)$}"; - } - - return r; - } - - generate_code: (override this, inout ctx: generation_context) = - { - inner_name := ctx..generate_func(inner_token); - groups: std::set = (); - inner_token*..add_groups(groups); - reset_name := ctx..generate_reset(groups); - - next_name := ctx..next_func_name(); - ctx..add_statefull(next_name, "cpp2::regex::range_token_matcher::match((ctx..match_parameters())$, (inner_name)$, (reset_name)$, other, (next_name)$)"); - } - - add_groups: (override this, inout groups: std::set) = { - inner_token*..add_groups(groups); - } - -} - - range_token_matcher: type = { @@ -2522,66 +638,10 @@ range_token_matcher: type = } -// Regex syntax: *, +, or ? Example: aa* -// -special_range_token: @polymorphic_base type = -{ - this : range_token = (); - - parse: (inout ctx: parse_context) -> token_ptr = - { - r := shared.new(); - symbol: char = '\0'; - if ctx..current() == '*' { - r*.min_count = 0; - r*.max_count = -1; - symbol = '*'; - } - else if ctx..current() == '+' { - r*.min_count = 1; - r*.max_count = -1; - symbol = '+'; - } else if ctx..current() == '?' { - r*.min_count = 0; - r*.max_count = 1; - symbol = '?'; - } else { - return nullptr; - } - - if !ctx..has_token() { return ctx..error("'(ctx..current())$' without previous element."); } - - - r*..parse_modifier(ctx); - - r*.inner_token = ctx..pop_token(); - r*.string_rep = r*.inner_token*..to_string() + symbol + r*..gen_mod_string(); - return r; - } -} - - // Regex syntax: \b or \B Example: \bword\b // // Matches the start end end of word boundaries. // -word_boundary_token_parse: (inout ctx: parse_context) -> token_ptr = -{ - if ctx..current() != '\\' { return nullptr; } - - if ctx..peek() == 'b' { - _ = ctx..next(); - return shared.new("\\b", "word_boundary_token_matcher"); - } - else if ctx..peek() == 'B' { - _ = ctx..next(); - return shared.new("\\B", "word_boundary_token_matcher"); - } - else { - return nullptr; - } -} - word_boundary_token_matcher: (inout cur, inout ctx) -> bool = { words : word_class = (); @@ -2698,92 +758,6 @@ regular_expression: type = } } - -//----------------------------------------------------------------------- -// -// Parser for regular expression. -// -//----------------------------------------------------------------------- -// - -// Parser and generator for regular expressions. -regex_generator: type = -{ - regex: std::string_view; - modifier: std::string = ""; - modifier_escape: std::string = ""; - - error_out: Error_out; - - source: std::string = ""; - - operator=: (out this, r: std::string_view, e: Error_out) = { - regex = r; - error_out = e; - } - - parse:(inout this) -> std::string = - { - // Extract modifiers and adapt regex. - extract_modifiers(); - - parse_ctx: parse_context = (regex, error_out); - if !parse_ctx..parse(modifier) { - return ""; - } - - source += "{\n"; - source += " wrap: type = {\n"; // TODO: Remove wrapper when template template parameters are available. - source += " context: type == cpp2::regex::match_context;"; - - gen_ctx: generation_context = (); - source += gen_ctx..run(parse_ctx..get_as_token()); - source += " entry: (cur: Iter, inout ctx: context) -> cpp2::regex::match_return = {\n"; - source += " ctx..set_group_start(0, cur);\n"; - source += " r := (gen_ctx..get_entry_func())$(cur, ctx, cpp2::regex::true_end_func());\n"; - source += " if r.matched { ctx..set_group_end(0, r.pos); }\n"; - source += " return r;\n"; - source += " }\n"; - - source += gen_ctx..create_named_group_lookup(parse_ctx.named_groups); - source += "}\n"; - - string := parse_ctx..get_as_token()*..to_string(); - source += " to_string: () -> std::string = { return R\"((modifier_escape)$(string)$(modifier_escape)$(modifier)$)\"; }\n"; - source += "}\n"; - - _ = parse_ctx; - - return source; - } - - private extract_modifiers: (inout this) = - { - if regex..find_first_of("'/") == 0 { - mod_token: char = regex[0]; - - end_pos := regex..rfind(mod_token); - if end_pos != 0 { - // Found valid start end escape - modifier = regex..substr(end_pos + 1); - modifier_escape = mod_token; - regex = regex..substr(1, end_pos - 1); - } - } - } -} - -generate_regex: (regex: std::string_view, err: Err) -> std::string = -{ - parser: regex_generator = (regex, err); - r := parser..parse(); - _ = parser; - return r; -} - } } - - #endif - diff --git a/include/cpp2util.h b/include/cpp2util.h index 69563d0e7..144aed7cd 100644 --- a/include/cpp2util.h +++ b/include/cpp2util.h @@ -2824,8 +2824,6 @@ inline constexpr auto as_() -> decltype(auto) } -#include "cpp2regex.h" - using cpp2::cpp2_new; diff --git a/regression-tests/test-results/apple-clang-14-c++2b/pure2-default-arguments.cpp.output b/regression-tests/test-results/apple-clang-14-c++2b/pure2-default-arguments.cpp.output new file mode 100644 index 000000000..4812dc94c --- /dev/null +++ b/regression-tests/test-results/apple-clang-14-c++2b/pure2-default-arguments.cpp.output @@ -0,0 +1,4 @@ +pure2-default-arguments.cpp2:6:61: error: no member named 'source_location' in namespace 'std' + char const* fn = CPP2_UFCS_NONLOCAL(function_name)(std::source_location::current()) + ~~~~~^ +1 error generated. diff --git a/regression-tests/test-results/apple-clang-15-c++2b/pure2-default-arguments.cpp.execution b/regression-tests/test-results/apple-clang-15-c++2b/pure2-default-arguments.cpp.execution new file mode 100644 index 000000000..8eba45436 --- /dev/null +++ b/regression-tests/test-results/apple-clang-15-c++2b/pure2-default-arguments.cpp.execution @@ -0,0 +1,2 @@ +calling: +012 \ No newline at end of file diff --git a/regression-tests/test-results/clang-15-c++20-libcpp/pure2-default-arguments.cpp.output b/regression-tests/test-results/clang-15-c++20-libcpp/pure2-default-arguments.cpp.output new file mode 100644 index 000000000..4812dc94c --- /dev/null +++ b/regression-tests/test-results/clang-15-c++20-libcpp/pure2-default-arguments.cpp.output @@ -0,0 +1,4 @@ +pure2-default-arguments.cpp2:6:61: error: no member named 'source_location' in namespace 'std' + char const* fn = CPP2_UFCS_NONLOCAL(function_name)(std::source_location::current()) + ~~~~~^ +1 error generated. diff --git a/regression-tests/test-results/clang-15-c++20/pure2-default-arguments.cpp.execution b/regression-tests/test-results/clang-15-c++20/pure2-default-arguments.cpp.execution new file mode 100644 index 000000000..8eba45436 --- /dev/null +++ b/regression-tests/test-results/clang-15-c++20/pure2-default-arguments.cpp.execution @@ -0,0 +1,2 @@ +calling: +012 \ No newline at end of file diff --git a/regression-tests/test-results/clang-18-c++20/pure2-default-arguments.cpp.output b/regression-tests/test-results/clang-18-c++20/pure2-default-arguments.cpp.output new file mode 100644 index 000000000..ba1cda599 --- /dev/null +++ b/regression-tests/test-results/clang-18-c++20/pure2-default-arguments.cpp.output @@ -0,0 +1,7 @@ +pure2-default-arguments.cpp2:6:56: error: cannot take address of consteval function 'current' outside of an immediate invocation + 6 | char const* fn = CPP2_UFCS_NONLOCAL(function_name)(std::source_location::current()) + | ^ +/usr/bin/../lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/source_location:60:5: note: declared here + 60 | current(__builtin_ret_type __p = __builtin_source_location()) noexcept + | ^ +1 error generated. diff --git a/regression-tests/test-results/clang-18-c++23-libcpp/pure2-default-arguments.cpp.output b/regression-tests/test-results/clang-18-c++23-libcpp/pure2-default-arguments.cpp.output new file mode 100644 index 000000000..1b5962fb7 --- /dev/null +++ b/regression-tests/test-results/clang-18-c++23-libcpp/pure2-default-arguments.cpp.output @@ -0,0 +1,7 @@ +pure2-default-arguments.cpp2:6:56: error: cannot take address of consteval function 'current' outside of an immediate invocation + 6 | char const* fn = CPP2_UFCS_NONLOCAL(function_name)(std::source_location::current()) + | ^ +/usr/lib/llvm-18/bin/../include/c++/v1/source_location:60:36: note: declared here + 60 | static consteval source_location current(__bsl_ty __ptr = __builtin_source_location()) noexcept { + | ^ +1 error generated. diff --git a/regression-tests/test-results/gcc-10-c++20/pure2-bugfix-for-requires-clause-in-forward-declaration.cpp.output b/regression-tests/test-results/gcc-10-c++20/pure2-bugfix-for-requires-clause-in-forward-declaration.cpp.output index 231ecfcb2..6df6e1164 100644 --- a/regression-tests/test-results/gcc-10-c++20/pure2-bugfix-for-requires-clause-in-forward-declaration.cpp.output +++ b/regression-tests/test-results/gcc-10-c++20/pure2-bugfix-for-requires-clause-in-forward-declaration.cpp.output @@ -6,12 +6,12 @@ pure2-bugfix-for-requires-clause-in-forward-declaration.cpp2:3:46: error: expect In file included from pure2-bugfix-for-requires-clause-in-forward-declaration.cpp:7: ../../../include/cpp2util.h:10005:47: error: static assertion failed: GCC 11 or higher is required to support variables and type-scope functions that have a 'requires' clause. This includes a type-scope 'forward' parameter of non-wildcard type, such as 'func: (this, forward s: std::string)', which relies on being able to add a 'requires' clause - in that case, use 'forward s: _' instead if you need the result to compile with GCC 10. pure2-bugfix-for-requires-clause-in-forward-declaration.cpp2:4:1: note: in expansion of macro ‘CPP2_REQUIRES_’ -pure2-bugfix-for-requires-clause-in-forward-declaration.cpp2:3:3: error: no declaration matches ‘element::element(auto:261&&) requires is_same_v::type>::type>’ +pure2-bugfix-for-requires-clause-in-forward-declaration.cpp2:3:3: error: no declaration matches ‘element::element(auto:95&&) requires is_same_v::type>::type>’ pure2-bugfix-for-requires-clause-in-forward-declaration.cpp2:5:11: note: candidates are: ‘element::element(const element&)’ -pure2-bugfix-for-requires-clause-in-forward-declaration.cpp2:3:20: note: ‘template element::element(auto:259&&)’ +pure2-bugfix-for-requires-clause-in-forward-declaration.cpp2:3:20: note: ‘template element::element(auto:93&&)’ pure2-bugfix-for-requires-clause-in-forward-declaration.cpp2:1:7: note: ‘class element’ defined here pure2-bugfix-for-requires-clause-in-forward-declaration.cpp2:5:78: error: expected unqualified-id before ‘{’ token -pure2-bugfix-for-requires-clause-in-forward-declaration.cpp2:3:8: error: no declaration matches ‘element& element::operator=(auto:262&&) requires is_same_v::type>::type>’ +pure2-bugfix-for-requires-clause-in-forward-declaration.cpp2:3:8: error: no declaration matches ‘element& element::operator=(auto:96&&) requires is_same_v::type>::type>’ pure2-bugfix-for-requires-clause-in-forward-declaration.cpp2:6:16: note: candidates are: ‘void element::operator=(const element&)’ -pure2-bugfix-for-requires-clause-in-forward-declaration.cpp2:3:16: note: ‘template element& element::operator=(auto:260&&)’ +pure2-bugfix-for-requires-clause-in-forward-declaration.cpp2:3:16: note: ‘template element& element::operator=(auto:94&&)’ pure2-bugfix-for-requires-clause-in-forward-declaration.cpp2:1:7: note: ‘class element’ defined here diff --git a/regression-tests/test-results/gcc-10-c++20/pure2-print.cpp.output b/regression-tests/test-results/gcc-10-c++20/pure2-print.cpp.output index 2a6d481ca..2dcc49e75 100644 --- a/regression-tests/test-results/gcc-10-c++20/pure2-print.cpp.output +++ b/regression-tests/test-results/gcc-10-c++20/pure2-print.cpp.output @@ -9,8 +9,8 @@ pure2-print.cpp2:68:1: note: in expansion of macro ‘CPP2_REQUIRES_’ pure2-print.cpp2:97:1: note: in expansion of macro ‘CPP2_REQUIRES_’ pure2-print.cpp2:9:41: error: ‘constexpr const T outer::object_alias’ is not a static data member of ‘class outer’ pure2-print.cpp2:9:48: error: template definition of non-template ‘constexpr const T outer::object_alias’ -pure2-print.cpp2:67:14: error: no declaration matches ‘void outer::mytype::variadic(const auto:260& ...) requires (is_convertible_v::type>::type, int> && ...)’ -pure2-print.cpp2:67:29: note: candidate is: ‘template static void outer::mytype::variadic(const auto:259& ...)’ +pure2-print.cpp2:67:14: error: no declaration matches ‘void outer::mytype::variadic(const auto:94& ...) requires (is_convertible_v::type>::type, int> && ...)’ +pure2-print.cpp2:67:29: note: candidate is: ‘template static void outer::mytype::variadic(const auto:93& ...)’ pure2-print.cpp2:10:19: note: ‘class outer::mytype’ defined here pure2-print.cpp2:96:37: error: no declaration matches ‘void outer::print(std::ostream&, const Args& ...) requires cpp2::impl::cmp_greater_eq(sizeof ... (Args ...), 0)’ pure2-print.cpp2:96:37: note: no functions named ‘void outer::print(std::ostream&, const Args& ...) requires cpp2::impl::cmp_greater_eq(sizeof ... (Args ...), 0)’ diff --git a/regression-tests/test-results/gcc-13-c++2b/mixed-bounds-safety-with-assert.cpp.execution b/regression-tests/test-results/gcc-13-c++2b/mixed-bounds-safety-with-assert.cpp.execution index e6ee874fc..56d396403 100644 --- a/regression-tests/test-results/gcc-13-c++2b/mixed-bounds-safety-with-assert.cpp.execution +++ b/regression-tests/test-results/gcc-13-c++2b/mixed-bounds-safety-with-assert.cpp.execution @@ -1 +1 @@ -mixed-bounds-safety-with-assert.cpp2(11) void print_subrange(const auto:263&, cpp2::impl::in, cpp2::impl::in) [with auto:263 = std::vector; cpp2::impl::in = const int]: Bounds safety violation +mixed-bounds-safety-with-assert.cpp2(11) void print_subrange(const auto:243&, cpp2::impl::in, cpp2::impl::in) [with auto:243 = std::vector; cpp2::impl::in = const int]: Bounds safety violation diff --git a/regression-tests/test-results/gcc-13-c++2b/pure2-default-arguments.cpp.execution b/regression-tests/test-results/gcc-13-c++2b/pure2-default-arguments.cpp.execution new file mode 100644 index 000000000..0e56963ff --- /dev/null +++ b/regression-tests/test-results/gcc-13-c++2b/pure2-default-arguments.cpp.execution @@ -0,0 +1,2 @@ +calling: int main(int, char**) +012 \ No newline at end of file diff --git a/regression-tests/test-results/gcc-14-c++2b/gcc-version.output b/regression-tests/test-results/gcc-14-c++2b/gcc-version.output index a2fc0ea24..e2db23c83 100644 --- a/regression-tests/test-results/gcc-14-c++2b/gcc-version.output +++ b/regression-tests/test-results/gcc-14-c++2b/gcc-version.output @@ -1,4 +1,4 @@ -g++ (GCC) 14.1.1 20240701 (Red Hat 14.1.1-7) +g++ (GCC) 14.2.1 20240801 (Red Hat 14.2.1-1) Copyright (C) 2024 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. diff --git a/regression-tests/test-results/gcc-14-c++2b/mixed-bounds-safety-with-assert.cpp.execution b/regression-tests/test-results/gcc-14-c++2b/mixed-bounds-safety-with-assert.cpp.execution index b2780a74b..4a8fe8c0f 100644 --- a/regression-tests/test-results/gcc-14-c++2b/mixed-bounds-safety-with-assert.cpp.execution +++ b/regression-tests/test-results/gcc-14-c++2b/mixed-bounds-safety-with-assert.cpp.execution @@ -1 +1 @@ -mixed-bounds-safety-with-assert.cpp2(11) void print_subrange(const auto:257&, cpp2::impl::in, cpp2::impl::in) [with auto:257 = std::vector; cpp2::impl::in = const int]: Bounds safety violation +mixed-bounds-safety-with-assert.cpp2(11) void print_subrange(const auto:91&, cpp2::impl::in, cpp2::impl::in) [with auto:91 = std::vector; cpp2::impl::in = const int]: Bounds safety violation diff --git a/regression-tests/test-results/msvc-2022-c++20/pure2-default-arguments.cpp.execution b/regression-tests/test-results/msvc-2022-c++20/pure2-default-arguments.cpp.execution new file mode 100644 index 000000000..8f0b4095c --- /dev/null +++ b/regression-tests/test-results/msvc-2022-c++20/pure2-default-arguments.cpp.execution @@ -0,0 +1,2 @@ +calling: int __cdecl main(const int,char **) +012 \ No newline at end of file diff --git a/regression-tests/test-results/msvc-2022-c++20/pure2-default-arguments.cpp.output b/regression-tests/test-results/msvc-2022-c++20/pure2-default-arguments.cpp.output new file mode 100644 index 000000000..f1128b42d --- /dev/null +++ b/regression-tests/test-results/msvc-2022-c++20/pure2-default-arguments.cpp.output @@ -0,0 +1 @@ +pure2-default-arguments.cpp diff --git a/regression-tests/test-results/pure2-regex_01_char_matcher.cpp b/regression-tests/test-results/pure2-regex_01_char_matcher.cpp index ae3ff7ee4..13500736b 100644 --- a/regression-tests/test-results/pure2-regex_01_char_matcher.cpp +++ b/regression-tests/test-results/pure2-regex_01_char_matcher.cpp @@ -1,5 +1,6 @@ #define CPP2_IMPORT_STD Yes +#include "cpp2regex.h" //=== Cpp2 type declarations ==================================================== diff --git a/regression-tests/test-results/pure2-regex_02_ranges.cpp b/regression-tests/test-results/pure2-regex_02_ranges.cpp index fb632c037..0524a5328 100644 --- a/regression-tests/test-results/pure2-regex_02_ranges.cpp +++ b/regression-tests/test-results/pure2-regex_02_ranges.cpp @@ -1,5 +1,6 @@ #define CPP2_IMPORT_STD Yes +#include "cpp2regex.h" //=== Cpp2 type declarations ==================================================== diff --git a/regression-tests/test-results/pure2-regex_03_wildcard.cpp b/regression-tests/test-results/pure2-regex_03_wildcard.cpp index e6924b841..18522d9fd 100644 --- a/regression-tests/test-results/pure2-regex_03_wildcard.cpp +++ b/regression-tests/test-results/pure2-regex_03_wildcard.cpp @@ -1,5 +1,6 @@ #define CPP2_IMPORT_STD Yes +#include "cpp2regex.h" //=== Cpp2 type declarations ==================================================== diff --git a/regression-tests/test-results/pure2-regex_04_start_end.cpp b/regression-tests/test-results/pure2-regex_04_start_end.cpp index 9d5320048..0e857b7e5 100644 --- a/regression-tests/test-results/pure2-regex_04_start_end.cpp +++ b/regression-tests/test-results/pure2-regex_04_start_end.cpp @@ -1,5 +1,6 @@ #define CPP2_IMPORT_STD Yes +#include "cpp2regex.h" //=== Cpp2 type declarations ==================================================== diff --git a/regression-tests/test-results/pure2-regex_05_classes.cpp b/regression-tests/test-results/pure2-regex_05_classes.cpp index 7ec65ccbd..7b569df9f 100644 --- a/regression-tests/test-results/pure2-regex_05_classes.cpp +++ b/regression-tests/test-results/pure2-regex_05_classes.cpp @@ -1,5 +1,6 @@ #define CPP2_IMPORT_STD Yes +#include "cpp2regex.h" //=== Cpp2 type declarations ==================================================== diff --git a/regression-tests/test-results/pure2-regex_06_boundaries.cpp b/regression-tests/test-results/pure2-regex_06_boundaries.cpp index 2f750e081..bbca6c3ec 100644 --- a/regression-tests/test-results/pure2-regex_06_boundaries.cpp +++ b/regression-tests/test-results/pure2-regex_06_boundaries.cpp @@ -1,5 +1,6 @@ #define CPP2_IMPORT_STD Yes +#include "cpp2regex.h" //=== Cpp2 type declarations ==================================================== diff --git a/regression-tests/test-results/pure2-regex_07_short_classes.cpp b/regression-tests/test-results/pure2-regex_07_short_classes.cpp index 5ae55bacd..3e12cfef4 100644 --- a/regression-tests/test-results/pure2-regex_07_short_classes.cpp +++ b/regression-tests/test-results/pure2-regex_07_short_classes.cpp @@ -1,5 +1,6 @@ #define CPP2_IMPORT_STD Yes +#include "cpp2regex.h" //=== Cpp2 type declarations ==================================================== diff --git a/regression-tests/test-results/pure2-regex_08_alternatives.cpp b/regression-tests/test-results/pure2-regex_08_alternatives.cpp index 543190ff2..923344c60 100644 --- a/regression-tests/test-results/pure2-regex_08_alternatives.cpp +++ b/regression-tests/test-results/pure2-regex_08_alternatives.cpp @@ -1,5 +1,6 @@ #define CPP2_IMPORT_STD Yes +#include "cpp2regex.h" //=== Cpp2 type declarations ==================================================== diff --git a/regression-tests/test-results/pure2-regex_09_groups.cpp b/regression-tests/test-results/pure2-regex_09_groups.cpp index a37dda119..736248b87 100644 --- a/regression-tests/test-results/pure2-regex_09_groups.cpp +++ b/regression-tests/test-results/pure2-regex_09_groups.cpp @@ -1,5 +1,6 @@ #define CPP2_IMPORT_STD Yes +#include "cpp2regex.h" //=== Cpp2 type declarations ==================================================== diff --git a/regression-tests/test-results/pure2-regex_10_escapes.cpp b/regression-tests/test-results/pure2-regex_10_escapes.cpp index f879aebdf..85a101d3a 100644 --- a/regression-tests/test-results/pure2-regex_10_escapes.cpp +++ b/regression-tests/test-results/pure2-regex_10_escapes.cpp @@ -1,5 +1,6 @@ #define CPP2_IMPORT_STD Yes +#include "cpp2regex.h" //=== Cpp2 type declarations ==================================================== diff --git a/regression-tests/test-results/pure2-regex_11_group_references.cpp b/regression-tests/test-results/pure2-regex_11_group_references.cpp index 3161c2578..b07ad7e01 100644 --- a/regression-tests/test-results/pure2-regex_11_group_references.cpp +++ b/regression-tests/test-results/pure2-regex_11_group_references.cpp @@ -1,5 +1,6 @@ #define CPP2_IMPORT_STD Yes +#include "cpp2regex.h" //=== Cpp2 type declarations ==================================================== diff --git a/regression-tests/test-results/pure2-regex_12_case_insensitive.cpp b/regression-tests/test-results/pure2-regex_12_case_insensitive.cpp index 5fa4130ef..595e7c8c1 100644 --- a/regression-tests/test-results/pure2-regex_12_case_insensitive.cpp +++ b/regression-tests/test-results/pure2-regex_12_case_insensitive.cpp @@ -1,5 +1,6 @@ #define CPP2_IMPORT_STD Yes +#include "cpp2regex.h" //=== Cpp2 type declarations ==================================================== diff --git a/regression-tests/test-results/pure2-regex_13_possessive_modifier.cpp b/regression-tests/test-results/pure2-regex_13_possessive_modifier.cpp index f64ec7b70..06c080b80 100644 --- a/regression-tests/test-results/pure2-regex_13_possessive_modifier.cpp +++ b/regression-tests/test-results/pure2-regex_13_possessive_modifier.cpp @@ -1,5 +1,6 @@ #define CPP2_IMPORT_STD Yes +#include "cpp2regex.h" //=== Cpp2 type declarations ==================================================== diff --git a/regression-tests/test-results/pure2-regex_14_multiline_modifier.cpp b/regression-tests/test-results/pure2-regex_14_multiline_modifier.cpp index ed4eb5dfc..338af1243 100644 --- a/regression-tests/test-results/pure2-regex_14_multiline_modifier.cpp +++ b/regression-tests/test-results/pure2-regex_14_multiline_modifier.cpp @@ -1,5 +1,6 @@ #define CPP2_IMPORT_STD Yes +#include "cpp2regex.h" //=== Cpp2 type declarations ==================================================== diff --git a/regression-tests/test-results/pure2-regex_15_group_modifiers.cpp b/regression-tests/test-results/pure2-regex_15_group_modifiers.cpp index d8b4cf691..b0103954a 100644 --- a/regression-tests/test-results/pure2-regex_15_group_modifiers.cpp +++ b/regression-tests/test-results/pure2-regex_15_group_modifiers.cpp @@ -1,5 +1,6 @@ #define CPP2_IMPORT_STD Yes +#include "cpp2regex.h" //=== Cpp2 type declarations ==================================================== diff --git a/regression-tests/test-results/pure2-regex_16_perl_syntax_modifier.cpp b/regression-tests/test-results/pure2-regex_16_perl_syntax_modifier.cpp index f35365f50..3d81c02e7 100644 --- a/regression-tests/test-results/pure2-regex_16_perl_syntax_modifier.cpp +++ b/regression-tests/test-results/pure2-regex_16_perl_syntax_modifier.cpp @@ -1,5 +1,6 @@ #define CPP2_IMPORT_STD Yes +#include "cpp2regex.h" //=== Cpp2 type declarations ==================================================== diff --git a/regression-tests/test-results/pure2-regex_17_comments.cpp b/regression-tests/test-results/pure2-regex_17_comments.cpp index 2e772618a..a566f273c 100644 --- a/regression-tests/test-results/pure2-regex_17_comments.cpp +++ b/regression-tests/test-results/pure2-regex_17_comments.cpp @@ -1,5 +1,6 @@ #define CPP2_IMPORT_STD Yes +#include "cpp2regex.h" //=== Cpp2 type declarations ==================================================== diff --git a/regression-tests/test-results/pure2-regex_18_branch_reset.cpp b/regression-tests/test-results/pure2-regex_18_branch_reset.cpp index 1e6e05678..aaf375aa2 100644 --- a/regression-tests/test-results/pure2-regex_18_branch_reset.cpp +++ b/regression-tests/test-results/pure2-regex_18_branch_reset.cpp @@ -1,5 +1,6 @@ #define CPP2_IMPORT_STD Yes +#include "cpp2regex.h" //=== Cpp2 type declarations ==================================================== diff --git a/regression-tests/test-results/pure2-regex_19_lookahead.cpp b/regression-tests/test-results/pure2-regex_19_lookahead.cpp index 78bbec662..610330ae5 100644 --- a/regression-tests/test-results/pure2-regex_19_lookahead.cpp +++ b/regression-tests/test-results/pure2-regex_19_lookahead.cpp @@ -1,5 +1,6 @@ #define CPP2_IMPORT_STD Yes +#include "cpp2regex.h" //=== Cpp2 type declarations ==================================================== diff --git a/source/common.h b/source/common.h index 844900f8e..0ba0d04f3 100644 --- a/source/common.h +++ b/source/common.h @@ -47,20 +47,11 @@ #ifndef CPP2_COMMON_H #define CPP2_COMMON_H -#include #include #include #include -#include -#include #include -#include -#include -#include -#include -#include #include -#include namespace cpp2 { diff --git a/source/parse.h b/source/parse.h index 282067125..ad35f1930 100644 --- a/source/parse.h +++ b/source/parse.h @@ -5539,6 +5539,7 @@ auto pretty_print_visualize(translation_unit_node const& n) class parser { std::vector& errors; + std::set& includes; std::unique_ptr parse_tree = {}; @@ -5584,10 +5585,10 @@ class parser } }; - std::vector const* tokens = {}; + std::vector const* tokens = {}; stable_vector* generated_tokens = {}; - int pos = 0; - std::string parse_kind = {}; + int pos = 0; + std::string parse_kind = {}; // Keep track of the function bodies' locations - used to emit comments // in the right pass (decide whether it's a comment that belongs with @@ -5663,13 +5664,18 @@ class parser // // errors error list // - parser( std::vector& errors_ ) + parser( + std::vector& errors_, + std::set& includes_ + ) : errors{ errors_ } + , includes{ includes_ } , parse_tree{std::make_unique()} { } parser( parser const& that ) : errors{ that.errors } + , includes{ that.includes } , parse_tree{std::make_unique()} { } diff --git a/source/reflect.h b/source/reflect.h index bd7d97213..759342845 100644 --- a/source/reflect.h +++ b/source/reflect.h @@ -10,36 +10,103 @@ #line 1 "reflect.h2" -#line 21 "reflect.h2" +#line 22 "reflect.h2" namespace cpp2 { namespace meta { -#line 33 "reflect.h2" +#line 34 "reflect.h2" class compiler_services; -#line 227 "reflect.h2" +#line 233 "reflect.h2" class declaration_base; -#line 253 "reflect.h2" +#line 259 "reflect.h2" class declaration; -#line 335 "reflect.h2" +#line 341 "reflect.h2" class function_declaration; -#line 425 "reflect.h2" +#line 431 "reflect.h2" class object_declaration; -#line 461 "reflect.h2" +#line 467 "reflect.h2" class type_declaration; -#line 598 "reflect.h2" +#line 604 "reflect.h2" class alias_declaration; -#line 1006 "reflect.h2" +#line 1012 "reflect.h2" class value_member_info; -#line 1692 "reflect.h2" +#line 1530 "reflect.h2" +class expression_flags; + +#line 1546 "reflect.h2" +class regex_token; + +#line 1572 "reflect.h2" +class regex_token_check; + +#line 1591 "reflect.h2" +class regex_token_code; + +#line 1610 "reflect.h2" +class regex_token_empty; + +#line 1626 "reflect.h2" +class regex_token_list; + +#line 1665 "reflect.h2" +class parse_context_group_state; + +#line 1726 "reflect.h2" +class parse_context_branch_reset_state; + +#line 1769 "reflect.h2" +class parse_context; + +#line 2167 "reflect.h2" +class generation_function_context; + + +#line 2185 "reflect.h2" +class generation_context; + +#line 2383 "reflect.h2" +class alternative_token; + +#line 2398 "reflect.h2" +class alternative_token_gen; + +#line 2450 "reflect.h2" +class any_token; + +#line 2468 "reflect.h2" +class char_token; + +#line 2571 "reflect.h2" +class class_token; + +#line 2786 "reflect.h2" +class group_ref_token; + +#line 2917 "reflect.h2" +class group_token; + +#line 3204 "reflect.h2" +class lookahead_token; + +#line 3285 "reflect.h2" +class range_token; + +#line 3433 "reflect.h2" +class special_range_token; + +#line 3500 "reflect.h2" +template class regex_generator; + +#line 3750 "reflect.h2" } } @@ -67,25 +134,20 @@ class value_member_info; #include "parse.h" #include "cpp2regex.h" +using namespace cpp2::regex; -#line 21 "reflect.h2" +#line 22 "reflect.h2" namespace cpp2 { namespace meta { -#line 26 "reflect.h2" -//----------------------------------------------------------------------- -// -// Compiler services -// -//----------------------------------------------------------------------- -// - +#line 34 "reflect.h2" class compiler_services { - // Common data members - // + +#line 38 "reflect.h2" private: std::vector* errors; + private: std::set* includes; private: int errors_original_size; private: stable_vector* generated_tokens; private: cpp2::parser parser; @@ -93,119 +155,67 @@ class compiler_services private: std::vector metafunction_args {}; private: bool metafunctions_used {false}; - // Constructor - // +#line 49 "reflect.h2" public: explicit compiler_services( std::vector* errors_, + std::set* includes_, stable_vector* generated_tokens_ ); -#line 59 "reflect.h2" - // Common API - // +#line 65 "reflect.h2" public: auto set_metafunction_name(cpp2::impl::in name, cpp2::impl::in> args) & -> void; -#line 67 "reflect.h2" +#line 71 "reflect.h2" public: [[nodiscard]] auto get_metafunction_name() const& -> std::string_view; public: [[nodiscard]] auto get_argument(cpp2::impl::in index) & -> std::string; -#line 77 "reflect.h2" +#line 81 "reflect.h2" public: [[nodiscard]] auto get_arguments() & -> std::vector; -#line 82 "reflect.h2" +#line 86 "reflect.h2" public: [[nodiscard]] auto arguments_were_used() const& -> bool; using parse_statement_ret = std::unique_ptr; -#line 84 "reflect.h2" +#line 88 "reflect.h2" protected: [[nodiscard]] auto parse_statement( std::string_view source ) & -> parse_statement_ret; -#line 137 "reflect.h2" +#line 141 "reflect.h2" + public: auto add_runtime_support_include(cpp2::impl::in s) & -> void; + public: [[nodiscard]] virtual auto position() const -> source_position; -#line 143 "reflect.h2" - // Error diagnosis and handling, integrated with compiler output - // Unlike a contract violation, .requires continues further processing - // +#line 152 "reflect.h2" public: auto require( cpp2::impl::in b, cpp2::impl::in msg ) const& -> void; -#line 157 "reflect.h2" +#line 163 "reflect.h2" public: auto error(cpp2::impl::in msg) const& -> void; -#line 166 "reflect.h2" - // Enable custom contracts on this object, integrated with compiler output - // Unlike .requires, a contract violation stops further processing - // +#line 175 "reflect.h2" public: auto report_violation(auto const& msg) const& -> void; -#line 177 "reflect.h2" +#line 183 "reflect.h2" public: [[nodiscard]] auto is_active() const& -> auto; public: virtual ~compiler_services() noexcept; public: compiler_services(compiler_services const& that); -#line 178 "reflect.h2" +#line 184 "reflect.h2" }; -#line 181 "reflect.h2" -/* -//----------------------------------------------------------------------- -// -// Type IDs -// -//----------------------------------------------------------------------- -// - -// All type_ids are wrappers around a pointer to node -// -type_id: @polymorphic_base @copyable type = -{ - this: compiler_services = (); - - n: type_id_node; - - protected operator=: ( - out this, - n_: type_id_node, - s : compiler_services - ) - = { - compiler_services = s; - n = n_; - assert( n, "a meta::type_id must point to a valid type_id_node, not null" ); - } - - is_wildcard : (this) -> bool = n.is_wildcard(); - is_pointer_qualified: (this) -> bool = n.is_pointer_qualified(); - template_args_count : (this) -> int = n.template_arguments().ssize(); - to_string : (this) -> std::string = n.to_string(); - - position: (override this) -> source_position = n.position(); -} -*/ - -#line 218 "reflect.h2" -//----------------------------------------------------------------------- -// -// Declarations -// -//----------------------------------------------------------------------- -// - -// All declarations are wrappers around a pointer to node -// +#line 233 "reflect.h2" class declaration_base : public compiler_services { -#line 231 "reflect.h2" +#line 237 "reflect.h2" protected: declaration_node* n; protected: explicit declaration_base( @@ -214,31 +224,28 @@ class declaration_base cpp2::impl::in s ); -#line 244 "reflect.h2" +#line 250 "reflect.h2" public: [[nodiscard]] auto position() const -> source_position override; public: [[nodiscard]] auto print() const& -> std::string; public: virtual ~declaration_base() noexcept; public: declaration_base(declaration_base const& that); -#line 247 "reflect.h2" +#line 253 "reflect.h2" }; -#line 250 "reflect.h2" -//----------------------------------------------------------------------- -// All declarations -// +#line 259 "reflect.h2" class declaration : public declaration_base { -#line 257 "reflect.h2" +#line 263 "reflect.h2" public: explicit declaration( declaration_node* n_, cpp2::impl::in s ); -#line 266 "reflect.h2" +#line 272 "reflect.h2" public: [[nodiscard]] auto is_public() const& -> bool; public: [[nodiscard]] auto is_protected() const& -> bool; public: [[nodiscard]] auto is_private() const& -> bool; @@ -257,7 +264,7 @@ class declaration public: [[nodiscard]] auto name() const& -> std::string_view; -#line 287 "reflect.h2" +#line 293 "reflect.h2" public: [[nodiscard]] auto has_initializer() const& -> bool; public: [[nodiscard]] auto is_global() const& -> bool; @@ -298,26 +305,22 @@ class declaration public: virtual ~declaration() noexcept; public: declaration(declaration const& that); - // this precondition should be sufficient ... -#line 329 "reflect.h2" +#line 335 "reflect.h2" }; -#line 332 "reflect.h2" -//----------------------------------------------------------------------- -// Function declarations -// +#line 341 "reflect.h2" class function_declaration : public declaration { -#line 339 "reflect.h2" +#line 345 "reflect.h2" public: explicit function_declaration( declaration_node* n_, cpp2::impl::in s ); -#line 349 "reflect.h2" +#line 355 "reflect.h2" public: [[nodiscard]] auto index_of_parameter_named(cpp2::impl::in s) const& -> int; public: [[nodiscard]] auto has_parameter_named(cpp2::impl::in s) const& -> bool; public: [[nodiscard]] auto has_in_parameter_named(cpp2::impl::in s) const& -> bool; @@ -357,7 +360,7 @@ class function_declaration public: [[nodiscard]] auto get_parameters() const& -> std::vector; -#line 396 "reflect.h2" +#line 402 "reflect.h2" public: [[nodiscard]] auto is_binary_comparison_function() const& -> bool; public: auto default_to_virtual() & -> void; @@ -368,103 +371,94 @@ class function_declaration public: function_declaration(function_declaration const& that); -#line 419 "reflect.h2" +#line 425 "reflect.h2" }; -#line 422 "reflect.h2" -//----------------------------------------------------------------------- -// Object declarations -// +#line 431 "reflect.h2" class object_declaration : public declaration { -#line 429 "reflect.h2" +#line 435 "reflect.h2" public: explicit object_declaration( declaration_node* n_, cpp2::impl::in s ); -#line 439 "reflect.h2" +#line 445 "reflect.h2" public: [[nodiscard]] auto is_const() const& -> bool; public: [[nodiscard]] auto has_wildcard_type() const& -> bool; public: [[nodiscard]] auto type() const& -> std::string; -#line 449 "reflect.h2" +#line 455 "reflect.h2" public: [[nodiscard]] auto initializer() const& -> std::string; public: object_declaration(object_declaration const& that); -#line 455 "reflect.h2" +#line 461 "reflect.h2" }; -#line 458 "reflect.h2" -//----------------------------------------------------------------------- -// Type declarations -// +#line 467 "reflect.h2" class type_declaration : public declaration { -#line 465 "reflect.h2" +#line 471 "reflect.h2" public: explicit type_declaration( declaration_node* n_, cpp2::impl::in s ); -#line 475 "reflect.h2" +#line 481 "reflect.h2" public: auto reserve_names(cpp2::impl::in name, auto&& ...etc) const& -> void; -#line 489 "reflect.h2" +#line 495 "reflect.h2" public: [[nodiscard]] auto is_polymorphic() const& -> bool; public: [[nodiscard]] auto is_final() const& -> bool; public: [[nodiscard]] auto make_final() & -> bool; public: [[nodiscard]] auto get_member_functions() const& -> std::vector; -#line 504 "reflect.h2" +#line 510 "reflect.h2" public: [[nodiscard]] auto get_member_functions_needing_initializer() const& -> std::vector; -#line 519 "reflect.h2" +#line 525 "reflect.h2" public: [[nodiscard]] auto get_member_objects() const& -> std::vector; -#line 529 "reflect.h2" +#line 535 "reflect.h2" public: [[nodiscard]] auto get_member_types() const& -> std::vector; -#line 539 "reflect.h2" +#line 545 "reflect.h2" public: [[nodiscard]] auto get_member_aliases() const& -> std::vector; -#line 549 "reflect.h2" +#line 555 "reflect.h2" public: [[nodiscard]] auto get_members() const& -> std::vector; struct query_declared_value_set_functions_ret { bool out_this_in_that; bool out_this_move_that; bool inout_this_in_that; bool inout_this_move_that; }; -#line 559 "reflect.h2" +#line 565 "reflect.h2" public: [[nodiscard]] auto query_declared_value_set_functions() const& -> query_declared_value_set_functions_ret; -#line 574 "reflect.h2" +#line 580 "reflect.h2" public: auto add_member(cpp2::impl::in source) & -> void; -#line 588 "reflect.h2" +#line 594 "reflect.h2" public: auto remove_marked_members() & -> void; public: auto remove_all_members() & -> void; public: auto disable_member_function_generation() & -> void; public: type_declaration(type_declaration const& that); -#line 592 "reflect.h2" +#line 598 "reflect.h2" }; -#line 595 "reflect.h2" -//----------------------------------------------------------------------- -// Alias declarations -// +#line 604 "reflect.h2" class alias_declaration : public declaration { -#line 602 "reflect.h2" +#line 608 "reflect.h2" public: explicit alias_declaration( declaration_node* n_, @@ -473,233 +467,62 @@ class alias_declaration public: alias_declaration(alias_declaration const& that); -#line 611 "reflect.h2" +#line 617 "reflect.h2" }; -#line 614 "reflect.h2" -//----------------------------------------------------------------------- -// -// Metafunctions - these are hardwired for now until we get to the -// step of writing a Cpp2 interpreter to run inside the compiler -// -//----------------------------------------------------------------------- -// - -//----------------------------------------------------------------------- -// Some common metafunction helpers (metafunctions are just functions, -// so they can be factored as usual) -// +#line 632 "reflect.h2" auto add_virtual_destructor(meta::type_declaration& t) -> void; -#line 632 "reflect.h2" -//----------------------------------------------------------------------- -// -// "... an abstract base class defines an interface ..." -// -// -- Stroustrup (The Design and Evolution of C++, 12.3.1) -// -//----------------------------------------------------------------------- -// -// interface -// -// an abstract base class having only pure virtual functions -// +#line 650 "reflect.h2" auto interface(meta::type_declaration& t) -> void; -#line 671 "reflect.h2" -//----------------------------------------------------------------------- -// -// "C.35: A base class destructor should be either public and -// virtual, or protected and non-virtual." -// -// "[C.43] ... a base class should not be copyable, and so does not -// necessarily need a default constructor." -// -// -- Stroustrup, Sutter, et al. (C++ Core Guidelines) -// -//----------------------------------------------------------------------- -// -// polymorphic_base -// -// A pure polymorphic base type that is not copyable, and whose -// destructor is either public and virtual or protected and nonvirtual. -// -// Unlike an interface, it can have nonpublic and nonvirtual functions. -// +#line 696 "reflect.h2" auto polymorphic_base(meta::type_declaration& t) -> void; -#line 715 "reflect.h2" -//----------------------------------------------------------------------- -// -// "... A totally ordered type ... requires operator<=> that -// returns std::strong_ordering. If the function is not -// user-written, a lexicographical memberwise implementation -// is generated by default..." -// -// -- P0707R4, section 3 -// -// Note: This feature derived from Cpp2 was already adopted -// into Standard C++ via paper P0515, so most of the -// heavy lifting is done by the Cpp1 C++20/23 compiler, -// including the memberwise default semantics -// (In contrast, cppfront has to do the work itself for -// default memberwise semantics for operator= assignment -// as those aren't yet part of Standard C++) -// -//----------------------------------------------------------------------- -// - +#line 741 "reflect.h2" auto ordered_impl( meta::type_declaration& t, - cpp2::impl::in ordering// must be "strong_ordering" etc. + cpp2::impl::in ordering ) -> void; -#line 759 "reflect.h2" -//----------------------------------------------------------------------- -// ordered - a totally ordered type -// -// Note: the ordering that should be encouraged as default gets the nice name -// +#line 770 "reflect.h2" auto ordered(meta::type_declaration& t) -> void; -#line 769 "reflect.h2" -//----------------------------------------------------------------------- -// weakly_ordered - a weakly ordered type -// +#line 778 "reflect.h2" auto weakly_ordered(meta::type_declaration& t) -> void; -#line 777 "reflect.h2" -//----------------------------------------------------------------------- -// partially_ordered - a partially ordered type -// +#line 786 "reflect.h2" auto partially_ordered(meta::type_declaration& t) -> void; -#line 786 "reflect.h2" -//----------------------------------------------------------------------- -// -// "A value is ... a regular type. It must have all public -// default construction, copy/move construction/assignment, -// and destruction, all of which are generated by default -// if not user-written; and it must not have any protected -// or virtual functions (including the destructor)." -// -// -- P0707R4, section 3 -// -//----------------------------------------------------------------------- -// -// copyable -// -// A type with (copy and move) x (construction and assignment) -// +#line 808 "reflect.h2" auto copyable(meta::type_declaration& t) -> void; -#line 829 "reflect.h2" -//----------------------------------------------------------------------- -// -// basic_value -// -// A regular type: copyable, plus has public default construction -// and no protected or virtual functions -// +#line 842 "reflect.h2" auto basic_value(meta::type_declaration& t) -> void; -#line 854 "reflect.h2" -//----------------------------------------------------------------------- -// -// "A 'value' is a totally ordered basic_value..." -// -// -- P0707R4, section 3 -// -// value - a value type that is totally ordered -// -// Note: the ordering that should be encouraged as default gets the nice name -// +#line 870 "reflect.h2" auto value(meta::type_declaration& t) -> void; -#line 870 "reflect.h2" +#line 876 "reflect.h2" auto weakly_ordered_value(meta::type_declaration& t) -> void; -#line 876 "reflect.h2" +#line 882 "reflect.h2" auto partially_ordered_value(meta::type_declaration& t) -> void; -#line 883 "reflect.h2" -//----------------------------------------------------------------------- -// -// C.20: If you can avoid defining default operations, do -// -// ##### Reason -// -// It's the simplest and gives the cleanest semantics. -// -// ... -// -// This is known as "the rule of zero". -// -// -- C++ Core Guidelines -// C.20: If you can avoid defining any default operations, do -// -// -//----------------------------------------------------------------------- -// -// cpp1_rule_of_zero -// -// a type without declared copy/move/destructor functions -// +#line 911 "reflect.h2" auto cpp1_rule_of_zero(meta::type_declaration& t) -> void; -#line 917 "reflect.h2" -//----------------------------------------------------------------------- -// -// "By definition, a `struct` is a `class` in which members -// are by default `public`; that is, -// -// struct s { ... -// -// is simply shorthand for -// -// class s { public: ... -// -// ... Which style you use depends on circumstances and taste. -// I usually prefer to use `struct` for classes that have all -// data `public`." -// -// -- Stroustrup (The C++ Programming Language, 3rd ed., p. 234) -// -//----------------------------------------------------------------------- -// -// struct -// -// a type with only public bases, objects, and functions, -// no virtual functions, and no user-defined constructors -// (i.e., no invariants) or assignment or destructors. -// +#line 948 "reflect.h2" auto cpp2_struct(meta::type_declaration& t) -> void; -#line 989 "reflect.h2" -//----------------------------------------------------------------------- -// -// "C enumerations constitute a curiously half-baked concept. ... -// the cleanest way out was to deem each enumeration a separate type." -// -// -- Stroustrup (The Design and Evolution of C++, 11.7) -// -// "An enumeration is a distinct type ... with named constants" -// -// -- ISO C++ Standard -// -//----------------------------------------------------------------------- -// -// basic_enum -// -// a type together with named constants that are its possible values -// +#line 1012 "reflect.h2" class value_member_info { public: std::string name; public: std::string type; public: std::string value; public: value_member_info(auto const& name_, auto const& type_, auto const& value_); -#line 1010 "reflect.h2" +#line 1016 "reflect.h2" }; auto basic_enum( @@ -708,1461 +531,4641 @@ auto basic_enum( cpp2::impl::in bitwise ) -> void; -#line 1267 "reflect.h2" -//----------------------------------------------------------------------- -// -// "An enum[...] is a totally ordered value type that stores a -// value of its enumerators's type, and otherwise has only public -// member variables of its enumerator's type, all of which are -// naturally scoped because they are members of a type." -// -// -- P0707R4, section 3 -// +#line 1282 "reflect.h2" auto cpp2_enum(meta::type_declaration& t) -> void; -#line 1293 "reflect.h2" -//----------------------------------------------------------------------- -// -// "flag_enum expresses an enumeration that stores values -// corresponding to bitwise-or'd enumerators. The enumerators must -// be powers of two, and are automatically generated [...] A none -// value is provided [...] Operators | and & are provided to -// combine and extract values." -// -// -- P0707R4, section 3 -// +#line 1309 "reflect.h2" auto flag_enum(meta::type_declaration& t) -> void; -#line 1325 "reflect.h2" -//----------------------------------------------------------------------- -// -// "As with void*, programmers should know that unions [...] are -// inherently dangerous, should be avoided wherever possible, -// and should be handled with special care when actually needed." -// -// -- Stroustrup (The Design and Evolution of C++, 14.3.4.1) -// -// "C++17 needs a type-safe union... The implications of the -// consensus `variant` design are well understood and have been -// explored over several LEWG discussions, over a thousand emails, -// a joint LEWG/EWG session, and not to mention 12 years of -// experience with Boost and other libraries." -// -// -- Axel Naumann, in P0088 (wg21.link/p0088), -// the adopted proposal for C++17 std::variant -// -//----------------------------------------------------------------------- -// -// union -// -// a type that contains exactly one of a fixed set of values at a time -// - +#line 1355 "reflect.h2" auto cpp2_union(meta::type_declaration& t) -> void; -#line 1496 "reflect.h2" -//----------------------------------------------------------------------- -// -// print - output a pretty-printed visualization of t -// +#line 1506 "reflect.h2" auto print(cpp2::impl::in t) -> void; -#line 1506 "reflect.h2" -//----------------------------------------------------------------------- -// -// regex - creates regular expressions from members -// -// Each member that starts with `regex` is replaced by a regular expression -// of the initializer string. E.g.: -// ``` -// regex := "ab"; -// ``` -// is replaced with -// ``` -// regex := ::cpp2::regex::regular_expression<...>; -// ``` -// -auto regex_gen(meta::type_declaration& t) -> void; +#line 1526 "reflect.h2" +using error_func = std::function x)>; -#line 1573 "reflect.h2" -//----------------------------------------------------------------------- -// -// apply_metafunctions -// -[[nodiscard]] auto apply_metafunctions( - declaration_node& n, - type_declaration& rtype, - auto const& error - ) -> bool; +#line 1530 "reflect.h2" +class expression_flags + { +private: cpp2::u8 _value; private: constexpr expression_flags(cpp2::impl::in _val); + +private: constexpr auto operator=(cpp2::impl::in _val) -> expression_flags& ; +public: constexpr auto operator|=(expression_flags const& that) & -> void; +public: constexpr auto operator&=(expression_flags const& that) & -> void; +public: constexpr auto operator^=(expression_flags const& that) & -> void; +public: [[nodiscard]] constexpr auto operator|(expression_flags const& that) const& -> expression_flags; +public: [[nodiscard]] constexpr auto operator&(expression_flags const& that) const& -> expression_flags; +public: [[nodiscard]] constexpr auto operator^(expression_flags const& that) const& -> expression_flags; +public: [[nodiscard]] constexpr auto has(expression_flags const& that) const& -> bool; +public: constexpr auto set(expression_flags const& that) & -> void; +public: constexpr auto clear(expression_flags const& that) & -> void; +public: static const expression_flags case_insensitive; +public: static const expression_flags multiple_lines; +public: static const expression_flags single_line; +public: static const expression_flags no_group_captures; +public: static const expression_flags perl_code_syntax; +public: static const expression_flags perl_code_syntax_in_classes; +public: static const expression_flags none; +public: [[nodiscard]] constexpr auto get_raw_value() const& -> cpp2::u8; +public: constexpr explicit expression_flags(); +public: constexpr expression_flags(expression_flags const& that); +public: constexpr auto operator=(expression_flags const& that) -> expression_flags& ; +public: constexpr expression_flags(expression_flags&& that) noexcept; +public: constexpr auto operator=(expression_flags&& that) noexcept -> expression_flags& ; +public: [[nodiscard]] auto operator<=>(expression_flags const& that) const& -> std::strong_ordering = default; +public: [[nodiscard]] auto to_string_impl(cpp2::impl::in prefix, cpp2::impl::in separator) const& -> std::string; +public: [[nodiscard]] auto to_string() const& -> std::string; +public: [[nodiscard]] auto to_code() const& -> std::string; +public: [[nodiscard]] static auto from_string(cpp2::impl::in s) -> expression_flags; +public: [[nodiscard]] static auto from_code(cpp2::impl::in s) -> expression_flags; + +#line 1538 "reflect.h2" +}; -#line 1692 "reflect.h2" -} +#line 1546 "reflect.h2" +class regex_token + { + public: std::string string_rep; -} + public: explicit regex_token(cpp2::impl::in str); +#line 1554 "reflect.h2" + public: explicit regex_token(); -//=== Cpp2 function definitions ================================================= +#line 1559 "reflect.h2" + public: virtual auto generate_code([[maybe_unused]] generation_context& unnamed_param_2) const -> void = 0; -#line 1 "reflect.h2" + public: virtual auto add_groups([[maybe_unused]] std::set& unnamed_param_2) const -> void; + public: [[nodiscard]] auto to_string() const& -> std::string; + public: auto set_string(cpp2::impl::in s) & -> void; + public: virtual ~regex_token() noexcept; -#line 21 "reflect.h2" -namespace cpp2 { + public: regex_token(regex_token const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(regex_token const&) -> void = delete; -namespace meta { +#line 1564 "reflect.h2" +}; -#line 47 "reflect.h2" - compiler_services::compiler_services( +using token_ptr = std::shared_ptr; +using token_vec = std::vector; - std::vector* errors_, - stable_vector* generated_tokens_ - ) - : errors{ errors_ } - , errors_original_size{ cpp2::unsafe_narrow(std::ssize(*cpp2::impl::assert_not_null(errors))) } - , generated_tokens{ generated_tokens_ } - , parser{ *cpp2::impl::assert_not_null(errors) } -#line 52 "reflect.h2" - { +#line 1570 "reflect.h2" +// Adds a check in code generation. +// +class regex_token_check +: public regex_token { -#line 57 "reflect.h2" - } +#line 1576 "reflect.h2" + private: std::string check; -#line 61 "reflect.h2" - auto compiler_services::set_metafunction_name(cpp2::impl::in name, cpp2::impl::in> args) & -> void{ - metafunction_name = name; - metafunction_args = args; - metafunctions_used = CPP2_UFCS(empty)(args); - } + public: explicit regex_token_check(cpp2::impl::in str, cpp2::impl::in check_); -#line 67 "reflect.h2" - [[nodiscard]] auto compiler_services::get_metafunction_name() const& -> std::string_view { return metafunction_name; } +#line 1583 "reflect.h2" + public: auto generate_code(generation_context& ctx) const -> void override; + public: virtual ~regex_token_check() noexcept; -#line 69 "reflect.h2" - [[nodiscard]] auto compiler_services::get_argument(cpp2::impl::in index) & -> std::string{ - metafunctions_used = true; - if (([_0 = 0, _1 = index, _2 = CPP2_UFCS(ssize)(metafunction_args)]{ return cpp2::impl::cmp_less_eq(_0,_1) && cpp2::impl::cmp_less(_1,_2); }())) { - return CPP2_ASSERT_IN_BOUNDS(metafunction_args, index); - } - return ""; - } + public: regex_token_check(regex_token_check const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(regex_token_check const&) -> void = delete; -#line 77 "reflect.h2" - [[nodiscard]] auto compiler_services::get_arguments() & -> std::vector{ - metafunctions_used = true; - return metafunction_args; - } -#line 82 "reflect.h2" - [[nodiscard]] auto compiler_services::arguments_were_used() const& -> bool { return metafunctions_used; } +#line 1586 "reflect.h2" +}; -#line 84 "reflect.h2" - [[nodiscard]] auto compiler_services::parse_statement( +#line 1589 "reflect.h2" +// Adds code in code generation. +// +class regex_token_code +: public regex_token { - std::string_view source - ) & -> parse_statement_ret +#line 1595 "reflect.h2" + private: std::string code; - { - cpp2::impl::deferred_init> ret; -#line 90 "reflect.h2" - auto original_source {source}; + public: explicit regex_token_code(cpp2::impl::in str, cpp2::impl::in code_); - CPP2_UFCS(push_back)(generated_lines, std::vector()); - auto lines {&CPP2_UFCS(back)(generated_lines)}; +#line 1602 "reflect.h2" + public: auto generate_code(generation_context& ctx) const -> void override; + public: virtual ~regex_token_code() noexcept; - auto add_line {[&, _1 = lines](cpp2::impl::in s) mutable -> void{ - static_cast(CPP2_UFCS(emplace_back)((*cpp2::impl::assert_not_null(_1)), s, source_line::category::cpp2)); - }}; -{ -auto newline_pos{CPP2_UFCS(find)(source, '\n')}; + public: regex_token_code(regex_token_code const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(regex_token_code const&) -> void = delete; - // First split this string into source_lines - // -#line 102 "reflect.h2" - if ( cpp2::impl::cmp_greater(CPP2_UFCS(ssize)(source),1) - && newline_pos != source.npos) - { - while( newline_pos != source.npos ) - { - add_line(CPP2_UFCS(substr)(source, 0, newline_pos)); - CPP2_UFCS(remove_prefix)(source, newline_pos + 1); - newline_pos = CPP2_UFCS(find)(source, '\n'); - } - } -} +#line 1605 "reflect.h2" +}; -#line 113 "reflect.h2" - if (!(CPP2_UFCS(empty)(source))) { - cpp2::move(add_line)(cpp2::move(source)); - } +#line 1608 "reflect.h2" +// Token that does not influence the matching. E.g. comment. +// +class regex_token_empty +: public regex_token { - // Now lex this source fragment to generate - // a single grammar_map entry, whose .second - // is the vector of tokens - static_cast(CPP2_UFCS(emplace_back)(generated_lexers, *cpp2::impl::assert_not_null(errors))); - auto tokens {&CPP2_UFCS(back)(generated_lexers)}; - CPP2_UFCS(lex)((*cpp2::impl::assert_not_null(tokens)), *cpp2::impl::assert_not_null(cpp2::move(lines)), true); +#line 1614 "reflect.h2" + public: explicit regex_token_empty(cpp2::impl::in str); - if (cpp2::cpp2_default.is_active() && !(std::ssize(CPP2_UFCS(get_map)((*cpp2::impl::assert_not_null(tokens)))) == 1) ) { cpp2::cpp2_default.report_violation(""); } +#line 1618 "reflect.h2" + public: auto generate_code([[maybe_unused]] generation_context& unnamed_param_2) const -> void override; + public: virtual ~regex_token_empty() noexcept; - // Now parse this single declaration from - // the lexed tokens - ret.construct(CPP2_UFCS(parse_one_declaration)(parser, - (*cpp2::impl::assert_not_null(CPP2_UFCS(begin)(CPP2_UFCS(get_map)(*cpp2::impl::assert_not_null(cpp2::move(tokens)))))).second, - *cpp2::impl::assert_not_null(generated_tokens) - )); - if (!(ret.value())) { - error("parse failed - the source string is not a valid statement:\n" + cpp2::to_string(cpp2::move(original_source)) + ""); - }return std::move(ret.value()); - } + public: regex_token_empty(regex_token_empty const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(regex_token_empty const&) -> void = delete; -#line 137 "reflect.h2" - [[nodiscard]] auto compiler_services::position() const -> source_position - { - return { }; - } +#line 1621 "reflect.h2" +}; -#line 146 "reflect.h2" - auto compiler_services::require( +#line 1624 "reflect.h2" +// Represents a list of regex tokens as one token. +// +class regex_token_list +: public regex_token { - cpp2::impl::in b, - cpp2::impl::in msg - ) const& -> void - { - if (!(b)) { - error(msg); - } - } +#line 1630 "reflect.h2" + public: token_vec tokens; -#line 157 "reflect.h2" - auto compiler_services::error(cpp2::impl::in msg) const& -> void - { - auto message {cpp2::impl::as_(msg)}; - if (!(CPP2_UFCS(empty)(metafunction_name))) { - message = "while applying @" + cpp2::to_string(metafunction_name) + " - " + cpp2::to_string(message) + ""; - } - static_cast(CPP2_UFCS(emplace_back)((*cpp2::impl::assert_not_null(errors)), position(), cpp2::move(message))); - } + public: explicit regex_token_list(cpp2::impl::in t); -#line 169 "reflect.h2" - auto compiler_services::report_violation(auto const& msg) const& -> void{ - error(msg); - throw(std::runtime_error( - " ==> programming bug found in metafunction @" + cpp2::to_string(metafunction_name) + " " - "- contract violation - see previous errors" - )); - } +#line 1637 "reflect.h2" + public: auto generate_code(generation_context& ctx) const -> void override; -#line 177 "reflect.h2" - [[nodiscard]] auto compiler_services::is_active() const& -> auto { return true; } +#line 1643 "reflect.h2" + public: auto add_groups(std::set& groups) const -> void override; - compiler_services::~compiler_services() noexcept{} -compiler_services::compiler_services(compiler_services const& that) - : errors{ that.errors } - , errors_original_size{ that.errors_original_size } - , generated_tokens{ that.generated_tokens } - , parser{ that.parser } - , metafunction_name{ that.metafunction_name } - , metafunction_args{ that.metafunction_args } - , metafunctions_used{ that.metafunctions_used }{} +#line 1649 "reflect.h2" + public: [[nodiscard]] static auto gen_string(cpp2::impl::in vec) -> std::string; + public: virtual ~regex_token_list() noexcept; -#line 233 "reflect.h2" - declaration_base::declaration_base( + public: regex_token_list(regex_token_list const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(regex_token_list const&) -> void = delete; - declaration_node* n_, - cpp2::impl::in s - ) - : compiler_services{ s } - , n{ n_ } -#line 238 "reflect.h2" - { -#line 241 "reflect.h2" - if (cpp2::cpp2_default.is_active() && !(n) ) { cpp2::cpp2_default.report_violation(CPP2_CONTRACT_MSG("a meta::declaration must point to a valid declaration_node, not null")); } - } +#line 1656 "reflect.h2" +}; -#line 244 "reflect.h2" - [[nodiscard]] auto declaration_base::position() const -> source_position { return CPP2_UFCS(position)((*cpp2::impl::assert_not_null(n))); } +#line 1659 "reflect.h2" +// +// Parse and generation context. +// -#line 246 "reflect.h2" - [[nodiscard]] auto declaration_base::print() const& -> std::string { return CPP2_UFCS(pretty_print_visualize)((*cpp2::impl::assert_not_null(n)), 0); } +// State of the current capturing group. See '()' +// +class parse_context_group_state + { + public: token_vec cur_match_list {}; // Current list of matchers. + public: token_vec alternate_match_lists {}; // List of alternate matcher lists. E.g. ab|cd|xy. + public: expression_flags modifiers {}; // Current modifiers for the group/regular expression. - declaration_base::~declaration_base() noexcept{} -declaration_base::declaration_base(declaration_base const& that) - : compiler_services{ static_cast(that) } - , n{ that.n }{} + // Start a new alternative. + public: auto next_alternative() & -> void; -#line 257 "reflect.h2" - declaration::declaration( +#line 1679 "reflect.h2" + // Swap this state with the other one. + public: auto swap(parse_context_group_state& t) & -> void; - declaration_node* n_, - cpp2::impl::in s - ) - : declaration_base{ n_, s } -#line 262 "reflect.h2" - { +#line 1686 "reflect.h2" + // Convert this state into a regex token. + public: [[nodiscard]] auto get_as_token() & -> token_ptr; - } +#line 1698 "reflect.h2" + // Add a token to the current matcher list. + public: auto add(cpp2::impl::in token) & -> void; -#line 266 "reflect.h2" - [[nodiscard]] auto declaration::is_public() const& -> bool { return CPP2_UFCS(is_public)((*cpp2::impl::assert_not_null(n))); } -#line 267 "reflect.h2" - [[nodiscard]] auto declaration::is_protected() const& -> bool { return CPP2_UFCS(is_protected)((*cpp2::impl::assert_not_null(n))); } -#line 268 "reflect.h2" - [[nodiscard]] auto declaration::is_private() const& -> bool { return CPP2_UFCS(is_private)((*cpp2::impl::assert_not_null(n))); } -#line 269 "reflect.h2" - [[nodiscard]] auto declaration::is_default_access() const& -> bool { return CPP2_UFCS(is_default_access)((*cpp2::impl::assert_not_null(n))); } +#line 1703 "reflect.h2" + // True if current matcher list is empty. + public: [[nodiscard]] auto empty() const& -> bool; -#line 271 "reflect.h2" - auto declaration::default_to_public() & -> void { static_cast(CPP2_UFCS(make_public)((*cpp2::impl::assert_not_null(n)))); } -#line 272 "reflect.h2" - auto declaration::default_to_protected() & -> void { static_cast(CPP2_UFCS(make_protected)((*cpp2::impl::assert_not_null(n)))); } -#line 273 "reflect.h2" - auto declaration::default_to_private() & -> void { static_cast(CPP2_UFCS(make_private)((*cpp2::impl::assert_not_null(n)))); } +#line 1707 "reflect.h2" + // Apply optimizations to the matcher list. + public: static auto post_process_list(token_vec& list) -> void; + public: parse_context_group_state(auto const& cur_match_list_, auto const& alternate_match_lists_, auto const& modifiers_); +public: parse_context_group_state(); -#line 275 "reflect.h2" - [[nodiscard]] auto declaration::make_public() & -> bool { return CPP2_UFCS(make_public)((*cpp2::impl::assert_not_null(n))); } -#line 276 "reflect.h2" - [[nodiscard]] auto declaration::make_protected() & -> bool { return CPP2_UFCS(make_protected)((*cpp2::impl::assert_not_null(n))); } -#line 277 "reflect.h2" - [[nodiscard]] auto declaration::make_private() & -> bool { return CPP2_UFCS(make_private)((*cpp2::impl::assert_not_null(n))); } -#line 279 "reflect.h2" - [[nodiscard]] auto declaration::has_name() const& -> bool { return CPP2_UFCS(has_name)((*cpp2::impl::assert_not_null(n))); } -#line 280 "reflect.h2" - [[nodiscard]] auto declaration::has_name(cpp2::impl::in s) const& -> bool { return CPP2_UFCS(has_name)((*cpp2::impl::assert_not_null(n)), s); } +#line 1721 "reflect.h2" +}; -#line 282 "reflect.h2" - [[nodiscard]] auto declaration::name() const& -> std::string_view{ - if (has_name()) {return CPP2_UFCS(as_string_view)((*cpp2::impl::assert_not_null(CPP2_UFCS(name)(*cpp2::impl::assert_not_null(n))))); } - else { return ""; } - } +#line 1724 "reflect.h2" +// State for the branch reset. Takes care of the group numbering. See '(|)'. +// +class parse_context_branch_reset_state + { + public: bool is_active {false}; // If we have a branch reset group. + public: int cur_group {1}; // Next group identifier. 0 == global capture group. + public: int max_group {1}; // Maximum group identifier generated. + public: int from {1}; // Starting identifier on new alternative branch. -#line 287 "reflect.h2" - [[nodiscard]] auto declaration::has_initializer() const& -> bool { return CPP2_UFCS(has_initializer)((*cpp2::impl::assert_not_null(n))); } + // Next group identifier. + public: [[nodiscard]] auto next() & -> int; -#line 289 "reflect.h2" - [[nodiscard]] auto declaration::is_global() const& -> bool { return CPP2_UFCS(is_global)((*cpp2::impl::assert_not_null(n))); } -#line 290 "reflect.h2" - [[nodiscard]] auto declaration::is_function() const& -> bool { return CPP2_UFCS(is_function)((*cpp2::impl::assert_not_null(n))); } -#line 291 "reflect.h2" - [[nodiscard]] auto declaration::is_object() const& -> bool { return CPP2_UFCS(is_object)((*cpp2::impl::assert_not_null(n))); } -#line 292 "reflect.h2" - [[nodiscard]] auto declaration::is_base_object() const& -> bool { return CPP2_UFCS(is_base_object)((*cpp2::impl::assert_not_null(n))); } -#line 293 "reflect.h2" - [[nodiscard]] auto declaration::is_member_object() const& -> bool { return CPP2_UFCS(is_member_object)((*cpp2::impl::assert_not_null(n))); } -#line 294 "reflect.h2" - [[nodiscard]] auto declaration::is_type() const& -> bool { return CPP2_UFCS(is_type)((*cpp2::impl::assert_not_null(n))); } -#line 295 "reflect.h2" - [[nodiscard]] auto declaration::is_namespace() const& -> bool { return CPP2_UFCS(is_namespace)((*cpp2::impl::assert_not_null(n))); } -#line 296 "reflect.h2" - [[nodiscard]] auto declaration::is_alias() const& -> bool { return CPP2_UFCS(is_alias)((*cpp2::impl::assert_not_null(n))); } +#line 1742 "reflect.h2" + // Set next group identifier. + public: auto set_next(cpp2::impl::in g) & -> void; -#line 298 "reflect.h2" - [[nodiscard]] auto declaration::is_type_alias() const& -> bool { return CPP2_UFCS(is_type_alias)((*cpp2::impl::assert_not_null(n))); } -#line 299 "reflect.h2" - [[nodiscard]] auto declaration::is_namespace_alias() const& -> bool { return CPP2_UFCS(is_namespace_alias)((*cpp2::impl::assert_not_null(n))); } -#line 300 "reflect.h2" - [[nodiscard]] auto declaration::is_object_alias() const& -> bool { return CPP2_UFCS(is_object_alias)((*cpp2::impl::assert_not_null(n))); } +#line 1748 "reflect.h2" + // Start a new alternative branch. + public: auto next_alternative() & -> void; -#line 302 "reflect.h2" - [[nodiscard]] auto declaration::is_function_expression() const& -> bool { return CPP2_UFCS(is_function_expression)((*cpp2::impl::assert_not_null(n))); } +#line 1755 "reflect.h2" + // Initialize for a branch reset group. + public: auto set_active_reset(cpp2::impl::in restart) & -> void; + public: parse_context_branch_reset_state(auto const& is_active_, auto const& cur_group_, auto const& max_group_, auto const& from_); +public: parse_context_branch_reset_state(); -#line 304 "reflect.h2" - [[nodiscard]] auto declaration::as_function() const& -> function_declaration { return function_declaration(n, (*this)); } -#line 305 "reflect.h2" - [[nodiscard]] auto declaration::as_object() const& -> object_declaration { return object_declaration(n, (*this)); } -#line 306 "reflect.h2" - [[nodiscard]] auto declaration::as_type() const& -> type_declaration { return type_declaration(n, (*this)); } -#line 307 "reflect.h2" - [[nodiscard]] auto declaration::as_alias() const& -> alias_declaration { return alias_declaration(n, (*this)); } -#line 309 "reflect.h2" - [[nodiscard]] auto declaration::get_parent() const& -> declaration { return declaration((*cpp2::impl::assert_not_null(n)).parent_declaration, (*this)); } +#line 1762 "reflect.h2" +}; -#line 311 "reflect.h2" - [[nodiscard]] auto declaration::parent_is_function() const& -> bool { return CPP2_UFCS(parent_is_function)((*cpp2::impl::assert_not_null(n))); } -#line 312 "reflect.h2" - [[nodiscard]] auto declaration::parent_is_object() const& -> bool { return CPP2_UFCS(parent_is_object)((*cpp2::impl::assert_not_null(n))); } -#line 313 "reflect.h2" - [[nodiscard]] auto declaration::parent_is_type() const& -> bool { return CPP2_UFCS(parent_is_type)((*cpp2::impl::assert_not_null(n))); } -#line 314 "reflect.h2" - [[nodiscard]] auto declaration::parent_is_namespace() const& -> bool { return CPP2_UFCS(parent_is_namespace)((*cpp2::impl::assert_not_null(n))); } -#line 315 "reflect.h2" - [[nodiscard]] auto declaration::parent_is_alias() const& -> bool { return CPP2_UFCS(parent_is_alias)((*cpp2::impl::assert_not_null(n))); } +#line 1765 "reflect.h2" +// Context during parsing of the regular expressions. +// +// Keeps track of the distributed group identifiers, current parsed group and branch resets. +// +class parse_context + { + private: std::string_view regex; // Regular expression string. + private: size_t pos {0}; // Current parsing position. + private: token_ptr root; // Token representing the regular expression. -#line 317 "reflect.h2" - [[nodiscard]] auto declaration::parent_is_type_alias() const& -> bool { return CPP2_UFCS(parent_is_type_alias)((*cpp2::impl::assert_not_null(n))); } -#line 318 "reflect.h2" - [[nodiscard]] auto declaration::parent_is_namespace_alias() const& -> bool { return CPP2_UFCS(parent_is_namespace_alias)((*cpp2::impl::assert_not_null(n))); } -#line 319 "reflect.h2" - [[nodiscard]] auto declaration::parent_is_object_alias() const& -> bool { return CPP2_UFCS(parent_is_object_alias)((*cpp2::impl::assert_not_null(n))); } + private: parse_context_group_state cur_group_state {}; + private: parse_context_branch_reset_state cur_branch_reset_state {}; -#line 321 "reflect.h2" - [[nodiscard]] auto declaration::parent_is_polymorphic() const& -> bool { return CPP2_UFCS(parent_is_polymorphic)((*cpp2::impl::assert_not_null(n))); } +#line 1779 "reflect.h2" + public: std::map named_groups {}; -#line 323 "reflect.h2" - auto declaration::mark_for_removal_from_enclosing_type() & -> void + private: error_func error_out; // TODO: Declaring std::function fails for cpp2. + private: bool has_error {false}; - { - if (cpp2::type_safety.is_active() && !(parent_is_type()) ) { cpp2::type_safety.report_violation(""); } -#line 326 "reflect.h2" - auto test {CPP2_UFCS(type_member_mark_for_removal)((*cpp2::impl::assert_not_null(n)))}; - if (cpp2::cpp2_default.is_active() && !(cpp2::move(test)) ) { cpp2::cpp2_default.report_violation(""); }// ... to ensure this assert is true - } + public: explicit parse_context(cpp2::impl::in r, auto const& e); - declaration::~declaration() noexcept{} -declaration::declaration(declaration const& that) - : declaration_base{ static_cast(that) }{} +#line 1790 "reflect.h2" + // State management functions + // -#line 339 "reflect.h2" - function_declaration::function_declaration( + // Returned group state needs to be stored and provided in `end_group`. + public: [[nodiscard]] auto start_group() & -> parse_context_group_state; - declaration_node* n_, - cpp2::impl::in s - ) - : declaration{ n_, s } -#line 344 "reflect.h2" - { +#line 1803 "reflect.h2" + // `old_state` argument needs to be from start group. + public: [[nodiscard]] auto end_group(cpp2::impl::in old_state) & -> token_ptr; - if (cpp2::cpp2_default.is_active() && !(CPP2_UFCS(is_function)((*cpp2::impl::assert_not_null(n)))) ) { cpp2::cpp2_default.report_violation(""); } - } +#line 1811 "reflect.h2" + public: [[nodiscard]] auto get_modifiers() const& -> expression_flags; -#line 349 "reflect.h2" - [[nodiscard]] auto function_declaration::index_of_parameter_named(cpp2::impl::in s) const& -> int { return CPP2_UFCS(index_of_parameter_named)((*cpp2::impl::assert_not_null(n)), s); } -#line 350 "reflect.h2" - [[nodiscard]] auto function_declaration::has_parameter_named(cpp2::impl::in s) const& -> bool { return CPP2_UFCS(has_parameter_named)((*cpp2::impl::assert_not_null(n)), s); } -#line 351 "reflect.h2" - [[nodiscard]] auto function_declaration::has_in_parameter_named(cpp2::impl::in s) const& -> bool { return CPP2_UFCS(has_in_parameter_named)((*cpp2::impl::assert_not_null(n)), s); } -#line 352 "reflect.h2" - [[nodiscard]] auto function_declaration::has_copy_parameter_named(cpp2::impl::in s) const& -> bool { return CPP2_UFCS(has_copy_parameter_named)((*cpp2::impl::assert_not_null(n)), s); } -#line 353 "reflect.h2" - [[nodiscard]] auto function_declaration::has_inout_parameter_named(cpp2::impl::in s) const& -> bool { return CPP2_UFCS(has_inout_parameter_named)((*cpp2::impl::assert_not_null(n)), s); } -#line 354 "reflect.h2" - [[nodiscard]] auto function_declaration::has_out_parameter_named(cpp2::impl::in s) const& -> bool { return CPP2_UFCS(has_out_parameter_named)((*cpp2::impl::assert_not_null(n)), s); } -#line 355 "reflect.h2" - [[nodiscard]] auto function_declaration::has_move_parameter_named(cpp2::impl::in s) const& -> bool { return CPP2_UFCS(has_move_parameter_named)((*cpp2::impl::assert_not_null(n)), s); } -#line 356 "reflect.h2" - [[nodiscard]] auto function_declaration::has_forward_parameter_named(cpp2::impl::in s) const& -> bool { return CPP2_UFCS(has_forward_parameter_named)((*cpp2::impl::assert_not_null(n)), s); } -#line 357 "reflect.h2" - [[nodiscard]] auto function_declaration::first_parameter_name() const& -> std::string { return CPP2_UFCS(first_parameter_name)((*cpp2::impl::assert_not_null(n))); } +#line 1815 "reflect.h2" + public: auto set_modifiers(cpp2::impl::in mod) & -> void; -#line 359 "reflect.h2" - [[nodiscard]] auto function_declaration::has_parameter_with_name_and_pass(cpp2::impl::in s, cpp2::impl::in pass) const& -> bool { - return CPP2_UFCS(has_parameter_with_name_and_pass)((*cpp2::impl::assert_not_null(n)), s, pass); } -#line 361 "reflect.h2" - [[nodiscard]] auto function_declaration::is_function_with_this() const& -> bool { return CPP2_UFCS(is_function_with_this)((*cpp2::impl::assert_not_null(n))); } -#line 362 "reflect.h2" - [[nodiscard]] auto function_declaration::is_virtual() const& -> bool { return CPP2_UFCS(is_virtual_function)((*cpp2::impl::assert_not_null(n))); } -#line 363 "reflect.h2" - [[nodiscard]] auto function_declaration::is_defaultable() const& -> bool { return CPP2_UFCS(is_defaultable_function)((*cpp2::impl::assert_not_null(n))); } -#line 364 "reflect.h2" - [[nodiscard]] auto function_declaration::is_constructor() const& -> bool { return CPP2_UFCS(is_constructor)((*cpp2::impl::assert_not_null(n))); } -#line 365 "reflect.h2" - [[nodiscard]] auto function_declaration::is_default_constructor() const& -> bool { return CPP2_UFCS(is_default_constructor)((*cpp2::impl::assert_not_null(n))); } -#line 366 "reflect.h2" - [[nodiscard]] auto function_declaration::is_move() const& -> bool { return CPP2_UFCS(is_move)((*cpp2::impl::assert_not_null(n))); } -#line 367 "reflect.h2" - [[nodiscard]] auto function_declaration::is_swap() const& -> bool { return CPP2_UFCS(is_swap)((*cpp2::impl::assert_not_null(n))); } -#line 368 "reflect.h2" - [[nodiscard]] auto function_declaration::is_constructor_with_that() const& -> bool { return CPP2_UFCS(is_constructor_with_that)((*cpp2::impl::assert_not_null(n))); } -#line 369 "reflect.h2" - [[nodiscard]] auto function_declaration::is_constructor_with_in_that() const& -> bool { return CPP2_UFCS(is_constructor_with_in_that)((*cpp2::impl::assert_not_null(n))); } -#line 370 "reflect.h2" - [[nodiscard]] auto function_declaration::is_constructor_with_move_that() const& -> bool { return CPP2_UFCS(is_constructor_with_move_that)((*cpp2::impl::assert_not_null(n))); } -#line 371 "reflect.h2" - [[nodiscard]] auto function_declaration::is_assignment() const& -> bool { return CPP2_UFCS(is_assignment)((*cpp2::impl::assert_not_null(n))); } -#line 372 "reflect.h2" - [[nodiscard]] auto function_declaration::is_assignment_with_that() const& -> bool { return CPP2_UFCS(is_assignment_with_that)((*cpp2::impl::assert_not_null(n))); } -#line 373 "reflect.h2" - [[nodiscard]] auto function_declaration::is_assignment_with_in_that() const& -> bool { return CPP2_UFCS(is_assignment_with_in_that)((*cpp2::impl::assert_not_null(n))); } -#line 374 "reflect.h2" - [[nodiscard]] auto function_declaration::is_assignment_with_move_that() const& -> bool { return CPP2_UFCS(is_assignment_with_move_that)((*cpp2::impl::assert_not_null(n))); } -#line 375 "reflect.h2" - [[nodiscard]] auto function_declaration::is_destructor() const& -> bool { return CPP2_UFCS(is_destructor)((*cpp2::impl::assert_not_null(n))); } +#line 1819 "reflect.h2" + // Branch reset management functions + // -#line 377 "reflect.h2" - [[nodiscard]] auto function_declaration::is_copy_or_move() const& -> bool { return is_constructor_with_that() || is_assignment_with_that(); } + public: [[nodiscard]] auto branch_reset_new_state() & -> parse_context_branch_reset_state; -#line 379 "reflect.h2" - [[nodiscard]] auto function_declaration::has_declared_return_type() const& -> bool { return CPP2_UFCS(has_declared_return_type)((*cpp2::impl::assert_not_null(n))); } -#line 380 "reflect.h2" - [[nodiscard]] auto function_declaration::has_deduced_return_type() const& -> bool { return CPP2_UFCS(has_deduced_return_type)((*cpp2::impl::assert_not_null(n))); } -#line 381 "reflect.h2" - [[nodiscard]] auto function_declaration::has_bool_return_type() const& -> bool { return CPP2_UFCS(has_bool_return_type)((*cpp2::impl::assert_not_null(n))); } -#line 382 "reflect.h2" - [[nodiscard]] auto function_declaration::has_non_void_return_type() const& -> bool { return CPP2_UFCS(has_non_void_return_type)((*cpp2::impl::assert_not_null(n))); } +#line 1831 "reflect.h2" + public: auto branch_reset_restore_state(cpp2::impl::in old_state) & -> void; -#line 384 "reflect.h2" - [[nodiscard]] auto function_declaration::unnamed_return_type() const& -> std::string { return CPP2_UFCS(unnamed_return_type_to_string)((*cpp2::impl::assert_not_null(n))); } +#line 1838 "reflect.h2" + public: auto next_alternative() & -> void; -#line 386 "reflect.h2" - [[nodiscard]] auto function_declaration::get_parameters() const& -> std::vector +#line 1844 "reflect.h2" + // Regex token management + // + public: auto add_token(cpp2::impl::in token) & -> void; - { - std::vector ret {}; - for ( auto const& param : CPP2_UFCS(get_function_parameters)((*cpp2::impl::assert_not_null(n))) ) { - static_cast(CPP2_UFCS(emplace_back)(ret, &*cpp2::impl::assert_not_null((*cpp2::impl::assert_not_null(param)).declaration), (*this))); - } - return ret; - } +#line 1850 "reflect.h2" + public: [[nodiscard]] auto has_token() const& -> bool; -#line 396 "reflect.h2" - [[nodiscard]] auto function_declaration::is_binary_comparison_function() const& -> bool { return CPP2_UFCS(is_binary_comparison_function)((*cpp2::impl::assert_not_null(n))); } +#line 1854 "reflect.h2" + public: [[nodiscard]] auto pop_token() & -> token_ptr; -#line 398 "reflect.h2" - auto function_declaration::default_to_virtual() & -> void { static_cast(CPP2_UFCS(make_function_virtual)((*cpp2::impl::assert_not_null(n)))); } +#line 1865 "reflect.h2" + public: [[nodiscard]] auto get_as_token() & -> token_ptr; -#line 400 "reflect.h2" - [[nodiscard]] auto function_declaration::make_virtual() & -> bool { return CPP2_UFCS(make_function_virtual)((*cpp2::impl::assert_not_null(n))); } +#line 1869 "reflect.h2" + // Group management + // + public: [[nodiscard]] auto get_cur_group() const& -> int; -#line 402 "reflect.h2" - auto function_declaration::add_initializer(cpp2::impl::in source) & -> void +#line 1875 "reflect.h2" + public: [[nodiscard]] auto next_group() & -> int; -#line 405 "reflect.h2" - { - if ((*this).is_active() && !(!(has_initializer())) ) { (*this).report_violation(CPP2_CONTRACT_MSG("cannot add an initializer to a function that already has one")); } - if ((*this).is_active() && !(parent_is_type()) ) { (*this).report_violation(CPP2_CONTRACT_MSG("cannot add an initializer to a function that isn't in a type scope")); } - //require( !has_initializer(), - // "cannot add an initializer to a function that already has one"); - //require( parent_is_type(), - // "cannot add an initializer to a function that isn't in a type scope"); +#line 1879 "reflect.h2" + public: auto set_named_group(cpp2::impl::in name, cpp2::impl::in id) & -> void; -#line 411 "reflect.h2" - auto stmt {parse_statement(source)}; - if (!((cpp2::impl::as_(stmt)))) { - error("cannot add an initializer that is not a valid statement"); - return ; - } - require(CPP2_UFCS(add_function_initializer)((*cpp2::impl::assert_not_null(n)), std::move(cpp2::move(stmt))), - std::string("unexpected error while attempting to add initializer")); - } +#line 1886 "reflect.h2" + public: [[nodiscard]] auto get_named_group(cpp2::impl::in name) const& -> int; - function_declaration::function_declaration(function_declaration const& that) - : declaration{ static_cast(that) }{} +#line 1897 "reflect.h2" + // Position management functions + // + public: [[nodiscard]] auto current() const& -> char; -#line 429 "reflect.h2" - object_declaration::object_declaration( + // Get the next token in the regex, skipping spaces according to the parameters. See `x` and `xx` modifiers. + private: [[nodiscard]] auto get_next_position(cpp2::impl::in in_class, cpp2::impl::in no_skip) const& -> size_t; - declaration_node* n_, - cpp2::impl::in s - ) - : declaration{ n_, s } -#line 434 "reflect.h2" - { +#line 1941 "reflect.h2" + // Return true if next token is available. + private: [[nodiscard]] auto next_impl(cpp2::impl::in in_class, cpp2::impl::in no_skip) & -> bool; - if (cpp2::cpp2_default.is_active() && !(CPP2_UFCS(is_object)((*cpp2::impl::assert_not_null(n)))) ) { cpp2::cpp2_default.report_violation(""); } - } +#line 1953 "reflect.h2" + public: [[nodiscard]] auto next() & -> auto; + public: [[nodiscard]] auto next_in_class() & -> auto; + public: [[nodiscard]] auto next_no_skip() & -> auto; -#line 439 "reflect.h2" - [[nodiscard]] auto object_declaration::is_const() const& -> bool { return CPP2_UFCS(is_const)((*cpp2::impl::assert_not_null(n))); } -#line 440 "reflect.h2" - [[nodiscard]] auto object_declaration::has_wildcard_type() const& -> bool { return CPP2_UFCS(has_wildcard_type)((*cpp2::impl::assert_not_null(n))); } + public: [[nodiscard]] auto next_n(cpp2::impl::in n) & -> bool; -#line 442 "reflect.h2" - [[nodiscard]] auto object_declaration::type() const& -> std::string{ - auto ret {CPP2_UFCS(object_type)((*cpp2::impl::assert_not_null(n)))}; - require(!(contains(ret, "(*ERROR*)")), - "cannot to_string this type: " + ret); - return ret; - } +#line 1966 "reflect.h2" + public: [[nodiscard]] auto has_next() const& -> bool; -#line 449 "reflect.h2" - [[nodiscard]] auto object_declaration::initializer() const& -> std::string{ - auto ret {CPP2_UFCS(object_initializer)((*cpp2::impl::assert_not_null(n)))}; - require(!(contains(ret, "(*ERROR*)")), - "cannot to_string this initializer: " + ret); - return ret; - } + private: [[nodiscard]] auto grab_until_impl(cpp2::impl::in e, cpp2::impl::out r, cpp2::impl::in any) & -> bool; - object_declaration::object_declaration(object_declaration const& that) - : declaration{ static_cast(that) }{} +#line 1989 "reflect.h2" + public: [[nodiscard]] auto grab_until(cpp2::impl::in e, cpp2::impl::out r) & -> auto; + public: [[nodiscard]] auto grab_until(cpp2::impl::in e, cpp2::impl::out r) & -> auto; + public: [[nodiscard]] auto grab_until_one_of(cpp2::impl::in e, cpp2::impl::out r) & -> auto; -#line 465 "reflect.h2" - type_declaration::type_declaration( + public: [[nodiscard]] auto grab_n(cpp2::impl::in n, cpp2::impl::out r) & -> bool; - declaration_node* n_, - cpp2::impl::in s - ) - : declaration{ n_, s } -#line 470 "reflect.h2" - { +#line 2006 "reflect.h2" + public: [[nodiscard]] auto grab_number() & -> std::string; - if (cpp2::cpp2_default.is_active() && !(CPP2_UFCS(is_type)((*cpp2::impl::assert_not_null(n)))) ) { cpp2::cpp2_default.report_violation(""); } - } +#line 2027 "reflect.h2" + private: [[nodiscard]] auto peek_impl(cpp2::impl::in in_class) const& -> char; -#line 475 "reflect.h2" - auto type_declaration::reserve_names(cpp2::impl::in name, auto&& ...etc) const& -> void - { // etc is not declared ':string_view' for compatibility with GCC 10.x - for ( - auto const& m : get_members() ) { - CPP2_UFCS(require)(m, !(CPP2_UFCS(has_name)(m, name)), - "in a '" + cpp2::to_string(get_metafunction_name()) + "' type, the name '" + cpp2::to_string(name) + "' " - "is reserved for use by the '" + cpp2::to_string(get_metafunction_name()) + "' implementation" +#line 2037 "reflect.h2" + public: [[nodiscard]] auto peek() const& -> auto; + public: [[nodiscard]] auto peek_in_class() const& -> auto; + +#line 2041 "reflect.h2" + // Parsing functions + // + public: [[nodiscard]] auto parser_group_modifiers(cpp2::impl::in change_str, expression_flags& parser_modifiers) & -> bool; + +#line 2097 "reflect.h2" + public: [[nodiscard]] auto parse_until(cpp2::impl::in term) & -> bool; + +#line 2135 "reflect.h2" + public: [[nodiscard]] auto parse(cpp2::impl::in modifiers) & -> bool; + +#line 2150 "reflect.h2" + // Misc functions + + public: [[nodiscard]] auto get_pos() const& -> auto; + public: [[nodiscard]] auto get_range(cpp2::impl::in start, cpp2::impl::in end) const& -> auto; + public: [[nodiscard]] auto valid() const& -> bool; + + public: [[nodiscard]] auto error(cpp2::impl::in err) & -> token_ptr; + public: parse_context(parse_context const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(parse_context const&) -> void = delete; + + +#line 2161 "reflect.h2" +}; + +#line 2164 "reflect.h2" +// Context for one function generation. Generation of functions can be interleaved, +// therefore we buffer the code for one function here. +// +class generation_function_context { + public: std::string code {""}; + public: std::string tabs {""}; + + public: auto add_tabs(cpp2::impl::in c) & -> void; + +#line 2178 "reflect.h2" + public: auto remove_tabs(cpp2::impl::in c) & -> void; + public: generation_function_context(auto const& code_, auto const& tabs_); +public: generation_function_context(); + + +#line 2181 "reflect.h2" +}; + +#line 2184 "reflect.h2" +// Context for generating the state machine. +class generation_context + { + private: std::vector gen_stack {1}; // Element 0 contains all the code. + + private: int matcher_func {0}; + private: int reset_func {0}; + private: int temp_name {0}; + private: std::string entry_func {""}; + + // Generation helpers + // + public: [[nodiscard]] auto match_parameters() const& -> std::string; + + // Code generation. + + // Add code line. + public: auto add(cpp2::impl::in s) & -> void; + +#line 2206 "reflect.h2" + // Add check for token. The check needs to be a function call that returns a boolean. + public: auto add_check(cpp2::impl::in check) & -> void; + +#line 2212 "reflect.h2" + // Add a stateful check. The check needs to return a `match_return`. + public: auto add_statefull(cpp2::impl::in next_func, cpp2::impl::in check) & -> void; + +#line 2221 "reflect.h2" + protected: auto start_func_named(cpp2::impl::in name) & -> void; + +#line 2232 "reflect.h2" + protected: [[nodiscard]] auto start_func() & -> std::string; + +#line 2239 "reflect.h2" + protected: auto end_func_statefull(cpp2::impl::in s) & -> void; + +#line 2258 "reflect.h2" + // Generate the function for a token. + public: [[nodiscard]] auto generate_func(cpp2::impl::in token) & -> std::string; + +#line 2268 "reflect.h2" + // Generate the reset for a list of group identifiers. + public: [[nodiscard]] auto generate_reset(cpp2::impl::in> groups) & -> std::string; + +#line 2291 "reflect.h2" + // Name generation + // + protected: [[nodiscard]] auto gen_func_name() & -> std::string; + +#line 2299 "reflect.h2" + public: [[nodiscard]] auto next_func_name() & -> std::string; + +#line 2303 "reflect.h2" + protected: [[nodiscard]] auto gen_reset_func_name() & -> std::string; + +#line 2309 "reflect.h2" + public: [[nodiscard]] auto gen_temp() & -> std::string; + +#line 2315 "reflect.h2" + // Context management + // + public: [[nodiscard]] auto new_context() & -> generation_function_context*; + +#line 2325 "reflect.h2" + public: auto finish_context() & -> void; + +#line 2333 "reflect.h2" + // Misc functions + // + private: [[nodiscard]] auto get_current() & -> generation_function_context*; + +#line 2339 "reflect.h2" + private: [[nodiscard]] auto get_base() & -> generation_function_context*; + +#line 2343 "reflect.h2" + public: [[nodiscard]] auto get_entry_func() const& -> std::string; + +#line 2347 "reflect.h2" + public: [[nodiscard]] auto create_named_group_lookup(cpp2::impl::in> named_groups) const& -> std::string; + +#line 2371 "reflect.h2" + // Run the generation for the token. + public: [[nodiscard]] auto run(cpp2::impl::in token) & -> std::string; + public: generation_context() = default; + public: generation_context(generation_context const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(generation_context const&) -> void = delete; + + +#line 2377 "reflect.h2" +}; + +// Regex syntax: | Example: ab|ba +// +// Non greedy implementation. First alternative that matches is chosen. +// +class alternative_token +: public regex_token_empty { + // No code gen here. alternative_token_gen is created in the parse_context + + public: explicit alternative_token(); + + public: [[nodiscard]] static auto parse(parse_context& ctx) -> token_ptr; + public: virtual ~alternative_token() noexcept; + + public: alternative_token(alternative_token const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(alternative_token const&) -> void = delete; + + +#line 2396 "reflect.h2" +}; + +class alternative_token_gen +: public regex_token { + +#line 2402 "reflect.h2" + private: token_vec alternatives; + + public: explicit alternative_token_gen(cpp2::impl::in a); + +#line 2409 "reflect.h2" + public: auto generate_code(generation_context& ctx) const -> void override; + +#line 2426 "reflect.h2" + public: auto add_groups(std::set& groups) const -> void override; + +#line 2433 "reflect.h2" + public: [[nodiscard]] static auto gen_string(cpp2::impl::in a) -> std::string; + public: virtual ~alternative_token_gen() noexcept; + + public: alternative_token_gen(alternative_token_gen const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(alternative_token_gen const&) -> void = delete; + + +#line 2445 "reflect.h2" +}; + +#line 2448 "reflect.h2" +// Regex syntax: . +// +class any_token +: public regex_token_check { + +#line 2454 "reflect.h2" + public: explicit any_token(cpp2::impl::in single_line); + +#line 2458 "reflect.h2" + public: [[nodiscard]] static auto parse(parse_context& ctx) -> token_ptr; + public: virtual ~any_token() noexcept; + + public: any_token(any_token const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(any_token const&) -> void = delete; + + +#line 2463 "reflect.h2" +}; + +#line 2466 "reflect.h2" +// Regex syntax: a +// +class char_token +: public regex_token { + +#line 2472 "reflect.h2" + private: std::string token; + private: bool ignore_case; + + public: explicit char_token(cpp2::impl::in t, cpp2::impl::in ignore_case_); + +#line 2481 "reflect.h2" + public: [[nodiscard]] static auto parse(parse_context& ctx) -> token_ptr; + +#line 2485 "reflect.h2" + public: auto generate_code(generation_context& ctx) const -> void override; + +#line 2508 "reflect.h2" + public: auto gen_case_insensitive(cpp2::impl::in lower, cpp2::impl::in upper, generation_context& ctx) const& -> void; + +#line 2529 "reflect.h2" + public: auto gen_case_sensitive(generation_context& ctx) const& -> void; + +#line 2547 "reflect.h2" + public: [[nodiscard]] auto add_escapes(std::string str) const& -> std::string; + +#line 2562 "reflect.h2" + public: auto append(char_token const& that) & -> void; + public: virtual ~char_token() noexcept; + + public: char_token(char_token const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(char_token const&) -> void = delete; + + +#line 2566 "reflect.h2" +}; + +#line 2569 "reflect.h2" +// Regex syntax: [] Example: [abcx-y[:digits:]] +// +class class_token +: public regex_token { + +#line 2575 "reflect.h2" + private: bool negate; + private: bool case_insensitive; + private: std::string class_str; + + public: explicit class_token(cpp2::impl::in negate_, cpp2::impl::in case_insensitive_, cpp2::impl::in class_str_, cpp2::impl::in str); + +#line 2587 "reflect.h2" + // TODO: Rework class generation: Generate check functions for classes. + public: [[nodiscard]] static auto parse(parse_context& ctx) -> token_ptr; + +#line 2713 "reflect.h2" + public: auto generate_code(generation_context& ctx) const -> void override; + +#line 2718 "reflect.h2" + private: [[nodiscard]] static auto create_matcher(cpp2::impl::in name, cpp2::impl::in template_arguments) -> std::string; + public: virtual ~class_token() noexcept; + + public: class_token(class_token const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(class_token const&) -> void = delete; + + +#line 2725 "reflect.h2" +}; + +#line 2728 "reflect.h2" +// Regex syntax: \a or \n or \[ +// +[[nodiscard]] auto escape_token_parse(parse_context& ctx) -> token_ptr; + +#line 2769 "reflect.h2" +// Regex syntax: \K Example: ab\Kcd +// +[[nodiscard]] auto global_group_reset_token_parse(parse_context& ctx) -> token_ptr; + +#line 2780 "reflect.h2" +// Regex syntax: \ Example: \1 +// \g{name_or_number} +// \k{name_or_number} +// \k +// \k'name_or_number' +// +class group_ref_token +: public regex_token { + +#line 2790 "reflect.h2" + private: int id; + private: bool case_insensitive; + + public: explicit group_ref_token(cpp2::impl::in id_, cpp2::impl::in case_insensitive_, cpp2::impl::in str); + +#line 2800 "reflect.h2" + public: [[nodiscard]] static auto parse(parse_context& ctx) -> token_ptr; + +#line 2901 "reflect.h2" + public: auto generate_code(generation_context& ctx) const -> void override; + public: virtual ~group_ref_token() noexcept; + + public: group_ref_token(group_ref_token const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(group_ref_token const&) -> void = delete; + + +#line 2904 "reflect.h2" +}; + +#line 2907 "reflect.h2" +// Regex syntax: () Example: (abc) +// (?:) (?i:abc) +// (?<>:) (?:abc) +// (?#) (#Step 1 finished) +// (?|) (?|(abc)|(cde)) +// (?=) (?=abc) +// (?!) (?!abc) +// (*: syntax, cpp2::impl::in positive) -> token_ptr; + +#line 2938 "reflect.h2" + public: [[nodiscard]] static auto parse(parse_context& ctx) -> token_ptr; + +#line 3075 "reflect.h2" + public: [[nodiscard]] static auto gen_string(cpp2::impl::in name, cpp2::impl::in name_brackets, cpp2::impl::in has_modifier, cpp2::impl::in modifiers, cpp2::impl::in inner_) -> std::string; + +#line 3093 "reflect.h2" + public: auto generate_code(generation_context& ctx) const -> void override; + +#line 3113 "reflect.h2" + public: auto add_groups(std::set& groups) const -> void override; + public: virtual ~group_token() noexcept; + + public: group_token() = default; + public: group_token(group_token const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(group_token const&) -> void = delete; + + +#line 3120 "reflect.h2" +}; + +#line 3123 "reflect.h2" +// Regex syntax: \x or \x{} Example: \x{62} +// +[[nodiscard]] auto hexadecimal_token_parse(parse_context& ctx) -> token_ptr; + +#line 3164 "reflect.h2" +// Regex syntax: $ Example: aa$ +// +[[nodiscard]] auto line_end_token_parse(parse_context& ctx) -> token_ptr; + +#line 3184 "reflect.h2" +// Regex syntax: ^ Example: ^aa +// +[[nodiscard]] auto line_start_token_parse(parse_context& ctx) -> token_ptr; + +#line 3200 "reflect.h2" +// Regex syntax: (?=) or (?!) or (*pla), etc. Example: (?=AA) +// +// Parsed in group_token. +// +class lookahead_token +: public regex_token { + +#line 3208 "reflect.h2" + protected: bool positive; + public: token_ptr inner {nullptr}; + + public: explicit lookahead_token(cpp2::impl::in positive_); + +#line 3215 "reflect.h2" + public: auto generate_code(generation_context& ctx) const -> void override; + +#line 3221 "reflect.h2" + public: auto add_groups(std::set& groups) const -> void override; + public: virtual ~lookahead_token() noexcept; + + public: lookahead_token(lookahead_token const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(lookahead_token const&) -> void = delete; + + +#line 3224 "reflect.h2" +}; + +#line 3227 "reflect.h2" +// Named character classes +// +[[nodiscard]] auto named_class_token_parse(parse_context& ctx) -> token_ptr; + +#line 3255 "reflect.h2" +// Regex syntax: \o{} Example: \o{142} +// +[[nodiscard]] auto octal_token_parse(parse_context& ctx) -> token_ptr; + +#line 3283 "reflect.h2" +// Regex syntax: {min, max} Example: a{2,4} +// +class range_token +: public regex_token { + +#line 3289 "reflect.h2" + protected: int min_count {-1}; + protected: int max_count {-1}; + protected: int kind {range_flags::greedy}; + protected: token_ptr inner_token {nullptr}; + + public: explicit range_token(); + + public: [[nodiscard]] static auto parse(parse_context& ctx) -> token_ptr; + +#line 3369 "reflect.h2" + public: auto parse_modifier(parse_context& ctx) & -> void; + +#line 3381 "reflect.h2" + public: [[nodiscard]] auto gen_mod_string() const& -> std::string; + +#line 3394 "reflect.h2" + public: [[nodiscard]] auto gen_range_string() const& -> std::string; + +#line 3413 "reflect.h2" + public: auto generate_code(generation_context& ctx) const -> void override; + +#line 3424 "reflect.h2" + public: auto add_groups(std::set& groups) const -> void override; + public: virtual ~range_token() noexcept; + + public: range_token(range_token const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(range_token const&) -> void = delete; + + +#line 3428 "reflect.h2" +}; + +#line 3431 "reflect.h2" +// Regex syntax: *, +, or ? Example: aa* +// +class special_range_token +: public range_token { + +#line 3437 "reflect.h2" + public: [[nodiscard]] static auto parse(parse_context& ctx) -> token_ptr; + public: virtual ~special_range_token() noexcept; + + public: special_range_token() = default; + public: special_range_token(special_range_token const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(special_range_token const&) -> void = delete; + + +#line 3467 "reflect.h2" +}; + +#line 3470 "reflect.h2" +// Regex syntax: \b or \B Example: \bword\b +// +// Matches the start end end of word boundaries. +// +[[nodiscard]] auto word_boundary_token_parse(parse_context& ctx) -> token_ptr; + +#line 3492 "reflect.h2" +//----------------------------------------------------------------------- +// +// Parser for regular expression. +// +//----------------------------------------------------------------------- +// + +// Parser and generator for regular expressions. +template class regex_generator + { + private: std::string_view regex; + private: std::string modifier {""}; + private: std::string modifier_escape {""}; + + private: Error_out error_out; + + private: std::string source {""}; + + public: explicit regex_generator(cpp2::impl::in r, Error_out const& e); + +#line 3515 "reflect.h2" + public: [[nodiscard]] auto parse() & -> std::string; + +#line 3550 "reflect.h2" + private: auto extract_modifiers() & -> void; + public: regex_generator(regex_generator const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(regex_generator const&) -> void = delete; + + +#line 3564 "reflect.h2" +}; + +template [[nodiscard]] auto generate_regex(cpp2::impl::in regex, Err const& err) -> std::string; + +#line 3576 "reflect.h2" +auto regex_gen(meta::type_declaration& t) -> void; + +#line 3631 "reflect.h2" +//----------------------------------------------------------------------- +// +// apply_metafunctions +// +[[nodiscard]] auto apply_metafunctions( + declaration_node& n, + type_declaration& rtype, + auto const& error + ) -> bool; + +#line 3750 "reflect.h2" +} + +} + + +//=== Cpp2 function definitions ================================================= + +#line 1 "reflect.h2" + +#line 22 "reflect.h2" +namespace cpp2 { + +namespace meta { + +#line 27 "reflect.h2" +//----------------------------------------------------------------------- +// +// Compiler services +// +//----------------------------------------------------------------------- +// + +#line 36 "reflect.h2" + // Common data members + // + +#line 47 "reflect.h2" + // Constructor + // +#line 49 "reflect.h2" + compiler_services::compiler_services( + + std::vector* errors_, + std::set* includes_, + stable_vector* generated_tokens_ + ) + : errors{ errors_ } + , includes{ includes_ } + , errors_original_size{ cpp2::unsafe_narrow(std::ssize(*cpp2::impl::assert_not_null(errors))) } + , generated_tokens{ generated_tokens_ } + , parser{ *cpp2::impl::assert_not_null(errors), *cpp2::impl::assert_not_null(includes) } +#line 55 "reflect.h2" + { + +#line 61 "reflect.h2" + } + + // Common API + // +#line 65 "reflect.h2" + auto compiler_services::set_metafunction_name(cpp2::impl::in name, cpp2::impl::in> args) & -> void{ + metafunction_name = name; + metafunction_args = args; + metafunctions_used = CPP2_UFCS(empty)(args); + } + +#line 71 "reflect.h2" + [[nodiscard]] auto compiler_services::get_metafunction_name() const& -> std::string_view { return metafunction_name; } + +#line 73 "reflect.h2" + [[nodiscard]] auto compiler_services::get_argument(cpp2::impl::in index) & -> std::string{ + metafunctions_used = true; + if (([_0 = 0, _1 = index, _2 = CPP2_UFCS(ssize)(metafunction_args)]{ return cpp2::impl::cmp_less_eq(_0,_1) && cpp2::impl::cmp_less(_1,_2); }())) { + return CPP2_ASSERT_IN_BOUNDS(metafunction_args, index); + } + return ""; + } + +#line 81 "reflect.h2" + [[nodiscard]] auto compiler_services::get_arguments() & -> std::vector{ + metafunctions_used = true; + return metafunction_args; + } + +#line 86 "reflect.h2" + [[nodiscard]] auto compiler_services::arguments_were_used() const& -> bool { return metafunctions_used; } + +#line 88 "reflect.h2" + [[nodiscard]] auto compiler_services::parse_statement( + + std::string_view source + ) & -> parse_statement_ret + + { + cpp2::impl::deferred_init> ret; +#line 94 "reflect.h2" + auto original_source {source}; + + CPP2_UFCS(push_back)(generated_lines, std::vector()); + auto lines {&CPP2_UFCS(back)(generated_lines)}; + + auto add_line {[&, _1 = lines](cpp2::impl::in s) mutable -> void{ + static_cast(CPP2_UFCS(emplace_back)((*cpp2::impl::assert_not_null(_1)), s, source_line::category::cpp2)); + }}; +{ +auto newline_pos{CPP2_UFCS(find)(source, '\n')}; + + // First split this string into source_lines + // + +#line 106 "reflect.h2" + if ( cpp2::impl::cmp_greater(CPP2_UFCS(ssize)(source),1) + && newline_pos != source.npos) + { + while( newline_pos != source.npos ) + { + add_line(CPP2_UFCS(substr)(source, 0, newline_pos)); + CPP2_UFCS(remove_prefix)(source, newline_pos + 1); + newline_pos = CPP2_UFCS(find)(source, '\n'); + } + } +} + +#line 117 "reflect.h2" + if (!(CPP2_UFCS(empty)(source))) { + cpp2::move(add_line)(cpp2::move(source)); + } + + // Now lex this source fragment to generate + // a single grammar_map entry, whose .second + // is the vector of tokens + static_cast(CPP2_UFCS(emplace_back)(generated_lexers, *cpp2::impl::assert_not_null(errors))); + auto tokens {&CPP2_UFCS(back)(generated_lexers)}; + CPP2_UFCS(lex)((*cpp2::impl::assert_not_null(tokens)), *cpp2::impl::assert_not_null(cpp2::move(lines)), true); + + if (cpp2::cpp2_default.is_active() && !(std::ssize(CPP2_UFCS(get_map)((*cpp2::impl::assert_not_null(tokens)))) == 1) ) { cpp2::cpp2_default.report_violation(""); } + + // Now parse this single declaration from + // the lexed tokens + ret.construct(CPP2_UFCS(parse_one_declaration)(parser, + (*cpp2::impl::assert_not_null(CPP2_UFCS(begin)(CPP2_UFCS(get_map)(*cpp2::impl::assert_not_null(cpp2::move(tokens)))))).second, + *cpp2::impl::assert_not_null(generated_tokens) + )); + if (!(ret.value())) { + error("parse failed - the source string is not a valid statement:\n" + cpp2::to_string(cpp2::move(original_source)) + ""); + }return std::move(ret.value()); + } + +#line 141 "reflect.h2" + auto compiler_services::add_runtime_support_include(cpp2::impl::in s) & -> void { static_cast(CPP2_UFCS(emplace)((*cpp2::impl::assert_not_null(includes)), s)); } + +#line 143 "reflect.h2" + [[nodiscard]] auto compiler_services::position() const -> source_position + + { + return { }; + } + + // Error diagnosis and handling, integrated with compiler output + // Unlike a contract violation, .requires continues further processing + // +#line 152 "reflect.h2" + auto compiler_services::require( + + cpp2::impl::in b, + cpp2::impl::in msg + ) const& -> void + { + if (!(b)) { + error(msg); + } + } + +#line 163 "reflect.h2" + auto compiler_services::error(cpp2::impl::in msg) const& -> void + { + auto message {cpp2::impl::as_(msg)}; + if (!(CPP2_UFCS(empty)(metafunction_name))) { + message = "while applying @" + cpp2::to_string(metafunction_name) + " - " + cpp2::to_string(message) + ""; + } + static_cast(CPP2_UFCS(emplace_back)((*cpp2::impl::assert_not_null(errors)), position(), cpp2::move(message))); + } + + // Enable custom contracts on this object, integrated with compiler output + // Unlike .requires, a contract violation stops further processing + // +#line 175 "reflect.h2" + auto compiler_services::report_violation(auto const& msg) const& -> void{ + error(msg); + throw(std::runtime_error( + " ==> programming bug found in metafunction @" + cpp2::to_string(metafunction_name) + " " + "- contract violation - see previous errors" + )); + } + +#line 183 "reflect.h2" + [[nodiscard]] auto compiler_services::is_active() const& -> auto { return true; } + + compiler_services::~compiler_services() noexcept{} +compiler_services::compiler_services(compiler_services const& that) + : errors{ that.errors } + , includes{ that.includes } + , errors_original_size{ that.errors_original_size } + , generated_tokens{ that.generated_tokens } + , parser{ that.parser } + , metafunction_name{ that.metafunction_name } + , metafunction_args{ that.metafunction_args } + , metafunctions_used{ that.metafunctions_used }{} + +#line 187 "reflect.h2" +/* +//----------------------------------------------------------------------- +// +// Type IDs +// +//----------------------------------------------------------------------- +// + +// All type_ids are wrappers around a pointer to node +// +type_id: @polymorphic_base @copyable type = +{ + this: compiler_services = (); + + n: type_id_node; + + protected operator=: ( + out this, + n_: type_id_node, + s : compiler_services + ) + = { + compiler_services = s; + n = n_; + assert( n, "a meta::type_id must point to a valid type_id_node, not null" ); + } + + is_wildcard : (this) -> bool = n.is_wildcard(); + is_pointer_qualified: (this) -> bool = n.is_pointer_qualified(); + template_args_count : (this) -> int = n.template_arguments().ssize(); + to_string : (this) -> std::string = n.to_string(); + + position: (override this) -> source_position = n.position(); +} +*/ + +#line 224 "reflect.h2" +//----------------------------------------------------------------------- +// +// Declarations +// +//----------------------------------------------------------------------- +// + +// All declarations are wrappers around a pointer to node +// + +#line 239 "reflect.h2" + declaration_base::declaration_base( + + declaration_node* n_, + cpp2::impl::in s + ) + : compiler_services{ s } + , n{ n_ } +#line 244 "reflect.h2" + { + +#line 247 "reflect.h2" + if (cpp2::cpp2_default.is_active() && !(n) ) { cpp2::cpp2_default.report_violation(CPP2_CONTRACT_MSG("a meta::declaration must point to a valid declaration_node, not null")); } + } + +#line 250 "reflect.h2" + [[nodiscard]] auto declaration_base::position() const -> source_position { return CPP2_UFCS(position)((*cpp2::impl::assert_not_null(n))); } + +#line 252 "reflect.h2" + [[nodiscard]] auto declaration_base::print() const& -> std::string { return CPP2_UFCS(pretty_print_visualize)((*cpp2::impl::assert_not_null(n)), 0); } + + declaration_base::~declaration_base() noexcept{} +declaration_base::declaration_base(declaration_base const& that) + : compiler_services{ static_cast(that) } + , n{ that.n }{} + +#line 256 "reflect.h2" +//----------------------------------------------------------------------- +// All declarations +// + +#line 263 "reflect.h2" + declaration::declaration( + + declaration_node* n_, + cpp2::impl::in s + ) + : declaration_base{ n_, s } +#line 268 "reflect.h2" + { + + } + +#line 272 "reflect.h2" + [[nodiscard]] auto declaration::is_public() const& -> bool { return CPP2_UFCS(is_public)((*cpp2::impl::assert_not_null(n))); } +#line 273 "reflect.h2" + [[nodiscard]] auto declaration::is_protected() const& -> bool { return CPP2_UFCS(is_protected)((*cpp2::impl::assert_not_null(n))); } +#line 274 "reflect.h2" + [[nodiscard]] auto declaration::is_private() const& -> bool { return CPP2_UFCS(is_private)((*cpp2::impl::assert_not_null(n))); } +#line 275 "reflect.h2" + [[nodiscard]] auto declaration::is_default_access() const& -> bool { return CPP2_UFCS(is_default_access)((*cpp2::impl::assert_not_null(n))); } + +#line 277 "reflect.h2" + auto declaration::default_to_public() & -> void { static_cast(CPP2_UFCS(make_public)((*cpp2::impl::assert_not_null(n)))); } +#line 278 "reflect.h2" + auto declaration::default_to_protected() & -> void { static_cast(CPP2_UFCS(make_protected)((*cpp2::impl::assert_not_null(n)))); } +#line 279 "reflect.h2" + auto declaration::default_to_private() & -> void { static_cast(CPP2_UFCS(make_private)((*cpp2::impl::assert_not_null(n)))); } + +#line 281 "reflect.h2" + [[nodiscard]] auto declaration::make_public() & -> bool { return CPP2_UFCS(make_public)((*cpp2::impl::assert_not_null(n))); } +#line 282 "reflect.h2" + [[nodiscard]] auto declaration::make_protected() & -> bool { return CPP2_UFCS(make_protected)((*cpp2::impl::assert_not_null(n))); } +#line 283 "reflect.h2" + [[nodiscard]] auto declaration::make_private() & -> bool { return CPP2_UFCS(make_private)((*cpp2::impl::assert_not_null(n))); } + +#line 285 "reflect.h2" + [[nodiscard]] auto declaration::has_name() const& -> bool { return CPP2_UFCS(has_name)((*cpp2::impl::assert_not_null(n))); } +#line 286 "reflect.h2" + [[nodiscard]] auto declaration::has_name(cpp2::impl::in s) const& -> bool { return CPP2_UFCS(has_name)((*cpp2::impl::assert_not_null(n)), s); } + +#line 288 "reflect.h2" + [[nodiscard]] auto declaration::name() const& -> std::string_view{ + if (has_name()) {return CPP2_UFCS(as_string_view)((*cpp2::impl::assert_not_null(CPP2_UFCS(name)(*cpp2::impl::assert_not_null(n))))); } + else { return ""; } + } + +#line 293 "reflect.h2" + [[nodiscard]] auto declaration::has_initializer() const& -> bool { return CPP2_UFCS(has_initializer)((*cpp2::impl::assert_not_null(n))); } + +#line 295 "reflect.h2" + [[nodiscard]] auto declaration::is_global() const& -> bool { return CPP2_UFCS(is_global)((*cpp2::impl::assert_not_null(n))); } +#line 296 "reflect.h2" + [[nodiscard]] auto declaration::is_function() const& -> bool { return CPP2_UFCS(is_function)((*cpp2::impl::assert_not_null(n))); } +#line 297 "reflect.h2" + [[nodiscard]] auto declaration::is_object() const& -> bool { return CPP2_UFCS(is_object)((*cpp2::impl::assert_not_null(n))); } +#line 298 "reflect.h2" + [[nodiscard]] auto declaration::is_base_object() const& -> bool { return CPP2_UFCS(is_base_object)((*cpp2::impl::assert_not_null(n))); } +#line 299 "reflect.h2" + [[nodiscard]] auto declaration::is_member_object() const& -> bool { return CPP2_UFCS(is_member_object)((*cpp2::impl::assert_not_null(n))); } +#line 300 "reflect.h2" + [[nodiscard]] auto declaration::is_type() const& -> bool { return CPP2_UFCS(is_type)((*cpp2::impl::assert_not_null(n))); } +#line 301 "reflect.h2" + [[nodiscard]] auto declaration::is_namespace() const& -> bool { return CPP2_UFCS(is_namespace)((*cpp2::impl::assert_not_null(n))); } +#line 302 "reflect.h2" + [[nodiscard]] auto declaration::is_alias() const& -> bool { return CPP2_UFCS(is_alias)((*cpp2::impl::assert_not_null(n))); } + +#line 304 "reflect.h2" + [[nodiscard]] auto declaration::is_type_alias() const& -> bool { return CPP2_UFCS(is_type_alias)((*cpp2::impl::assert_not_null(n))); } +#line 305 "reflect.h2" + [[nodiscard]] auto declaration::is_namespace_alias() const& -> bool { return CPP2_UFCS(is_namespace_alias)((*cpp2::impl::assert_not_null(n))); } +#line 306 "reflect.h2" + [[nodiscard]] auto declaration::is_object_alias() const& -> bool { return CPP2_UFCS(is_object_alias)((*cpp2::impl::assert_not_null(n))); } + +#line 308 "reflect.h2" + [[nodiscard]] auto declaration::is_function_expression() const& -> bool { return CPP2_UFCS(is_function_expression)((*cpp2::impl::assert_not_null(n))); } + +#line 310 "reflect.h2" + [[nodiscard]] auto declaration::as_function() const& -> function_declaration { return function_declaration(n, (*this)); } +#line 311 "reflect.h2" + [[nodiscard]] auto declaration::as_object() const& -> object_declaration { return object_declaration(n, (*this)); } +#line 312 "reflect.h2" + [[nodiscard]] auto declaration::as_type() const& -> type_declaration { return type_declaration(n, (*this)); } +#line 313 "reflect.h2" + [[nodiscard]] auto declaration::as_alias() const& -> alias_declaration { return alias_declaration(n, (*this)); } + +#line 315 "reflect.h2" + [[nodiscard]] auto declaration::get_parent() const& -> declaration { return declaration((*cpp2::impl::assert_not_null(n)).parent_declaration, (*this)); } + +#line 317 "reflect.h2" + [[nodiscard]] auto declaration::parent_is_function() const& -> bool { return CPP2_UFCS(parent_is_function)((*cpp2::impl::assert_not_null(n))); } +#line 318 "reflect.h2" + [[nodiscard]] auto declaration::parent_is_object() const& -> bool { return CPP2_UFCS(parent_is_object)((*cpp2::impl::assert_not_null(n))); } +#line 319 "reflect.h2" + [[nodiscard]] auto declaration::parent_is_type() const& -> bool { return CPP2_UFCS(parent_is_type)((*cpp2::impl::assert_not_null(n))); } +#line 320 "reflect.h2" + [[nodiscard]] auto declaration::parent_is_namespace() const& -> bool { return CPP2_UFCS(parent_is_namespace)((*cpp2::impl::assert_not_null(n))); } +#line 321 "reflect.h2" + [[nodiscard]] auto declaration::parent_is_alias() const& -> bool { return CPP2_UFCS(parent_is_alias)((*cpp2::impl::assert_not_null(n))); } + +#line 323 "reflect.h2" + [[nodiscard]] auto declaration::parent_is_type_alias() const& -> bool { return CPP2_UFCS(parent_is_type_alias)((*cpp2::impl::assert_not_null(n))); } +#line 324 "reflect.h2" + [[nodiscard]] auto declaration::parent_is_namespace_alias() const& -> bool { return CPP2_UFCS(parent_is_namespace_alias)((*cpp2::impl::assert_not_null(n))); } +#line 325 "reflect.h2" + [[nodiscard]] auto declaration::parent_is_object_alias() const& -> bool { return CPP2_UFCS(parent_is_object_alias)((*cpp2::impl::assert_not_null(n))); } + +#line 327 "reflect.h2" + [[nodiscard]] auto declaration::parent_is_polymorphic() const& -> bool { return CPP2_UFCS(parent_is_polymorphic)((*cpp2::impl::assert_not_null(n))); } + +#line 329 "reflect.h2" + auto declaration::mark_for_removal_from_enclosing_type() & -> void + // this precondition should be sufficient ... + { + if (cpp2::type_safety.is_active() && !(parent_is_type()) ) { cpp2::type_safety.report_violation(""); } +#line 332 "reflect.h2" + auto test {CPP2_UFCS(type_member_mark_for_removal)((*cpp2::impl::assert_not_null(n)))}; + if (cpp2::cpp2_default.is_active() && !(cpp2::move(test)) ) { cpp2::cpp2_default.report_violation(""); }// ... to ensure this assert is true + } + + declaration::~declaration() noexcept{} +declaration::declaration(declaration const& that) + : declaration_base{ static_cast(that) }{} + +#line 338 "reflect.h2" +//----------------------------------------------------------------------- +// Function declarations +// + +#line 345 "reflect.h2" + function_declaration::function_declaration( + + declaration_node* n_, + cpp2::impl::in s + ) + : declaration{ n_, s } +#line 350 "reflect.h2" + { + + if (cpp2::cpp2_default.is_active() && !(CPP2_UFCS(is_function)((*cpp2::impl::assert_not_null(n)))) ) { cpp2::cpp2_default.report_violation(""); } + } + +#line 355 "reflect.h2" + [[nodiscard]] auto function_declaration::index_of_parameter_named(cpp2::impl::in s) const& -> int { return CPP2_UFCS(index_of_parameter_named)((*cpp2::impl::assert_not_null(n)), s); } +#line 356 "reflect.h2" + [[nodiscard]] auto function_declaration::has_parameter_named(cpp2::impl::in s) const& -> bool { return CPP2_UFCS(has_parameter_named)((*cpp2::impl::assert_not_null(n)), s); } +#line 357 "reflect.h2" + [[nodiscard]] auto function_declaration::has_in_parameter_named(cpp2::impl::in s) const& -> bool { return CPP2_UFCS(has_in_parameter_named)((*cpp2::impl::assert_not_null(n)), s); } +#line 358 "reflect.h2" + [[nodiscard]] auto function_declaration::has_copy_parameter_named(cpp2::impl::in s) const& -> bool { return CPP2_UFCS(has_copy_parameter_named)((*cpp2::impl::assert_not_null(n)), s); } +#line 359 "reflect.h2" + [[nodiscard]] auto function_declaration::has_inout_parameter_named(cpp2::impl::in s) const& -> bool { return CPP2_UFCS(has_inout_parameter_named)((*cpp2::impl::assert_not_null(n)), s); } +#line 360 "reflect.h2" + [[nodiscard]] auto function_declaration::has_out_parameter_named(cpp2::impl::in s) const& -> bool { return CPP2_UFCS(has_out_parameter_named)((*cpp2::impl::assert_not_null(n)), s); } +#line 361 "reflect.h2" + [[nodiscard]] auto function_declaration::has_move_parameter_named(cpp2::impl::in s) const& -> bool { return CPP2_UFCS(has_move_parameter_named)((*cpp2::impl::assert_not_null(n)), s); } +#line 362 "reflect.h2" + [[nodiscard]] auto function_declaration::has_forward_parameter_named(cpp2::impl::in s) const& -> bool { return CPP2_UFCS(has_forward_parameter_named)((*cpp2::impl::assert_not_null(n)), s); } +#line 363 "reflect.h2" + [[nodiscard]] auto function_declaration::first_parameter_name() const& -> std::string { return CPP2_UFCS(first_parameter_name)((*cpp2::impl::assert_not_null(n))); } + +#line 365 "reflect.h2" + [[nodiscard]] auto function_declaration::has_parameter_with_name_and_pass(cpp2::impl::in s, cpp2::impl::in pass) const& -> bool { + return CPP2_UFCS(has_parameter_with_name_and_pass)((*cpp2::impl::assert_not_null(n)), s, pass); } +#line 367 "reflect.h2" + [[nodiscard]] auto function_declaration::is_function_with_this() const& -> bool { return CPP2_UFCS(is_function_with_this)((*cpp2::impl::assert_not_null(n))); } +#line 368 "reflect.h2" + [[nodiscard]] auto function_declaration::is_virtual() const& -> bool { return CPP2_UFCS(is_virtual_function)((*cpp2::impl::assert_not_null(n))); } +#line 369 "reflect.h2" + [[nodiscard]] auto function_declaration::is_defaultable() const& -> bool { return CPP2_UFCS(is_defaultable_function)((*cpp2::impl::assert_not_null(n))); } +#line 370 "reflect.h2" + [[nodiscard]] auto function_declaration::is_constructor() const& -> bool { return CPP2_UFCS(is_constructor)((*cpp2::impl::assert_not_null(n))); } +#line 371 "reflect.h2" + [[nodiscard]] auto function_declaration::is_default_constructor() const& -> bool { return CPP2_UFCS(is_default_constructor)((*cpp2::impl::assert_not_null(n))); } +#line 372 "reflect.h2" + [[nodiscard]] auto function_declaration::is_move() const& -> bool { return CPP2_UFCS(is_move)((*cpp2::impl::assert_not_null(n))); } +#line 373 "reflect.h2" + [[nodiscard]] auto function_declaration::is_swap() const& -> bool { return CPP2_UFCS(is_swap)((*cpp2::impl::assert_not_null(n))); } +#line 374 "reflect.h2" + [[nodiscard]] auto function_declaration::is_constructor_with_that() const& -> bool { return CPP2_UFCS(is_constructor_with_that)((*cpp2::impl::assert_not_null(n))); } +#line 375 "reflect.h2" + [[nodiscard]] auto function_declaration::is_constructor_with_in_that() const& -> bool { return CPP2_UFCS(is_constructor_with_in_that)((*cpp2::impl::assert_not_null(n))); } +#line 376 "reflect.h2" + [[nodiscard]] auto function_declaration::is_constructor_with_move_that() const& -> bool { return CPP2_UFCS(is_constructor_with_move_that)((*cpp2::impl::assert_not_null(n))); } +#line 377 "reflect.h2" + [[nodiscard]] auto function_declaration::is_assignment() const& -> bool { return CPP2_UFCS(is_assignment)((*cpp2::impl::assert_not_null(n))); } +#line 378 "reflect.h2" + [[nodiscard]] auto function_declaration::is_assignment_with_that() const& -> bool { return CPP2_UFCS(is_assignment_with_that)((*cpp2::impl::assert_not_null(n))); } +#line 379 "reflect.h2" + [[nodiscard]] auto function_declaration::is_assignment_with_in_that() const& -> bool { return CPP2_UFCS(is_assignment_with_in_that)((*cpp2::impl::assert_not_null(n))); } +#line 380 "reflect.h2" + [[nodiscard]] auto function_declaration::is_assignment_with_move_that() const& -> bool { return CPP2_UFCS(is_assignment_with_move_that)((*cpp2::impl::assert_not_null(n))); } +#line 381 "reflect.h2" + [[nodiscard]] auto function_declaration::is_destructor() const& -> bool { return CPP2_UFCS(is_destructor)((*cpp2::impl::assert_not_null(n))); } + +#line 383 "reflect.h2" + [[nodiscard]] auto function_declaration::is_copy_or_move() const& -> bool { return is_constructor_with_that() || is_assignment_with_that(); } + +#line 385 "reflect.h2" + [[nodiscard]] auto function_declaration::has_declared_return_type() const& -> bool { return CPP2_UFCS(has_declared_return_type)((*cpp2::impl::assert_not_null(n))); } +#line 386 "reflect.h2" + [[nodiscard]] auto function_declaration::has_deduced_return_type() const& -> bool { return CPP2_UFCS(has_deduced_return_type)((*cpp2::impl::assert_not_null(n))); } +#line 387 "reflect.h2" + [[nodiscard]] auto function_declaration::has_bool_return_type() const& -> bool { return CPP2_UFCS(has_bool_return_type)((*cpp2::impl::assert_not_null(n))); } +#line 388 "reflect.h2" + [[nodiscard]] auto function_declaration::has_non_void_return_type() const& -> bool { return CPP2_UFCS(has_non_void_return_type)((*cpp2::impl::assert_not_null(n))); } + +#line 390 "reflect.h2" + [[nodiscard]] auto function_declaration::unnamed_return_type() const& -> std::string { return CPP2_UFCS(unnamed_return_type_to_string)((*cpp2::impl::assert_not_null(n))); } + +#line 392 "reflect.h2" + [[nodiscard]] auto function_declaration::get_parameters() const& -> std::vector + + { + std::vector ret {}; + for ( auto const& param : CPP2_UFCS(get_function_parameters)((*cpp2::impl::assert_not_null(n))) ) { + static_cast(CPP2_UFCS(emplace_back)(ret, &*cpp2::impl::assert_not_null((*cpp2::impl::assert_not_null(param)).declaration), (*this))); + } + return ret; + } + +#line 402 "reflect.h2" + [[nodiscard]] auto function_declaration::is_binary_comparison_function() const& -> bool { return CPP2_UFCS(is_binary_comparison_function)((*cpp2::impl::assert_not_null(n))); } + +#line 404 "reflect.h2" + auto function_declaration::default_to_virtual() & -> void { static_cast(CPP2_UFCS(make_function_virtual)((*cpp2::impl::assert_not_null(n)))); } + +#line 406 "reflect.h2" + [[nodiscard]] auto function_declaration::make_virtual() & -> bool { return CPP2_UFCS(make_function_virtual)((*cpp2::impl::assert_not_null(n))); } + +#line 408 "reflect.h2" + auto function_declaration::add_initializer(cpp2::impl::in source) & -> void + +#line 411 "reflect.h2" + { + if ((*this).is_active() && !(!(has_initializer())) ) { (*this).report_violation(CPP2_CONTRACT_MSG("cannot add an initializer to a function that already has one")); } + if ((*this).is_active() && !(parent_is_type()) ) { (*this).report_violation(CPP2_CONTRACT_MSG("cannot add an initializer to a function that isn't in a type scope")); } + //require( !has_initializer(), + // "cannot add an initializer to a function that already has one"); + //require( parent_is_type(), + // "cannot add an initializer to a function that isn't in a type scope"); + +#line 417 "reflect.h2" + auto stmt {parse_statement(source)}; + if (!((cpp2::impl::as_(stmt)))) { + error("cannot add an initializer that is not a valid statement"); + return ; + } + require(CPP2_UFCS(add_function_initializer)((*cpp2::impl::assert_not_null(n)), std::move(cpp2::move(stmt))), + std::string("unexpected error while attempting to add initializer")); + } + + function_declaration::function_declaration(function_declaration const& that) + : declaration{ static_cast(that) }{} + +#line 428 "reflect.h2" +//----------------------------------------------------------------------- +// Object declarations +// + +#line 435 "reflect.h2" + object_declaration::object_declaration( + + declaration_node* n_, + cpp2::impl::in s + ) + : declaration{ n_, s } +#line 440 "reflect.h2" + { + + if (cpp2::cpp2_default.is_active() && !(CPP2_UFCS(is_object)((*cpp2::impl::assert_not_null(n)))) ) { cpp2::cpp2_default.report_violation(""); } + } + +#line 445 "reflect.h2" + [[nodiscard]] auto object_declaration::is_const() const& -> bool { return CPP2_UFCS(is_const)((*cpp2::impl::assert_not_null(n))); } +#line 446 "reflect.h2" + [[nodiscard]] auto object_declaration::has_wildcard_type() const& -> bool { return CPP2_UFCS(has_wildcard_type)((*cpp2::impl::assert_not_null(n))); } + +#line 448 "reflect.h2" + [[nodiscard]] auto object_declaration::type() const& -> std::string{ + auto ret {CPP2_UFCS(object_type)((*cpp2::impl::assert_not_null(n)))}; + require(!(contains(ret, "(*ERROR*)")), + "cannot to_string this type: " + ret); + return ret; + } + +#line 455 "reflect.h2" + [[nodiscard]] auto object_declaration::initializer() const& -> std::string{ + auto ret {CPP2_UFCS(object_initializer)((*cpp2::impl::assert_not_null(n)))}; + require(!(contains(ret, "(*ERROR*)")), + "cannot to_string this initializer: " + ret); + return ret; + } + + object_declaration::object_declaration(object_declaration const& that) + : declaration{ static_cast(that) }{} + +#line 464 "reflect.h2" +//----------------------------------------------------------------------- +// Type declarations +// + +#line 471 "reflect.h2" + type_declaration::type_declaration( + + declaration_node* n_, + cpp2::impl::in s + ) + : declaration{ n_, s } +#line 476 "reflect.h2" + { + + if (cpp2::cpp2_default.is_active() && !(CPP2_UFCS(is_type)((*cpp2::impl::assert_not_null(n)))) ) { cpp2::cpp2_default.report_violation(""); } + } + +#line 481 "reflect.h2" + auto type_declaration::reserve_names(cpp2::impl::in name, auto&& ...etc) const& -> void + { // etc is not declared ':string_view' for compatibility with GCC 10.x + for ( + auto const& m : get_members() ) { + CPP2_UFCS(require)(m, !(CPP2_UFCS(has_name)(m, name)), + "in a '" + cpp2::to_string(get_metafunction_name()) + "' type, the name '" + cpp2::to_string(name) + "' " + "is reserved for use by the '" + cpp2::to_string(get_metafunction_name()) + "' implementation" + ); + } + if constexpr (!(CPP2_PACK_EMPTY(etc))) { + reserve_names(CPP2_FORWARD(etc)...); + } + } + +#line 495 "reflect.h2" + [[nodiscard]] auto type_declaration::is_polymorphic() const& -> bool { return CPP2_UFCS(is_polymorphic)((*cpp2::impl::assert_not_null(n))); } +#line 496 "reflect.h2" + [[nodiscard]] auto type_declaration::is_final() const& -> bool { return CPP2_UFCS(is_type_final)((*cpp2::impl::assert_not_null(n))); } +#line 497 "reflect.h2" + [[nodiscard]] auto type_declaration::make_final() & -> bool { return CPP2_UFCS(make_type_final)((*cpp2::impl::assert_not_null(n))); } + +#line 499 "reflect.h2" + [[nodiscard]] auto type_declaration::get_member_functions() const& -> std::vector + + { + std::vector ret {}; + for ( + auto const& d : CPP2_UFCS(get_type_scope_declarations)((*cpp2::impl::assert_not_null(n)), declaration_node::functions) ) { + static_cast(CPP2_UFCS(emplace_back)(ret, d, (*this))); + } + return ret; + } + +#line 510 "reflect.h2" + [[nodiscard]] auto type_declaration::get_member_functions_needing_initializer() const& -> std::vector + + { + std::vector ret {}; + for ( + auto const& d : CPP2_UFCS(get_type_scope_declarations)((*cpp2::impl::assert_not_null(n)), declaration_node::functions) ) + if ( !(CPP2_UFCS(has_initializer)((*cpp2::impl::assert_not_null(d)))) + && !(CPP2_UFCS(is_virtual_function)((*cpp2::impl::assert_not_null(d)))) + && !(CPP2_UFCS(is_defaultable_function)((*cpp2::impl::assert_not_null(d))))) + { + static_cast(CPP2_UFCS(emplace_back)(ret, d, (*this))); + } + return ret; + } + +#line 525 "reflect.h2" + [[nodiscard]] auto type_declaration::get_member_objects() const& -> std::vector + + { + std::vector ret {}; + for ( auto const& d : CPP2_UFCS(get_type_scope_declarations)((*cpp2::impl::assert_not_null(n)), declaration_node::objects) ) { + static_cast(CPP2_UFCS(emplace_back)(ret, d, (*this))); + } + return ret; + } + +#line 535 "reflect.h2" + [[nodiscard]] auto type_declaration::get_member_types() const& -> std::vector + + { + std::vector ret {}; + for ( auto const& d : CPP2_UFCS(get_type_scope_declarations)((*cpp2::impl::assert_not_null(n)), declaration_node::types) ) { + static_cast(CPP2_UFCS(emplace_back)(ret, d, (*this))); + } + return ret; + } + +#line 545 "reflect.h2" + [[nodiscard]] auto type_declaration::get_member_aliases() const& -> std::vector + + { + std::vector ret {}; + for ( auto const& d : CPP2_UFCS(get_type_scope_declarations)((*cpp2::impl::assert_not_null(n)), declaration_node::aliases) ) { + static_cast(CPP2_UFCS(emplace_back)(ret, d, (*this))); + } + return ret; + } + +#line 555 "reflect.h2" + [[nodiscard]] auto type_declaration::get_members() const& -> std::vector + + { + std::vector ret {}; + for ( auto const& d : CPP2_UFCS(get_type_scope_declarations)((*cpp2::impl::assert_not_null(n)), declaration_node::all) ) { + static_cast(CPP2_UFCS(emplace_back)(ret, d, (*this))); + } + return ret; + } + +#line 565 "reflect.h2" + [[nodiscard]] auto type_declaration::query_declared_value_set_functions() const& -> query_declared_value_set_functions_ret + +#line 572 "reflect.h2" + { + cpp2::impl::deferred_init out_this_in_that; + cpp2::impl::deferred_init out_this_move_that; + cpp2::impl::deferred_init inout_this_in_that; + cpp2::impl::deferred_init inout_this_move_that; +#line 573 "reflect.h2" + auto declared {CPP2_UFCS(find_declared_value_set_functions)((*cpp2::impl::assert_not_null(n)))}; + out_this_in_that.construct(declared.out_this_in_that != nullptr); + out_this_move_that.construct(declared.out_this_move_that != nullptr); + inout_this_in_that.construct(declared.inout_this_in_that != nullptr); + inout_this_move_that.construct(cpp2::move(declared).inout_this_move_that != nullptr); + return { std::move(out_this_in_that.value()), std::move(out_this_move_that.value()), std::move(inout_this_in_that.value()), std::move(inout_this_move_that.value()) }; } + +#line 580 "reflect.h2" + auto type_declaration::add_member(cpp2::impl::in source) & -> void + { + auto decl {parse_statement(source)}; + if (!((cpp2::impl::as_(decl)))) { + error("the provided source string is not a valid statement"); + return ; + } + if (!(CPP2_UFCS(is_declaration)((*cpp2::impl::assert_not_null(decl))))) { + error("cannot add a member that is not a declaration"); + } + require(CPP2_UFCS(add_type_member)((*cpp2::impl::assert_not_null(n)), std::move(cpp2::move(decl))), + std::string("unexpected error while attempting to add member:\n") + source); + } + +#line 594 "reflect.h2" + auto type_declaration::remove_marked_members() & -> void { CPP2_UFCS(type_remove_marked_members)((*cpp2::impl::assert_not_null(n))); } +#line 595 "reflect.h2" + auto type_declaration::remove_all_members() & -> void { CPP2_UFCS(type_remove_all_members)((*cpp2::impl::assert_not_null(n))); } + +#line 597 "reflect.h2" + auto type_declaration::disable_member_function_generation() & -> void { CPP2_UFCS(type_disable_member_function_generation)((*cpp2::impl::assert_not_null(n))); } + + type_declaration::type_declaration(type_declaration const& that) + : declaration{ static_cast(that) }{} + +#line 601 "reflect.h2" +//----------------------------------------------------------------------- +// Alias declarations +// + +#line 608 "reflect.h2" + alias_declaration::alias_declaration( + + declaration_node* n_, + cpp2::impl::in s + ) + : declaration{ n_, s } +#line 613 "reflect.h2" + { + + if (cpp2::cpp2_default.is_active() && !(CPP2_UFCS(is_alias)((*cpp2::impl::assert_not_null(n)))) ) { cpp2::cpp2_default.report_violation(""); } + } + + alias_declaration::alias_declaration(alias_declaration const& that) + : declaration{ static_cast(that) }{} + +#line 620 "reflect.h2" +//----------------------------------------------------------------------- +// +// Metafunctions - these are hardwired for now until we get to the +// step of writing a Cpp2 interpreter to run inside the compiler +// +//----------------------------------------------------------------------- +// + +//----------------------------------------------------------------------- +// Some common metafunction helpers (metafunctions are just functions, +// so they can be factored as usual) +// +#line 632 "reflect.h2" +auto add_virtual_destructor(meta::type_declaration& t) -> void +{ + CPP2_UFCS(add_member)(t, "operator=: (virtual move this) = { }"); +} + +#line 638 "reflect.h2" +//----------------------------------------------------------------------- +// +// "... an abstract base class defines an interface ..." +// +// -- Stroustrup (The Design and Evolution of C++, 12.3.1) +// +//----------------------------------------------------------------------- +// +// interface +// +// an abstract base class having only pure virtual functions +// +#line 650 "reflect.h2" +auto interface(meta::type_declaration& t) -> void +{ + auto has_dtor {false}; + + for ( auto& m : CPP2_UFCS(get_members)(t) ) + { + CPP2_UFCS(require)(m, !(CPP2_UFCS(is_object)(m)), + "interfaces may not contain data objects"); + if (CPP2_UFCS(is_function)(m)) { + auto mf {CPP2_UFCS(as_function)(m)}; + CPP2_UFCS(require)(mf, !(CPP2_UFCS(is_copy_or_move)(mf)), + "interfaces may not copy or move; consider a virtual clone() instead"); + CPP2_UFCS(require)(mf, !(CPP2_UFCS(has_initializer)(mf)), + "interface functions must not have a function body; remove the '=' initializer"); + CPP2_UFCS(require)(mf, CPP2_UFCS(make_public)(mf), + "interface functions must be public"); + CPP2_UFCS(default_to_virtual)(mf); + has_dtor |= CPP2_UFCS(is_destructor)(cpp2::move(mf)); + } + } + + if (!(cpp2::move(has_dtor))) { + CPP2_UFCS(add_virtual_destructor)(t); + } +} + +#line 677 "reflect.h2" +//----------------------------------------------------------------------- +// +// "C.35: A base class destructor should be either public and +// virtual, or protected and non-virtual." +// +// "[C.43] ... a base class should not be copyable, and so does not +// necessarily need a default constructor." +// +// -- Stroustrup, Sutter, et al. (C++ Core Guidelines) +// +//----------------------------------------------------------------------- +// +// polymorphic_base +// +// A pure polymorphic base type that is not copyable, and whose +// destructor is either public and virtual or protected and nonvirtual. +// +// Unlike an interface, it can have nonpublic and nonvirtual functions. +// +#line 696 "reflect.h2" +auto polymorphic_base(meta::type_declaration& t) -> void +{ + auto has_dtor {false}; + + for ( auto& mf : CPP2_UFCS(get_member_functions)(t) ) + { + if (CPP2_UFCS(is_default_access)(mf)) { + CPP2_UFCS(default_to_public)(mf); + } + CPP2_UFCS(require)(mf, !(CPP2_UFCS(is_copy_or_move)(mf)), + "polymorphic base types may not copy or move; consider a virtual clone() instead"); + if (CPP2_UFCS(is_destructor)(mf)) { + has_dtor = true; + CPP2_UFCS(require)(mf, ((CPP2_UFCS(is_public)(mf) || CPP2_UFCS(is_default_access)(mf)) && CPP2_UFCS(is_virtual)(mf)) + || (CPP2_UFCS(is_protected)(mf) && !(CPP2_UFCS(is_virtual)(mf))), + "a polymorphic base type destructor must be public and virtual, or protected and nonvirtual"); + } + } + + if (!(cpp2::move(has_dtor))) { + CPP2_UFCS(add_virtual_destructor)(t); + } +} + +#line 721 "reflect.h2" +//----------------------------------------------------------------------- +// +// "... A totally ordered type ... requires operator<=> that +// returns std::strong_ordering. If the function is not +// user-written, a lexicographical memberwise implementation +// is generated by default..." +// +// -- P0707R4, section 3 +// +// Note: This feature derived from Cpp2 was already adopted +// into Standard C++ via paper P0515, so most of the +// heavy lifting is done by the Cpp1 C++20/23 compiler, +// including the memberwise default semantics +// (In contrast, cppfront has to do the work itself for +// default memberwise semantics for operator= assignment +// as those aren't yet part of Standard C++) +// +//----------------------------------------------------------------------- +// + +#line 741 "reflect.h2" +auto ordered_impl( + meta::type_declaration& t, + cpp2::impl::in ordering// must be "strong_ordering" etc. +) -> void +{ + auto has_spaceship {false}; + + for ( auto& mf : CPP2_UFCS(get_member_functions)(t) ) + { + if (CPP2_UFCS(has_name)(mf, "operator<=>")) { + has_spaceship = true; + auto return_name {CPP2_UFCS(unnamed_return_type)(mf)}; + if (CPP2_UFCS(find)(return_name, ordering) == return_name.npos) + { + CPP2_UFCS(error)(mf, "operator<=> must return std::" + cpp2::impl::as_(ordering)); + } + } + } + + if (!(cpp2::move(has_spaceship))) { + CPP2_UFCS(add_member)(t, "operator<=>: (this, that) -> std::" + (cpp2::impl::as_(ordering)) + ";"); + } +} + +//----------------------------------------------------------------------- +// ordered - a totally ordered type +// +// Note: the ordering that should be encouraged as default gets the nice name +// +#line 770 "reflect.h2" +auto ordered(meta::type_declaration& t) -> void +{ + ordered_impl(t, "strong_ordering"); +} + +//----------------------------------------------------------------------- +// weakly_ordered - a weakly ordered type +// +#line 778 "reflect.h2" +auto weakly_ordered(meta::type_declaration& t) -> void +{ + ordered_impl(t, "weak_ordering"); +} + +//----------------------------------------------------------------------- +// partially_ordered - a partially ordered type +// +#line 786 "reflect.h2" +auto partially_ordered(meta::type_declaration& t) -> void +{ + ordered_impl(t, "partial_ordering"); +} + +#line 792 "reflect.h2" +//----------------------------------------------------------------------- +// +// "A value is ... a regular type. It must have all public +// default construction, copy/move construction/assignment, +// and destruction, all of which are generated by default +// if not user-written; and it must not have any protected +// or virtual functions (including the destructor)." +// +// -- P0707R4, section 3 +// +//----------------------------------------------------------------------- +// +// copyable +// +// A type with (copy and move) x (construction and assignment) +// +#line 808 "reflect.h2" +auto copyable(meta::type_declaration& t) -> void +{ + // If the user explicitly wrote any of the copy/move functions, + // they must also have written the most general one - we can't + // assume we can safely generate it for them since they've opted + // into customized semantics + auto smfs {CPP2_UFCS(query_declared_value_set_functions)(t)}; + if ( !(smfs.out_this_in_that) + && ( + smfs.out_this_move_that + || smfs.inout_this_in_that + || smfs.inout_this_move_that)) + + { + CPP2_UFCS(error)(t, + "this type is partially copyable/movable - when you provide " + "any of the more-specific operator= signatures, you must also provide " + "the one with the general signature (out this, that); alternatively, " + "consider removing all the operator= functions and let them all be " + "generated for you with default memberwise semantics" + ); + } + else {if (!(cpp2::move(smfs).out_this_in_that)) { + CPP2_UFCS(add_member)(t, "operator=: (out this, that) = { }"); + }} +} + +//----------------------------------------------------------------------- +// +// basic_value +// +// A regular type: copyable, plus has public default construction +// and no protected or virtual functions +// +#line 842 "reflect.h2" +auto basic_value(meta::type_declaration& t) -> void +{ + CPP2_UFCS(copyable)(t); + + auto has_default_ctor {false}; + for ( auto& mf : CPP2_UFCS(get_member_functions)(t) ) { + has_default_ctor |= CPP2_UFCS(is_default_constructor)(mf); + CPP2_UFCS(require)(mf, !(CPP2_UFCS(is_protected)(mf)) && !(CPP2_UFCS(is_virtual)(mf)), + "a value type may not have a protected or virtual function"); + CPP2_UFCS(require)(mf, !(CPP2_UFCS(is_destructor)(mf)) || CPP2_UFCS(is_public)(mf) || CPP2_UFCS(is_default_access)(mf), + "a value type may not have a non-public destructor"); + } + + if (!(cpp2::move(has_default_ctor))) { + CPP2_UFCS(add_member)(t, "operator=: (out this) = { }"); + } +} + +//----------------------------------------------------------------------- +// +// "A 'value' is a totally ordered basic_value..." +// +// -- P0707R4, section 3 +// +// value - a value type that is totally ordered +// +// Note: the ordering that should be encouraged as default gets the nice name +// +#line 870 "reflect.h2" +auto value(meta::type_declaration& t) -> void +{ + CPP2_UFCS(ordered)(t); + CPP2_UFCS(basic_value)(t); +} + +#line 876 "reflect.h2" +auto weakly_ordered_value(meta::type_declaration& t) -> void +{ + CPP2_UFCS(weakly_ordered)(t); + CPP2_UFCS(basic_value)(t); +} + +#line 882 "reflect.h2" +auto partially_ordered_value(meta::type_declaration& t) -> void +{ + CPP2_UFCS(partially_ordered)(t); + CPP2_UFCS(basic_value)(t); +} + +#line 889 "reflect.h2" +//----------------------------------------------------------------------- +// +// C.20: If you can avoid defining default operations, do +// +// ##### Reason +// +// It's the simplest and gives the cleanest semantics. +// +// ... +// +// This is known as "the rule of zero". +// +// -- C++ Core Guidelines +// C.20: If you can avoid defining any default operations, do +// +// +//----------------------------------------------------------------------- +// +// cpp1_rule_of_zero +// +// a type without declared copy/move/destructor functions +// +#line 911 "reflect.h2" +auto cpp1_rule_of_zero(meta::type_declaration& t) -> void +{ + for ( auto& mf : CPP2_UFCS(get_member_functions)(t) ) + { + CPP2_UFCS(require)(t, !(CPP2_UFCS(is_constructor_with_that)(mf)) + && !(CPP2_UFCS(is_assignment_with_that)(mf)) + && !(CPP2_UFCS(is_destructor)(mf)), + "the rule of zero requires no copy/move/destructor functions"); + } + CPP2_UFCS(disable_member_function_generation)(t); +} + +//----------------------------------------------------------------------- +// +// "By definition, a `struct` is a `class` in which members +// are by default `public`; that is, +// +// struct s { ... +// +// is simply shorthand for +// +// class s { public: ... +// +// ... Which style you use depends on circumstances and taste. +// I usually prefer to use `struct` for classes that have all +// data `public`." +// +// -- Stroustrup (The C++ Programming Language, 3rd ed., p. 234) +// +//----------------------------------------------------------------------- +// +// struct +// +// a type with only public bases, objects, and functions, +// no virtual functions, and no user-defined constructors +// (i.e., no invariants) or assignment or destructors. +// +#line 948 "reflect.h2" +auto cpp2_struct(meta::type_declaration& t) -> void +{ + std::string ctor_params {}; + std::string ctor_inits {}; + + auto found_member_without_initializer {false}; + + for ( auto& m : CPP2_UFCS(get_members)(t) ) + { + CPP2_UFCS(require)(m, CPP2_UFCS(make_public)(m), + "all struct members must be public"); + if (CPP2_UFCS(is_function)(m)) { + auto mf {CPP2_UFCS(as_function)(m)}; + CPP2_UFCS(require)(t, !(CPP2_UFCS(is_virtual)(mf)), + "a struct may not have a virtual function"); + CPP2_UFCS(require)(t, !(CPP2_UFCS(has_name)(cpp2::move(mf), "operator=")), + "a struct may not have a user-defined operator="); + } + else {if (CPP2_UFCS(is_object)(m)) { + auto mo {CPP2_UFCS(as_object)(m)}; + if (CPP2_UFCS(name)(mo) != "this") { + ctor_params += "" + cpp2::to_string(CPP2_UFCS(name)(mo)) + "_, "; + ctor_inits += "" + cpp2::to_string(CPP2_UFCS(name)(mo)) + " = " + cpp2::to_string(CPP2_UFCS(name)(mo)) + "_; "; + } + else { + ctor_inits += "" + cpp2::to_string(CPP2_UFCS(type)(mo)) + " = (); "; + } + found_member_without_initializer |= !(CPP2_UFCS(has_initializer)(cpp2::move(mo))); + }} + } + CPP2_UFCS(cpp1_rule_of_zero)(t); + + // If we found any data members + if (!(CPP2_UFCS(empty)(ctor_params))) + { + // Then to enable construction from corresponding values + // requires a constructor... an exception to the rule of zero + CPP2_UFCS(add_member)(t, " operator=: (implicit out this, " + cpp2::to_string(cpp2::move(ctor_params)) + ") = { " + cpp2::to_string(cpp2::move(ctor_inits)) + " }"); + + // And if all members had initializers, we need a default constructor + if (!(cpp2::move(found_member_without_initializer))) { + CPP2_UFCS(add_member)(t, " operator=: (implicit out this) = { }"); + } + } +} + +value_member_info::value_member_info(auto const& name_, auto const& type_, auto const& value_) + : name{ name_ } + , type{ type_ } + , value{ value_ }{} + +#line 995 "reflect.h2" +//----------------------------------------------------------------------- +// +// "C enumerations constitute a curiously half-baked concept. ... +// the cleanest way out was to deem each enumeration a separate type." +// +// -- Stroustrup (The Design and Evolution of C++, 11.7) +// +// "An enumeration is a distinct type ... with named constants" +// +// -- ISO C++ Standard +// +//----------------------------------------------------------------------- +// +// basic_enum +// +// a type together with named constants that are its possible values +// + +#line 1018 "reflect.h2" +auto basic_enum( + meta::type_declaration& t, + auto const& nextval, + cpp2::impl::in bitwise + ) -> void +{ + std::vector enumerators {}; + cpp2::i64 min_value {}; + cpp2::i64 max_value {}; + cpp2::impl::deferred_init underlying_type; + + CPP2_UFCS(reserve_names)(t, "operator=", "operator<=>"); + if (bitwise) { + CPP2_UFCS(reserve_names)(t, "has", "set", "clear", "to_string", "get_raw_value", "none"); + } + + // 1. Gather: The names of all the user-written members, and find/compute the type + + underlying_type.construct(CPP2_UFCS(get_argument)(t, 0));// use the first template argument, if there was one + + auto found_non_numeric {false}; +{ +std::string value{"-1"}; + +#line 1041 "reflect.h2" + for ( + auto const& m : CPP2_UFCS(get_members)(t) ) + if ( CPP2_UFCS(is_member_object)(m)) + { + CPP2_UFCS(require)(m, CPP2_UFCS(is_public)(m) || CPP2_UFCS(is_default_access)(m), + "an enumerator cannot be protected or private"); + + auto mo {CPP2_UFCS(as_object)(m)}; + if (!(CPP2_UFCS(has_wildcard_type)(mo))) { + CPP2_UFCS(error)(mo, + "an explicit underlying type should be specified as a compile-time argument " + "to the metafunction - try 'enum' or 'flag_enum'" ); } - if constexpr (!(CPP2_PACK_EMPTY(etc))) { - reserve_names(CPP2_FORWARD(etc)...); + + auto init {CPP2_UFCS(initializer)(mo)}; + + auto is_default_or_numeric {is_empty_or_a_decimal_number(init)}; + found_non_numeric |= !(CPP2_UFCS(empty)(init)) && !(is_default_or_numeric); + CPP2_UFCS(require)(m, !(cpp2::move(is_default_or_numeric)) || !(found_non_numeric) || CPP2_UFCS(has_name)(mo, "none"), + "" + cpp2::to_string(CPP2_UFCS(name)(mo)) + ": enumerators with non-numeric values must come after all default and numeric values"); + + nextval(value, cpp2::move(init)); + + auto v {std::strtoll(&CPP2_ASSERT_IN_BOUNDS_LITERAL(value, 0), nullptr, 10)}; // for non-numeric values we'll just get 0 which is okay for now + if (cpp2::impl::cmp_less(v,min_value)) { + min_value = v; + } + if (cpp2::impl::cmp_greater(v,max_value)) { + max_value = cpp2::move(v); + } + + // Adding local variable 'e' to work around a Clang warning + value_member_info e {cpp2::impl::as_(CPP2_UFCS(name)(mo)), "", value}; + CPP2_UFCS(push_back)(enumerators, cpp2::move(e)); + + CPP2_UFCS(mark_for_removal_from_enclosing_type)(mo); + static_cast(cpp2::move(mo)); + } +} + +#line 1081 "reflect.h2" + if ((CPP2_UFCS(empty)(enumerators))) { + CPP2_UFCS(error)(t, "an enumeration must contain at least one enumerator value"); + return ; + } + + // Compute the default underlying type, if it wasn't explicitly specified + if (underlying_type.value() == "") + { + CPP2_UFCS(require)(t, !(cpp2::move(found_non_numeric)), + "if you write an enumerator with a non-numeric-literal value, " + "you must specify the enumeration's underlying type" + ); + + if (!(bitwise)) { + if (cpp2::impl::cmp_greater_eq(min_value,std::numeric_limits::min()) && cpp2::impl::cmp_less_eq(max_value,std::numeric_limits::max())) { + underlying_type.value() = "i8"; + } + else {if (cpp2::impl::cmp_greater_eq(min_value,std::numeric_limits::min()) && cpp2::impl::cmp_less_eq(max_value,std::numeric_limits::max())) { + underlying_type.value() = "i16"; + } + else {if (cpp2::impl::cmp_greater_eq(min_value,std::numeric_limits::min()) && cpp2::impl::cmp_less_eq(max_value,std::numeric_limits::max())) { + underlying_type.value() = "i32"; + } + else {if (cpp2::impl::cmp_greater_eq(cpp2::move(min_value),std::numeric_limits::min()) && cpp2::impl::cmp_less_eq(cpp2::move(max_value),std::numeric_limits::max())) { + underlying_type.value() = "i64"; + } + else { + CPP2_UFCS(error)(t, + "values are outside the range representable by the " + "largest supported underlying signed type (i64)" + ); + }}}} + } + else { + auto umax {cpp2::move(max_value) * cpp2::impl::as_()}; + if (cpp2::impl::cmp_less_eq(umax,std::numeric_limits::max())) { + underlying_type.value() = "u8"; + } + else {if (cpp2::impl::cmp_less_eq(umax,std::numeric_limits::max())) { + underlying_type.value() = "u16"; + } + else {if (cpp2::impl::cmp_less_eq(cpp2::move(umax),std::numeric_limits::max())) { + underlying_type.value() = "u32"; + } + else { + underlying_type.value() = "u64"; + }}} + } + } + +#line 1132 "reflect.h2" + // 2. Replace: Erase the contents and replace with modified contents + // + // Note that most values and functions are declared as '==' compile-time values, i.e. Cpp1 'constexpr' + + CPP2_UFCS(remove_marked_members)(t); + + // Generate the 'none' value if appropriate, and use that or + // else the first enumerator as the default-constructed value + auto default_value {CPP2_ASSERT_IN_BOUNDS_LITERAL(enumerators, 0).name}; + if (bitwise) { + default_value = "none"; + value_member_info e {"none", "", "0"}; + CPP2_UFCS(push_back)(enumerators, cpp2::move(e)); + } + + // Generate all the private implementation + CPP2_UFCS(add_member)(t, " _value : " + cpp2::to_string(underlying_type.value()) + ";"); + CPP2_UFCS(add_member)(t, " private operator= : (implicit out this, _val: i64) == " + "_value = cpp2::unsafe_narrow<" + cpp2::to_string(underlying_type.value()) + ">(_val);"); + + // Generate the bitwise operations + if (bitwise) { + CPP2_UFCS(add_member)(t, " operator|=: ( inout this, that ) == _value |= that._value;"); + CPP2_UFCS(add_member)(t, " operator&=: ( inout this, that ) == _value &= that._value;"); + CPP2_UFCS(add_member)(t, " operator^=: ( inout this, that ) == _value ^= that._value;"); + CPP2_UFCS(add_member)(t, " operator| : ( this, that ) -> " + cpp2::to_string(CPP2_UFCS(name)(t)) + " == _value | that._value;"); + CPP2_UFCS(add_member)(t, " operator& : ( this, that ) -> " + cpp2::to_string(CPP2_UFCS(name)(t)) + " == _value & that._value;"); + CPP2_UFCS(add_member)(t, " operator^ : ( this, that ) -> " + cpp2::to_string(CPP2_UFCS(name)(t)) + " == _value ^ that._value;"); + CPP2_UFCS(add_member)(t, " has : ( this, that ) -> bool == _value & that._value;"); + CPP2_UFCS(add_member)(t, " set : ( inout this, that ) == _value |= that._value;"); + CPP2_UFCS(add_member)(t, " clear : ( inout this, that ) == _value &= that._value~;"); + } + + // Add the enumerators + for ( auto const& e : enumerators ) { + CPP2_UFCS(add_member)(t, " " + cpp2::to_string(e.name) + " : " + cpp2::to_string(CPP2_UFCS(name)(t)) + " == " + cpp2::to_string(e.value) + ";"); + } + + // Generate the common functions + CPP2_UFCS(add_member)(t, " get_raw_value : (this) -> " + cpp2::to_string(cpp2::move(underlying_type.value())) + " == _value;"); + CPP2_UFCS(add_member)(t, " operator= : (out this) == { _value = " + cpp2::to_string(default_value) + "._value; }"); + CPP2_UFCS(add_member)(t, " operator= : (out this, that) == { }"); + CPP2_UFCS(add_member)(t, " operator<=> : (this, that) -> std::strong_ordering;"); +{ +std::string to_string_impl{" to_string_impl: (this, prefix: std::string_view"}; + + // Provide 'to_string' and 'to_code' functions to print enumerator + // name(s) as human-readable strings or as code expressions + +#line 1179 "reflect.h2" + { + if (bitwise) { + to_string_impl += ", separator: std::string_view ) -> std::string = { \n" + " ret : std::string = \"(\";\n" + " sep : std::string = ();\n" + " if this == none { return \"(none)\"; }\n"; + } + else { + to_string_impl += ") -> std::string = { \n"; + } + + to_string_impl += " pref := cpp2::to_string(prefix);\n"; + + for ( + auto const& e : enumerators ) { + if (e.name != "_") {// ignore unnamed values + if (bitwise) { + if (e.name != "none") { + to_string_impl += " if (this & " + cpp2::to_string(e.name) + ") == " + cpp2::to_string(e.name) + " { " + "ret += sep + pref + \"" + cpp2::to_string(e.name) + "\"; sep = separator; " + "}\n"; + } + } + else { + to_string_impl += " if this == " + cpp2::to_string(e.name) + " { return pref + \"" + cpp2::to_string(e.name) + "\"; }\n"; + } + } + } + + if (bitwise) { + to_string_impl += " return ret+\")\";\n}\n"; + } + else { + to_string_impl += " return \"invalid " + cpp2::to_string(CPP2_UFCS(name)(t)) + " value\";\n}\n"; + } + + CPP2_UFCS(add_member)(t, cpp2::move(to_string_impl)); + } +} + +#line 1218 "reflect.h2" + if (bitwise) { + CPP2_UFCS(add_member)(t, " to_string: (this) -> std::string = to_string_impl( \"\", \", \" );"); + CPP2_UFCS(add_member)(t, " to_code : (this) -> std::string = to_string_impl( \"" + cpp2::to_string(CPP2_UFCS(name)(t)) + "::\", \" | \" );"); + } + else { + CPP2_UFCS(add_member)(t, " to_string: (this) -> std::string = to_string_impl( \"\" );"); + CPP2_UFCS(add_member)(t, " to_code : (this) -> std::string = to_string_impl( \"" + cpp2::to_string(CPP2_UFCS(name)(t)) + "::\" );"); + } +{ +std::string from_string{" from_string: (s: std::string_view) -> " + cpp2::to_string(CPP2_UFCS(name)(t)) + " = { \n"}; + + // Provide a 'from_string' function to parse strings into enumerators + +#line 1229 "reflect.h2" + { + std::string_view prefix {""}; + std::string_view combine_op {"return"}; + + // For flags, accept a list that we break apart and then |= together + if (bitwise) + { + prefix = "flag_"; + combine_op = "ret |="; + + from_string += " ret := none;\n" + " outer: do {\n" + " for cpp2::string_util::split_string_list(s) do (x) {\n"; + } + // Otherwise, accept just a single string + else { + from_string += " x := s;\n"; + } +{ +std::string_view else_{""}; + +#line 1249 "reflect.h2" + for ( + auto const& e : cpp2::move(enumerators) ) { + from_string += " " + cpp2::to_string(else_) + "if \"" + cpp2::to_string(e.name) + "\" == x { " + cpp2::to_string(combine_op) + " " + cpp2::to_string(e.name) + "; }\n"; + else_ = "else "; + } +} + +#line 1255 "reflect.h2" + if (bitwise) { + from_string += " else { break outer; }\n" + " }\n" + " return ret;\n" + " } while false;\n"; + } + + from_string += " cpp2::type_safety.report_violation( (\"can't convert string '\" + cpp2::to_string(s) + \"' to " + cpp2::to_string(cpp2::move(prefix)) + "enum of type " + cpp2::to_string(CPP2_UFCS(name)(t)) + "\").c_str() );\n" + " return " + cpp2::to_string(cpp2::move(default_value)) + ";\n" + " }\n\n"; + + CPP2_UFCS(add_member)(t, cpp2::move(from_string)); + } +} + +#line 1269 "reflect.h2" + CPP2_UFCS(add_member)(t, " from_code: (s: std::string_view) -> " + cpp2::to_string(CPP2_UFCS(name)(t)) + " = { str: std::string = s; return from_string( cpp2::string_util::replace_all(str, \"" + cpp2::to_string(CPP2_UFCS(name)(t)) + "::\", \"\" ) ); }"); +} + +#line 1273 "reflect.h2" +//----------------------------------------------------------------------- +// +// "An enum[...] is a totally ordered value type that stores a +// value of its enumerators's type, and otherwise has only public +// member variables of its enumerator's type, all of which are +// naturally scoped because they are members of a type." +// +// -- P0707R4, section 3 +// +#line 1282 "reflect.h2" +auto cpp2_enum(meta::type_declaration& t) -> void +{ + // Let basic_enum do its thing, with an incrementing value generator + CPP2_UFCS(basic_enum)(t, + [](std::string& value, cpp2::impl::in specified_value) mutable -> void{ + if (!(CPP2_UFCS(empty)(specified_value))) { + value = specified_value; + }else { + auto v {std::strtoll(&CPP2_ASSERT_IN_BOUNDS_LITERAL(value, 0), nullptr, 10)}; + value = cpp2::impl::as_((cpp2::move(v) + 1)); + } + }, + false // disable bitwise operations + ); +} + +#line 1299 "reflect.h2" +//----------------------------------------------------------------------- +// +// "flag_enum expresses an enumeration that stores values +// corresponding to bitwise-or'd enumerators. The enumerators must +// be powers of two, and are automatically generated [...] A none +// value is provided [...] Operators | and & are provided to +// combine and extract values." +// +// -- P0707R4, section 3 +// +#line 1309 "reflect.h2" +auto flag_enum(meta::type_declaration& t) -> void +{ + // Let basic_enum do its thing, with a power-of-two value generator + CPP2_UFCS(basic_enum)(t, + [](std::string& value, cpp2::impl::in specified_value) mutable -> void{ + if (!(CPP2_UFCS(empty)(specified_value))) { + value = specified_value; + }else { + auto v {std::strtoll(&CPP2_ASSERT_IN_BOUNDS_LITERAL(value, 0), nullptr, 10)}; + if (cpp2::impl::cmp_less(v,1)) { + value = "1"; + } + else { + value = cpp2::impl::as_((cpp2::move(v) * 2)); + } + } + }, + true // enable bitwise operations + ); +} + +#line 1331 "reflect.h2" +//----------------------------------------------------------------------- +// +// "As with void*, programmers should know that unions [...] are +// inherently dangerous, should be avoided wherever possible, +// and should be handled with special care when actually needed." +// +// -- Stroustrup (The Design and Evolution of C++, 14.3.4.1) +// +// "C++17 needs a type-safe union... The implications of the +// consensus `variant` design are well understood and have been +// explored over several LEWG discussions, over a thousand emails, +// a joint LEWG/EWG session, and not to mention 12 years of +// experience with Boost and other libraries." +// +// -- Axel Naumann, in P0088 (wg21.link/p0088), +// the adopted proposal for C++17 std::variant +// +//----------------------------------------------------------------------- +// +// union +// +// a type that contains exactly one of a fixed set of values at a time +// + +#line 1355 "reflect.h2" +auto cpp2_union(meta::type_declaration& t) -> void +{ + std::vector alternatives {}; +{ +auto value{0}; + + // 1. Gather: All the user-written members, and find/compute the max size + +#line 1362 "reflect.h2" + for ( + + auto const& m : CPP2_UFCS(get_members)(t) ) { do + if ( CPP2_UFCS(is_member_object)(m)) + { + CPP2_UFCS(require)(m, CPP2_UFCS(is_public)(m) || CPP2_UFCS(is_default_access)(m), + "a union alternative cannot be protected or private" + ); + + CPP2_UFCS(require)(m, !(CPP2_UFCS(starts_with)(CPP2_UFCS(name)(m), "is_")) + && !(CPP2_UFCS(starts_with)(CPP2_UFCS(name)(m), "set_")), + "a union alternative's name cannot start with 'is_' or 'set_' - that could cause " + "user confusion with the 'is_alternative' and 'set_alternative' generated functions" + ); + + auto mo {CPP2_UFCS(as_object)(m)}; + CPP2_UFCS(require)(mo, CPP2_UFCS(empty)(CPP2_UFCS(initializer)(mo)), + "a union alternative cannot have an initializer" + ); + + // Adding local variable 'e' to work around a Clang warning + value_member_info e {cpp2::impl::as_(CPP2_UFCS(name)(mo)), CPP2_UFCS(type)(mo), cpp2::impl::as_(value)}; + CPP2_UFCS(push_back)(alternatives, cpp2::move(e)); + + CPP2_UFCS(mark_for_removal_from_enclosing_type)(mo); + static_cast(cpp2::move(mo)); + } while (false); ++value; } +} + +#line 1390 "reflect.h2" + std::string discriminator_type {}; + if (cpp2::impl::cmp_less(CPP2_UFCS(ssize)(alternatives),std::numeric_limits::max())) { + discriminator_type = "i8"; + } + else {if (cpp2::impl::cmp_less(CPP2_UFCS(ssize)(alternatives),std::numeric_limits::max())) { + discriminator_type = "i16"; + } + else {if (cpp2::impl::cmp_less(CPP2_UFCS(ssize)(alternatives),std::numeric_limits::max())) { + discriminator_type = "i32"; + } + else { + discriminator_type = "i64"; + }}} + +#line 1405 "reflect.h2" + // 2. Replace: Erase the contents and replace with modified contents + + CPP2_UFCS(remove_marked_members)(t); +{ +std::string storage{" _storage: cpp2::aligned_storage bool = _discriminator == " + cpp2::to_string(a.value) + ";\n"); + + CPP2_UFCS(add_member)(t, " " + cpp2::to_string(a.name) + ": (this) -> forward " + cpp2::to_string(a.type) + " pre(is_" + cpp2::to_string(a.name) + "()) = " + "reinterpret_cast<* const " + cpp2::to_string(a.type) + ">(_storage&)*;\n" + ); + + CPP2_UFCS(add_member)(t, " " + cpp2::to_string(a.name) + ": (inout this) -> forward " + cpp2::to_string(a.type) + " pre(is_" + cpp2::to_string(a.name) + "()) = " + "reinterpret_cast<*" + cpp2::to_string(a.type) + ">(_storage&)*;\n" + ); + + CPP2_UFCS(add_member)(t, " set_" + cpp2::to_string(a.name) + ": (inout this, _value: " + cpp2::to_string(a.type) + ") = { " + "if !is_" + cpp2::to_string(a.name) + "() { _destroy(); std::construct_at( reinterpret_cast<*" + cpp2::to_string(a.type) + ">(_storage&), _value); } " + "else { reinterpret_cast<*" + cpp2::to_string(a.type) + ">(_storage&)* = _value; } " + "_discriminator = " + cpp2::to_string(a.value) + "; " + "}\n" + ); + + CPP2_UFCS(add_member)(t, " set_" + cpp2::to_string(a.name) + ": (inout this, forward _args...: _) = { " + "if !is_" + cpp2::to_string(a.name) + "() { _destroy(); std::construct_at( reinterpret_cast<*" + cpp2::to_string(a.type) + ">(_storage&), _args...); } " + " else { reinterpret_cast<*" + cpp2::to_string(a.type) + ">(_storage&)* = :" + cpp2::to_string(a.type) + " = (_args...); } " + "_discriminator = " + cpp2::to_string(a.value) + "; " + "}\n" + ); + } +{ +std::string destroy{" private _destroy: (inout this) = {\n"}; + + // Add destroy + +#line 1462 "reflect.h2" + { + for ( + auto const& a : alternatives ) { + destroy += " if _discriminator == " + cpp2::to_string(a.value) + " { std::destroy_at( reinterpret_cast<*" + cpp2::to_string(a.type) + ">(_storage&) ); }\n"; + } + + destroy += " _discriminator = -1;\n" + " }\n"; + CPP2_UFCS(add_member)(t, cpp2::move(destroy)); + } +} + + // Add the destructor +#line 1474 "reflect.h2" + CPP2_UFCS(add_member)(t, " operator=: (move this) = { _destroy(); _ = this; }"); + + // Add default constructor + CPP2_UFCS(add_member)(t, " operator=: (out this) = { }"); +{ +std::string value_set{""}; + + // Add copy/move construction and assignment + +#line 1481 "reflect.h2" + { + for ( + auto const& a : cpp2::move(alternatives) ) { + value_set += " if that.is_" + cpp2::to_string(a.name) + "() { set_" + cpp2::to_string(a.name) + "( that." + cpp2::to_string(a.name) + "() ); }\n"; + } + value_set += " }\n"; + + CPP2_UFCS(add_member)(t, " operator=: (out this, that) = {\n" + " _storage = ();\n" + " _discriminator = -1;\n" + + value_set + ); + CPP2_UFCS(add_member)(t, " operator=: (inout this, that) = {\n" + " _storage = _;\n" + " _discriminator = _;\n" + + cpp2::move(value_set) + ); + } +} +#line 1499 "reflect.h2" +} + +#line 1502 "reflect.h2" +//----------------------------------------------------------------------- +// +// print - output a pretty-printed visualization of t +// +#line 1506 "reflect.h2" +auto print(cpp2::impl::in t) -> void +{ + std::cout << CPP2_UFCS(print)(t) << "\n"; +} + + +constexpr expression_flags::expression_flags(cpp2::impl::in _val) + : _value{ cpp2::unsafe_narrow(_val) } { } + +constexpr auto expression_flags::operator=(cpp2::impl::in _val) -> expression_flags& { + _value = cpp2::unsafe_narrow(_val); + return *this; } +constexpr auto expression_flags::operator|=(expression_flags const& that) & -> void { _value |= that._value; } +constexpr auto expression_flags::operator&=(expression_flags const& that) & -> void { _value &= that._value; } +constexpr auto expression_flags::operator^=(expression_flags const& that) & -> void { _value ^= that._value; } +[[nodiscard]] constexpr auto expression_flags::operator|(expression_flags const& that) const& -> expression_flags { return _value | that._value; } +[[nodiscard]] constexpr auto expression_flags::operator&(expression_flags const& that) const& -> expression_flags { return _value & that._value; } +[[nodiscard]] constexpr auto expression_flags::operator^(expression_flags const& that) const& -> expression_flags { return _value ^ that._value; } +[[nodiscard]] constexpr auto expression_flags::has(expression_flags const& that) const& -> bool { return _value & that._value; } +constexpr auto expression_flags::set(expression_flags const& that) & -> void { _value |= that._value; } +constexpr auto expression_flags::clear(expression_flags const& that) & -> void { _value &= ~that._value; } +inline CPP2_CONSTEXPR expression_flags expression_flags::case_insensitive{ 1 }; + +inline CPP2_CONSTEXPR expression_flags expression_flags::multiple_lines{ 2 }; + +inline CPP2_CONSTEXPR expression_flags expression_flags::single_line{ 4 }; + +inline CPP2_CONSTEXPR expression_flags expression_flags::no_group_captures{ 8 }; + +inline CPP2_CONSTEXPR expression_flags expression_flags::perl_code_syntax{ 16 }; + +inline CPP2_CONSTEXPR expression_flags expression_flags::perl_code_syntax_in_classes{ 32 }; + +inline CPP2_CONSTEXPR expression_flags expression_flags::none{ 0 }; + +[[nodiscard]] constexpr auto expression_flags::get_raw_value() const& -> cpp2::u8 { return _value; } +constexpr expression_flags::expression_flags() + : _value{ none._value }{} +constexpr expression_flags::expression_flags(expression_flags const& that) + : _value{ that._value }{} +constexpr auto expression_flags::operator=(expression_flags const& that) -> expression_flags& { + _value = that._value; + return *this;} +constexpr expression_flags::expression_flags(expression_flags&& that) noexcept + : _value{ std::move(that)._value }{} +constexpr auto expression_flags::operator=(expression_flags&& that) noexcept -> expression_flags& { + _value = std::move(that)._value; + return *this;} +[[nodiscard]] auto expression_flags::to_string_impl(cpp2::impl::in prefix, cpp2::impl::in separator) const& -> std::string{ + +std::string ret {"("}; + +std::string sep {}; +if ((*this) == none) {return "(none)"; } + +auto pref {cpp2::to_string(prefix)}; +if (((*this) & case_insensitive) == case_insensitive) {ret += sep + pref + "case_insensitive";sep = separator;} +if (((*this) & multiple_lines) == multiple_lines) {ret += sep + pref + "multiple_lines";sep = separator;} +if (((*this) & single_line) == single_line) {ret += sep + pref + "single_line";sep = separator;} +if (((*this) & no_group_captures) == no_group_captures) {ret += sep + pref + "no_group_captures";sep = separator;} +if (((*this) & perl_code_syntax) == perl_code_syntax) {ret += sep + pref + "perl_code_syntax";sep = separator;} +if (((*this) & perl_code_syntax_in_classes) == perl_code_syntax_in_classes) {ret += sep + cpp2::move(pref) + "perl_code_syntax_in_classes";sep = separator;} +return cpp2::move(ret) + ")"; +} + +[[nodiscard]] auto expression_flags::to_string() const& -> std::string { return to_string_impl("", ", "); } +[[nodiscard]] auto expression_flags::to_code() const& -> std::string { return to_string_impl("expression_flags::", " | "); } +[[nodiscard]] auto expression_flags::from_string(cpp2::impl::in s) -> expression_flags{ + +auto ret {none}; +do {{ +for ( auto const& x : cpp2::string_util::split_string_list(s) ) { +if ("case_insensitive" == x) {ret |= case_insensitive;} +else {if ("multiple_lines" == x) {ret |= multiple_lines;} +else {if ("single_line" == x) {ret |= single_line;} +else {if ("no_group_captures" == x) {ret |= no_group_captures;} +else {if ("perl_code_syntax" == x) {ret |= perl_code_syntax;} +else {if ("perl_code_syntax_in_classes" == x) {ret |= perl_code_syntax_in_classes;} +else {if ("none" == x) {ret |= none;} +else {goto BREAK_outer;} +#line 1 "reflect.h2" +}}}}}} +} + +return ret; +} CPP2_CONTINUE_BREAK(outer) } + while ( +false +); +CPP2_UFCS(report_violation)(cpp2::type_safety, CPP2_UFCS(c_str)(("can't convert string '" + cpp2::to_string(s) + "' to flag_enum of type expression_flags"))); +return none; +} + +[[nodiscard]] auto expression_flags::from_code(cpp2::impl::in s) -> expression_flags{ +std::string str {s}; return from_string(cpp2::string_util::replace_all(cpp2::move(str), "expression_flags::", "")); } + +#line 1512 "reflect.h2" +//----------------------------------------------------------------------- +// +// regex - creates regular expressions from members +// +// Each member that starts with `regex` is replaced by a regular expression +// of the initializer string. E.g.: +// ``` +// regex := "ab"; +// ``` +// is replaced with +// ``` +// regex := ::cpp2::regex::regular_expression<...>; +// ``` +// + +#line 1528 "reflect.h2" +// Possible modifiers for a regular expression. +// + +#line 1532 "reflect.h2" + // mod: i + // mod: m + // mod: s + // mod: n + // mod: x + // mod: xx + +#line 1541 "reflect.h2" +// Tokens for regular expressions. +// + +// Basic class for a regex token. +// + +#line 1550 "reflect.h2" + regex_token::regex_token(cpp2::impl::in str) + : string_rep{ str }{ + +#line 1552 "reflect.h2" + } + +#line 1554 "reflect.h2" + regex_token::regex_token() + : string_rep{ "" }{ + +#line 1556 "reflect.h2" + } + + //parse: (inout ctx: parse_context) -> token_ptr; + // Generate the matching code. + +#line 1561 "reflect.h2" + auto regex_token::add_groups([[maybe_unused]] std::set& unnamed_param_2) const -> void{}// Adds all group indices to the set. +#line 1562 "reflect.h2" + [[nodiscard]] auto regex_token::to_string() const& -> std::string{return string_rep; }// Create a string representation. +#line 1563 "reflect.h2" + auto regex_token::set_string(cpp2::impl::in s) & -> void{string_rep = s; } + + regex_token::~regex_token() noexcept{}// Set the string representation. + +#line 1578 "reflect.h2" + regex_token_check::regex_token_check(cpp2::impl::in str, cpp2::impl::in check_) + : regex_token{ str } + , check{ check_ }{ + +#line 1581 "reflect.h2" + } + +#line 1583 "reflect.h2" + auto regex_token_check::generate_code(generation_context& ctx) const -> void{ + ctx.add_check(check + "(" + ctx.match_parameters() + ")"); + } + + regex_token_check::~regex_token_check() noexcept{} + +#line 1597 "reflect.h2" + regex_token_code::regex_token_code(cpp2::impl::in str, cpp2::impl::in code_) + : regex_token{ str } + , code{ code_ }{ + +#line 1600 "reflect.h2" + } + +#line 1602 "reflect.h2" + auto regex_token_code::generate_code(generation_context& ctx) const -> void{ + ctx.add(code); + } + + regex_token_code::~regex_token_code() noexcept{} + +#line 1614 "reflect.h2" + regex_token_empty::regex_token_empty(cpp2::impl::in str) + : regex_token{ str }{ + +#line 1616 "reflect.h2" + } + +#line 1618 "reflect.h2" + auto regex_token_empty::generate_code([[maybe_unused]] generation_context& unnamed_param_2) const -> void{ + // Nothing. + } + + regex_token_empty::~regex_token_empty() noexcept{} + +#line 1632 "reflect.h2" + regex_token_list::regex_token_list(cpp2::impl::in t) + : regex_token{ gen_string(t) } + , tokens{ t }{ + +#line 1635 "reflect.h2" + } + +#line 1637 "reflect.h2" + auto regex_token_list::generate_code(generation_context& ctx) const -> void{ + for ( auto const& token : tokens ) { + (*cpp2::impl::assert_not_null(token)).generate_code(ctx); + } + } + +#line 1643 "reflect.h2" + auto regex_token_list::add_groups(std::set& groups) const -> void{ + for ( auto const& token : tokens ) { + (*cpp2::impl::assert_not_null(token)).add_groups(groups); + } + } + +#line 1649 "reflect.h2" + [[nodiscard]] auto regex_token_list::gen_string(cpp2::impl::in vec) -> std::string{ + std::string r {""}; + for ( auto const& token : vec ) { + r += (*cpp2::impl::assert_not_null(token)).to_string(); + } + return r; + } + + regex_token_list::~regex_token_list() noexcept{} + +#line 1672 "reflect.h2" + auto parse_context_group_state::next_alternative() & -> void{ + token_vec new_list {}; + std::swap(new_list, cur_match_list); + post_process_list(new_list); + static_cast(alternate_match_lists.insert(alternate_match_lists.end(), CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, cpp2::move(new_list)))); + } + +#line 1680 "reflect.h2" + auto parse_context_group_state::swap(parse_context_group_state& t) & -> void{ + std::swap(cur_match_list, t.cur_match_list); + std::swap(alternate_match_lists, t.alternate_match_lists); + std::swap(modifiers, t.modifiers); + } + +#line 1687 "reflect.h2" + [[nodiscard]] auto parse_context_group_state::get_as_token() & -> token_ptr{ + if (alternate_match_lists.empty()) { + post_process_list(cur_match_list); + return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, cur_match_list); + } + else { + next_alternative(); + return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, alternate_match_lists); + } + } + +#line 1699 "reflect.h2" + auto parse_context_group_state::add(cpp2::impl::in token) & -> void{ + cur_match_list.push_back(token); + } + +#line 1704 "reflect.h2" + [[nodiscard]] auto parse_context_group_state::empty() const& -> bool { return cur_match_list.empty(); } + +#line 1708 "reflect.h2" + auto parse_context_group_state::post_process_list(token_vec& list) -> void{ + // Merge all characters + auto merge_pos {list.begin()}; + for( ; merge_pos != list.end(); (++merge_pos) ) { + if (cpp2::impl::is(*cpp2::impl::assert_not_null(*cpp2::impl::assert_not_null(merge_pos)))) { + auto combine_pos {merge_pos + 1}; + while( combine_pos != list.end() && cpp2::impl::is(*cpp2::impl::assert_not_null(*cpp2::impl::assert_not_null(combine_pos))) ) {// The erase advances combine_pos + (cpp2::impl::as_(*cpp2::impl::assert_not_null(*cpp2::impl::assert_not_null(merge_pos)))).append(cpp2::impl::as_(*cpp2::impl::assert_not_null(*cpp2::impl::assert_not_null(combine_pos)))); + combine_pos = list.erase(combine_pos); + } + } + } + } + + parse_context_group_state::parse_context_group_state(auto const& cur_match_list_, auto const& alternate_match_lists_, auto const& modifiers_) + : cur_match_list{ cur_match_list_ } + , alternate_match_lists{ alternate_match_lists_ } + , modifiers{ modifiers_ }{} +parse_context_group_state::parse_context_group_state(){} + +#line 1734 "reflect.h2" + [[nodiscard]] auto parse_context_branch_reset_state::next() & -> int{ + auto g {cur_group}; + cur_group += 1; + max_group = max(max_group, cur_group); + + return g; + } + +#line 1743 "reflect.h2" + auto parse_context_branch_reset_state::set_next(cpp2::impl::in g) & -> void{ + cur_group = g; + max_group = max(max_group, g); + } + +#line 1749 "reflect.h2" + auto parse_context_branch_reset_state::next_alternative() & -> void{ + if (is_active) { + cur_group = from; } } -#line 489 "reflect.h2" - [[nodiscard]] auto type_declaration::is_polymorphic() const& -> bool { return CPP2_UFCS(is_polymorphic)((*cpp2::impl::assert_not_null(n))); } -#line 490 "reflect.h2" - [[nodiscard]] auto type_declaration::is_final() const& -> bool { return CPP2_UFCS(is_type_final)((*cpp2::impl::assert_not_null(n))); } -#line 491 "reflect.h2" - [[nodiscard]] auto type_declaration::make_final() & -> bool { return CPP2_UFCS(make_type_final)((*cpp2::impl::assert_not_null(n))); } +#line 1756 "reflect.h2" + auto parse_context_branch_reset_state::set_active_reset(cpp2::impl::in restart) & -> void{ + is_active = true; + cur_group = restart; + from = restart; + max_group = restart; + } -#line 493 "reflect.h2" - [[nodiscard]] auto type_declaration::get_member_functions() const& -> std::vector + parse_context_branch_reset_state::parse_context_branch_reset_state(auto const& is_active_, auto const& cur_group_, auto const& max_group_, auto const& from_) + : is_active{ is_active_ } + , cur_group{ cur_group_ } + , max_group{ max_group_ } + , from{ from_ }{} +parse_context_branch_reset_state::parse_context_branch_reset_state(){} +#line 1784 "reflect.h2" + parse_context::parse_context(cpp2::impl::in r, auto const& e) + : regex{ r } + , root{ CPP2_UFCS_TEMPLATE_NONLOCAL(cpp2_new)(cpp2::shared, "") } + , error_out{ e }{ + +#line 1788 "reflect.h2" + } + +#line 1794 "reflect.h2" + [[nodiscard]] auto parse_context::start_group() & -> parse_context_group_state { - std::vector ret {}; - for ( - auto const& d : CPP2_UFCS(get_type_scope_declarations)((*cpp2::impl::assert_not_null(n)), declaration_node::functions) ) { - static_cast(CPP2_UFCS(emplace_back)(ret, d, (*this))); + parse_context_group_state old_state {}; + old_state.swap(cur_group_state); + cur_group_state.modifiers = old_state.modifiers; + + return old_state; + } + +#line 1804 "reflect.h2" + [[nodiscard]] auto parse_context::end_group(cpp2::impl::in old_state) & -> token_ptr + { + auto inner {cur_group_state.get_as_token()}; + cur_group_state = old_state; + return inner; + } + +#line 1811 "reflect.h2" + [[nodiscard]] auto parse_context::get_modifiers() const& -> expression_flags{ + return cur_group_state.modifiers; + } + +#line 1815 "reflect.h2" + auto parse_context::set_modifiers(cpp2::impl::in mod) & -> void{ + cur_group_state.modifiers = mod; + } + +#line 1822 "reflect.h2" + [[nodiscard]] auto parse_context::branch_reset_new_state() & -> parse_context_branch_reset_state + { + parse_context_branch_reset_state old_state {}; + std::swap(old_state, cur_branch_reset_state); + + cur_branch_reset_state.set_active_reset(old_state.cur_group); + return old_state; + } + +#line 1831 "reflect.h2" + auto parse_context::branch_reset_restore_state(cpp2::impl::in old_state) & -> void + { + auto max_group {cur_branch_reset_state.max_group}; + cur_branch_reset_state = old_state; + cur_branch_reset_state.set_next(cpp2::move(max_group)); + } + +#line 1838 "reflect.h2" + auto parse_context::next_alternative() & -> void + { + cur_group_state.next_alternative(); + cur_branch_reset_state.next_alternative(); + } + +#line 1846 "reflect.h2" + auto parse_context::add_token(cpp2::impl::in token) & -> void{ + cur_group_state.add(token); + } + +#line 1850 "reflect.h2" + [[nodiscard]] auto parse_context::has_token() const& -> bool{ + return !(cur_group_state.empty()); + } + +#line 1854 "reflect.h2" + [[nodiscard]] auto parse_context::pop_token() & -> token_ptr + { + token_ptr r {nullptr}; + if (has_token()) { + r = cur_group_state.cur_match_list.back(); + cur_group_state.cur_match_list.pop_back(); } - return ret; + + return r; } -#line 504 "reflect.h2" - [[nodiscard]] auto type_declaration::get_member_functions_needing_initializer() const& -> std::vector +#line 1865 "reflect.h2" + [[nodiscard]] auto parse_context::get_as_token() & -> token_ptr{ + return root; + } +#line 1871 "reflect.h2" + [[nodiscard]] auto parse_context::get_cur_group() const& -> int{ + return cur_branch_reset_state.cur_group; + } + +#line 1875 "reflect.h2" + [[nodiscard]] auto parse_context::next_group() & -> int{ + return cur_branch_reset_state.next(); + } + +#line 1879 "reflect.h2" + auto parse_context::set_named_group(cpp2::impl::in name, cpp2::impl::in id) & -> void { - std::vector ret {}; - for ( - auto const& d : CPP2_UFCS(get_type_scope_declarations)((*cpp2::impl::assert_not_null(n)), declaration_node::functions) ) - if ( !(CPP2_UFCS(has_initializer)((*cpp2::impl::assert_not_null(d)))) - && !(CPP2_UFCS(is_virtual_function)((*cpp2::impl::assert_not_null(d)))) - && !(CPP2_UFCS(is_defaultable_function)((*cpp2::impl::assert_not_null(d))))) - { - static_cast(CPP2_UFCS(emplace_back)(ret, d, (*this))); + if (!(named_groups.contains(name))) {// Redefinition of group name is not an error. The left most one is retained. + CPP2_ASSERT_IN_BOUNDS(named_groups, name) = id; } - return ret; } -#line 519 "reflect.h2" - [[nodiscard]] auto type_declaration::get_member_objects() const& -> std::vector +#line 1886 "reflect.h2" + [[nodiscard]] auto parse_context::get_named_group(cpp2::impl::in name) const& -> int + { + auto iter {named_groups.find(name)}; + if (iter == named_groups.end()) { + return -1; + } + else { + return (*cpp2::impl::assert_not_null(cpp2::move(iter))).second; + } + } + +#line 1899 "reflect.h2" + [[nodiscard]] auto parse_context::current() const& -> char{return CPP2_ASSERT_IN_BOUNDS(regex, pos); } +#line 1902 "reflect.h2" + [[nodiscard]] auto parse_context::get_next_position(cpp2::impl::in in_class, cpp2::impl::in no_skip) const& -> size_t { - std::vector ret {}; - for ( auto const& d : CPP2_UFCS(get_type_scope_declarations)((*cpp2::impl::assert_not_null(n)), declaration_node::objects) ) { - static_cast(CPP2_UFCS(emplace_back)(ret, d, (*this))); + auto perl_syntax {false}; + if (!(no_skip)) { + if (in_class) { + perl_syntax = get_modifiers().has(expression_flags::perl_code_syntax) && get_modifiers().has(expression_flags::perl_code_syntax_in_classes); + } + else { + perl_syntax = get_modifiers().has(expression_flags::perl_code_syntax); + } } - return ret; + auto cur {pos + 1}; + if (cpp2::move(perl_syntax)) { + for( ; cpp2::impl::cmp_less(cur,regex.size()); (cur += 1) ) { + auto n {CPP2_ASSERT_IN_BOUNDS(regex, cur)}; + + if (space_class::includes(n)) { + continue; + } + else {if (!(in_class) && '#' == cpp2::move(n)) { + cur = regex.find("\n", cur); + if (std::string::npos == cur) { + // No new line, comment runs until the end of the pattern + cur = regex.size(); + } + } + else { // None space none comment char + break; + }} + } + } + + // Check for end of file. + if (cpp2::impl::cmp_greater(cur,regex.size())) { + cur = regex.size(); + } + return cur; } -#line 529 "reflect.h2" - [[nodiscard]] auto type_declaration::get_member_types() const& -> std::vector +#line 1942 "reflect.h2" + [[nodiscard]] auto parse_context::next_impl(cpp2::impl::in in_class, cpp2::impl::in no_skip) & -> bool + { + pos = get_next_position(in_class, no_skip); + if (pos != regex.size()) { + return true; + } + else { + return false; + } + } + +#line 1953 "reflect.h2" + [[nodiscard]] auto parse_context::next() & -> auto { return next_impl(false, false); } +#line 1954 "reflect.h2" + [[nodiscard]] auto parse_context::next_in_class() & -> auto { return next_impl(true, false); } +#line 1955 "reflect.h2" + [[nodiscard]] auto parse_context::next_no_skip() & -> auto { return next_impl(false, true); } + +#line 1957 "reflect.h2" + [[nodiscard]] auto parse_context::next_n(cpp2::impl::in n) & -> bool{ + auto r {true}; + auto cur {0}; + for( ; r && cpp2::impl::cmp_less(cur,n); (r = next()) ) { + cur += 1; + } + return r; + } + +#line 1966 "reflect.h2" + [[nodiscard]] auto parse_context::has_next() const& -> bool{return cpp2::impl::cmp_less(pos,regex.size()); } +#line 1968 "reflect.h2" + [[nodiscard]] auto parse_context::grab_until_impl(cpp2::impl::in e, cpp2::impl::out r, cpp2::impl::in any) & -> bool { - std::vector ret {}; - for ( auto const& d : CPP2_UFCS(get_type_scope_declarations)((*cpp2::impl::assert_not_null(n)), declaration_node::types) ) { - static_cast(CPP2_UFCS(emplace_back)(ret, d, (*this))); + auto end {pos}; + if (any) { + end = regex.find_first_of(e, pos); + } + else { + end = regex.find(e, pos); + } + + if (end != std::string_view::npos) { + r.construct(regex.substr(pos, end - pos)); + pos = cpp2::move(end); + return true; + } + else { + r.construct(""); + return false; } - return ret; } -#line 539 "reflect.h2" - [[nodiscard]] auto type_declaration::get_member_aliases() const& -> std::vector +#line 1989 "reflect.h2" + [[nodiscard]] auto parse_context::grab_until(cpp2::impl::in e, cpp2::impl::out r) & -> auto { return grab_until_impl(e, cpp2::impl::out(&r), false); } +#line 1990 "reflect.h2" + [[nodiscard]] auto parse_context::grab_until(cpp2::impl::in e, cpp2::impl::out r) & -> auto { return grab_until_impl(std::string(1, e), cpp2::impl::out(&r), false); } +#line 1991 "reflect.h2" + [[nodiscard]] auto parse_context::grab_until_one_of(cpp2::impl::in e, cpp2::impl::out r) & -> auto { return grab_until_impl(e, cpp2::impl::out(&r), true); } +#line 1993 "reflect.h2" + [[nodiscard]] auto parse_context::grab_n(cpp2::impl::in n, cpp2::impl::out r) & -> bool { - std::vector ret {}; - for ( auto const& d : CPP2_UFCS(get_type_scope_declarations)((*cpp2::impl::assert_not_null(n)), declaration_node::aliases) ) { - static_cast(CPP2_UFCS(emplace_back)(ret, d, (*this))); + if (cpp2::impl::cmp_less_eq(pos + cpp2::impl::as_(n),regex.size())) { + r.construct(regex.substr(pos, cpp2::impl::as_(n))); + pos += (cpp2::impl::as_(n)) - 1; + return true; + } + else { + r.construct(""); + return false; } - return ret; } -#line 549 "reflect.h2" - [[nodiscard]] auto type_declaration::get_members() const& -> std::vector +#line 2006 "reflect.h2" + [[nodiscard]] auto parse_context::grab_number() & -> std::string + { + auto start {pos}; + auto start_search {pos}; + if (CPP2_ASSERT_IN_BOUNDS(regex, start_search) == '-') { + start_search += 1; + } + auto end {regex.find_first_not_of("1234567890", cpp2::move(start_search))}; + + cpp2::impl::deferred_init r; + if (end != std::string::npos) { + r.construct(regex.substr(start, end - start)); + pos = cpp2::move(end) - 1; + } + else { + r.construct(regex.substr(cpp2::move(start))); + pos = regex.size() - 1; + } + return cpp2::move(r.value()); + } + +#line 2027 "reflect.h2" + [[nodiscard]] auto parse_context::peek_impl(cpp2::impl::in in_class) const& -> char{ + auto next_pos {get_next_position(in_class, false)}; + if (cpp2::impl::cmp_less(next_pos,regex.size())) { + return CPP2_ASSERT_IN_BOUNDS(regex, cpp2::move(next_pos)); + } + else { + return '\0'; + } + } +#line 2037 "reflect.h2" + [[nodiscard]] auto parse_context::peek() const& -> auto { return peek_impl(false); } +#line 2038 "reflect.h2" + [[nodiscard]] auto parse_context::peek_in_class() const& -> auto { return peek_impl(true); } + +#line 2043 "reflect.h2" + [[nodiscard]] auto parse_context::parser_group_modifiers(cpp2::impl::in change_str, expression_flags& parser_modifiers) & -> bool { - std::vector ret {}; - for ( auto const& d : CPP2_UFCS(get_type_scope_declarations)((*cpp2::impl::assert_not_null(n)), declaration_node::all) ) { - static_cast(CPP2_UFCS(emplace_back)(ret, d, (*this))); + auto is_negative {false}; + auto is_reset {false}; + + auto apply {[&, _1 = (&is_negative), _2 = (&parser_modifiers)](cpp2::impl::in flag) mutable -> void{ + if (*cpp2::impl::assert_not_null(_1)) { + (*cpp2::impl::assert_not_null(_2)).clear(flag); + } + else { + (*cpp2::impl::assert_not_null(_2)).set(flag); + } + }}; + + auto iter {change_str.begin()}; + for( ; iter != change_str.end(); (++iter) ) + { + auto cur {*cpp2::impl::assert_not_null(iter)}; + if (cur == '^') { + is_reset = true; + parser_modifiers = expression_flags::none; + } + else {if (cur == '-') { + if (is_reset) {static_cast(error("No negative modifier allowed.")); return false; } + is_negative = true; + } + else {if (cur == 'i') {apply(expression_flags::case_insensitive); } + else {if (cur == 'm') {apply(expression_flags::multiple_lines); } + else {if (cur == 's') {apply(expression_flags::single_line); } + else {if (cur == 'n') {apply(expression_flags::no_group_captures); } + else {if (cur == 'x') { + if ((iter + 1) == change_str.end() || *cpp2::impl::assert_not_null((iter + 1)) != 'x') { + // x modifier + apply(expression_flags::perl_code_syntax); + + // Just x unsets xx and remove x also removes xx + parser_modifiers.clear(expression_flags::perl_code_syntax_in_classes); + } + else { // xx modifier + // xx also sets or unsets x + apply(expression_flags::perl_code_syntax); + apply(expression_flags::perl_code_syntax_in_classes); + + ++iter; // Skip the second x + } + } + else { + static_cast(error("Unknown modifier: " + cpp2::to_string(cpp2::move(cur)) + "")); return false; + }}}}}}} } - return ret; + + return true; } -#line 559 "reflect.h2" - [[nodiscard]] auto type_declaration::query_declared_value_set_functions() const& -> query_declared_value_set_functions_ret +#line 2097 "reflect.h2" + [[nodiscard]] auto parse_context::parse_until(cpp2::impl::in term) & -> bool{ + token_ptr cur_token {}; + + for( ; valid(); static_cast(next()) ) + { + if (term == current()) {break; } + + cur_token = nullptr; + + if (!(cur_token) && valid()) {cur_token = alternative_token::parse((*this)); } + if (!(cur_token) && valid()) {cur_token = any_token::parse((*this)); } + if (!(cur_token) && valid()) {cur_token = class_token::parse((*this)); } + if (!(cur_token) && valid()) {cur_token = escape_token_parse((*this)); } + if (!(cur_token) && valid()) {cur_token = global_group_reset_token_parse((*this)); } + if (!(cur_token) && valid()) {cur_token = group_ref_token::parse((*this)); } + if (!(cur_token) && valid()) {cur_token = group_token::parse((*this)); } + if (!(cur_token) && valid()) {cur_token = hexadecimal_token_parse((*this)); } + if (!(cur_token) && valid()) {cur_token = line_end_token_parse((*this)); } + if (!(cur_token) && valid()) {cur_token = line_start_token_parse((*this)); } + if (!(cur_token) && valid()) {cur_token = named_class_token_parse((*this)); } + if (!(cur_token) && valid()) {cur_token = octal_token_parse((*this)); } + if (!(cur_token) && valid()) {cur_token = range_token::parse((*this)); } + if (!(cur_token) && valid()) {cur_token = special_range_token::parse((*this)); } + if (!(cur_token) && valid()) {cur_token = word_boundary_token_parse((*this)); } + + // Everything else is matched as it is. + if (!(cur_token) && valid()) {cur_token = char_token::parse((*this)); } + + if (cur_token && valid()) { + add_token(cur_token); + }else { + return false; + } + } -#line 566 "reflect.h2" + return true; + } + +#line 2135 "reflect.h2" + [[nodiscard]] auto parse_context::parse(cpp2::impl::in modifiers) & -> bool { - cpp2::impl::deferred_init out_this_in_that; - cpp2::impl::deferred_init out_this_move_that; - cpp2::impl::deferred_init inout_this_in_that; - cpp2::impl::deferred_init inout_this_move_that; -#line 567 "reflect.h2" - auto declared {CPP2_UFCS(find_declared_value_set_functions)((*cpp2::impl::assert_not_null(n)))}; - out_this_in_that.construct(declared.out_this_in_that != nullptr); - out_this_move_that.construct(declared.out_this_move_that != nullptr); - inout_this_in_that.construct(declared.inout_this_in_that != nullptr); - inout_this_move_that.construct(cpp2::move(declared).inout_this_move_that != nullptr); - return { std::move(out_this_in_that.value()), std::move(out_this_move_that.value()), std::move(inout_this_in_that.value()), std::move(inout_this_move_that.value()) }; } -#line 574 "reflect.h2" - auto type_declaration::add_member(cpp2::impl::in source) & -> void + expression_flags flags {}; + if (!(parser_group_modifiers(modifiers, flags))) {return false; } + set_modifiers(cpp2::move(flags)); + + auto r {parse_until('\0')}; + if (r) { + root = cur_group_state.get_as_token(); + } + + return r; + } + +#line 2152 "reflect.h2" + [[nodiscard]] auto parse_context::get_pos() const& -> auto { return pos; } +#line 2153 "reflect.h2" + [[nodiscard]] auto parse_context::get_range(cpp2::impl::in start, cpp2::impl::in end) const& -> auto { return std::string(regex.substr(start, end - start + 1)); } +#line 2154 "reflect.h2" + [[nodiscard]] auto parse_context::valid() const& -> bool{return has_next() && !(has_error); } + +#line 2156 "reflect.h2" + [[nodiscard]] auto parse_context::error(cpp2::impl::in err) & -> token_ptr{ + has_error = true; + error_out("Error during parsing of regex '" + cpp2::to_string(regex) + "' at position '" + cpp2::to_string(pos) + "': " + cpp2::to_string(err) + ""); + return nullptr; + } + +#line 2171 "reflect.h2" + auto generation_function_context::add_tabs(cpp2::impl::in c) & -> void{ + int i {0}; + for( ; cpp2::impl::cmp_less(i,c); i += 1 ) { + tabs += " "; + } + } + +#line 2178 "reflect.h2" + auto generation_function_context::remove_tabs(cpp2::impl::in c) & -> void{ + tabs = tabs.substr(0, (cpp2::impl::as_(c)) * 2); + } + + generation_function_context::generation_function_context(auto const& code_, auto const& tabs_) + : code{ code_ } + , tabs{ tabs_ }{} +generation_function_context::generation_function_context(){} + +#line 2196 "reflect.h2" + [[nodiscard]] auto generation_context::match_parameters() const& -> std::string{return "r.pos, ctx"; } + +#line 2201 "reflect.h2" + auto generation_context::add(cpp2::impl::in s) & -> void{ + auto cur {get_current()}; + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + cpp2::to_string(s) + "\n"; + } + +#line 2207 "reflect.h2" + auto generation_context::add_check(cpp2::impl::in check) & -> void{ + auto cur {get_current()}; + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + "if !cpp2::regex::" + cpp2::to_string(check) + " { r.matched = false; break; }\n"; + } + +#line 2213 "reflect.h2" + auto generation_context::add_statefull(cpp2::impl::in next_func, cpp2::impl::in check) & -> void { - auto decl {parse_statement(source)}; - if (!((cpp2::impl::as_(decl)))) { - error("the provided source string is not a valid statement"); - return ; + end_func_statefull(check); + + auto name {next_func.substr(0, next_func.size() - 2)}; + start_func_named(cpp2::move(name)); + } + +#line 2221 "reflect.h2" + auto generation_context::start_func_named(cpp2::impl::in name) & -> void + { + auto cur {new_context()}; + + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + cpp2::to_string(name) + ": @struct type = {\n"; + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " operator(): (this, cur: Iter, inout ctx: context, other) -> cpp2::regex::match_return = {\n"; + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " r := ctx..pass(cur);\n"; + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " do {\n"; + (*cpp2::impl::assert_not_null(cpp2::move(cur))).add_tabs(3); + } + +#line 2232 "reflect.h2" + [[nodiscard]] auto generation_context::start_func() & -> std::string + { + auto name {gen_func_name()}; + start_func_named(name); + return cpp2::move(name) + "()"; + } + +#line 2239 "reflect.h2" + auto generation_context::end_func_statefull(cpp2::impl::in s) & -> void + { + auto cur {get_current()}; + (*cpp2::impl::assert_not_null(cur)).remove_tabs(3); + (*cpp2::impl::assert_not_null(cur)).code += "\n"; + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " } while false;\n"; + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " if r.matched {\n"; + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " r = " + cpp2::to_string(s) + ";\n"; + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " }\n"; + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " else {\n"; + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " r.pos = ctx.end;\n"; + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " }\n"; + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " return r;\n"; + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " }\n"; + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + "}\n"; + + finish_context(); + } + +#line 2259 "reflect.h2" + [[nodiscard]] auto generation_context::generate_func(cpp2::impl::in token) & -> std::string + { + auto name {start_func()}; + (*cpp2::impl::assert_not_null(token)).generate_code((*this)); + end_func_statefull("other(" + cpp2::to_string(match_parameters()) + ")"); + + return name; + } + +#line 2269 "reflect.h2" + [[nodiscard]] auto generation_context::generate_reset(cpp2::impl::in> groups) & -> std::string + { + if (groups.empty()) { + return "cpp2::regex::no_reset()"; } - if (!(CPP2_UFCS(is_declaration)((*cpp2::impl::assert_not_null(decl))))) { - error("cannot add a member that is not a declaration"); + + auto name {gen_reset_func_name()}; + auto cur {new_context()}; + + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + cpp2::to_string(name) + ": @struct type = {\n"; + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " operator(): (this, inout ctx) = {\n"; + for ( auto const& g : groups ) { + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " ctx..set_group_invalid(" + cpp2::to_string(g) + ");\n"; } - require(CPP2_UFCS(add_type_member)((*cpp2::impl::assert_not_null(n)), std::move(cpp2::move(decl))), - std::string("unexpected error while attempting to add member:\n") + source); + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + " }\n"; + (*cpp2::impl::assert_not_null(cur)).code += "" + cpp2::to_string((*cpp2::impl::assert_not_null(cur)).tabs) + "}\n"; + + finish_context(); + + return cpp2::move(name) + "()"; } -#line 588 "reflect.h2" - auto type_declaration::remove_marked_members() & -> void { CPP2_UFCS(type_remove_marked_members)((*cpp2::impl::assert_not_null(n))); } -#line 589 "reflect.h2" - auto type_declaration::remove_all_members() & -> void { CPP2_UFCS(type_remove_all_members)((*cpp2::impl::assert_not_null(n))); } +#line 2293 "reflect.h2" + [[nodiscard]] auto generation_context::gen_func_name() & -> std::string{ + auto cur_id {matcher_func}; + matcher_func += 1; + return "func_" + cpp2::to_string(cpp2::move(cur_id)) + ""; + } -#line 591 "reflect.h2" - auto type_declaration::disable_member_function_generation() & -> void { CPP2_UFCS(type_disable_member_function_generation)((*cpp2::impl::assert_not_null(n))); } +#line 2299 "reflect.h2" + [[nodiscard]] auto generation_context::next_func_name() & -> std::string{ + return gen_func_name() + "()"; + } - type_declaration::type_declaration(type_declaration const& that) - : declaration{ static_cast(that) }{} +#line 2303 "reflect.h2" + [[nodiscard]] auto generation_context::gen_reset_func_name() & -> std::string{ + auto cur_id {reset_func}; + reset_func += 1; + return "reset_" + cpp2::to_string(cpp2::move(cur_id)) + ""; + } -#line 602 "reflect.h2" - alias_declaration::alias_declaration( +#line 2309 "reflect.h2" + [[nodiscard]] auto generation_context::gen_temp() & -> std::string{ + auto cur_id {temp_name}; + temp_name += 1; + return "tmp_" + cpp2::to_string(cpp2::move(cur_id)) + ""; + } + +#line 2317 "reflect.h2" + [[nodiscard]] auto generation_context::new_context() & -> generation_function_context*{ + gen_stack.push_back(generation_function_context()); + auto cur {get_current()}; + (*cpp2::impl::assert_not_null(cur)).tabs = " "; + + return cur; + } + +#line 2325 "reflect.h2" + auto generation_context::finish_context() & -> void{ + auto cur {get_current()}; + auto base {get_base()}; + (*cpp2::impl::assert_not_null(base)).code += (*cpp2::impl::assert_not_null(cpp2::move(cur))).code; + + gen_stack.pop_back(); + } + +#line 2335 "reflect.h2" + [[nodiscard]] auto generation_context::get_current() & -> generation_function_context*{ + return &gen_stack.back(); + } + +#line 2339 "reflect.h2" + [[nodiscard]] auto generation_context::get_base() & -> generation_function_context*{ + return &CPP2_ASSERT_IN_BOUNDS_LITERAL(gen_stack, 0); + } + +#line 2343 "reflect.h2" + [[nodiscard]] auto generation_context::get_entry_func() const& -> std::string{ + return entry_func; + } + +#line 2347 "reflect.h2" + [[nodiscard]] auto generation_context::create_named_group_lookup(cpp2::impl::in> named_groups) const& -> std::string + { + std::string res {"get_named_group_index: (name) -> int = {\n"}; + + // Generate if selection. + std::string sep {""}; + for ( auto const& cur : named_groups ) { + res += "" + cpp2::to_string(sep) + "if name == \"" + cpp2::to_string(cur.first) + "\" { return " + cpp2::to_string(cur.second) + "; }"; + sep = "else "; + } + + // Generate else branch or return if list is empty. + if (named_groups.empty()) { + res += " _ = name;\n"; + res += " return -1;\n"; + } + else { + res += " else { return -1; }\n"; + } + res += "}\n"; + return res; + } + +#line 2372 "reflect.h2" + [[nodiscard]] auto generation_context::run(cpp2::impl::in token) & -> std::string{ + entry_func = generate_func(token); + + return (*cpp2::impl::assert_not_null(get_base())).code; + } + +#line 2387 "reflect.h2" + alternative_token::alternative_token() + : regex_token_empty{ "" }{} - declaration_node* n_, - cpp2::impl::in s - ) - : declaration{ n_, s } -#line 607 "reflect.h2" - { +#line 2389 "reflect.h2" + [[nodiscard]] auto alternative_token::parse(parse_context& ctx) -> token_ptr{ + if (ctx.current() != '|') {return nullptr; } - if (cpp2::cpp2_default.is_active() && !(CPP2_UFCS(is_alias)((*cpp2::impl::assert_not_null(n)))) ) { cpp2::cpp2_default.report_violation(""); } + if (!(ctx.has_token())) {return ctx.error("Alternative with no content."); } + ctx.next_alternative(); + return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared); } - alias_declaration::alias_declaration(alias_declaration const& that) - : declaration{ static_cast(that) }{} + alternative_token::~alternative_token() noexcept{} -#line 626 "reflect.h2" -auto add_virtual_destructor(meta::type_declaration& t) -> void -{ - CPP2_UFCS(add_member)(t, "operator=: (virtual move this) = { }"); -} +#line 2404 "reflect.h2" + alternative_token_gen::alternative_token_gen(cpp2::impl::in a) + : regex_token{ gen_string(a) } + , alternatives{ a }{ -#line 644 "reflect.h2" -auto interface(meta::type_declaration& t) -> void -{ - auto has_dtor {false}; +#line 2407 "reflect.h2" + } - for ( auto& m : CPP2_UFCS(get_members)(t) ) +#line 2409 "reflect.h2" + auto alternative_token_gen::generate_code(generation_context& ctx) const -> void { - CPP2_UFCS(require)(m, !(CPP2_UFCS(is_object)(m)), - "interfaces may not contain data objects"); - if (CPP2_UFCS(is_function)(m)) { - auto mf {CPP2_UFCS(as_function)(m)}; - CPP2_UFCS(require)(mf, !(CPP2_UFCS(is_copy_or_move)(mf)), - "interfaces may not copy or move; consider a virtual clone() instead"); - CPP2_UFCS(require)(mf, !(CPP2_UFCS(has_initializer)(mf)), - "interface functions must not have a function body; remove the '=' initializer"); - CPP2_UFCS(require)(mf, CPP2_UFCS(make_public)(mf), - "interface functions must be public"); - CPP2_UFCS(default_to_virtual)(mf); - has_dtor |= CPP2_UFCS(is_destructor)(cpp2::move(mf)); + std::string functions {""}; + + for ( auto const& cur : alternatives ) { + std::set groups {}; + (*cpp2::impl::assert_not_null(cur)).add_groups(groups); + + functions += ", " + ctx.generate_func(cur); + functions += ", " + ctx.generate_reset(cpp2::move(groups)); } - } - if (!(cpp2::move(has_dtor))) { - CPP2_UFCS(add_virtual_destructor)(t); - } -} + auto next_name {ctx.next_func_name()}; -#line 690 "reflect.h2" -auto polymorphic_base(meta::type_declaration& t) -> void -{ - auto has_dtor {false}; + ctx.add_statefull(next_name, "cpp2::regex::alternative_token_matcher::match(" + cpp2::to_string(ctx.match_parameters()) + ", other, " + cpp2::to_string(next_name) + " " + cpp2::to_string(cpp2::move(functions)) + ")"); + } - for ( auto& mf : CPP2_UFCS(get_member_functions)(t) ) +#line 2426 "reflect.h2" + auto alternative_token_gen::add_groups(std::set& groups) const -> void { - if (CPP2_UFCS(is_default_access)(mf)) { - CPP2_UFCS(default_to_public)(mf); + for ( auto const& cur : alternatives ) { + (*cpp2::impl::assert_not_null(cur)).add_groups(groups); } - CPP2_UFCS(require)(mf, !(CPP2_UFCS(is_copy_or_move)(mf)), - "polymorphic base types may not copy or move; consider a virtual clone() instead"); - if (CPP2_UFCS(is_destructor)(mf)) { - has_dtor = true; - CPP2_UFCS(require)(mf, ((CPP2_UFCS(is_public)(mf) || CPP2_UFCS(is_default_access)(mf)) && CPP2_UFCS(is_virtual)(mf)) - || (CPP2_UFCS(is_protected)(mf) && !(CPP2_UFCS(is_virtual)(mf))), - "a polymorphic base type destructor must be public and virtual, or protected and nonvirtual"); + } + +#line 2433 "reflect.h2" + [[nodiscard]] auto alternative_token_gen::gen_string(cpp2::impl::in a) -> std::string + { + std::string r {""}; + std::string sep {""}; + + for ( auto const& cur : a ) { + r += sep + (*cpp2::impl::assert_not_null(cur)).to_string(); + sep = "|"; } + + return r; } - if (!(cpp2::move(has_dtor))) { - CPP2_UFCS(add_virtual_destructor)(t); + alternative_token_gen::~alternative_token_gen() noexcept{} + +#line 2454 "reflect.h2" + any_token::any_token(cpp2::impl::in single_line) + : regex_token_check{ ".", "any_token_matcher" }{ + +#line 2456 "reflect.h2" } -} -#line 735 "reflect.h2" -auto ordered_impl( - meta::type_declaration& t, - cpp2::impl::in ordering -) -> void -{ - auto has_spaceship {false}; +#line 2458 "reflect.h2" + [[nodiscard]] auto any_token::parse(parse_context& ctx) -> token_ptr{ + if ('.' != ctx.current()) {return nullptr; } - for ( auto& mf : CPP2_UFCS(get_member_functions)(t) ) - { - if (CPP2_UFCS(has_name)(mf, "operator<=>")) { - has_spaceship = true; - auto return_name {CPP2_UFCS(unnamed_return_type)(mf)}; - if (CPP2_UFCS(find)(return_name, ordering) == return_name.npos) - { - CPP2_UFCS(error)(mf, "operator<=> must return std::" + cpp2::impl::as_(ordering)); - } - } + return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, ctx.get_modifiers().has(expression_flags::single_line)); } - if (!(cpp2::move(has_spaceship))) { - CPP2_UFCS(add_member)(t, "operator<=>: (this, that) -> std::" + (cpp2::impl::as_(ordering)) + ";"); + any_token::~any_token() noexcept{} + +#line 2475 "reflect.h2" + char_token::char_token(cpp2::impl::in t, cpp2::impl::in ignore_case_) + : regex_token{ std::string(1, t) } + , token{ t } + , ignore_case{ ignore_case_ }{ + +#line 2479 "reflect.h2" } -} -#line 764 "reflect.h2" -auto ordered(meta::type_declaration& t) -> void -{ - ordered_impl(t, "strong_ordering"); -} +#line 2481 "reflect.h2" + [[nodiscard]] auto char_token::parse(parse_context& ctx) -> token_ptr{ + return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, ctx.current(), ctx.get_modifiers().has(expression_flags::case_insensitive)); + } -#line 772 "reflect.h2" -auto weakly_ordered(meta::type_declaration& t) -> void +#line 2485 "reflect.h2" + auto char_token::generate_code(generation_context& ctx) const -> void + { + if (ignore_case) { + std::string upper {token}; + std::string lower {token}; { - ordered_impl(t, "weak_ordering"); -} +size_t i{0}; -#line 780 "reflect.h2" -auto partially_ordered(meta::type_declaration& t) -> void -{ - ordered_impl(t, "partial_ordering"); +#line 2491 "reflect.h2" + for( ; cpp2::impl::cmp_less(i,token.size()); i += 1 ) { + CPP2_ASSERT_IN_BOUNDS(lower, i) = string_util::safe_tolower(CPP2_ASSERT_IN_BOUNDS(token, i)); + CPP2_ASSERT_IN_BOUNDS(upper, i) = string_util::safe_toupper(CPP2_ASSERT_IN_BOUNDS(token, i)); + } } -#line 802 "reflect.h2" -auto copyable(meta::type_declaration& t) -> void -{ - // If the user explicitly wrote any of the copy/move functions, - // they must also have written the most general one - we can't - // assume we can safely generate it for them since they've opted - // into customized semantics - auto smfs {CPP2_UFCS(query_declared_value_set_functions)(t)}; - if ( !(smfs.out_this_in_that) - && ( - smfs.out_this_move_that - || smfs.inout_this_in_that - || smfs.inout_this_move_that)) +#line 2496 "reflect.h2" + if (upper != lower) { + gen_case_insensitive(cpp2::move(lower), cpp2::move(upper), ctx); + } + else { + gen_case_sensitive(ctx); + } + } + else { + gen_case_sensitive(ctx); + } + } +#line 2508 "reflect.h2" + auto char_token::gen_case_insensitive(cpp2::impl::in lower, cpp2::impl::in upper, generation_context& ctx) const& -> void { - CPP2_UFCS(error)(t, - "this type is partially copyable/movable - when you provide " - "any of the more-specific operator= signatures, you must also provide " - "the one with the general signature (out this, that); alternatively, " - "consider removing all the operator= functions and let them all be " - "generated for you with default memberwise semantics" - ); + std::string name {"str_" + cpp2::to_string(ctx.gen_temp()) + ""}; + std::string lower_name {"lower_" + cpp2::to_string(name) + ""}; + std::string upper_name {"upper_" + cpp2::to_string(cpp2::move(name)) + ""}; + auto size {token.size()}; + ctx.add("" + cpp2::to_string(lower_name) + " : std::array = \"" + cpp2::to_string(add_escapes(lower)) + "\";"); // TODO: Add constexpr when Issue https://github.com/hsutter/cppfront/issues/1104 is resolved. + ctx.add("" + cpp2::to_string(upper_name) + " : std::array = \"" + cpp2::to_string(add_escapes(upper)) + "\";"); // TODO: Add constexpr when Issue https://github.com/hsutter/cppfront/issues/1104 is resolved. + ctx.add("if std::distance(r.pos, ctx.end) < " + cpp2::to_string(size) + " {"); + ctx.add(" r.matched = false;"); + ctx.add(" break;"); + ctx.add("}"); + ctx.add(""); + ctx.add("(copy i : int = 0) while i < " + cpp2::to_string(size) + " next (i += 1) {"); + ctx.add(" if !(" + cpp2::to_string(cpp2::move(lower_name)) + "[i] == r.pos[i] || " + cpp2::to_string(cpp2::move(upper_name)) + "[i] == r.pos[i]) { r.matched = false; }"); + ctx.add("}"); + ctx.add(""); + ctx.add("if r.matched { r.pos += " + cpp2::to_string(cpp2::move(size)) + "; }"); + ctx.add("else { break; }"); } - else {if (!(cpp2::move(smfs).out_this_in_that)) { - CPP2_UFCS(add_member)(t, "operator=: (out this, that) = { }"); - }} -} -#line 836 "reflect.h2" -auto basic_value(meta::type_declaration& t) -> void -{ - CPP2_UFCS(copyable)(t); +#line 2529 "reflect.h2" + auto char_token::gen_case_sensitive(generation_context& ctx) const& -> void + { + std::string name {"str_" + cpp2::to_string(ctx.gen_temp()) + ""}; + auto size {token.size()}; + ctx.add("" + cpp2::to_string(name) + " : std::array = \"" + cpp2::to_string(add_escapes(token)) + "\";"); // TODO: Add constexpr when Issue https://github.com/hsutter/cppfront/issues/1104 is resolved. + ctx.add("if std::distance(r.pos, ctx.end) < " + cpp2::to_string(size) + " {"); + ctx.add(" r.matched = false;"); + ctx.add(" break;"); + ctx.add("}"); + ctx.add(""); + ctx.add("(copy i : int = 0) while i < " + cpp2::to_string(size) + " next (i += 1) {"); + ctx.add(" if " + cpp2::to_string(cpp2::move(name)) + "[i] != r.pos[i] { r.matched = false; }"); + ctx.add("}"); + ctx.add(""); + ctx.add("if r.matched { r.pos += " + cpp2::to_string(cpp2::move(size)) + "; }"); + ctx.add("else { break; }"); + } - auto has_default_ctor {false}; - for ( auto& mf : CPP2_UFCS(get_member_functions)(t) ) { - has_default_ctor |= CPP2_UFCS(is_default_constructor)(mf); - CPP2_UFCS(require)(mf, !(CPP2_UFCS(is_protected)(mf)) && !(CPP2_UFCS(is_virtual)(mf)), - "a value type may not have a protected or virtual function"); - CPP2_UFCS(require)(mf, !(CPP2_UFCS(is_destructor)(mf)) || CPP2_UFCS(is_public)(mf) || CPP2_UFCS(is_default_access)(mf), - "a value type may not have a non-public destructor"); +#line 2547 "reflect.h2" + [[nodiscard]] auto char_token::add_escapes(std::string str) const& -> std::string + { + str = string_util::replace_all(str, "\\", "\\\\"); + str = string_util::replace_all(str, "\a", "\\a"); + str = string_util::replace_all(str, "\f", "\\f"); + str = string_util::replace_all(str, "\x1b", "\" \"\\x1b\" \""); // Generate a separated string. This prevents + // situations like `\x1bblub` from generating + // wrong hex characters. + str = string_util::replace_all(str, "\n", "\\n"); + str = string_util::replace_all(str, "\r", "\\r"); + str = string_util::replace_all(str, "\t", "\\t"); + + return cpp2::move(str); } - if (!(cpp2::move(has_default_ctor))) { - CPP2_UFCS(add_member)(t, "operator=: (out this) = { }"); +#line 2562 "reflect.h2" + auto char_token::append(char_token const& that) & -> void{ + (*this).token += that.token; + (*this).string_rep += that.string_rep; } -} -#line 864 "reflect.h2" -auto value(meta::type_declaration& t) -> void -{ - CPP2_UFCS(ordered)(t); - CPP2_UFCS(basic_value)(t); -} + char_token::~char_token() noexcept{} -#line 870 "reflect.h2" -auto weakly_ordered_value(meta::type_declaration& t) -> void -{ - CPP2_UFCS(weakly_ordered)(t); - CPP2_UFCS(basic_value)(t); -} +#line 2579 "reflect.h2" + class_token::class_token(cpp2::impl::in negate_, cpp2::impl::in case_insensitive_, cpp2::impl::in class_str_, cpp2::impl::in str) + : regex_token{ str } + , negate{ negate_ } + , case_insensitive{ case_insensitive_ } + , class_str{ class_str_ } +#line 2580 "reflect.h2" + { -#line 876 "reflect.h2" -auto partially_ordered_value(meta::type_declaration& t) -> void -{ - CPP2_UFCS(partially_ordered)(t); - CPP2_UFCS(basic_value)(t); -} +#line 2585 "reflect.h2" + } -#line 905 "reflect.h2" -auto cpp1_rule_of_zero(meta::type_declaration& t) -> void -{ - for ( auto& mf : CPP2_UFCS(get_member_functions)(t) ) +#line 2588 "reflect.h2" + [[nodiscard]] auto class_token::parse(parse_context& ctx) -> token_ptr { - CPP2_UFCS(require)(t, !(CPP2_UFCS(is_constructor_with_that)(mf)) - && !(CPP2_UFCS(is_assignment_with_that)(mf)) - && !(CPP2_UFCS(is_destructor)(mf)), - "the rule of zero requires no copy/move/destructor functions"); - } - CPP2_UFCS(disable_member_function_generation)(t); -} + if (ctx.current() != '[') {return nullptr; } -#line 942 "reflect.h2" -auto cpp2_struct(meta::type_declaration& t) -> void -{ - std::string ctor_params {}; - std::string ctor_inits {}; + auto start_pos {ctx.get_pos()}; - auto found_member_without_initializer {false}; + std::vector supported_classes {"alnum", "alpha", "ascii", "blank", "cntrl", "digits", "graph", + "lower", "print", "punct", "space", "upper", "word", "xdigit"}; - for ( auto& m : CPP2_UFCS(get_members)(t) ) - { - CPP2_UFCS(require)(m, CPP2_UFCS(make_public)(m), - "all struct members must be public"); - if (CPP2_UFCS(is_function)(m)) { - auto mf {CPP2_UFCS(as_function)(m)}; - CPP2_UFCS(require)(t, !(CPP2_UFCS(is_virtual)(mf)), - "a struct may not have a virtual function"); - CPP2_UFCS(require)(t, !(CPP2_UFCS(has_name)(cpp2::move(mf), "operator=")), - "a struct may not have a user-defined operator="); - } - else {if (CPP2_UFCS(is_object)(m)) { - auto mo {CPP2_UFCS(as_object)(m)}; - if (CPP2_UFCS(name)(mo) != "this") { - ctor_params += "" + cpp2::to_string(CPP2_UFCS(name)(mo)) + "_, "; - ctor_inits += "" + cpp2::to_string(CPP2_UFCS(name)(mo)) + " = " + cpp2::to_string(CPP2_UFCS(name)(mo)) + "_; "; - } - else { - ctor_inits += "" + cpp2::to_string(CPP2_UFCS(type)(mo)) + " = (); "; + std::vector classes {}; + + // First step: parse until the end bracket and push single chars, ranges or groups on the class stack. + auto is_negate {false}; + auto first {true}; + auto range {false}; + while( ctx.next_in_class() && (ctx.current() != ']' || first) ) + { + if (ctx.current() == '^') + { + is_negate = true; + continue; // Skip rest of the loop. Also the first update. } - found_member_without_initializer |= !(CPP2_UFCS(has_initializer)(cpp2::move(mo))); - }} - } - CPP2_UFCS(cpp1_rule_of_zero)(t); - // If we found any data members - if (!(CPP2_UFCS(empty)(ctor_params))) - { - // Then to enable construction from corresponding values - // requires a constructor... an exception to the rule of zero - CPP2_UFCS(add_member)(t, " operator=: (implicit out this, " + cpp2::to_string(cpp2::move(ctor_params)) + ") = { " + cpp2::to_string(cpp2::move(ctor_inits)) + " }"); + if (ctx.current() == '[' && ctx.peek_in_class() == ':') + { + // We have a character class. + static_cast(ctx.next_n(2));// Skip [: - // And if all members had initializers, we need a default constructor - if (!(cpp2::move(found_member_without_initializer))) { - CPP2_UFCS(add_member)(t, " operator=: (implicit out this) = { }"); + std::string name {""}; + if (!(ctx.grab_until(":]", cpp2::impl::out(&name)))) {return ctx.error("Could not find end of character class."); } + if (supported_classes.end() == std::find(supported_classes.begin(), supported_classes.end(), name)) { + return ctx.error("Unsupported character class. Supported ones are: " + cpp2::to_string(string_util::join(supported_classes)) + ""); + } + + classes.push_back("[:" + cpp2::to_string(cpp2::move(name)) + ":]"); + + static_cast(ctx.next());// Skip ':' pointing to the ending ']'. + } + else {if (ctx.current() == '\\') + { + if (ctx. next_no_skip() && (ctx. current() != ']')) + { + if ( ' ' == ctx. current() + && ctx.get_modifiers().has(expression_flags::perl_code_syntax) + && ctx.get_modifiers().has(expression_flags::perl_code_syntax_in_classes)) + { + classes.push_back(std::string(1, ctx.current())); + } + else { + auto name {""}; + if ( 'd' == ctx. current()) { name = "short_digits"; } + else {if ('D' == ctx.current()) {name = "short_not_digits"; } + else {if ('h' == ctx.current()) {name = "short_hor_space"; } + else {if ('H' == ctx.current()) {name = "short_not_hor_space"; } + else {if ('s' == ctx.current()) {name = "short_space"; } + else {if ('S' == ctx.current()) {name = "short_not_space"; } + else {if ('v' == ctx.current()) {name = "short_ver_space"; } + else {if ('V' == ctx.current()) {name = "short_not_ver_space"; } + else {if ('w' == ctx.current()) {name = "short_word"; } + else {if ('W' == ctx.current()) {name = "short_not_word"; } + else { + return ctx.error("Unknown group escape."); + }}}}}}}}}} + classes.push_back("[:" + cpp2::to_string(cpp2::move(name)) + ":]"); + } + }else { + return ctx.error("Escape without a following character."); + } + } + else {if (ctx.current() == '-') + { + if (first) {// Literal if first entry. + classes.push_back("" + cpp2::to_string(ctx.current()) + ""); + }else { + range = true; + } + } + else + { + if (range) {// Modify last element to be a range. + classes.back() += "-" + cpp2::to_string(ctx.current()) + ""; + range = false; + } + else { + classes.push_back("" + cpp2::to_string(ctx.current()) + ""); + } + }}} + + first = false; } - } -} -value_member_info::value_member_info(auto const& name_, auto const& type_, auto const& value_) - : name{ name_ } - , type{ type_ } - , value{ value_ }{} + if (ctx.current() != ']') { + return ctx.error("Error end of character class definition before terminating ']'."); + } + auto end_pos {ctx.get_pos()}; -#line 1012 "reflect.h2" -auto basic_enum( - meta::type_declaration& t, - auto const& nextval, - cpp2::impl::in bitwise - ) -> void -{ - std::vector enumerators {}; - cpp2::i64 min_value {}; - cpp2::i64 max_value {}; - cpp2::impl::deferred_init underlying_type; + if (cpp2::move(range)) {// If '-' is last entry treat it as a literal char. + classes.push_back("-"); + } - CPP2_UFCS(reserve_names)(t, "operator=", "operator<=>"); - if (bitwise) { - CPP2_UFCS(reserve_names)(t, "has", "set", "clear", "to_string", "get_raw_value", "none"); + // Second step: Wrap the item on the class stack with corresponding class implementation. + for ( auto& cur : classes ) + { + if (cur.starts_with("[:")) { + auto name {cur.substr(2, cur.size() - 4)}; + cur = create_matcher("" + cpp2::to_string(cpp2::move(name)) + "_class", ""); + } + else {if (1 != cur.size()) { + cur = create_matcher("range_class_entry", "'" + cpp2::to_string(CPP2_ASSERT_IN_BOUNDS_LITERAL(cur, 0)) + "', '" + cpp2::to_string(CPP2_ASSERT_IN_BOUNDS_LITERAL(cur, 2)) + "'"); + } + else { + cur = create_matcher("single_class_entry", "'" + cpp2::to_string(cur) + "'"); + }} + } + + auto inner {string_util::join(cpp2::move(classes))}; + auto string_rep {ctx.get_range(cpp2::move(start_pos), cpp2::move(end_pos))}; + return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, + cpp2::move(is_negate), + ctx.get_modifiers().has(expression_flags::case_insensitive), + cpp2::move(inner), + cpp2::move(string_rep) + ); } - // 1. Gather: The names of all the user-written members, and find/compute the type +#line 2713 "reflect.h2" + auto class_token::generate_code(generation_context& ctx) const -> void + { + ctx.add_check("class_token_matcher::match(" + cpp2::to_string(ctx.match_parameters()) + ")"); + } - underlying_type.construct(CPP2_UFCS(get_argument)(t, 0));// use the first template argument, if there was one +#line 2718 "reflect.h2" + [[nodiscard]] auto class_token::create_matcher(cpp2::impl::in name, cpp2::impl::in template_arguments) -> std::string + { + auto sep {", "}; + if (template_arguments.empty()) {sep = ""; } - auto found_non_numeric {false}; -{ -std::string value{"-1"}; + return "::cpp2::regex::" + cpp2::to_string(name) + ""; + } -#line 1035 "reflect.h2" - for ( - auto const& m : CPP2_UFCS(get_members)(t) ) - if ( CPP2_UFCS(is_member_object)(m)) - { - CPP2_UFCS(require)(m, CPP2_UFCS(is_public)(m) || CPP2_UFCS(is_default_access)(m), - "an enumerator cannot be protected or private"); + class_token::~class_token() noexcept{} - auto mo {CPP2_UFCS(as_object)(m)}; - if (!(CPP2_UFCS(has_wildcard_type)(mo))) { - CPP2_UFCS(error)(mo, - "an explicit underlying type should be specified as a compile-time argument " - "to the metafunction - try 'enum' or 'flag_enum'" - ); - } +#line 2730 "reflect.h2" +[[nodiscard]] auto escape_token_parse(parse_context& ctx) -> token_ptr +{ + if (ctx.current() != '\\') {return nullptr; } - auto init {CPP2_UFCS(initializer)(mo)}; +#line 2735 "reflect.h2" + if (std::string::npos == std::string("afenrt^.[]()*{}?+|\\").find(ctx.peek())) { + return nullptr; + } - auto is_default_or_numeric {is_empty_or_a_decimal_number(init)}; - found_non_numeric |= !(CPP2_UFCS(empty)(init)) && !(is_default_or_numeric); - CPP2_UFCS(require)(m, !(cpp2::move(is_default_or_numeric)) || !(found_non_numeric) || CPP2_UFCS(has_name)(mo, "none"), - "" + cpp2::to_string(CPP2_UFCS(name)(mo)) + ": enumerators with non-numeric values must come after all default and numeric values"); + static_cast(ctx.next());// Skip escape - nextval(value, cpp2::move(init)); + if (std::string::npos != std::string("afenrt\\").find(ctx.current())) + { + // Escape of string special char + char t {'\0'}; + if ( 'a' == ctx. current()) { t = '\a'; } + else {if ('f' == ctx.current()) {t = '\f'; } + else {if ('e' == ctx.current()) {t = '\x1b'; } + else {if ('n' == ctx.current()) {t = '\n'; } + else {if ('r' == ctx.current()) {t = '\r'; } + else {if ('t' == ctx.current()) {t = '\t'; } + else {if ('\\' == ctx.current()) {t = '\\'; } + else {return ctx.error("Internal: missing switch case for special escape."); }}}}}}} + + auto r {CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, cpp2::move(t), false)}; + (*cpp2::impl::assert_not_null(r)).set_string("\\" + cpp2::to_string(ctx.current()) + ""); + return r; + } + else + { + // Escape of regex special char + auto r {CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, ctx.current(), false)}; + (*cpp2::impl::assert_not_null(r)).set_string("\\" + cpp2::to_string(ctx.current()) + ""); + return r; + } - auto v {std::strtoll(&CPP2_ASSERT_IN_BOUNDS_LITERAL(value, 0), nullptr, 10)}; // for non-numeric values we'll just get 0 which is okay for now - if (cpp2::impl::cmp_less(v,min_value)) { - min_value = v; - } - if (cpp2::impl::cmp_greater(v,max_value)) { - max_value = cpp2::move(v); - } +} - // Adding local variable 'e' to work around a Clang warning - value_member_info e {cpp2::impl::as_(CPP2_UFCS(name)(mo)), "", value}; - CPP2_UFCS(push_back)(enumerators, cpp2::move(e)); +#line 2771 "reflect.h2" +[[nodiscard]] auto global_group_reset_token_parse(parse_context& ctx) -> token_ptr +{ + if (!((ctx.current() == '\\' && ctx.peek() == 'K'))) {return nullptr; } - CPP2_UFCS(mark_for_removal_from_enclosing_type)(mo); - static_cast(cpp2::move(mo)); - } + static_cast(ctx.next());// Skip escape. + return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "\\K", "ctx..set_group_start(0, r.pos);"); } -#line 1075 "reflect.h2" - if ((CPP2_UFCS(empty)(enumerators))) { - CPP2_UFCS(error)(t, "an enumeration must contain at least one enumerator value"); - return ; +#line 2793 "reflect.h2" + group_ref_token::group_ref_token(cpp2::impl::in id_, cpp2::impl::in case_insensitive_, cpp2::impl::in str) + : regex_token{ str } + , id{ id_ } + , case_insensitive{ case_insensitive_ } +#line 2794 "reflect.h2" + { + +#line 2798 "reflect.h2" } - // Compute the default underlying type, if it wasn't explicitly specified - if (underlying_type.value() == "") +#line 2800 "reflect.h2" + [[nodiscard]] auto group_ref_token::parse(parse_context& ctx) -> token_ptr { - CPP2_UFCS(require)(t, !(cpp2::move(found_non_numeric)), - "if you write an enumerator with a non-numeric-literal value, " - "you must specify the enumeration's underlying type" - ); + if (ctx.current() != '\\') {return nullptr; } - if (!(bitwise)) { - if (cpp2::impl::cmp_greater_eq(min_value,std::numeric_limits::min()) && cpp2::impl::cmp_less_eq(max_value,std::numeric_limits::max())) { - underlying_type.value() = "i8"; - } - else {if (cpp2::impl::cmp_greater_eq(min_value,std::numeric_limits::min()) && cpp2::impl::cmp_less_eq(max_value,std::numeric_limits::max())) { - underlying_type.value() = "i16"; + std::string str {"\\"}; + std::string group {""}; + + if ([_0 = '0', _1 = ctx.peek(), _2 = '9']{ return cpp2::impl::cmp_less_eq(_0,_1) && cpp2::impl::cmp_less_eq(_1,_2); }()) + { + static_cast(ctx.next());// Skip escape + group = ctx.grab_number(); + + if (cpp2::impl::cmp_greater_eq(group.size(),cpp2::impl::as_())) + { + // Octal syntax (\000) not a group ref matcher. + auto number {0}; + if (!(string_util::string_to_int(group, number, 8))) {return ctx.error("Could not convert octal to int."); } + + char number_as_char {cpp2::unsafe_narrow(cpp2::move(number))}; + + auto token {CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, number_as_char, ctx.get_modifiers().has(expression_flags::case_insensitive))}; + (*cpp2::impl::assert_not_null(token)).set_string("\\" + cpp2::to_string(string_util::int_to_string<8>(cpp2::impl::as_(cpp2::move(number_as_char)))) + ""); + + return token; } - else {if (cpp2::impl::cmp_greater_eq(min_value,std::numeric_limits::min()) && cpp2::impl::cmp_less_eq(max_value,std::numeric_limits::max())) { - underlying_type.value() = "i32"; + + str += group; + // Regular group ref + } + else {if ('g' == ctx.peek()) + { + static_cast(ctx.next());// Skip escape + if (!(ctx.next())) {return ctx.error("Group escape without a following char."); }// Skip g + + str += "g"; + + if (ctx.current() == '{') { + str += "{"; + if (!((ctx.next() && ctx.grab_until('}', cpp2::impl::out(&group))))) {return ctx.error("No ending bracket."); } + + str += group + "}"; } - else {if (cpp2::impl::cmp_greater_eq(cpp2::move(min_value),std::numeric_limits::min()) && cpp2::impl::cmp_less_eq(cpp2::move(max_value),std::numeric_limits::max())) { - underlying_type.value() = "i64"; + else { + group = ctx.grab_number(); + str += group; } + } + else {if ('k' == ctx.peek()) + { + static_cast(ctx.next());// Skip escape + if (!(ctx.next())) {return ctx.error("Group escape without a following char."); }// Skip k + + str += "k"; + + auto term_char {'\0'}; + if (ctx.current() == '{') {term_char = '}'; } + else {if (ctx.current() == '<') {term_char = '>'; } + else {if (ctx.current() == '\'') {term_char = '\''; } else { - CPP2_UFCS(error)(t, - "values are outside the range representable by the " - "largest supported underlying signed type (i64)" - ); - }}}} + return ctx.error("Group escape has wrong operator."); + }}} + + str += ctx.current(); + + if (!((ctx.next() && ctx.grab_until(term_char, cpp2::impl::out(&group))))) {return ctx.error("No ending bracket."); } + + str += group + cpp2::move(term_char); } - else { - auto umax {cpp2::move(max_value) * cpp2::impl::as_()}; - if (cpp2::impl::cmp_less_eq(umax,std::numeric_limits::max())) { - underlying_type.value() = "u8"; - } - else {if (cpp2::impl::cmp_less_eq(umax,std::numeric_limits::max())) { - underlying_type.value() = "u16"; + else + { + // No group ref matcher + return nullptr; + }}} + + // Parse the group + group = string_util::trim_copy(group); + int group_id {0}; + if (string_util::string_to_int(group, group_id)) + { + if (cpp2::impl::cmp_less(group_id,0)) { + group_id = ctx.get_cur_group() + group_id; + + if (cpp2::impl::cmp_less(group_id,1)) {// Negative and zero are no valid groups. + return ctx.error("Relative group reference does not reference a valid group. (Would be " + cpp2::to_string(group_id) + ".)"); + } } - else {if (cpp2::impl::cmp_less_eq(cpp2::move(umax),std::numeric_limits::max())) { - underlying_type.value() = "u32"; + + if (cpp2::impl::cmp_greater_eq(group_id,ctx.get_cur_group())) { + return ctx.error("Group reference is used before the group is declared."); } - else { - underlying_type.value() = "u64"; - }}} } + else + { + // Named group + group_id = ctx.get_named_group(group); + if (-1 == group_id) {return ctx.error("Group names does not exist. (Name is: " + cpp2::to_string(cpp2::move(group)) + ")"); } + } + + return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, cpp2::move(group_id), ctx.get_modifiers().has(expression_flags::case_insensitive), cpp2::move(str)); } -#line 1126 "reflect.h2" - // 2. Replace: Erase the contents and replace with modified contents - // - // Note that most values and functions are declared as '==' compile-time values, i.e. Cpp1 'constexpr' +#line 2901 "reflect.h2" + auto group_ref_token::generate_code(generation_context& ctx) const -> void{ + ctx.add_check("group_ref_token_matcher(" + cpp2::to_string(ctx.match_parameters()) + ")"); + } - CPP2_UFCS(remove_marked_members)(t); + group_ref_token::~group_ref_token() noexcept{} - // Generate the 'none' value if appropriate, and use that or - // else the first enumerator as the default-constructed value - auto default_value {CPP2_ASSERT_IN_BOUNDS_LITERAL(enumerators, 0).name}; - if (bitwise) { - default_value = "none"; - value_member_info e {"none", "", "0"}; - CPP2_UFCS(push_back)(enumerators, cpp2::move(e)); - } +#line 2924 "reflect.h2" + [[nodiscard]] auto group_token::parse_lookahead(parse_context& ctx, cpp2::impl::in syntax, cpp2::impl::in positive) -> token_ptr + { + static_cast(ctx.next());// Skip last token defining the syntax - // Generate all the private implementation - CPP2_UFCS(add_member)(t, " _value : " + cpp2::to_string(underlying_type.value()) + ";"); - CPP2_UFCS(add_member)(t, " private operator= : (implicit out this, _val: i64) == " - "_value = cpp2::unsafe_narrow<" + cpp2::to_string(underlying_type.value()) + ">(_val);"); + auto r {CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, positive)}; - // Generate the bitwise operations - if (bitwise) { - CPP2_UFCS(add_member)(t, " operator|=: ( inout this, that ) == _value |= that._value;"); - CPP2_UFCS(add_member)(t, " operator&=: ( inout this, that ) == _value &= that._value;"); - CPP2_UFCS(add_member)(t, " operator^=: ( inout this, that ) == _value ^= that._value;"); - CPP2_UFCS(add_member)(t, " operator| : ( this, that ) -> " + cpp2::to_string(CPP2_UFCS(name)(t)) + " == _value | that._value;"); - CPP2_UFCS(add_member)(t, " operator& : ( this, that ) -> " + cpp2::to_string(CPP2_UFCS(name)(t)) + " == _value & that._value;"); - CPP2_UFCS(add_member)(t, " operator^ : ( this, that ) -> " + cpp2::to_string(CPP2_UFCS(name)(t)) + " == _value ^ that._value;"); - CPP2_UFCS(add_member)(t, " has : ( this, that ) -> bool == _value & that._value;"); - CPP2_UFCS(add_member)(t, " set : ( inout this, that ) == _value |= that._value;"); - CPP2_UFCS(add_member)(t, " clear : ( inout this, that ) == _value &= that._value~;"); - } + auto old_state {ctx.start_group()}; + if (!(ctx.parse_until(')'))) {return ctx.error("Lookahead without a closing bracket."); } + (*cpp2::impl::assert_not_null(r)).inner = ctx.end_group(cpp2::move(old_state)); + (*cpp2::impl::assert_not_null(r)).set_string("(" + cpp2::to_string(syntax) + cpp2::to_string((*cpp2::impl::assert_not_null((*cpp2::impl::assert_not_null(r)).inner)).to_string()) + ")"); - // Add the enumerators - for ( auto const& e : enumerators ) { - CPP2_UFCS(add_member)(t, " " + cpp2::to_string(e.name) + " : " + cpp2::to_string(CPP2_UFCS(name)(t)) + " == " + cpp2::to_string(e.value) + ";"); + return r; } - // Generate the common functions - CPP2_UFCS(add_member)(t, " get_raw_value : (this) -> " + cpp2::to_string(cpp2::move(underlying_type.value())) + " == _value;"); - CPP2_UFCS(add_member)(t, " operator= : (out this) == { _value = " + cpp2::to_string(default_value) + "._value; }"); - CPP2_UFCS(add_member)(t, " operator= : (out this, that) == { }"); - CPP2_UFCS(add_member)(t, " operator<=> : (this, that) -> std::strong_ordering;"); -{ -std::string to_string_impl{" to_string_impl: (this, prefix: std::string_view"}; +#line 2938 "reflect.h2" + [[nodiscard]] auto group_token::parse(parse_context& ctx) -> token_ptr + { + if (ctx.current() != '(') {return nullptr; } - // Provide 'to_string' and 'to_code' functions to print enumerator - // name(s) as human-readable strings or as code expressions + auto has_id {!(ctx.get_modifiers().has(expression_flags::no_group_captures))}; + auto has_pattern {true}; + std::string group_name {""}; + auto group_name_brackets {true}; + std::string modifiers {""}; + auto modifiers_change_to {ctx.get_modifiers()}; -#line 1173 "reflect.h2" - { - if (bitwise) { - to_string_impl += ", separator: std::string_view ) -> std::string = { \n" - " ret : std::string = \"(\";\n" - " sep : std::string = ();\n" - " if this == none { return \"(none)\"; }\n"; - } - else { - to_string_impl += ") -> std::string = { \n"; - } + // Skip the '(' + if (!(ctx.next())) {return ctx.error("Group without closing bracket."); } - to_string_impl += " pref := cpp2::to_string(prefix);\n"; + if (ctx.current() == '?') + { + // Special group + if (!(ctx.next_no_skip())) {return ctx.error("Missing character after group opening."); } - for ( - auto const& e : enumerators ) { - if (e.name != "_") {// ignore unnamed values - if (bitwise) { - if (e.name != "none") { - to_string_impl += " if (this & " + cpp2::to_string(e.name) + ") == " + cpp2::to_string(e.name) + " { " - "ret += sep + pref + \"" + cpp2::to_string(e.name) + "\"; sep = separator; " - "}\n"; - } + if (ctx.current() == '<' || ctx.current() == '\'') + { + // Named group + auto end_char {ctx.current()}; + if (end_char == '<') { + end_char = '>'; + }else { + group_name_brackets = false; + } + has_id = true; // Force id for named groups. + if (!(ctx.next())) { return ctx. error("Missing ending bracket for named group."); }/* skip '<' */ + if (!(ctx.grab_until(cpp2::move(end_char), cpp2::impl::out(&group_name)))) {return ctx.error("Missing ending bracket for named group."); } + if (!(ctx.next())) {return ctx.error("Group without closing bracket."); } + } + else {if (ctx.current() == '#') + { + // Comment + std::string comment_str {""}; + static_cast(ctx.next());// Skip # + if (!(ctx.grab_until(")", cpp2::impl::out(&comment_str)))) {return ctx.error("Group without closing bracket."); } + // Do not add comment. Has problems with ranges. + + // Pop token and add a list. This fixes comments between a token and a range + if (ctx.has_token()) { + token_vec list {}; + list.push_back(ctx.pop_token()); + list.push_back(CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "(?#" + cpp2::to_string(cpp2::move(comment_str)) + ")")); + + return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, cpp2::move(list)); } else { - to_string_impl += " if this == " + cpp2::to_string(e.name) + " { return pref + \"" + cpp2::to_string(e.name) + "\"; }\n"; + return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "(?#" + cpp2::to_string(cpp2::move(comment_str)) + ")"); } } - } + else {if (ctx.current() == '|') + { + // Branch reset group - if (bitwise) { - to_string_impl += " return ret+\")\";\n}\n"; - } - else { - to_string_impl += " return \"invalid " + cpp2::to_string(CPP2_UFCS(name)(t)) + " value\";\n}\n"; + if (!(ctx.next())) { return ctx. error("Missing ending bracket for named group."); }/* skip '|' */ + + auto old_parser_state {ctx.start_group()}; + auto old_branch_state {ctx.branch_reset_new_state()}; + if (!(ctx.parse_until(')'))) {return nullptr; } + ctx.branch_reset_restore_state(cpp2::move(old_branch_state)); + auto inner_ {ctx.end_group(cpp2::move(old_parser_state))}; + + token_vec list {CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "(?|"), cpp2::move(inner_), CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, ")")}; + return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, cpp2::move(list)); + } + else {if (ctx.current() == '=' || ctx.current() == '!') + { + return parse_lookahead(ctx, "?" + cpp2::to_string(ctx.current()) + "", ctx.current() == '='); + } + else + { + // Simple modifier + has_id = false; + if (!(ctx.grab_until_one_of("):", cpp2::impl::out(&modifiers)))) {return ctx.error("Missing ending bracket for group."); } + if (!(ctx.parser_group_modifiers(modifiers, modifiers_change_to))) { + return nullptr; + } + + if (')' == ctx.current()) { + has_pattern = false; + } + else { + if (!(ctx.next())) { return ctx. error("Missing ending bracket for group."); }/* skip ':' */ + } + }}}} } + else {if (ctx.current() == '*') + { + // Named pattern + static_cast(ctx.next());// Skip *. + std::string name {""}; + if (!(ctx.grab_until(':', cpp2::impl::out(&name)))) {return ctx.error("Missing colon for named pattern."); } - CPP2_UFCS(add_member)(t, cpp2::move(to_string_impl)); - } -} + if (name == "pla" || name == "positive_lookahead") { + return parse_lookahead(ctx, "*" + cpp2::to_string(cpp2::move(name)) + ":", true); + } + else {if (name == "nla" || name == "negative_lookahead") { + return parse_lookahead(ctx, "*" + cpp2::to_string(cpp2::move(name)) + ":", false); + } + else { + return ctx.error("Unknown named group pattern: '" + cpp2::to_string(cpp2::move(name)) + "'"); + }} + }} -#line 1212 "reflect.h2" - if (bitwise) { - CPP2_UFCS(add_member)(t, " to_string: (this) -> std::string = to_string_impl( \"\", \", \" );"); - CPP2_UFCS(add_member)(t, " to_code : (this) -> std::string = to_string_impl( \"" + cpp2::to_string(CPP2_UFCS(name)(t)) + "::\", \" | \" );"); - } - else { - CPP2_UFCS(add_member)(t, " to_string: (this) -> std::string = to_string_impl( \"\" );"); - CPP2_UFCS(add_member)(t, " to_code : (this) -> std::string = to_string_impl( \"" + cpp2::to_string(CPP2_UFCS(name)(t)) + "::\" );"); - } -{ -std::string from_string{" from_string: (s: std::string_view) -> " + cpp2::to_string(CPP2_UFCS(name)(t)) + " = { \n"}; + if (cpp2::move(has_pattern)) + { + // Regular group - // Provide a 'from_string' function to parse strings into enumerators + auto r {CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared)}; + if (has_id) { + (*cpp2::impl::assert_not_null(r)).number = ctx.next_group(); -#line 1223 "reflect.h2" - { - std::string_view prefix {""}; - std::string_view combine_op {"return"}; + if (0 != group_name.size()) { + ctx.set_named_group(group_name, (*cpp2::impl::assert_not_null(r)).number); + } + } - // For flags, accept a list that we break apart and then |= together - if (bitwise) + auto old_state {ctx.start_group()}; + ctx.set_modifiers(cpp2::move(modifiers_change_to)); + if (!(ctx.parse_until(')'))) {return nullptr; } + (*cpp2::impl::assert_not_null(r)).inner = ctx.end_group(cpp2::move(old_state)); + (*cpp2::impl::assert_not_null(r)).set_string(gen_string(cpp2::move(group_name), cpp2::move(group_name_brackets), !(cpp2::move(has_id)), cpp2::move(modifiers), (*cpp2::impl::assert_not_null(r)).inner)); + + return r; + } + else { - prefix = "flag_"; - combine_op = "ret |="; + // Only a modifier + ctx.set_modifiers(cpp2::move(modifiers_change_to)); - from_string += " ret := none;\n" - " outer: do {\n" - " for cpp2::string_util::split_string_list(s) do (x) {\n"; + return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "(?" + cpp2::to_string(cpp2::move(modifiers)) + ")"); } - // Otherwise, accept just a single string - else { - from_string += " x := s;\n"; + } + +#line 3075 "reflect.h2" + [[nodiscard]] auto group_token::gen_string(cpp2::impl::in name, cpp2::impl::in name_brackets, cpp2::impl::in has_modifier, cpp2::impl::in modifiers, cpp2::impl::in inner_) -> std::string + { + std::string start {"("}; + if (0 != name.size()) { + if (name_brackets) { + start += "?<" + cpp2::to_string(name.data()) + ">"; + } + else { + start += "?'" + cpp2::to_string(name.data()) + "'"; + } + } + else {if (has_modifier) { + start += "?" + modifiers + ":"; + }} + + return cpp2::move(start) + (*cpp2::impl::assert_not_null(inner_)).to_string() + ")"; + } + +#line 3093 "reflect.h2" + auto group_token::generate_code(generation_context& ctx) const -> void + { + if (-1 != number) { + ctx.add("ctx..set_group_start(" + cpp2::to_string(number) + ", r.pos);"); } -{ -std::string_view else_{""}; -#line 1243 "reflect.h2" - for ( - auto const& e : cpp2::move(enumerators) ) { - from_string += " " + cpp2::to_string(else_) + "if \"" + cpp2::to_string(e.name) + "\" == x { " + cpp2::to_string(combine_op) + " " + cpp2::to_string(e.name) + "; }\n"; - else_ = "else "; + (*cpp2::impl::assert_not_null(inner)).generate_code(ctx); + if (-1 != number) { + ctx.add("ctx..set_group_end(" + cpp2::to_string(number) + ", r.pos);"); + auto tmp_name {ctx.gen_temp()}; + ctx.add("" + cpp2::to_string(tmp_name) + "_func := :() = {"); + ctx.add(" if !r&$*.matched {"); + ctx.add(" ctx&$*..set_group_invalid(" + cpp2::to_string(number) + ");"); + ctx.add(" }"); + ctx.add("};"); + ctx.add("" + cpp2::to_string(tmp_name) + " := cpp2::regex::make_on_return(" + cpp2::to_string(tmp_name) + "_func);"); + ctx.add("_ = " + cpp2::to_string(cpp2::move(tmp_name)) + ";");// Logic is done in the destructor. Same behavior as for guard objects. } -} + } -#line 1249 "reflect.h2" - if (bitwise) { - from_string += " else { break outer; }\n" - " }\n" - " return ret;\n" - " } while false;\n"; +#line 3113 "reflect.h2" + auto group_token::add_groups(std::set& groups) const -> void + { + (*cpp2::impl::assert_not_null(inner)).add_groups(groups); + if (-1 != number) { + static_cast(groups.insert(number)); } + } - from_string += " cpp2::type_safety.report_violation( (\"can't convert string '\" + cpp2::to_string(s) + \"' to " + cpp2::to_string(cpp2::move(prefix)) + "enum of type " + cpp2::to_string(CPP2_UFCS(name)(t)) + "\").c_str() );\n" - " return " + cpp2::to_string(cpp2::move(default_value)) + ";\n" - " }\n\n"; + group_token::~group_token() noexcept{} - CPP2_UFCS(add_member)(t, cpp2::move(from_string)); +#line 3125 "reflect.h2" +[[nodiscard]] auto hexadecimal_token_parse(parse_context& ctx) -> token_ptr +{ + if (!((ctx.current() == '\\' && ctx.peek() == 'x'))) {return nullptr; } + + static_cast(ctx.next());// Skip escape. + + if (!(ctx.next())) {return ctx.error("x escape without number."); } + + auto has_brackets {false}; + std::string number_str {""}; + if ('{' == ctx.current()) { + // Bracketed + has_brackets = true; + static_cast(ctx.next());// Skip '{' + if (!(ctx.grab_until('}', cpp2::impl::out(&number_str)))) {return ctx.error("No ending bracket for \\x"); } + } + else { + // Grab two chars + if (!(ctx.grab_n(2, cpp2::impl::out(&number_str)))) {return ctx.error("Missing hexadecimal digits after \\x."); } } -} -#line 1263 "reflect.h2" - CPP2_UFCS(add_member)(t, " from_code: (s: std::string_view) -> " + cpp2::to_string(CPP2_UFCS(name)(t)) + " = { str: std::string = s; return from_string( cpp2::string_util::replace_all(str, \"" + cpp2::to_string(CPP2_UFCS(name)(t)) + "::\", \"\" ) ); }"); + auto number {0}; + if (!(string_util::string_to_int(cpp2::move(number_str), number, 16))) {return ctx.error("Could not convert hexadecimal to int."); } + + // TODO: Change for unicode. + char number_as_char {cpp2::unsafe_narrow(cpp2::move(number))}; + + std::string syntax {string_util::int_to_string<16>(cpp2::impl::as_(number_as_char))}; + if (cpp2::move(has_brackets)) { + syntax = "{" + cpp2::to_string(syntax) + "}"; + } + syntax = "\\x" + cpp2::to_string(syntax) + ""; + + auto r {CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, cpp2::move(number_as_char), ctx.get_modifiers().has(expression_flags::case_insensitive))}; + (*cpp2::impl::assert_not_null(r)).set_string(cpp2::move(syntax)); + return r; } -#line 1276 "reflect.h2" -auto cpp2_enum(meta::type_declaration& t) -> void +#line 3166 "reflect.h2" +[[nodiscard]] auto line_end_token_parse(parse_context& ctx) -> token_ptr { - // Let basic_enum do its thing, with an incrementing value generator - CPP2_UFCS(basic_enum)(t, - [](std::string& value, cpp2::impl::in specified_value) mutable -> void{ - if (!(CPP2_UFCS(empty)(specified_value))) { - value = specified_value; - }else { - auto v {std::strtoll(&CPP2_ASSERT_IN_BOUNDS_LITERAL(value, 0), nullptr, 10)}; - value = cpp2::impl::as_((cpp2::move(v) + 1)); - } - }, - false // disable bitwise operations - ); + if (ctx.current() == '$' || (ctx.current() == '\\' && ctx.peek() == '$')) { + if ((ctx.current() == '\\')) {static_cast(ctx.next()); }// Skip escape + return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "$", "line_end_token_matcher"); + } + else {if (ctx.current() == '\\' && (ctx.peek() == 'z' || ctx.peek() == 'Z')) { + static_cast(ctx.next());// Skip escape + + auto negate {ctx.current() == 'Z'}; + return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "\\" + cpp2::to_string(ctx.current()) + "", "line_end_token_matcher"); + } + else { + return nullptr; + }} } -#line 1303 "reflect.h2" -auto flag_enum(meta::type_declaration& t) -> void +#line 3186 "reflect.h2" +[[nodiscard]] auto line_start_token_parse(parse_context& ctx) -> token_ptr { - // Let basic_enum do its thing, with a power-of-two value generator - CPP2_UFCS(basic_enum)(t, - [](std::string& value, cpp2::impl::in specified_value) mutable -> void{ - if (!(CPP2_UFCS(empty)(specified_value))) { - value = specified_value; - }else { - auto v {std::strtoll(&CPP2_ASSERT_IN_BOUNDS_LITERAL(value, 0), nullptr, 10)}; - if (cpp2::impl::cmp_less(v,1)) { - value = "1"; - } - else { - value = cpp2::impl::as_((cpp2::move(v) * 2)); - } - } - }, - true // enable bitwise operations - ); + if (ctx.current() != '^' && !((ctx.current() == '\\' && ctx.peek() == 'A'))) {return nullptr; } + + if (ctx.current() == '\\') { + static_cast(ctx.next()); + return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "\\A", "line_start_token_matcher"); + } + else { + return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "^", "line_start_token_matcher"); + } } -#line 1349 "reflect.h2" -auto cpp2_union(meta::type_declaration& t) -> void +#line 3211 "reflect.h2" + lookahead_token::lookahead_token(cpp2::impl::in positive_) + : regex_token{ "" } + , positive{ positive_ }{ + +#line 3213 "reflect.h2" + } + +#line 3215 "reflect.h2" + auto lookahead_token::generate_code(generation_context& ctx) const -> void{ + auto inner_name {ctx.generate_func(inner)}; + + ctx.add_check("lookahead_token_matcher(" + cpp2::to_string(ctx.match_parameters()) + ", " + cpp2::to_string(cpp2::move(inner_name)) + ")"); + } + +#line 3221 "reflect.h2" + auto lookahead_token::add_groups(std::set& groups) const -> void{ + (*cpp2::impl::assert_not_null(inner)).add_groups(groups); + } + + lookahead_token::~lookahead_token() noexcept{} + +#line 3229 "reflect.h2" +[[nodiscard]] auto named_class_token_parse(parse_context& ctx) -> token_ptr { - std::vector alternatives {}; + if (ctx.current() != '\\') {return nullptr; } + + auto name {""}; + auto c_next {ctx.peek()}; + + if ( 'd' == c_next) { name = "named_class_digits"; } + else {if ('D' == c_next) {name = "named_class_not_digits"; } + else {if ('h' == c_next) {name = "named_class_hor_space"; } + else {if ('H' == c_next) {name = "named_class_not_hor_space"; } + else {if ('N' == c_next) {name = "named_class_no_new_line"; } + else {if ('s' == c_next) {name = "named_class_space"; } + else {if ('S' == c_next) {name = "named_class_not_space"; } + else {if ('v' == c_next) {name = "named_class_ver_space"; } + else {if ('V' == c_next) {name = "named_class_not_ver_space"; } + else {if ('w' == c_next) {name = "named_class_word"; } + else {if ('W' == cpp2::move(c_next)) {name = "named_class_not_word"; } + else { return nullptr; }}}}}}}}}}} + + static_cast(ctx.next());// Skip escape + + return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "\\" + cpp2::to_string(ctx.current()) + "", "" + cpp2::to_string(cpp2::move(name)) + "::match"); +} + +#line 3257 "reflect.h2" +[[nodiscard]] auto octal_token_parse(parse_context& ctx) -> token_ptr { -auto value{0}; + if (!((ctx.current() == '\\' && ctx.peek() == 'o'))) {return nullptr; } - // 1. Gather: All the user-written members, and find/compute the max size + static_cast(ctx.next());// Skip escape. -#line 1356 "reflect.h2" - for ( + if (!(ctx.next())) { return ctx. error("o escape without number."); } + if (ctx.current() != '{') {return ctx.error("Missing opening bracket for \\o."); } - auto const& m : CPP2_UFCS(get_members)(t) ) { do - if ( CPP2_UFCS(is_member_object)(m)) + std::string number_str {""}; + static_cast(ctx.next());// Skip '{' + if (!(ctx.grab_until('}', cpp2::impl::out(&number_str)))) {return ctx.error("No ending bracket for \\o"); } + + auto number {0}; + if (!(string_util::string_to_int(cpp2::move(number_str), number, 8))) {return ctx.error("Could not convert octal to int."); } + + // TODO: Change for unicode. + char number_as_char {cpp2::unsafe_narrow(cpp2::move(number))}; + + std::string syntax {"\\o{" + cpp2::to_string(string_util::int_to_string<8>(cpp2::impl::as_(number_as_char))) + "}"}; + auto r {CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, cpp2::move(number_as_char), ctx.get_modifiers().has(expression_flags::case_insensitive))}; + (*cpp2::impl::assert_not_null(r)).set_string(cpp2::move(syntax)); + return r; +} + +#line 3294 "reflect.h2" + range_token::range_token() + : regex_token{ "" }{} + +#line 3296 "reflect.h2" + [[nodiscard]] auto range_token::parse(parse_context& ctx) -> token_ptr { - CPP2_UFCS(require)(m, CPP2_UFCS(is_public)(m) || CPP2_UFCS(is_default_access)(m), - "a union alternative cannot be protected or private" - ); + auto r {CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared)}; + if (ctx.current() == '{') + { + if (!(ctx.has_token())) {return ctx.error("'{' without previous element."); } - CPP2_UFCS(require)(m, !(CPP2_UFCS(starts_with)(CPP2_UFCS(name)(m), "is_")) - && !(CPP2_UFCS(starts_with)(CPP2_UFCS(name)(m), "set_")), - "a union alternative's name cannot start with 'is_' or 'set_' - that could cause " - "user confusion with the 'is_alternative' and 'set_alternative' generated functions" - ); + std::string inner {""}; + if (!(ctx.grab_until('}', cpp2::impl::out(&inner)))) {return ctx.error("Missing closing bracket '}'."); } - auto mo {CPP2_UFCS(as_object)(m)}; - CPP2_UFCS(require)(mo, CPP2_UFCS(empty)(CPP2_UFCS(initializer)(mo)), - "a union alternative cannot have an initializer" - ); + inner = string_util::trim_copy(inner.substr(1)); // Remove '{' and white spaces. + if (inner.empty()) {return ctx.error("Empty range specifier. Either '{n}', '{n,}', '{,m}' '{n,m}'"); } - // Adding local variable 'e' to work around a Clang warning - value_member_info e {cpp2::impl::as_(CPP2_UFCS(name)(mo)), CPP2_UFCS(type)(mo), cpp2::impl::as_(value)}; - CPP2_UFCS(push_back)(alternatives, cpp2::move(e)); + // Non-greedy or possessive + (*cpp2::impl::assert_not_null(r)).parse_modifier(ctx); - CPP2_UFCS(mark_for_removal_from_enclosing_type)(mo); - static_cast(cpp2::move(mo)); - } while (false); ++value; } -} + // Get range arguments + std::string min_count_str {"-1"}; + std::string max_count_str {"-1"}; -#line 1384 "reflect.h2" - std::string discriminator_type {}; - if (cpp2::impl::cmp_less(CPP2_UFCS(ssize)(alternatives),std::numeric_limits::max())) { - discriminator_type = "i8"; - } - else {if (cpp2::impl::cmp_less(CPP2_UFCS(ssize)(alternatives),std::numeric_limits::max())) { - discriminator_type = "i16"; - } - else {if (cpp2::impl::cmp_less(CPP2_UFCS(ssize)(alternatives),std::numeric_limits::max())) { - discriminator_type = "i32"; - } - else { - discriminator_type = "i64"; - }}} + size_t sep {inner.find(",")}; + if (sep == std::string::npos) + { + min_count_str = inner; + max_count_str = inner; + if (!(string_util::string_to_int(cpp2::move(inner), (*cpp2::impl::assert_not_null(r)).min_count))) {return ctx.error("Could not convert range to number."); } + (*cpp2::impl::assert_not_null(r)).max_count = (*cpp2::impl::assert_not_null(r)).min_count; + } + else + { + std::string inner_first {string_util::trim_copy(inner.substr(0, sep))}; + std::string inner_last {string_util::trim_copy(cpp2::move(inner).substr(cpp2::move(sep) + 1))}; -#line 1399 "reflect.h2" - // 2. Replace: Erase the contents and replace with modified contents + if ((inner_first.empty() && inner_last.empty())) { + return ctx.error("Empty range specifier. Either '{n}', '{n,}', '{,m}' '{n,m}'"); + } - CPP2_UFCS(remove_marked_members)(t); -{ -std::string storage{" _storage: cpp2::aligned_storage void + { + if (ctx.peek() == '?') { + kind = range_flags::not_greedy; + static_cast(ctx.next()); } - - storage += " )> = ();\n"; - CPP2_UFCS(add_member)(t, cpp2::move(storage)); + else {if (ctx.peek() == '+') { + kind = range_flags::possessive; + static_cast(ctx.next()); + }} } -} - // Provide discriminator -#line 1423 "reflect.h2" - CPP2_UFCS(add_member)(t, " _discriminator: " + cpp2::to_string(cpp2::move(discriminator_type)) + " = -1;\n"); +#line 3381 "reflect.h2" + [[nodiscard]] auto range_token::gen_mod_string() const& -> std::string + { + if (kind == range_flags::not_greedy) { + return "?"; + } + else {if (kind == range_flags::possessive) { + return "+"; + } + else { + return ""; + }} + } - // Add the alternatives: is_alternative, get_alternative, and set_alternative - for ( - auto const& a : alternatives ) +#line 3394 "reflect.h2" + [[nodiscard]] auto range_token::gen_range_string() const& -> std::string { - CPP2_UFCS(add_member)(t, " is_" + cpp2::to_string(a.name) + ": (this) -> bool = _discriminator == " + cpp2::to_string(a.value) + ";\n"); + std::string r {""}; + if (min_count == max_count) { + r += "{" + cpp2::to_string(min_count) + "}"; + } + else {if (min_count == -1) { + r += "{," + cpp2::to_string(max_count) + "}"; + } + else {if (max_count == -1) { + r += "{" + cpp2::to_string(min_count) + ",}"; + } + else { + r += "{" + cpp2::to_string(min_count) + "," + cpp2::to_string(max_count) + "}"; + }}} - CPP2_UFCS(add_member)(t, " " + cpp2::to_string(a.name) + ": (this) -> forward " + cpp2::to_string(a.type) + " pre(is_" + cpp2::to_string(a.name) + "()) = " - "reinterpret_cast<* const " + cpp2::to_string(a.type) + ">(_storage&)*;\n" - ); + return r; + } - CPP2_UFCS(add_member)(t, " " + cpp2::to_string(a.name) + ": (inout this) -> forward " + cpp2::to_string(a.type) + " pre(is_" + cpp2::to_string(a.name) + "()) = " - "reinterpret_cast<*" + cpp2::to_string(a.type) + ">(_storage&)*;\n" - ); +#line 3413 "reflect.h2" + auto range_token::generate_code(generation_context& ctx) const -> void + { + auto inner_name {ctx.generate_func(inner_token)}; + std::set groups {}; + (*cpp2::impl::assert_not_null(inner_token)).add_groups(groups); + auto reset_name {ctx.generate_reset(cpp2::move(groups))}; - CPP2_UFCS(add_member)(t, " set_" + cpp2::to_string(a.name) + ": (inout this, _value: " + cpp2::to_string(a.type) + ") = { " - "if !is_" + cpp2::to_string(a.name) + "() { _destroy(); std::construct_at( reinterpret_cast<*" + cpp2::to_string(a.type) + ">(_storage&), _value); } " - "else { reinterpret_cast<*" + cpp2::to_string(a.type) + ">(_storage&)* = _value; } " - "_discriminator = " + cpp2::to_string(a.value) + "; " - "}\n" - ); + auto next_name {ctx.next_func_name()}; + ctx.add_statefull(next_name, "cpp2::regex::range_token_matcher::match(" + cpp2::to_string(ctx.match_parameters()) + ", " + cpp2::to_string(cpp2::move(inner_name)) + ", " + cpp2::to_string(cpp2::move(reset_name)) + ", other, " + cpp2::to_string(next_name) + ")"); + } - CPP2_UFCS(add_member)(t, " set_" + cpp2::to_string(a.name) + ": (inout this, forward _args...: _) = { " - "if !is_" + cpp2::to_string(a.name) + "() { _destroy(); std::construct_at( reinterpret_cast<*" + cpp2::to_string(a.type) + ">(_storage&), _args...); } " - " else { reinterpret_cast<*" + cpp2::to_string(a.type) + ">(_storage&)* = :" + cpp2::to_string(a.type) + " = (_args...); } " - "_discriminator = " + cpp2::to_string(a.value) + "; " - "}\n" - ); +#line 3424 "reflect.h2" + auto range_token::add_groups(std::set& groups) const -> void{ + (*cpp2::impl::assert_not_null(inner_token)).add_groups(groups); } -{ -std::string destroy{" private _destroy: (inout this) = {\n"}; - // Add destroy + range_token::~range_token() noexcept{} -#line 1456 "reflect.h2" +#line 3437 "reflect.h2" + [[nodiscard]] auto special_range_token::parse(parse_context& ctx) -> token_ptr { - for ( - auto const& a : alternatives ) { - destroy += " if _discriminator == " + cpp2::to_string(a.value) + " { std::destroy_at( reinterpret_cast<*" + cpp2::to_string(a.type) + ">(_storage&) ); }\n"; + auto r {CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared)}; + char symbol {'\0'}; + if (ctx.current() == '*') { + (*cpp2::impl::assert_not_null(r)).min_count = 0; + (*cpp2::impl::assert_not_null(r)).max_count = -1; + symbol = '*'; } - - destroy += " _discriminator = -1;\n" - " }\n"; - CPP2_UFCS(add_member)(t, cpp2::move(destroy)); + else {if (ctx.current() == '+') { + (*cpp2::impl::assert_not_null(r)).min_count = 1; + (*cpp2::impl::assert_not_null(r)).max_count = -1; + symbol = '+'; + }else {if (ctx.current() == '?') { + (*cpp2::impl::assert_not_null(r)).min_count = 0; + (*cpp2::impl::assert_not_null(r)).max_count = 1; + symbol = '?'; + }else { + return nullptr; + }}} + + if (!(ctx.has_token())) {return ctx.error("'" + cpp2::to_string(ctx.current()) + "' without previous element."); } + +#line 3461 "reflect.h2" + (*cpp2::impl::assert_not_null(r)).parse_modifier(ctx); + + (*cpp2::impl::assert_not_null(r)).inner_token = ctx.pop_token(); + (*cpp2::impl::assert_not_null(r)).string_rep = (*cpp2::impl::assert_not_null((*cpp2::impl::assert_not_null(r)).inner_token)).to_string() + cpp2::move(symbol) + (*cpp2::impl::assert_not_null(r)).gen_mod_string(); + return r; } -} - // Add the destructor -#line 1468 "reflect.h2" - CPP2_UFCS(add_member)(t, " operator=: (move this) = { _destroy(); _ = this; }"); + special_range_token::~special_range_token() noexcept{} - // Add default constructor - CPP2_UFCS(add_member)(t, " operator=: (out this) = { }"); +#line 3474 "reflect.h2" +[[nodiscard]] auto word_boundary_token_parse(parse_context& ctx) -> token_ptr { -std::string value_set{""}; + if (ctx.current() != '\\') {return nullptr; } - // Add copy/move construction and assignment + if (ctx.peek() == 'b') { + static_cast(ctx.next()); + return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "\\b", "word_boundary_token_matcher"); + } + else {if (ctx.peek() == 'B') { + static_cast(ctx.next()); + return CPP2_UFCS_TEMPLATE(cpp2_new)(cpp2::shared, "\\B", "word_boundary_token_matcher"); + } + else { + return nullptr; + }} +} + +#line 3510 "reflect.h2" + template regex_generator::regex_generator(cpp2::impl::in r, Error_out const& e) + : regex{ r } + , error_out{ e }{ -#line 1475 "reflect.h2" +#line 3513 "reflect.h2" + } + +#line 3515 "reflect.h2" + template [[nodiscard]] auto regex_generator::parse() & -> std::string { - for ( - auto const& a : cpp2::move(alternatives) ) { - value_set += " if that.is_" + cpp2::to_string(a.name) + "() { set_" + cpp2::to_string(a.name) + "( that." + cpp2::to_string(a.name) + "() ); }\n"; + // Extract modifiers and adapt regex. + extract_modifiers(); + + parse_context parse_ctx {regex, error_out}; + if (!(parse_ctx.parse(modifier))) { + return ""; } - value_set += " }\n"; - CPP2_UFCS(add_member)(t, " operator=: (out this, that) = {\n" - " _storage = ();\n" - " _discriminator = -1;\n" - + value_set - ); - CPP2_UFCS(add_member)(t, " operator=: (inout this, that) = {\n" - " _storage = _;\n" - " _discriminator = _;\n" - + cpp2::move(value_set) - ); + source += "{\n"; + source += " wrap: type = {\n"; // TODO: Remove wrapper when template template parameters are available. + source += " context: type == cpp2::regex::match_context;"; + + generation_context gen_ctx {}; + source += gen_ctx.run(parse_ctx.get_as_token()); + source += " entry: (cur: Iter, inout ctx: context) -> cpp2::regex::match_return = {\n"; + source += " ctx..set_group_start(0, cur);\n"; + source += " r := " + cpp2::to_string(gen_ctx.get_entry_func()) + "(cur, ctx, cpp2::regex::true_end_func());\n"; + source += " if r.matched { ctx..set_group_end(0, r.pos); }\n"; + source += " return r;\n"; + source += " }\n"; + + source += cpp2::move(gen_ctx).create_named_group_lookup(parse_ctx.named_groups); + source += "}\n"; + + auto string {(*cpp2::impl::assert_not_null(parse_ctx.get_as_token())).to_string()}; + source += " to_string: () -> std::string = { return R\"(" + cpp2::to_string(modifier_escape) + cpp2::to_string(cpp2::move(string)) + cpp2::to_string(modifier_escape) + cpp2::to_string(modifier) + ")\"; }\n"; + source += "}\n"; + + static_cast(cpp2::move(parse_ctx)); + + return source; } -} -#line 1493 "reflect.h2" -} -#line 1500 "reflect.h2" -auto print(cpp2::impl::in t) -> void +#line 3550 "reflect.h2" + template auto regex_generator::extract_modifiers() & -> void + { + if (regex.find_first_of("'/") == 0) { + char mod_token {CPP2_ASSERT_IN_BOUNDS_LITERAL(regex, 0)}; + + auto end_pos {regex.rfind(mod_token)}; + if (end_pos != 0) { + // Found valid start end escape + modifier = regex.substr(end_pos + 1); + modifier_escape = cpp2::move(mod_token); + regex = regex.substr(1, cpp2::move(end_pos) - 1); + } + } + } + +#line 3566 "reflect.h2" +template [[nodiscard]] auto generate_regex(cpp2::impl::in regex, Err const& err) -> std::string { - std::cout << CPP2_UFCS(print)(t) << "\n"; + regex_generator parser {regex, err}; + auto r {parser.parse()}; + static_cast(cpp2::move(parser)); + return r; } -#line 1520 "reflect.h2" +#line 3576 "reflect.h2" auto regex_gen(meta::type_declaration& t) -> void { auto has_default {false}; @@ -2206,16 +5209,18 @@ auto regex_gen(meta::type_declaration& t) -> void CPP2_UFCS(remove_marked_members)(t); for ( auto const& expr : cpp2::move(expressions) ) { - auto regular_expression {::cpp2::regex::generate_regex(expr.second, [_0 = t](auto const& message) mutable -> void { CPP2_UFCS(error)(_0, message); })}; + auto regular_expression {generate_regex(expr.second, [_0 = t](auto const& message) mutable -> void { CPP2_UFCS(error)(_0, message); })}; if (!(regular_expression.empty())) { CPP2_UFCS(add_member)(t, "public " + cpp2::to_string(expr.first) + "_matcher: type = " + cpp2::to_string(cpp2::move(regular_expression)) + ""); CPP2_UFCS(add_member)(t, "public " + cpp2::to_string(expr.first) + ": cpp2::regex::regular_expression = ();"); } } + + CPP2_UFCS(add_runtime_support_include)(t, "cpp2regex.h"); } -#line 1577 "reflect.h2" +#line 3635 "reflect.h2" [[nodiscard]] auto apply_metafunctions( declaration_node& n, type_declaration& rtype, @@ -2330,7 +5335,7 @@ auto regex_gen(meta::type_declaration& t) -> void return true; } -#line 1692 "reflect.h2" +#line 3750 "reflect.h2" } } diff --git a/source/reflect.h2 b/source/reflect.h2 index 9989aedbe..e9830f4f8 100644 --- a/source/reflect.h2 +++ b/source/reflect.h2 @@ -17,6 +17,7 @@ #include "parse.h" #include "cpp2regex.h" +using namespace cpp2::regex; cpp2: namespace = { @@ -35,6 +36,7 @@ compiler_services: @polymorphic_base @copyable type = // Common data members // errors : *std::vector; + includes : *std::set; errors_original_size : int; generated_tokens : *stable_vector; parser : cpp2::parser; @@ -47,13 +49,15 @@ compiler_services: @polymorphic_base @copyable type = operator=: ( out this, errors_ : *std::vector, + includes_ : *std::set, generated_tokens_: *stable_vector ) = { - errors = errors_; + errors = errors_; + includes = includes_; errors_original_size = cpp2::unsafe_narrow(std::ssize(errors*)); - generated_tokens = generated_tokens_; - parser = errors*; + generated_tokens = generated_tokens_; + parser = (errors*, includes*); } // Common API @@ -134,6 +138,8 @@ compiler_services: @polymorphic_base @copyable type = } } + add_runtime_support_include: (inout this, s: std::string_view) = _=includes*.emplace( s ); + position: (virtual this) -> source_position = { @@ -1517,56 +1523,2108 @@ print: (t: meta::type_declaration) = // regex := ::cpp2::regex::regular_expression<...>; // ``` // -regex_gen: (inout t: meta::type_declaration) = +error_func: type == std::function< (x: std::string) -> void >; + +// Possible modifiers for a regular expression. +// +expression_flags: @flag_enum type = { - has_default := false; - exact_name := "regex"; - prefix := "regex_"; - expressions : std::map = (); + case_insensitive; // mod: i + multiple_lines; // mod: m + single_line; // mod: s + no_group_captures; // mod: n + perl_code_syntax; // mod: x + perl_code_syntax_in_classes; // mod: xx +} - for t.get_member_objects() do (inout m) - { - name: std::string = m.name(); - if name.starts_with(prefix) || name == exact_name - { - if !m.has_initializer() { - t.error("Regular expression must have an initializer."); - } - m.mark_for_removal_from_enclosing_type(); +// Tokens for regular expressions. +// - if name == exact_name { - if has_default { - t.error("Type can only contain one default named regular expression."); +// Basic class for a regex token. +// +regex_token: @polymorphic_base type = +{ + public string_rep: std::string; + + operator=:(out this, str: std::string) = { + string_rep = str; + } + + operator=:(out this) = { + string_rep = ""; + } + + //parse: (inout ctx: parse_context) -> token_ptr; + generate_code: (virtual this, inout _: generation_context); // Generate the matching code. + + add_groups: (virtual this, inout _: std::set) = {} // Adds all group indices to the set. + to_string: (this) -> std::string = { return string_rep; } // Create a string representation. + set_string: (inout this, s: std::string) = { string_rep = s; } // Set the string representation. +} + +token_ptr : type == std::shared_ptr; +token_vec: type == std::vector; + + +// Adds a check in code generation. +// +regex_token_check: @polymorphic_base type = +{ + this: regex_token; + + check: std::string; + + operator=:(out this, str: std::string, check_: std::string) = { + regex_token = (str); + check = check_; + } + + generate_code: (override this, inout ctx: generation_context) = { + ctx..add_check(check + "(" + ctx..match_parameters() + ")"); + } +} + + +// Adds code in code generation. +// +regex_token_code: @polymorphic_base type = +{ + this: regex_token; + + code: std::string; + + operator=:(out this, str: std::string, code_: std::string) = { + regex_token = (str); + code = code_; + } + + generate_code: (override this, inout ctx: generation_context) = { + ctx..add(code); + } +} + + +// Token that does not influence the matching. E.g. comment. +// +regex_token_empty: @polymorphic_base type = +{ + this: regex_token; + + operator=:(out this, str: std::string) = { + regex_token = (str); + } + + generate_code: (override this, inout _: generation_context) = { + // Nothing. + } +} + + +// Represents a list of regex tokens as one token. +// +regex_token_list: @polymorphic_base type = +{ + this: regex_token; + + public tokens: token_vec; + + operator=:(out this, t: token_vec) = { + regex_token = (gen_string(t)); + tokens = t; + } + + generate_code: (override this, inout ctx: generation_context) = { + for tokens do (token) { + token*..generate_code(ctx); + } + } + + add_groups: (override this, inout groups: std::set) = { + for tokens do (token) { + token*..add_groups(groups); + } + } + + gen_string: (vec: token_vec) -> std::string = { + r : std::string = ""; + for vec do (token) { + r += token*..to_string(); + } + return r; + } +} + + +// +// Parse and generation context. +// + +// State of the current capturing group. See '()' +// +parse_context_group_state: @struct type = +{ + cur_match_list: token_vec = (); // Current list of matchers. + alternate_match_lists: token_vec = (); // List of alternate matcher lists. E.g. ab|cd|xy. + modifiers : expression_flags = (); // Current modifiers for the group/regular expression. + + // Start a new alternative. + next_alternative: (inout this) = { + new_list: token_vec = (); + std::swap(new_list, cur_match_list); + post_process_list(new_list); + _ = alternate_match_lists..insert(alternate_match_lists..end(), shared.new(new_list)); + } + + // Swap this state with the other one. + swap: (inout this, inout t: parse_context_group_state) = { + std::swap(cur_match_list, t.cur_match_list); + std::swap(alternate_match_lists, t.alternate_match_lists); + std::swap(modifiers, t.modifiers); + } + + // Convert this state into a regex token. + get_as_token: (inout this) -> token_ptr = { + if alternate_match_lists..empty() { + post_process_list(cur_match_list); + return shared.new(cur_match_list); + } + else { + next_alternative(); + return shared.new(alternate_match_lists); + } + } + + // Add a token to the current matcher list. + add: (inout this, token: token_ptr) = { + cur_match_list..push_back(token); + } + + // True if current matcher list is empty. + empty: (this) -> bool = cur_match_list..empty(); + + + // Apply optimizations to the matcher list. + post_process_list: (inout list: token_vec) = { + // Merge all characters + merge_pos := list..begin(); + while merge_pos != list..end() next (merge_pos++) { + if merge_pos** is char_token { + combine_pos := merge_pos + 1; + while combine_pos != list..end() && combine_pos** is char_token { // The erase advances combine_pos + (merge_pos** as char_token)..append(combine_pos** as char_token); + combine_pos = list..erase(combine_pos); } - has_default = true; } + } + } +} - expr: std::string = m.initializer(); - if expr.starts_with("R\"(") && expr.ends_with(")\"") { - expr = expr.substr(3, expr.size() - 5); - } - else if string_util::is_escaped(expr) { - expr = expr.substr(1, expr.size() - 2); + +// State for the branch reset. Takes care of the group numbering. See '(|)'. +// +parse_context_branch_reset_state: @struct type = +{ + is_active : bool = false; // If we have a branch reset group. + cur_group : int = 1; // Next group identifier. 0 == global capture group. + max_group : int = 1; // Maximum group identifier generated. + from : int = 1; // Starting identifier on new alternative branch. + + // Next group identifier. + next: (inout this) -> int = { + g := cur_group; + cur_group += 1; + max_group = max(max_group, cur_group); + + return g; + } + + // Set next group identifier. + set_next: (inout this, g: int) = { + cur_group = g; + max_group = max(max_group, g); + } + + // Start a new alternative branch. + next_alternative: (inout this) = { + if is_active { + cur_group = from; + } + } + + // Initialize for a branch reset group. + set_active_reset: (inout this, restart: int) = { + is_active = true; + cur_group = restart; + from = restart; + max_group = restart; + } +} + + +// Context during parsing of the regular expressions. +// +// Keeps track of the distributed group identifiers, current parsed group and branch resets. +// +parse_context: type = +{ + regex: std::string_view; // Regular expression string. + pos: size_t = 0; // Current parsing position. + root: token_ptr; // Token representing the regular expression. + + cur_group_state: parse_context_group_state = (); + cur_branch_reset_state: parse_context_branch_reset_state = (); + + + public named_groups: std::map = (); + + error_out: error_func; // TODO: Declaring std::function fails for cpp2. + has_error: bool = false; + + operator=:(out this, r: std::string_view, e) = { + regex = r; + root = shared.new(""); + error_out = e; + } + + // State management functions + // + + // Returned group state needs to be stored and provided in `end_group`. + start_group: (inout this) -> parse_context_group_state = + { + old_state: parse_context_group_state = (); + old_state..swap(cur_group_state); + cur_group_state.modifiers = old_state.modifiers; + + return old_state; + } + + // `old_state` argument needs to be from start group. + end_group: (inout this, old_state: parse_context_group_state) -> token_ptr = + { + inner := cur_group_state..get_as_token(); + cur_group_state = old_state; + return inner; + } + + get_modifiers: (this) -> expression_flags = { + return cur_group_state.modifiers; + } + + set_modifiers: (inout this, mod: expression_flags) = { + cur_group_state.modifiers = mod; + } + + // Branch reset management functions + // + + branch_reset_new_state: (inout this) -> parse_context_branch_reset_state = + { + old_state: parse_context_branch_reset_state = (); + std::swap(old_state, cur_branch_reset_state); + + cur_branch_reset_state..set_active_reset(old_state.cur_group); + return old_state; + } + + branch_reset_restore_state: (inout this, old_state: parse_context_branch_reset_state) = + { + max_group := cur_branch_reset_state.max_group; + cur_branch_reset_state = old_state; + cur_branch_reset_state..set_next(max_group); + } + + next_alternative: (inout this) = + { + cur_group_state..next_alternative(); + cur_branch_reset_state..next_alternative(); + } + + // Regex token management + // + add_token: (inout this, token: token_ptr) = { + cur_group_state..add(token); + } + + has_token: (this) -> bool = { + return !cur_group_state..empty(); + } + + pop_token: (inout this) -> token_ptr = + { + r : token_ptr = nullptr; + if has_token() { + r = cur_group_state.cur_match_list..back(); + cur_group_state.cur_match_list..pop_back(); + } + + return r; + } + + get_as_token: (inout this) -> token_ptr = { + return root; + } + + // Group management + // + get_cur_group: (this) -> int = { + return cur_branch_reset_state.cur_group; + } + + next_group: (inout this) -> int = { + return cur_branch_reset_state..next(); + } + + set_named_group: (inout this, name: std::string, id: int) = + { + if !named_groups..contains(name) { // Redefinition of group name is not an error. The left most one is retained. + named_groups[name] = id; + } + } + + get_named_group: (this, name: std::string) -> int = + { + iter := named_groups..find(name); + if iter == named_groups..end() { + return -1; + } + else { + return iter*.second; + } + } + + // Position management functions + // + current: (this) -> char = { return regex[pos]; } + + // Get the next token in the regex, skipping spaces according to the parameters. See `x` and `xx` modifiers. + private get_next_position: (in this, in_class: bool, no_skip: bool) -> size_t = + { + perl_syntax := false; + if !no_skip { + if in_class { + perl_syntax = get_modifiers()..has(expression_flags::perl_code_syntax) && get_modifiers()..has(expression_flags::perl_code_syntax_in_classes); } else { - t.error("Unknown string format '(expr)$'"); + perl_syntax = get_modifiers()..has(expression_flags::perl_code_syntax); } + } + cur := pos + 1; + if perl_syntax { + while cur < regex..size() next (cur += 1) { + n: = regex[cur]; - expressions[name] = expr; + if space_class::includes(n) { + continue; + } + else if !in_class && '#' == n { + cur = regex..find("\n", cur); + if std::string::npos == cur { + // No new line, comment runs until the end of the pattern + cur = regex..size(); + } + } + else { // None space none comment char + break; + } + } + } + + // Check for end of file. + if cur > regex..size() { + cur = regex..size(); } + return cur; } - t.remove_marked_members(); + // Return true if next token is available. + private next_impl: (inout this, in_class: bool, no_skip: bool) -> bool = + { + pos = get_next_position(in_class, no_skip); + if pos != regex..size() { + return true; + } + else { + return false; + } + } - for expressions do (expr) { - regular_expression := ::cpp2::regex::generate_regex(expr.second, :(message) = t$.error(message);); + next : (inout this) next_impl(false, false); + next_in_class: (inout this) next_impl( true, false); + next_no_skip : (inout this) next_impl(false, true); - if !regular_expression..empty() { - t.add_member("public (expr.first)$_matcher: type = (regular_expression)$"); - t.add_member("public (expr.first)$: cpp2::regex::regular_expression = ();"); + next_n: (inout this, n: int) -> bool = { + r := true; + cur := 0; + while r && cur < n next (r = next()) { + cur += 1; + } + return r; + } + + has_next: (this) -> bool = { return pos < regex..size(); } + + private grab_until_impl: (inout this, in e: std::string, out r: std::string, any: bool) -> bool = + { + end:= pos; + if any { + end = regex..find_first_of(e, pos); + } + else { + end = regex..find(e, pos); + } + + if end != std::string_view::npos { + r = regex..substr(pos, end - pos); + pos = end; + return true; + } + else { + r = ""; + return false; + } + } + + grab_until: (inout this, in e: std::string, out r: std::string) grab_until_impl(e, out r, false); + grab_until: (inout this, in e: char, out r: std::string) grab_until_impl(std::string(1, e), out r, false); + grab_until_one_of: (inout this, in e: std::string, out r: std::string) grab_until_impl(e, out r, true); + + grab_n: (inout this, in n: int, out r: std::string) -> bool = + { + if pos + n as size_t <= regex..size() { + r = regex..substr(pos, n as size_t); + pos += (n as size_t) - 1; + return true; + } + else { + r = ""; + return false; + } + } + + grab_number: (inout this) -> std::string = + { + start := pos; + start_search := pos; + if regex[start_search] == '-' { + start_search += 1; + } + end := regex..find_first_not_of("1234567890", start_search); + + r : std::string; + if end != std::string::npos { + r = regex..substr(start, end - start); + pos = end - 1; + } + else { + r = regex..substr(start); + pos = regex..size() - 1; + } + return r; + } + + private peek_impl: (in this, in_class: bool) -> char = { + next_pos := get_next_position(in_class, false); + if next_pos < regex..size() { + return regex[next_pos]; + } + else { + return '\0'; } } + + peek : (in this) peek_impl(false); + peek_in_class: (in this) peek_impl( true); + + + // Parsing functions + // + parser_group_modifiers: (inout this, change_str: std::string, inout parser_modifiers: expression_flags) -> bool = + { + is_negative := false; + is_reset := false; + + apply := :(flag: expression_flags) = { + if is_negative&$* { + parser_modifiers&$*..clear(flag); + } + else { + parser_modifiers&$*..set(flag); + } + }; + + iter := change_str..begin(); + while iter != change_str..end() next (iter++) + { + cur := iter*; + if cur == '^' { + is_reset = true; + parser_modifiers = expression_flags::none; + } + else if cur == '-' { + if is_reset { _= error("No negative modifier allowed."); return false; } + is_negative = true; + } + else if cur == 'i' { apply(expression_flags::case_insensitive); } + else if cur == 'm' { apply(expression_flags::multiple_lines); } + else if cur == 's' { apply(expression_flags::single_line); } + else if cur == 'n' { apply(expression_flags::no_group_captures); } + else if cur == 'x' { + if (iter + 1) == change_str..end() || (iter + 1)* != 'x' { + // x modifier + apply(expression_flags::perl_code_syntax); + + // Just x unsets xx and remove x also removes xx + parser_modifiers..clear(expression_flags::perl_code_syntax_in_classes); + } + else { // xx modifier + // xx also sets or unsets x + apply(expression_flags::perl_code_syntax); + apply(expression_flags::perl_code_syntax_in_classes); + + iter++; // Skip the second x + } + } + else { + _= error("Unknown modifier: (cur)$"); return false; + } + } + + return true; + } + + parse_until:(inout this, term: char) -> bool = { + cur_token: token_ptr = (); + + while valid() next _ = next() + { + if term == current() { break; } + + cur_token = nullptr; + + if !cur_token && valid() { cur_token = alternative_token::parse(this); } + if !cur_token && valid() { cur_token = any_token::parse(this); } + if !cur_token && valid() { cur_token = class_token::parse(this); } + if !cur_token && valid() { cur_token = escape_token_parse(this); } + if !cur_token && valid() { cur_token = global_group_reset_token_parse(this); } + if !cur_token && valid() { cur_token = group_ref_token::parse(this); } + if !cur_token && valid() { cur_token = group_token::parse(this); } + if !cur_token && valid() { cur_token = hexadecimal_token_parse(this); } + if !cur_token && valid() { cur_token = line_end_token_parse(this); } + if !cur_token && valid() { cur_token = line_start_token_parse(this); } + if !cur_token && valid() { cur_token = named_class_token_parse(this); } + if !cur_token && valid() { cur_token = octal_token_parse(this); } + if !cur_token && valid() { cur_token = range_token::parse(this); } + if !cur_token && valid() { cur_token = special_range_token::parse(this); } + if !cur_token && valid() { cur_token = word_boundary_token_parse(this); } + + // Everything else is matched as it is. + if !cur_token && valid() { cur_token = char_token::parse(this); } + + if cur_token && valid() { + add_token(cur_token); + } else { + return false; + } + } + + return true; + } + + parse: (inout this, modifiers: std::string) -> bool = + { + + flags : expression_flags = (); + if !parser_group_modifiers(modifiers, flags) { return false; } + set_modifiers(flags); + + r := parse_until('\0'); + if r { + root = cur_group_state..get_as_token(); + } + + return r; + } + + // Misc functions + + get_pos: (this) pos; + get_range: (this, start: size_t, end: size_t) std::string(regex..substr(start, end - start + 1)); + valid: (this) -> bool = { return has_next() && !has_error; } + + error: (inout this, err: std::string) -> token_ptr = { + has_error = true; + error_out("Error during parsing of regex '(regex)$' at position '(pos)$': (err)$"); + return nullptr; + } +} + + +// Context for one function generation. Generation of functions can be interleaved, +// therefore we buffer the code for one function here. +// +generation_function_context: @struct type = { + code: std::string = ""; + tabs: std::string = ""; + + add_tabs: (inout this, c: int) = { + i: int = 0; + while i < c next i += 1 { + tabs += " "; + } + } + + remove_tabs: (inout this, c: int) = { + tabs = tabs..substr(0, (c as size_t) * 2); + } +} + + +// Context for generating the state machine. +generation_context: type = +{ + gen_stack: std::vector = (1); // Element 0 contains all the code. + + matcher_func: int = 0; + reset_func: int = 0; + temp_name: int = 0; + entry_func: std::string = ""; + + // Generation helpers + // + match_parameters: (this) -> std::string = { return "r.pos, ctx"; } + + // Code generation. + + // Add code line. + add: (inout this, s: std::string) = { + cur := get_current(); + cur*.code += "(cur*.tabs)$(s)$\n"; + } + + // Add check for token. The check needs to be a function call that returns a boolean. + add_check: (inout this, check: std::string) = { + cur := get_current(); + cur*.code += "(cur*.tabs)$if !cpp2::regex::(check)$ { r.matched = false; break; }\n"; + } + + // Add a stateful check. The check needs to return a `match_return`. + add_statefull: (inout this, next_func: std::string, check: std::string) = + { + end_func_statefull(check); + + name := next_func..substr(0, next_func..size() - 2); + start_func_named(name); + } + + protected start_func_named: (inout this, name: std::string) = + { + cur := new_context(); + + cur*.code += "(cur*.tabs)$(name)$: @struct type = {\n"; + cur*.code += "(cur*.tabs)$ operator(): (this, cur: Iter, inout ctx: context, other) -> cpp2::regex::match_return = {\n"; + cur*.code += "(cur*.tabs)$ r := ctx..pass(cur);\n"; + cur*.code += "(cur*.tabs)$ do {\n"; + cur*..add_tabs(3); + } + + protected start_func: (inout this) -> std::string = + { + name := gen_func_name(); + start_func_named(name); + return name + "()"; + } + + protected end_func_statefull: (inout this, s: std::string) = + { + cur := get_current(); + cur*..remove_tabs(3); + cur*.code += "\n"; + cur*.code += "(cur*.tabs)$ } while false;\n"; + cur*.code += "(cur*.tabs)$ if r.matched {\n"; + cur*.code += "(cur*.tabs)$ r = (s)$;\n"; + cur*.code += "(cur*.tabs)$ }\n"; + cur*.code += "(cur*.tabs)$ else {\n"; + cur*.code += "(cur*.tabs)$ r.pos = ctx.end;\n"; + cur*.code += "(cur*.tabs)$ }\n"; + cur*.code += "(cur*.tabs)$ return r;\n"; + cur*.code += "(cur*.tabs)$ }\n"; + cur*.code += "(cur*.tabs)$}\n"; + + finish_context(); + } + + // Generate the function for a token. + generate_func: (inout this, token: token_ptr) -> std::string = + { + name := start_func(); + token*..generate_code(this); + end_func_statefull("other((match_parameters())$)"); + + return name; + } + + // Generate the reset for a list of group identifiers. + generate_reset: (inout this, groups: std::set) -> std::string = + { + if groups..empty() { + return "cpp2::regex::no_reset()"; + } + + name := gen_reset_func_name(); + cur := new_context(); + + cur*.code += "(cur*.tabs)$(name)$: @struct type = {\n"; + cur*.code += "(cur*.tabs)$ operator(): (this, inout ctx) = {\n"; + for groups do (g) { + cur*.code += "(cur*.tabs)$ ctx..set_group_invalid((g)$);\n"; + } + cur*.code += "(cur*.tabs)$ }\n"; + cur*.code += "(cur*.tabs)$}\n"; + + finish_context(); + + return name + "()"; + } + + // Name generation + // + protected gen_func_name: (inout this) -> std::string = { + cur_id : = matcher_func; + matcher_func += 1; + return "func_(cur_id)$"; + } + + next_func_name: (inout this) -> std::string = { + return gen_func_name() + "()"; + } + + protected gen_reset_func_name: (inout this) -> std::string = { + cur_id : = reset_func; + reset_func += 1; + return "reset_(cur_id)$"; + } + + gen_temp: (inout this) -> std::string = { + cur_id := temp_name; + temp_name += 1; + return "tmp_(cur_id)$"; + } + + // Context management + // + new_context: (inout this) -> *generation_function_context = { + gen_stack..push_back(generation_function_context()); + cur := get_current(); + cur*.tabs = " "; + + return cur; + } + + finish_context: (inout this) = { + cur := get_current(); + base := get_base(); + base*.code += cur*.code; + + gen_stack..pop_back(); + } + + // Misc functions + // + private get_current: (inout this) -> *generation_function_context = { + return gen_stack..back()&; + } + + private get_base: (inout this) -> *generation_function_context = { + return gen_stack[0]&; + } + + get_entry_func: (this) -> std::string = { + return entry_func; + } + + create_named_group_lookup: (this, named_groups: std::map) -> std::string = + { + res: std::string = "get_named_group_index: (name) -> int = {\n"; + + // Generate if selection. + sep: std::string = ""; + for named_groups do (cur) { + res += "(sep)$if name == \"(cur.first)$\" { return (cur.second)$; }"; + sep = "else "; + } + + // Generate else branch or return if list is empty. + if named_groups..empty() { + res += " _ = name;\n"; + res += " return -1;\n"; + } + else { + res += " else { return -1; }\n"; + } + res += "}\n"; + return res; + } + + + // Run the generation for the token. + run: (inout this, token: token_ptr) -> std::string = { + entry_func = generate_func(token); + + return get_base()*.code; + } +} + +// Regex syntax: | Example: ab|ba +// +// Non greedy implementation. First alternative that matches is chosen. +// +alternative_token: @polymorphic_base type = +{ + this: regex_token_empty = (""); // No code gen here. alternative_token_gen is created in the parse_context + + operator=:(out this) = {} + + parse: (inout ctx: parse_context) -> token_ptr = { + if ctx..current() != '|' { return nullptr; } + + if !ctx..has_token() { return ctx..error("Alternative with no content."); } + ctx..next_alternative(); + return shared.new(); + } +} + +alternative_token_gen: @polymorphic_base type = +{ + this: regex_token; + + alternatives: token_vec; + + operator=: (out this, a: token_vec) = { + regex_token = gen_string(a); + alternatives = a; + } + + generate_code: (override this, inout ctx: generation_context) = + { + functions: std::string = ""; + + for alternatives do (cur) { + groups: std::set = (); + cur*..add_groups(groups); + + functions += ", " + ctx..generate_func(cur); + functions += ", " + ctx..generate_reset(groups); + } + + next_name := ctx..next_func_name(); + + ctx..add_statefull(next_name, "cpp2::regex::alternative_token_matcher::match((ctx..match_parameters())$, other, (next_name)$ (functions)$)"); + } + + add_groups: (override this, inout groups: std::set) = + { + for alternatives do (cur) { + cur*..add_groups(groups); + } + } + + gen_string: (a: token_vec) -> std::string = + { + r: std::string = ""; + sep: std::string = ""; + + for a do (cur) { + r += sep + cur*..to_string(); + sep = "|"; + } + + return r; + } +} + + +// Regex syntax: . +// +any_token: @polymorphic_base type = +{ + this: regex_token_check = ("."); + + operator=:(out this, single_line: bool) = { + regex_token_check = (".", "any_token_matcher"); + } + + parse: (inout ctx: parse_context) -> token_ptr = { + if '.' != ctx..current() { return nullptr;} + + return shared.new(ctx..get_modifiers()..has(expression_flags::single_line)); + } +} + + +// Regex syntax: a +// +char_token: @polymorphic_base type = +{ + this: regex_token; + + token : std::string; + ignore_case: bool; + + operator=: (out this, t: char, ignore_case_: bool) = { + regex_token = (std::string(1, t)); + token = t; + ignore_case = ignore_case_; + } + + parse: (inout ctx: parse_context) -> token_ptr = { + return shared.new(ctx..current(), ctx..get_modifiers()..has(expression_flags::case_insensitive)); + } + + generate_code: (override this, inout ctx: generation_context) = + { + if ignore_case { + upper: std::string = token; + lower: std::string = token; + + (copy i: size_t = 0) while i < token..size() next i += 1 { + lower[i] = string_util::safe_tolower(token[i]); + upper[i] = string_util::safe_toupper(token[i]); + } + + if upper != lower { + gen_case_insensitive(lower, upper, ctx); + } + else { + gen_case_sensitive(ctx); + } + } + else { + gen_case_sensitive(ctx); + } + } + + gen_case_insensitive: (this, lower: std::string, upper: std::string, inout ctx: generation_context) = + { + name: std::string = "str_(ctx..gen_temp())$"; + lower_name: std::string = "lower_(name)$"; + upper_name: std::string = "upper_(name)$"; + size := token..size(); + ctx..add("(lower_name)$ : std::array = \"(add_escapes(lower))$\";"); // TODO: Add constexpr when Issue https://github.com/hsutter/cppfront/issues/1104 is resolved. + ctx..add("(upper_name)$ : std::array = \"(add_escapes(upper))$\";"); // TODO: Add constexpr when Issue https://github.com/hsutter/cppfront/issues/1104 is resolved. + ctx..add("if std::distance(r.pos, ctx.end) < (size)$ {"); + ctx..add(" r.matched = false;"); + ctx..add(" break;"); + ctx..add("}"); + ctx..add(""); + ctx..add("(copy i : int = 0) while i < (size)$ next (i += 1) {"); + ctx..add(" if !((lower_name)$[i] == r.pos[i] || (upper_name)$[i] == r.pos[i]) { r.matched = false; }"); + ctx..add("}"); + ctx..add(""); + ctx..add("if r.matched { r.pos += (size)$; }"); + ctx..add("else { break; }"); + } + + gen_case_sensitive: (this, inout ctx: generation_context) = + { + name: std::string = "str_(ctx..gen_temp())$"; + size := token..size(); + ctx..add("(name)$ : std::array = \"(add_escapes(token))$\";"); // TODO: Add constexpr when Issue https://github.com/hsutter/cppfront/issues/1104 is resolved. + ctx..add("if std::distance(r.pos, ctx.end) < (size)$ {"); + ctx..add(" r.matched = false;"); + ctx..add(" break;"); + ctx..add("}"); + ctx..add(""); + ctx..add("(copy i : int = 0) while i < (size)$ next (i += 1) {"); + ctx..add(" if (name)$[i] != r.pos[i] { r.matched = false; }"); + ctx..add("}"); + ctx..add(""); + ctx..add("if r.matched { r.pos += (size)$; }"); + ctx..add("else { break; }"); + } + + add_escapes: (this, copy str: std::string) -> std::string = + { + str = string_util::replace_all(str, "\\", "\\\\"); + str = string_util::replace_all(str, "\a", "\\a"); + str = string_util::replace_all(str, "\f", "\\f"); + str = string_util::replace_all(str, "\x1b", "\" \"\\x1b\" \""); // Generate a separated string. This prevents + // situations like `\x1bblub` from generating + // wrong hex characters. + str = string_util::replace_all(str, "\n", "\\n"); + str = string_util::replace_all(str, "\r", "\\r"); + str = string_util::replace_all(str, "\t", "\\t"); + + return str; + } + + append: (inout this, that) = { + this.token += that.token; + this.string_rep += that.string_rep; + } +} + + +// Regex syntax: [] Example: [abcx-y[:digits:]] +// +class_token: @polymorphic_base type = +{ + this : regex_token = (); + + negate : bool; + case_insensitive: bool; + class_str : std::string; + + operator=: (out this, negate_: bool, case_insensitive_: bool, class_str_: std::string, str: std::string) = + { + regex_token = str; + negate = negate_; + case_insensitive = case_insensitive_; + class_str = class_str_; + } + + // TODO: Rework class generation: Generate check functions for classes. + parse: (inout ctx: parse_context) -> token_ptr = + { + if ctx..current() != '[' { return nullptr; } + + start_pos := ctx..get_pos(); + + supported_classes: std::vector = ("alnum", "alpha", "ascii", "blank", "cntrl", "digits", "graph", + "lower", "print", "punct", "space", "upper", "word", "xdigit"); + + classes: std::vector = (); + + // First step: parse until the end bracket and push single chars, ranges or groups on the class stack. + is_negate := false; + first := true; + range := false; + while ctx..next_in_class() && (ctx..current() != ']' || first) + { + if ctx..current() == '^' + { + is_negate = true; + continue; // Skip rest of the loop. Also the first update. + } + + if ctx..current() == '[' && ctx..peek_in_class() == ':' + { + // We have a character class. + _ = ctx..next_n(2); // Skip [: + + name: std::string = ""; + if !ctx..grab_until(":]", out name) { return ctx..error("Could not find end of character class."); } + if supported_classes..end() == std::find(supported_classes..begin(), supported_classes..end(), name) { + return ctx..error("Unsupported character class. Supported ones are: (string_util::join(supported_classes))$"); + } + + classes..push_back("[:(name)$:]"); + + _ = ctx..next(); // Skip ':' pointing to the ending ']'. + } + else if ctx..current() == '\\' + { + if ctx..next_no_skip() && (ctx..current() != ']') + { + if ' ' == ctx..current() + && ctx..get_modifiers()..has(expression_flags::perl_code_syntax) + && ctx..get_modifiers()..has(expression_flags::perl_code_syntax_in_classes) + { + classes..push_back(std::string(1, ctx..current())); + } + else { + name := ""; + if 'd' == ctx..current() { name = "short_digits"; } + else if 'D' == ctx..current() { name = "short_not_digits"; } + else if 'h' == ctx..current() { name = "short_hor_space"; } + else if 'H' == ctx..current() { name = "short_not_hor_space"; } + else if 's' == ctx..current() { name = "short_space"; } + else if 'S' == ctx..current() { name = "short_not_space"; } + else if 'v' == ctx..current() { name = "short_ver_space"; } + else if 'V' == ctx..current() { name = "short_not_ver_space"; } + else if 'w' == ctx..current() { name = "short_word"; } + else if 'W' == ctx..current() { name = "short_not_word"; } + else { + return ctx..error("Unknown group escape."); + } + classes..push_back("[:(name)$:]"); + } + } else { + return ctx..error("Escape without a following character."); + } + } + else if ctx..current() == '-' + { + if first { // Literal if first entry. + classes..push_back("(ctx..current())$"); + } else { + range = true; + } + } + else + { + if range { // Modify last element to be a range. + classes..back() += "-(ctx..current())$"; + range = false; + } + else { + classes..push_back("(ctx..current())$"); + } + } + + first = false; + } + + if ctx..current() != ']' { + return ctx..error("Error end of character class definition before terminating ']'."); + } + end_pos := ctx..get_pos(); + + if range { // If '-' is last entry treat it as a literal char. + classes..push_back("-"); + } + + // Second step: Wrap the item on the class stack with corresponding class implementation. + for classes do (inout cur) + { + if cur..starts_with("[:") { + name := cur..substr(2, cur..size() - 4); + cur = create_matcher("(name)$_class", ""); + } + else if 1 != cur..size() { + cur = create_matcher("range_class_entry", "'(cur[0])$', '(cur[2])$'"); + } + else { + cur = create_matcher("single_class_entry", "'(cur)$'"); + } + } + + inner := string_util::join(classes); + string_rep := ctx..get_range(start_pos, end_pos); + return shared.new( + is_negate, + ctx..get_modifiers()..has(expression_flags::case_insensitive), + inner, + string_rep + ); + } + + generate_code: (override this, inout ctx: generation_context) = + { + ctx..add_check("class_token_matcher::match((ctx..match_parameters())$)"); + } + + private create_matcher: (name: std::string, template_arguments: std::string) -> std::string = + { + sep := ", "; + if template_arguments..empty() { sep = ""; } + + return "::cpp2::regex::(name)$"; + } +} + + +// Regex syntax: \a or \n or \[ +// +escape_token_parse: (inout ctx: parse_context) -> token_ptr = +{ + if ctx..current() != '\\' { return nullptr; } + + + if std::string::npos == std::string("afenrt^.[]()*{}?+|\\")..find(ctx..peek()) { + return nullptr; + } + + _ = ctx..next(); // Skip escape + + if std::string::npos != std::string("afenrt\\")..find(ctx..current()) + { + // Escape of string special char + t : char = '\0'; + if 'a' == ctx..current() { t = '\a'; } + else if 'f' == ctx..current() { t = '\f'; } + else if 'e' == ctx..current() { t = '\x1b'; } + else if 'n' == ctx..current() { t = '\n'; } + else if 'r' == ctx..current() { t = '\r'; } + else if 't' == ctx..current() { t = '\t'; } + else if '\\' == ctx..current() { t = '\\'; } + else { return ctx..error("Internal: missing switch case for special escape."); } + + r: = shared.new(t, false); + r*..set_string("\\(ctx..current())$"); + return r; + } + else + { + // Escape of regex special char + r := shared.new(ctx..current(), false); + r*..set_string("\\(ctx..current())$"); + return r; + } + +} + + +// Regex syntax: \K Example: ab\Kcd +// +global_group_reset_token_parse: (inout ctx: parse_context) -> token_ptr = +{ + if !(ctx..current() == '\\' && ctx..peek() == 'K') { return nullptr; } + + _ = ctx..next(); // Skip escape. + return shared.new("\\K", "ctx..set_group_start(0, r.pos);"); +} + + +// Regex syntax: \ Example: \1 +// \g{name_or_number} +// \k{name_or_number} +// \k +// \k'name_or_number' +// +group_ref_token: @polymorphic_base type = +{ + this : regex_token = (); + + id : int; + case_insensitive: bool; + + operator=:(out this, id_: int, case_insensitive_: bool, str: std::string) = + { + regex_token = str; + id = id_; + case_insensitive = case_insensitive_; + } + + parse: (inout ctx: parse_context) -> token_ptr = + { + if ctx..current() != '\\' { return nullptr; } + + str : std::string = "\\"; + group : std::string = ""; + + if '0' <= ctx..peek() <= '9' + { + _ = ctx..next(); // Skip escape + group = ctx..grab_number(); + + if group..size() >= 3 as size_t + { + // Octal syntax (\000) not a group ref matcher. + number := 0; + if !string_util::string_to_int(group, number, 8) { return ctx..error("Could not convert octal to int."); } + + number_as_char : char = unsafe_narrow(number); + + token := shared.new(number_as_char, ctx..get_modifiers()..has(expression_flags::case_insensitive)); + token*..set_string("\\(string_util::int_to_string<8>(number_as_char as int))$"); + + return token; + } + + str += group; + // Regular group ref + } + else if 'g' == ctx..peek() + { + _ = ctx..next(); // Skip escape + if !ctx..next() { return ctx..error("Group escape without a following char."); } // Skip g + + str += "g"; + + if ctx..current() == '{' { + str += "{"; + if !(ctx..next() && ctx..grab_until('}', out group)) { return ctx..error("No ending bracket."); } + + str += group + "}"; + } + else { + group = ctx..grab_number(); + str += group; + } + } + else if 'k' == ctx..peek() + { + _ = ctx..next(); // Skip escape + if !ctx..next() { return ctx..error("Group escape without a following char."); } // Skip k + + str += "k"; + + term_char := '\0'; + if ctx..current() == '{' { term_char = '}'; } + else if ctx..current() == '<' { term_char = '>'; } + else if ctx..current() == '\'' { term_char = '\''; } + else { + return ctx..error("Group escape has wrong operator."); + } + + str += ctx..current(); + + if !(ctx..next() && ctx..grab_until(term_char, out group)) { return ctx..error("No ending bracket."); } + + str += group + term_char; + } + else + { + // No group ref matcher + return nullptr; + } + + // Parse the group + group = string_util::trim_copy(group); + group_id : int = 0; + if string_util::string_to_int(group, group_id) + { + if group_id < 0 { + group_id = ctx..get_cur_group() + group_id; + + if group_id < 1 { // Negative and zero are no valid groups. + return ctx..error("Relative group reference does not reference a valid group. (Would be (group_id)$.)"); + } + } + + if group_id >= ctx..get_cur_group() { + return ctx..error("Group reference is used before the group is declared."); + } + } + else + { + // Named group + group_id = ctx..get_named_group(group); + if -1 == group_id { return ctx..error("Group names does not exist. (Name is: (group)$)");} + } + + return shared.new(group_id, ctx..get_modifiers()..has(expression_flags::case_insensitive), str); + } + + generate_code: (override this, inout ctx: generation_context) = { + ctx..add_check("group_ref_token_matcher((ctx..match_parameters())$)"); + } +} + + +// Regex syntax: () Example: (abc) +// (?:) (?i:abc) +// (?<>:) (?:abc) +// (?#) (#Step 1 finished) +// (?|) (?|(abc)|(cde)) +// (?=) (?=abc) +// (?!) (?!abc) +// (*: token_ptr = + { + _ = ctx..next(); // Skip last token defining the syntax + + r := shared.new(positive); + + old_state := ctx..start_group(); + if !ctx..parse_until(')') { return ctx..error("Lookahead without a closing bracket."); } + r*.inner = ctx..end_group(old_state); + r*..set_string("((syntax)$(r*.inner*..to_string())$)"); + + return r; + } + + parse: (inout ctx: parse_context) -> token_ptr = + { + if ctx..current() != '(' { return nullptr; } + + has_id := !ctx..get_modifiers()..has(expression_flags::no_group_captures); + has_pattern := true; + group_name : std::string = ""; + group_name_brackets := true; + modifiers : std::string = ""; + modifiers_change_to : = ctx..get_modifiers(); + + // Skip the '(' + if !ctx..next() { return ctx..error("Group without closing bracket."); } + + if ctx..current() == '?' + { + // Special group + if !ctx..next_no_skip() { return ctx..error("Missing character after group opening."); } + + if ctx..current() == '<' || ctx..current() == '\'' + { + // Named group + end_char := ctx..current(); + if end_char == '<' { + end_char = '>'; + } else { + group_name_brackets = false; + } + has_id = true; // Force id for named groups. + if !ctx..next() /* skip '<' */ { return ctx..error("Missing ending bracket for named group."); } + if !ctx..grab_until(end_char, out group_name) { return ctx..error("Missing ending bracket for named group."); } + if !ctx..next() { return ctx..error("Group without closing bracket."); } + } + else if ctx..current() == '#' + { + // Comment + comment_str : std::string = ""; + _ = ctx..next(); // Skip # + if !ctx..grab_until(")", out comment_str) { return ctx..error("Group without closing bracket."); } + // Do not add comment. Has problems with ranges. + + // Pop token and add a list. This fixes comments between a token and a range + if ctx..has_token() { + list : token_vec = (); + list..push_back(ctx..pop_token()); + list..push_back(shared.new("(?#(comment_str)$)")); + + return shared.new(list); + } + else { + return shared.new("(?#(comment_str)$)"); + } + } + else if ctx..current() == '|' + { + // Branch reset group + + if !ctx..next() /* skip '|' */ { return ctx..error("Missing ending bracket for named group."); } + + old_parser_state := ctx..start_group(); + old_branch_state := ctx..branch_reset_new_state(); + if !ctx..parse_until(')') { return nullptr; } + ctx..branch_reset_restore_state(old_branch_state); + inner_ := ctx..end_group(old_parser_state); + + list: token_vec = (shared.new("(?|"), inner_, shared.new(")")); + return shared.new(list); + } + else if ctx..current() == '=' || ctx..current() == '!' + { + return parse_lookahead(ctx, "?(ctx..current())$", ctx..current() == '='); + } + else + { + // Simple modifier + has_id = false; + if !ctx..grab_until_one_of("):", out modifiers) { return ctx..error("Missing ending bracket for group."); } + if !ctx..parser_group_modifiers(modifiers, modifiers_change_to) { + return nullptr; + } + + if ')' == ctx..current() { + has_pattern = false; + } + else { + if !ctx..next() /* skip ':' */ { return ctx..error("Missing ending bracket for group."); } + } + } + } + else if ctx..current() == '*' + { + // Named pattern + _ = ctx..next(); // Skip *. + name: std::string = ""; + if !ctx..grab_until(':', out name) { return ctx..error("Missing colon for named pattern."); } + + if name == "pla" || name == "positive_lookahead" { + return parse_lookahead(ctx, "*(name)$:", true); + } + else if name == "nla" || name == "negative_lookahead" { + return parse_lookahead(ctx, "*(name)$:", false); + } + else { + return ctx..error("Unknown named group pattern: '(name)$'"); + } + } + + if has_pattern + { + // Regular group + + r := shared.new(); + if has_id { + r*.number = ctx..next_group(); + + if 0 != group_name..size() { + ctx..set_named_group(group_name, r*.number); + } + } + + old_state := ctx..start_group(); + ctx..set_modifiers(modifiers_change_to); + if !ctx..parse_until(')') { return nullptr; } + r*.inner = ctx..end_group(old_state); + r*..set_string(gen_string(group_name, group_name_brackets, !has_id, modifiers, r*.inner)); + + return r; + } + else + { + // Only a modifier + ctx..set_modifiers(modifiers_change_to); + + return shared.new("(?(modifiers)$)"); + } + } + + gen_string: (name: std::string, name_brackets: bool, has_modifier: bool, modifiers: std::string, inner_: token_ptr) -> std::string = + { + start : std::string = "("; + if 0 != name..size() { + if name_brackets { + start += "?<(name..data())$>"; + } + else { + start += "?'(name..data())$'"; + } + } + else if has_modifier { + start += "?" + modifiers + ":"; + } + + return start + inner_*..to_string() + ")"; + } + + generate_code: (override this, inout ctx: generation_context) = + { + if -1 != number { + ctx..add("ctx..set_group_start((number)$, r.pos);"); + } + + inner*..generate_code(ctx); + if -1 != number { + ctx..add("ctx..set_group_end((number)$, r.pos);"); + tmp_name := ctx..gen_temp(); + ctx..add("(tmp_name)$_func := :() = {"); + ctx..add(" if !r&$*.matched {"); + ctx..add(" ctx&$*..set_group_invalid((number)$);"); + ctx..add(" }"); + ctx..add("};"); + ctx..add("(tmp_name)$ := cpp2::regex::make_on_return((tmp_name)$_func);"); + ctx..add("_ = (tmp_name)$;"); // Logic is done in the destructor. Same behavior as for guard objects. + } + } + + add_groups: (override this, inout groups: std::set) = + { + inner*..add_groups(groups); + if -1 != number { + _ = groups..insert(number); + } + } +} + + +// Regex syntax: \x or \x{} Example: \x{62} +// +hexadecimal_token_parse: (inout ctx: parse_context) -> token_ptr = +{ + if !(ctx..current() == '\\' && ctx..peek() == 'x') { return nullptr; } + + _ = ctx..next(); // Skip escape. + + if !ctx..next() { return ctx..error("x escape without number.");} + + has_brackets := false; + number_str: std::string = ""; + if '{' == ctx..current() { + // Bracketed + has_brackets = true; + _ = ctx..next(); // Skip '{' + if !ctx..grab_until('}', out number_str) { return ctx..error("No ending bracket for \\x"); } + } + else { + // Grab two chars + if !ctx..grab_n(2, out number_str) { return ctx..error("Missing hexadecimal digits after \\x."); } + } + + number := 0; + if !string_util::string_to_int(number_str, number, 16) { return ctx..error("Could not convert hexadecimal to int."); } + + // TODO: Change for unicode. + number_as_char : char = unsafe_narrow(number); + + syntax: std::string = string_util::int_to_string<16>(number_as_char as int); + if has_brackets { + syntax = "{(syntax)$}"; + } + syntax = "\\x(syntax)$"; + + r := shared.new(number_as_char, ctx..get_modifiers()..has(expression_flags::case_insensitive)); + r*..set_string(syntax); + return r; +} + + +// Regex syntax: $ Example: aa$ +// +line_end_token_parse: (inout ctx: parse_context) -> token_ptr = +{ + if ctx..current() == '$' || (ctx..current() == '\\' && ctx..peek() == '$') { + if (ctx..current() == '\\') { _ = ctx..next(); } // Skip escape + return shared.new("$", "line_end_token_matcher"); + } + else if ctx..current() == '\\' && (ctx..peek() == 'z' || ctx..peek() == 'Z') { + _ = ctx..next(); // Skip escape + + negate := ctx..current() == 'Z'; + return shared.new("\\(ctx..current())$", "line_end_token_matcher"); + } + else { + return nullptr; + } +} + + +// Regex syntax: ^ Example: ^aa +// +line_start_token_parse: (inout ctx: parse_context) -> token_ptr = +{ + if ctx..current() != '^' && !(ctx..current() == '\\' && ctx..peek() == 'A') { return nullptr; } + + if ctx..current() == '\\' { + _ = ctx..next(); + return shared.new("\\A", "line_start_token_matcher"); + } + else { + return shared.new("^", "line_start_token_matcher"); + } +} + + +// Regex syntax: (?=) or (?!) or (*pla), etc. Example: (?=AA) +// +// Parsed in group_token. +// +lookahead_token: @polymorphic_base type = +{ + this: regex_token = (""); + + protected positive: bool; + public inner : token_ptr = nullptr; + + operator=: (out this, positive_: bool) = { + positive = positive_; + } + + generate_code: (override this, inout ctx: generation_context) = { + inner_name := ctx..generate_func(inner); + + ctx..add_check("lookahead_token_matcher((ctx..match_parameters())$, (inner_name)$)"); + } + + add_groups: (override this, inout groups: std::set) = { + inner*..add_groups(groups); + } +} + + +// Named character classes +// +named_class_token_parse: (inout ctx: parse_context) -> token_ptr = +{ + if ctx..current() != '\\' { return nullptr; } + + name := ""; + c_next := ctx..peek(); + + if 'd' == c_next { name = "named_class_digits"; } + else if 'D' == c_next { name = "named_class_not_digits"; } + else if 'h' == c_next { name = "named_class_hor_space"; } + else if 'H' == c_next { name = "named_class_not_hor_space"; } + else if 'N' == c_next { name = "named_class_no_new_line"; } + else if 's' == c_next { name = "named_class_space"; } + else if 'S' == c_next { name = "named_class_not_space"; } + else if 'v' == c_next { name = "named_class_ver_space"; } + else if 'V' == c_next { name = "named_class_not_ver_space"; } + else if 'w' == c_next { name = "named_class_word"; } + else if 'W' == c_next { name = "named_class_not_word"; } + else { return nullptr; } + + _ = ctx..next(); // Skip escape + + return shared.new("\\(ctx..current())$", "(name)$::match"); +} + + +// Regex syntax: \o{} Example: \o{142} +// +octal_token_parse: (inout ctx: parse_context) -> token_ptr = +{ + if !(ctx..current() == '\\' && ctx..peek() == 'o') { return nullptr; } + + _ = ctx..next(); // Skip escape. + + if !ctx..next() { return ctx..error("o escape without number.");} + if ctx..current() != '{' { return ctx..error("Missing opening bracket for \\o."); } + + number_str: std::string = ""; + _ = ctx..next(); // Skip '{' + if !ctx..grab_until('}', out number_str) { return ctx..error("No ending bracket for \\o"); } + + number := 0; + if !string_util::string_to_int(number_str, number, 8) { return ctx..error("Could not convert octal to int."); } + + // TODO: Change for unicode. + number_as_char : char = unsafe_narrow(number); + + syntax: std::string = "\\o{(string_util::int_to_string<8>(number_as_char as int))$}"; + r := shared.new(number_as_char, ctx..get_modifiers()..has(expression_flags::case_insensitive)); + r*..set_string(syntax); + return r; +} + + +// Regex syntax: {min, max} Example: a{2,4} +// +range_token: @polymorphic_base type = +{ + this : regex_token = (""); + + protected min_count : int = -1; + protected max_count : int = -1; + protected kind : int = range_flags::greedy; + protected inner_token: token_ptr = nullptr; + + operator=: (out this) = {} + + parse: (inout ctx: parse_context) -> token_ptr = + { + r := shared.new(); + if ctx..current() == '{' + { + if !ctx..has_token() { return ctx..error("'{' without previous element."); } + + inner: std::string = ""; + if !ctx..grab_until('}', out inner) { return ctx..error("Missing closing bracket '}'."); } + + inner = string_util::trim_copy(inner..substr(1)); // Remove '{' and white spaces. + if inner..empty() { return ctx..error("Empty range specifier. Either '{n}', '{n,}', '{,m}' '{n,m}'"); } + + // Non-greedy or possessive + r*..parse_modifier(ctx); + + // Get range arguments + min_count_str: std::string = "-1"; + max_count_str: std::string = "-1"; + + sep: size_t = inner..find(","); + if sep == std::string::npos + { + min_count_str = inner; + max_count_str = inner; + if !string_util::string_to_int(inner, r*.min_count) { return ctx..error("Could not convert range to number."); } + r*.max_count = r*.min_count; + } + else + { + inner_first: std::string = string_util::trim_copy(inner..substr(0, sep)); + inner_last: std::string = string_util::trim_copy(inner..substr(sep + 1)); + + if (inner_first..empty() && inner_last..empty()) { + return ctx..error("Empty range specifier. Either '{n}', '{n,}', '{,m}' '{n,m}'"); + } + + if !inner_first..empty() { + min_count_str = inner_first; + if !string_util::string_to_int(inner_first, r*.min_count) { return ctx..error("Could not convert range to number."); } + } + if !inner_last..empty() { + max_count_str = inner_last; + if !string_util::string_to_int(inner_last, r*.max_count) { return ctx..error("Could not convert range to number."); } + } + } + + // Check validity of the range. + if -1 != r*.min_count { + if !(0 <= r*.min_count) { + return ctx..error("Min value in range is negative. Have (r*.min_count)$)"); + } + } + if -1 != r*.max_count { + if !(0 <= r*.max_count) { + return ctx..error("Max value in range is negative. Have (r*.max_count)$)"); + } + if -1 != r*.min_count { + if !(r*.min_count <= r*.max_count) { + return ctx..error("Min and max values in range are wrong it should hold 0 <= min <= max. Have 0 <= (r*.min_count)$ <= (r*.max_count)$"); + } + } + } + + r*.inner_token = ctx..pop_token(); + r*.string_rep = r*.inner_token*..to_string() + r*..gen_range_string() + r*..gen_mod_string(); + + return r; + } + + return nullptr; + } + + parse_modifier: (inout this, inout ctx: parse_context) = + { + if ctx..peek() == '?' { + kind = range_flags::not_greedy; + _ = ctx..next(); + } + else if ctx..peek() == '+' { + kind = range_flags::possessive; + _ = ctx..next(); + } + } + + gen_mod_string: (this) -> std::string = + { + if kind == range_flags::not_greedy { + return "?"; + } + else if kind == range_flags::possessive { + return "+"; + } + else { + return ""; + } + } + + gen_range_string: (this) -> std::string = + { + r : std::string = ""; + if min_count == max_count { + r += "{(min_count)$}"; + } + else if min_count == -1 { + r += "{,(max_count)$}"; + } + else if max_count == -1 { + r += "{(min_count)$,}"; + } + else { + r += "{(min_count)$,(max_count)$}"; + } + + return r; + } + + generate_code: (override this, inout ctx: generation_context) = + { + inner_name := ctx..generate_func(inner_token); + groups: std::set = (); + inner_token*..add_groups(groups); + reset_name := ctx..generate_reset(groups); + + next_name := ctx..next_func_name(); + ctx..add_statefull(next_name, "cpp2::regex::range_token_matcher::match((ctx..match_parameters())$, (inner_name)$, (reset_name)$, other, (next_name)$)"); + } + + add_groups: (override this, inout groups: std::set) = { + inner_token*..add_groups(groups); + } + +} + + +// Regex syntax: *, +, or ? Example: aa* +// +special_range_token: @polymorphic_base type = +{ + this : range_token = (); + + parse: (inout ctx: parse_context) -> token_ptr = + { + r := shared.new(); + symbol: char = '\0'; + if ctx..current() == '*' { + r*.min_count = 0; + r*.max_count = -1; + symbol = '*'; + } + else if ctx..current() == '+' { + r*.min_count = 1; + r*.max_count = -1; + symbol = '+'; + } else if ctx..current() == '?' { + r*.min_count = 0; + r*.max_count = 1; + symbol = '?'; + } else { + return nullptr; + } + + if !ctx..has_token() { return ctx..error("'(ctx..current())$' without previous element."); } + + + r*..parse_modifier(ctx); + + r*.inner_token = ctx..pop_token(); + r*.string_rep = r*.inner_token*..to_string() + symbol + r*..gen_mod_string(); + return r; + } +} + + +// Regex syntax: \b or \B Example: \bword\b +// +// Matches the start end end of word boundaries. +// +word_boundary_token_parse: (inout ctx: parse_context) -> token_ptr = +{ + if ctx..current() != '\\' { return nullptr; } + + if ctx..peek() == 'b' { + _ = ctx..next(); + return shared.new("\\b", "word_boundary_token_matcher"); + } + else if ctx..peek() == 'B' { + _ = ctx..next(); + return shared.new("\\B", "word_boundary_token_matcher"); + } + else { + return nullptr; + } +} + + +//----------------------------------------------------------------------- +// +// Parser for regular expression. +// +//----------------------------------------------------------------------- +// + +// Parser and generator for regular expressions. +regex_generator: type = +{ + regex: std::string_view; + modifier: std::string = ""; + modifier_escape: std::string = ""; + + error_out: Error_out; + + source: std::string = ""; + + operator=: (out this, r: std::string_view, e: Error_out) = { + regex = r; + error_out = e; + } + + parse:(inout this) -> std::string = + { + // Extract modifiers and adapt regex. + extract_modifiers(); + + parse_ctx: parse_context = (regex, error_out); + if !parse_ctx..parse(modifier) { + return ""; + } + + source += "{\n"; + source += " wrap: type = {\n"; // TODO: Remove wrapper when template template parameters are available. + source += " context: type == cpp2::regex::match_context;"; + + gen_ctx: generation_context = (); + source += gen_ctx..run(parse_ctx..get_as_token()); + source += " entry: (cur: Iter, inout ctx: context) -> cpp2::regex::match_return = {\n"; + source += " ctx..set_group_start(0, cur);\n"; + source += " r := (gen_ctx..get_entry_func())$(cur, ctx, cpp2::regex::true_end_func());\n"; + source += " if r.matched { ctx..set_group_end(0, r.pos); }\n"; + source += " return r;\n"; + source += " }\n"; + + source += gen_ctx..create_named_group_lookup(parse_ctx.named_groups); + source += "}\n"; + + string := parse_ctx..get_as_token()*..to_string(); + source += " to_string: () -> std::string = { return R\"((modifier_escape)$(string)$(modifier_escape)$(modifier)$)\"; }\n"; + source += "}\n"; + + _ = parse_ctx; + + return source; + } + + private extract_modifiers: (inout this) = + { + if regex..find_first_of("'/") == 0 { + mod_token: char = regex[0]; + + end_pos := regex..rfind(mod_token); + if end_pos != 0 { + // Found valid start end escape + modifier = regex..substr(end_pos + 1); + modifier_escape = mod_token; + regex = regex..substr(1, end_pos - 1); + } + } + } +} + +generate_regex: (regex: std::string_view, err: Err) -> std::string = +{ + parser: regex_generator = (regex, err); + r := parser..parse(); + _ = parser; + return r; +} + + + +regex_gen: (inout t: meta::type_declaration) = +{ + has_default := false; + exact_name := "regex"; + prefix := "regex_"; + expressions : std::map = (); + + for t.get_member_objects() do (inout m) + { + name: std::string = m.name(); + + if name.starts_with(prefix) || name == exact_name + { + if !m.has_initializer() { + t.error("Regular expression must have an initializer."); + } + m.mark_for_removal_from_enclosing_type(); + + if name == exact_name { + if has_default { + t.error("Type can only contain one default named regular expression."); + } + has_default = true; + } + + expr: std::string = m.initializer(); + if expr.starts_with("R\"(") && expr.ends_with(")\"") { + expr = expr.substr(3, expr.size() - 5); + } + else if string_util::is_escaped(expr) { + expr = expr.substr(1, expr.size() - 2); + } + else { + t.error("Unknown string format '(expr)$'"); + } + + expressions[name] = expr; + } + } + + t.remove_marked_members(); + + for expressions do (expr) { + regular_expression := generate_regex(expr.second, :(message) = t$.error(message);); + + if !regular_expression..empty() { + t.add_member("public (expr.first)$_matcher: type = (regular_expression)$"); + t.add_member("public (expr.first)$: cpp2::regex::regular_expression = ();"); + } + } + + t.add_runtime_support_include( "cpp2regex.h" ); } diff --git a/source/sema.h b/source/sema.h index 46e2c509d..b56f71516 100644 --- a/source/sema.h +++ b/source/sema.h @@ -29,7 +29,7 @@ auto parser::apply_type_metafunctions( declaration_node& n ) assert(n.is_type()); // Get the reflection state ready to pass to the function - auto cs = meta::compiler_services{ &errors, generated_tokens }; + auto cs = meta::compiler_services{ &errors, &includes, generated_tokens }; auto rtype = meta::type_declaration{ &n, cs }; return apply_metafunctions( diff --git a/source/to_cpp1.h b/source/to_cpp1.h index bc66cd5d7..678d1070c 100644 --- a/source/to_cpp1.h +++ b/source/to_cpp1.h @@ -1014,6 +1014,7 @@ class cppfront { std::string sourcefile; std::vector errors; + std::set includes; // For building // @@ -1166,7 +1167,7 @@ class cppfront : sourcefile{ filename } , source { errors } , tokens { errors } - , parser { errors } + , parser { errors, includes } , sema { errors } { // "Constraints enable creativity in the right directions" @@ -1323,6 +1324,10 @@ class cppfront if (flag_no_rtti) { printer.print_extra( "#define CPP2_NO_RTTI Yes\n" ); } + + for (auto& h: includes) { + printer.print_extra( "#include \"" + h + "\"\n" ); + } } auto map_iter = tokens.get_map().cbegin();