From 190941c3043297b00fc125a1de7e0ff7497f6f0f Mon Sep 17 00:00:00 2001 From: Christian Parpart Date: Tue, 11 Apr 2023 10:07:43 +0200 Subject: [PATCH] wip Signed-off-by: Christian Parpart --- src/regex_dfa/Lexable.h | 10 ++++++---- src/regex_dfa/Lexer-inl.h | 21 +-------------------- src/regex_dfa/Lexer_test.cpp | 5 +++-- 3 files changed, 10 insertions(+), 26 deletions(-) diff --git a/src/regex_dfa/Lexable.h b/src/regex_dfa/Lexable.h index 3384edb7b6..343408d3f7 100644 --- a/src/regex_dfa/Lexable.h +++ b/src/regex_dfa/Lexable.h @@ -322,7 +322,7 @@ inline Token LexerIterator::recogniz stack.push_back(BadState); if constexpr (Trace) - tracef("recognize: startState {}, offset {} {}", + tracef("recognizeOne: startState {}, offset {} {}", stateName(state), offset_, isBeginOfLine_ ? "BOL" : "no-BOL"); @@ -331,6 +331,7 @@ inline Token LexerIterator::recogniz while (state != ErrorState) { Symbol ch = nextChar(); // one of: input character, ERROR or EOF + fmt::print("recognizeOne: ch: {}\n", ch); currentToken_.literal.push_back(ch); // we do not stack.clear() stack if isAcceptState(state) as we need this information iff @@ -344,7 +345,7 @@ inline Token LexerIterator::recogniz while (state != BadState && !isAcceptState(state)) { if constexpr (Trace) - tracef("recognize: backtrack: current state {} {}; stack: {}", + tracef("recognizeOne: backtrack: current state {} {}; stack: {}", stateName(state), isAcceptState(state) ? "accepting" : "non-accepting", toString(stack)); @@ -391,7 +392,7 @@ inline Token LexerIterator::recogniz currentToken_.offset, offset_, quotedString(currentToken_.literal), - quoted(currentChar_)); + prettySymbol(currentChar_)); if (!isAcceptState(state)) throw LexerError { offset_ }; @@ -464,7 +465,7 @@ inline Symbol LexerIterator::nextCha } int ch = source_->get(); - fmt::print("source.get: => {} (0x{:02X}, {})\n", ch, (uint8_t)ch, prettySymbol(ch)); + fmt::print("source.get: => {} (0x{:02X}, {})\n", ch, (uint16_t)ch, prettySymbol(ch)); if (ch < 0) { currentChar_ = Symbols::EndOfFile; @@ -490,6 +491,7 @@ inline void LexerIterator::rollback( { offset_--; buffered_.push_back(currentToken_.literal.back()); + tracef("Lexer:{}: rollback '{}'", offset_, prettySymbol(buffered_.back())); } } diff --git a/src/regex_dfa/Lexer-inl.h b/src/regex_dfa/Lexer-inl.h index fc897daec9..b925548a05 100644 --- a/src/regex_dfa/Lexer-inl.h +++ b/src/regex_dfa/Lexer-inl.h @@ -17,25 +17,6 @@ namespace regex_dfa { -static inline std::string quoted(int ch) -{ - if (ch == Symbols::Epsilon) - return "ε"; - if (ch == Symbols::Error) - return "Error"; - if (ch == Symbols::BeginOfLine) - return "BOL"; - if (ch == Symbols::EndOfLine) - return "EOL"; - if (ch == Symbols::EndOfFile) - return "EOF"; - if (ch == '\n') - return "\\n"; - if (ch == ' ') - return "\\s"; - return fmt::format("{}", ch); -} - static inline std::string quotedString(const std::string& s) { std::stringstream sstr; @@ -255,7 +236,7 @@ inline Token Lexer::recognizeOne() oldOffset_, offset_, quotedString(word_), - quoted(currentChar_)); + prettySymbol(currentChar_)); if (!isAcceptState(state)) throw LexerError { offset_ }; diff --git a/src/regex_dfa/Lexer_test.cpp b/src/regex_dfa/Lexer_test.cpp index 2b2df35482..e0e3a4634e 100644 --- a/src/regex_dfa/Lexer_test.cpp +++ b/src/regex_dfa/Lexer_test.cpp @@ -114,7 +114,7 @@ TEST_CASE("regex_Lexable.one") auto src = Lexable { ld, make_unique("abba abcdef"), [](const string& msg) { - UNSCOPED_INFO(msg); + fmt::print("trace: {}\n", msg); } }; auto lexer = begin(src); auto eof = end(src); @@ -168,8 +168,9 @@ TEST_CASE("regex_Lexer.match_eol") cc.parse(RULES); LexerDef ld = cc.compile(); + INFO(fmt::format("LexerDef:\n{}", ld.to_string())); Lexable ls { ld, "abba eol\nabba", [](const string& msg) { - INFO(msg); + fmt::print("trace: {}\n", msg); } }; auto lexer = begin(ls);