Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
Signed-off-by: Christian Parpart <christian@parpart.family>
  • Loading branch information
christianparpart committed Apr 11, 2023
1 parent 6adefd7 commit 190941c
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 26 deletions.
10 changes: 6 additions & 4 deletions src/regex_dfa/Lexable.h
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ inline Token LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::recogniz
stack.push_back(BadState);

if constexpr (Trace)
tracef("recognize: startState {}, offset {} {}",
tracef("recognizeOne: startState {}, offset {} {}",
stateName(state),
offset_,
isBeginOfLine_ ? "BOL" : "no-BOL");
Expand All @@ -331,6 +331,7 @@ inline Token LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::recogniz
while (state != ErrorState)
{
Symbol ch = nextChar(); // one of: input character, ERROR or EOF
fmt::print("recognizeOne: ch: {}\n", ch);
currentToken_.literal.push_back(ch);

// we do not stack.clear() stack if isAcceptState(state) as we need this information iff
Expand All @@ -344,7 +345,7 @@ inline Token LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::recogniz
while (state != BadState && !isAcceptState(state))
{
if constexpr (Trace)
tracef("recognize: backtrack: current state {} {}; stack: {}",
tracef("recognizeOne: backtrack: current state {} {}; stack: {}",
stateName(state),
isAcceptState(state) ? "accepting" : "non-accepting",
toString(stack));
Expand Down Expand Up @@ -391,7 +392,7 @@ inline Token LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::recogniz
currentToken_.offset,
offset_,
quotedString(currentToken_.literal),
quoted(currentChar_));
prettySymbol(currentChar_));

if (!isAcceptState(state))
throw LexerError { offset_ };
Expand Down Expand Up @@ -464,7 +465,7 @@ inline Symbol LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::nextCha
}

int ch = source_->get();
fmt::print("source.get: => {} (0x{:02X}, {})\n", ch, (uint8_t)ch, prettySymbol(ch));
fmt::print("source.get: => {} (0x{:02X}, {})\n", ch, (uint16_t)ch, prettySymbol(ch));
if (ch < 0)
{
currentChar_ = Symbols::EndOfFile;
Expand All @@ -490,6 +491,7 @@ inline void LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::rollback(
{
offset_--;
buffered_.push_back(currentToken_.literal.back());
tracef("Lexer:{}: rollback '{}'", offset_, prettySymbol(buffered_.back()));
}
}

Expand Down
21 changes: 1 addition & 20 deletions src/regex_dfa/Lexer-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,6 @@
namespace regex_dfa
{

static inline std::string quoted(int ch)
{
if (ch == Symbols::Epsilon)
return "ε";
if (ch == Symbols::Error)
return "Error";
if (ch == Symbols::BeginOfLine)
return "BOL";
if (ch == Symbols::EndOfLine)
return "EOL";
if (ch == Symbols::EndOfFile)
return "EOF";
if (ch == '\n')
return "\\n";
if (ch == ' ')
return "\\s";
return fmt::format("{}", ch);
}

static inline std::string quotedString(const std::string& s)
{
std::stringstream sstr;
Expand Down Expand Up @@ -255,7 +236,7 @@ inline Token Lexer<Token, Machine, RequiresBeginOfLine, Debug>::recognizeOne()
oldOffset_,
offset_,
quotedString(word_),
quoted(currentChar_));
prettySymbol(currentChar_));

if (!isAcceptState(state))
throw LexerError { offset_ };
Expand Down
5 changes: 3 additions & 2 deletions src/regex_dfa/Lexer_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ TEST_CASE("regex_Lexable.one")
auto src = Lexable<LookaheadToken, StateId, false, true> { ld,
make_unique<stringstream>("abba abcdef"),
[](const string& msg) {
UNSCOPED_INFO(msg);
fmt::print("trace: {}\n", msg);
} };
auto lexer = begin(src);
auto eof = end(src);
Expand Down Expand Up @@ -168,8 +168,9 @@ TEST_CASE("regex_Lexer.match_eol")
cc.parse(RULES);

LexerDef ld = cc.compile();
INFO(fmt::format("LexerDef:\n{}", ld.to_string()));
Lexable<LookaheadToken, StateId, false, true> ls { ld, "abba eol\nabba", [](const string& msg) {
INFO(msg);
fmt::print("trace: {}\n", msg);
} };
auto lexer = begin(ls);

Expand Down

0 comments on commit 190941c

Please sign in to comment.