-
-
Notifications
You must be signed in to change notification settings - Fork 105
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds regex_dfa library (repurposed from klex project)
Signed-off-by: Christian Parpart <christian@parpart.family>
- Loading branch information
1 parent
ae0fc93
commit d14604d
Showing
65 changed files
with
10,427 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
// This file is part of the "klex" project, http://github.com/christianparpart/klex> | ||
// (c) 2018 Christian Parpart <christian@parpart.family> | ||
// | ||
// Licensed under the MIT License (the "License"); you may not use this | ||
// file except in compliance with the License. You may obtain a copy of | ||
// the License at: http://opensource.org/licenses/MIT | ||
|
||
#include <regex_dfa/Alphabet.h> | ||
#include <regex_dfa/Symbols.h> | ||
|
||
#include <iomanip> | ||
#include <iostream> | ||
#include <sstream> | ||
|
||
using namespace std; | ||
|
||
namespace regex_dfa | ||
{ | ||
|
||
#if 0 | ||
#define DEBUG(msg, ...) \ | ||
do \ | ||
{ \ | ||
cerr << fmt::format(msg, __VA_ARGS__) << "\n"; \ | ||
} while (0) | ||
#else | ||
#define DEBUG(msg, ...) \ | ||
do \ | ||
{ \ | ||
} while (0) | ||
#endif | ||
|
||
void Alphabet::insert(Symbol ch) | ||
{ | ||
if (alphabet_.find(ch) == alphabet_.end()) | ||
{ | ||
DEBUG("Alphabet: insert '{:}'", prettySymbol(ch)); | ||
alphabet_.insert(ch); | ||
} | ||
} | ||
|
||
string Alphabet::to_string() const | ||
{ | ||
stringstream sstr; | ||
|
||
sstr << '{'; | ||
|
||
for (Symbol c: alphabet_) | ||
sstr << prettySymbol(c); | ||
|
||
sstr << '}'; | ||
|
||
return sstr.str(); | ||
} | ||
|
||
} // namespace regex_dfa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
// This file is part of the "klex" project, http://github.com/christianparpart/klex> | ||
// (c) 2018 Christian Parpart <christian@parpart.family> | ||
// | ||
// Licensed under the MIT License (the "License"); you may not use this | ||
// file except in compliance with the License. You may obtain a copy of | ||
// the License at: http://opensource.org/licenses/MIT | ||
#pragma once | ||
|
||
#include <regex_dfa/Symbols.h> | ||
|
||
#include <fmt/format.h> | ||
|
||
#include <set> | ||
#include <string> | ||
|
||
namespace regex_dfa | ||
{ | ||
|
||
/** | ||
* Represents the alphabet of a finite automaton or regular expression. | ||
*/ | ||
class Alphabet | ||
{ | ||
public: | ||
using set_type = std::set<Symbol>; | ||
using iterator = set_type::iterator; | ||
|
||
size_t size() const noexcept { return alphabet_.size(); } | ||
|
||
void insert(Symbol ch); | ||
|
||
std::string to_string() const; | ||
|
||
const iterator begin() const { return alphabet_.begin(); } | ||
const iterator end() const { return alphabet_.end(); } | ||
|
||
private: | ||
set_type alphabet_; | ||
}; | ||
|
||
} // namespace regex_dfa | ||
|
||
namespace fmt | ||
{ | ||
template <> | ||
struct formatter<regex_dfa::Alphabet> | ||
{ | ||
template <typename ParseContext> | ||
constexpr auto parse(ParseContext& ctx) | ||
{ | ||
return ctx.begin(); | ||
} | ||
|
||
template <typename FormatContext> | ||
constexpr auto format(const regex_dfa::Alphabet& v, FormatContext& ctx) | ||
{ | ||
return format_to(ctx.out(), "{}", v.to_string()); | ||
} | ||
}; | ||
} // namespace fmt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
add_library(regex_dfa STATIC | ||
Alphabet.cpp | ||
Compiler.cpp | ||
DFA.cpp | ||
DFABuilder.cpp | ||
DFAMinimizer.cpp | ||
DotWriter.cpp | ||
MultiDFA.cpp | ||
NFA.cpp | ||
NFABuilder.cpp | ||
RegExpr.cpp | ||
RegExprParser.cpp | ||
RuleParser.cpp | ||
State.cpp | ||
Symbols.cpp | ||
Report.cpp | ||
SourceLocation.cpp | ||
) | ||
|
||
target_include_directories(regex_dfa PUBLIC ${PROJECT_SOURCE_DIR}/src ${CMAKE_SOURCE_DIR}/src) | ||
target_link_libraries(regex_dfa PUBLIC fmt::fmt-header-only) | ||
|
||
# ---------------------------------------------------------------------------- | ||
if(TESTS) | ||
add_executable(regex_dfa_test | ||
regex_dfa_test.cpp | ||
DFABuilder_test.cpp | ||
DotWriter_test.cpp | ||
Lexer_test.cpp | ||
NFA_test.cpp | ||
RegExprParser_test.cpp | ||
RuleParser_test.cpp | ||
State_test.cpp | ||
Symbols_test.cpp | ||
util/iterator_test.cpp | ||
util/testing.cpp | ||
) | ||
|
||
target_link_libraries(regex_dfa_test PUBLIC regex_dfa) | ||
target_link_libraries(regex_dfa_test PUBLIC fmt::fmt-header-only) | ||
endif(TESTS) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
// This file is part of the "klex" project, http://github.com/christianparpart/klex> | ||
// (c) 2018 Christian Parpart <christian@parpart.family> | ||
// | ||
// Licensed under the MIT License (the "License"); you may not use this | ||
// file except in compliance with the License. You may obtain a copy of | ||
// the License at: http://opensource.org/licenses/MIT | ||
#pragma once | ||
|
||
#include <iosfwd> | ||
#include <istream> | ||
#include <string> | ||
|
||
namespace regex_dfa | ||
{ | ||
|
||
class CharStream | ||
{ | ||
public: | ||
virtual ~CharStream() = default; | ||
|
||
[[nodiscard]] virtual bool isEof() const noexcept = 0; | ||
virtual char get() = 0; | ||
virtual void rollback(int count) = 0; | ||
virtual void rewind() = 0; | ||
}; | ||
|
||
class StringStream: public CharStream | ||
{ | ||
public: | ||
explicit StringStream(std::string&& s): source_ { std::move(s) } {} | ||
|
||
[[nodiscard]] bool isEof() const noexcept override { return pos_ >= source_.size(); } | ||
char get() override { return source_[pos_++]; } | ||
void rollback(int count) override { pos_ -= count; } | ||
void rewind() override { pos_ = 0; } | ||
|
||
private: | ||
std::string source_; | ||
size_t pos_ = 0; | ||
}; | ||
|
||
class StandardStream: public CharStream | ||
{ | ||
public: | ||
explicit StandardStream(std::istream* source); | ||
|
||
[[nodiscard]] bool isEof() const noexcept override { return !source_->good(); } | ||
char get() override { return static_cast<char>(source_->get()); } | ||
|
||
void rollback(int count) override | ||
{ | ||
source_->clear(); | ||
source_->seekg(-count, std::ios::cur); | ||
} | ||
|
||
void rewind() override | ||
{ | ||
source_->clear(); | ||
source_->seekg(initialOffset_, std::ios::beg); | ||
} | ||
|
||
private: | ||
std::istream* source_; | ||
std::streamoff initialOffset_; | ||
}; | ||
|
||
} // namespace regex_dfa |
Oops, something went wrong.