Skip to content

Commit

Permalink
Adds regex_dfa library (repurposed from klex project)
Browse files Browse the repository at this point in the history
Signed-off-by: Christian Parpart <christian@parpart.family>
  • Loading branch information
christianparpart committed Apr 11, 2023
1 parent ae0fc93 commit d14604d
Show file tree
Hide file tree
Showing 65 changed files with 10,427 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ IncludeCategories:
Priority: 3
- Regex: '^<(vtrasterizer)/'
Priority: 4
- Regex: '^<(regex_dfa)/'
Priority: 5
- Regex: '^<(text_shaper)/'
Priority: 5
- Regex: '^<(crispy)/'
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ endif()
include(PedanticCompiler)

add_subdirectory(crispy)
add_subdirectory(regex_dfa)
add_subdirectory(text_shaper)
add_subdirectory(vtpty)
add_subdirectory(vtparser)
Expand Down
56 changes: 56 additions & 0 deletions src/regex_dfa/Alphabet.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// This file is part of the "klex" project, http://github.com/christianparpart/klex>
// (c) 2018 Christian Parpart <christian@parpart.family>
//
// Licensed under the MIT License (the "License"); you may not use this
// file except in compliance with the License. You may obtain a copy of
// the License at: http://opensource.org/licenses/MIT

#include <regex_dfa/Alphabet.h>
#include <regex_dfa/Symbols.h>

#include <iomanip>
#include <iostream>
#include <sstream>

using namespace std;

namespace regex_dfa
{

#if 0
#define DEBUG(msg, ...) \
do \
{ \
cerr << fmt::format(msg, __VA_ARGS__) << "\n"; \
} while (0)
#else
#define DEBUG(msg, ...) \
do \
{ \
} while (0)
#endif

void Alphabet::insert(Symbol ch)
{
if (alphabet_.find(ch) == alphabet_.end())
{
DEBUG("Alphabet: insert '{:}'", prettySymbol(ch));
alphabet_.insert(ch);
}
}

string Alphabet::to_string() const
{
stringstream sstr;

sstr << '{';

for (Symbol c: alphabet_)
sstr << prettySymbol(c);

sstr << '}';

return sstr.str();
}

} // namespace regex_dfa
60 changes: 60 additions & 0 deletions src/regex_dfa/Alphabet.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// This file is part of the "klex" project, http://github.com/christianparpart/klex>
// (c) 2018 Christian Parpart <christian@parpart.family>
//
// Licensed under the MIT License (the "License"); you may not use this
// file except in compliance with the License. You may obtain a copy of
// the License at: http://opensource.org/licenses/MIT
#pragma once

#include <regex_dfa/Symbols.h>

#include <fmt/format.h>

#include <set>
#include <string>

namespace regex_dfa
{

/**
* Represents the alphabet of a finite automaton or regular expression.
*/
class Alphabet
{
public:
using set_type = std::set<Symbol>;
using iterator = set_type::iterator;

size_t size() const noexcept { return alphabet_.size(); }

void insert(Symbol ch);

std::string to_string() const;

const iterator begin() const { return alphabet_.begin(); }
const iterator end() const { return alphabet_.end(); }

private:
set_type alphabet_;
};

} // namespace regex_dfa

namespace fmt
{
template <>
struct formatter<regex_dfa::Alphabet>
{
template <typename ParseContext>
constexpr auto parse(ParseContext& ctx)
{
return ctx.begin();
}

template <typename FormatContext>
constexpr auto format(const regex_dfa::Alphabet& v, FormatContext& ctx)
{
return format_to(ctx.out(), "{}", v.to_string());
}
};
} // namespace fmt
41 changes: 41 additions & 0 deletions src/regex_dfa/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
add_library(regex_dfa STATIC
Alphabet.cpp
Compiler.cpp
DFA.cpp
DFABuilder.cpp
DFAMinimizer.cpp
DotWriter.cpp
MultiDFA.cpp
NFA.cpp
NFABuilder.cpp
RegExpr.cpp
RegExprParser.cpp
RuleParser.cpp
State.cpp
Symbols.cpp
Report.cpp
SourceLocation.cpp
)

target_include_directories(regex_dfa PUBLIC ${PROJECT_SOURCE_DIR}/src ${CMAKE_SOURCE_DIR}/src)
target_link_libraries(regex_dfa PUBLIC fmt::fmt-header-only)

# ----------------------------------------------------------------------------
if(TESTS)
add_executable(regex_dfa_test
regex_dfa_test.cpp
DFABuilder_test.cpp
DotWriter_test.cpp
Lexer_test.cpp
NFA_test.cpp
RegExprParser_test.cpp
RuleParser_test.cpp
State_test.cpp
Symbols_test.cpp
util/iterator_test.cpp
util/testing.cpp
)

target_link_libraries(regex_dfa_test PUBLIC regex_dfa)
target_link_libraries(regex_dfa_test PUBLIC fmt::fmt-header-only)
endif(TESTS)
67 changes: 67 additions & 0 deletions src/regex_dfa/CharStream.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// This file is part of the "klex" project, http://github.com/christianparpart/klex>
// (c) 2018 Christian Parpart <christian@parpart.family>
//
// Licensed under the MIT License (the "License"); you may not use this
// file except in compliance with the License. You may obtain a copy of
// the License at: http://opensource.org/licenses/MIT
#pragma once

#include <iosfwd>
#include <istream>
#include <string>

namespace regex_dfa
{

class CharStream
{
public:
virtual ~CharStream() = default;

[[nodiscard]] virtual bool isEof() const noexcept = 0;
virtual char get() = 0;
virtual void rollback(int count) = 0;
virtual void rewind() = 0;
};

class StringStream: public CharStream
{
public:
explicit StringStream(std::string&& s): source_ { std::move(s) } {}

[[nodiscard]] bool isEof() const noexcept override { return pos_ >= source_.size(); }
char get() override { return source_[pos_++]; }
void rollback(int count) override { pos_ -= count; }
void rewind() override { pos_ = 0; }

private:
std::string source_;
size_t pos_ = 0;
};

class StandardStream: public CharStream
{
public:
explicit StandardStream(std::istream* source);

[[nodiscard]] bool isEof() const noexcept override { return !source_->good(); }
char get() override { return static_cast<char>(source_->get()); }

void rollback(int count) override
{
source_->clear();
source_->seekg(-count, std::ios::cur);
}

void rewind() override
{
source_->clear();
source_->seekg(initialOffset_, std::ios::beg);
}

private:
std::istream* source_;
std::streamoff initialOffset_;
};

} // namespace regex_dfa
Loading

0 comments on commit d14604d

Please sign in to comment.