diff --git a/.clang-format b/.clang-format
index bd35115950..9c0b3a8142 100644
--- a/.clang-format
+++ b/.clang-format
@@ -77,6 +77,8 @@ IncludeCategories:
     Priority:  3
   - Regex:     '^<(vtrasterizer)/'
     Priority:  4
+  - Regex:     '^<(regex_dfa)/'
+    Priority:  5
   - Regex:     '^<(text_shaper)/'
     Priority:  5
   - Regex:     '^<(crispy)/'
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 7b9da0c303..42dfbe2923 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -215,6 +215,7 @@ jobs:
             cmake -DCMAKE_BUILD_TYPE=Debug -DLIBTERMINAL_BUILD_BENCH_HEADLESS=ON -DCONTOUR_QT_VERSION=6 -S . -B build
             cmake --build build/ -j2
             ./build/src/crispy/crispy_test
+            ./build/src/regex_dfa/regex_dfa_test
             ./build/src/vtparser/vtparser_test
             ./build/src/vtbackend/vtbackend_test
             rm -rf _deps build
@@ -257,6 +258,8 @@ jobs:
         run: cmake --build build/
       - name: "test: crispy"
         run: ./build/src/crispy/crispy_test
+      - name: "test: regex_dfa"
+        run: ./build/src/regex_dfa/regex_dfa_test
       - name: "test: vtparser"
         run: ./build/src/vtparser/vtparser_test
       - name: "test: vtbackend"
@@ -326,6 +329,8 @@ jobs:
         run: cmake --build build/ --config Release
       - name: "test: crispy"
         run: .\build\src\crispy\Release\crispy_test.exe
+      - name: "test: regex_dfa"
+        run: .\build\src\regex_dfa\Release\regex_dfa_test.exe
       - name: "test: vtparser"
         run: .\build\src\vtparser\Release\vtparser_test.exe
       - name: "test: vtbackend"
@@ -450,6 +455,8 @@ jobs:
         run: cmake --build build/ -- -j3
       - name: "test: crispy"
         run: ./build/src/crispy/crispy_test
+      - name: "test: regex_dfa"
+        run: ./build/src/regex_dfa/regex_dfa_test
       - name: "test: vtparser"
         run: ./build/src/vtparser/vtparser_test
       - name: "test: vtbackend"
@@ -461,6 +468,7 @@ jobs:
           name: contour-ubuntu2204-tests
           path: |
             build/src/crispy/crispy_test
+            build/src/regex_dfa/regex_dfa_test
             build/src/vtparser/vtparser_test
             build/src/vtbackend/vtbackend_test
             build/src/vtbackend/bench-headless
@@ -516,6 +524,8 @@ jobs:
   #       run: cmake --build build/ -- -j3
   #     - name: "test: crispy"
   #       run: ./build/src/crispy/crispy_test
+  #     - name: "test: regex_dfa"
+  #       run: ./build/src/regex_dfa/regex_dfa_test
   #     - name: "test: vtparser"
   #       run: ./build/src/vtparser/vtparser_test
   #     - name: "test: vtbackend"
@@ -664,6 +674,8 @@ jobs:
         run: cmake --build build/ -- -j3
       - name: "test: crispy"
         run: ./build/src/crispy/crispy_test
+      - name: "test: regex_dfa"
+        run: ./build/src/regex_dfa/regex_dfa_test
       - name: "test: vtparser"
         run: ./build/src/vtparser/vtparser_test
       - name: "test: vtbackend"
@@ -728,6 +740,8 @@ jobs:
                           valgrind
       - name: "test: crispy (via valgrind)"
         run: valgrind --error-exitcode=64 ./build/src/crispy/crispy_test
+      - name: "test: regex_dfa"
+        run: valgrind --error-exitcode=64 ./build/src/regex_dfa/regex_dfa_test
       - name: "test: vtparser (via valgrind)"
         run: valgrind --error-exitcode=64 ./build/src/vtparser/vtparser_test
       - name: "test: vtbackend (via valgrind)"
diff --git a/cmake/ContourThirdParties.cmake b/cmake/ContourThirdParties.cmake
index 75f9334f09..ecad1d2e08 100644
--- a/cmake/ContourThirdParties.cmake
+++ b/cmake/ContourThirdParties.cmake
@@ -130,6 +130,9 @@ endif()
 ContourThirdParties_Embed_boxed_cpp()
 set(THIRDPARTY_BUILDIN_boxed_cpp "embedded")
 
+ContourThirdParties_Embed_ctre()
+set(THIRDPARTY_BUILDIN_ctre "embedded")
+
 macro(ContourThirdPartiesSummary2)
     message(STATUS "==============================================================================")
     message(STATUS "    Contour ThirdParties")
@@ -144,5 +147,6 @@ macro(ContourThirdPartiesSummary2)
     message(STATUS "libunicode          ${THIRDPARTY_BUILTIN_unicode_core} (${LIBUNICODE_LIBS})")
     message(STATUS "yaml-cpp            ${THIRDPARTY_BUILTIN_yaml_cpp}")
     message(STATUS "boxed-cpp           ${THIRDPARTY_BUILDIN_boxed_cpp}")
+    message(STATUS "CTRE                ${THIRDPARTY_BUILDIN_ctre}")
     message(STATUS "------------------------------------------------------------------------------")
 endmacro()
diff --git a/scripts/install-deps.sh b/scripts/install-deps.sh
index 011372964b..787df91f2c 100755
--- a/scripts/install-deps.sh
+++ b/scripts/install-deps.sh
@@ -108,6 +108,16 @@ fetch_and_unpack_termbenchpro()
         termbench_pro
 }
 
+fetch_and_unpack_ctre()
+{
+    local ctre_git_sha="0fdd96db416188a07833606b16633fb977c0cc11"
+    fetch_and_unpack \
+        compile-time-regular-expressions-$ctre_git_sha \
+        ctre-$ctre_git_sha.tar.gz \
+        https://github.com/hanickadot/compile-time-regular-expressions/archive/$ctre_git_sha.tar.gz \
+        ctre
+}
+
 fetch_and_unpack_boxed()
 {
     local boxed_cpp_git_sha="daa702e22e71f3da3eef838e4946b6c3df1f16b1"
@@ -573,6 +583,7 @@ main()
 
     fetch_and_unpack_boxed
     fetch_and_unpack_termbenchpro
+    fetch_and_unpack_ctre
 }
 
 main $*
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 89c65121bc..f945b3715c 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -5,6 +5,7 @@ endif()
 include(PedanticCompiler)
 
 add_subdirectory(crispy)
+add_subdirectory(regex_dfa)
 add_subdirectory(text_shaper)
 add_subdirectory(vtpty)
 add_subdirectory(vtparser)
diff --git a/src/contour/Config.h b/src/contour/Config.h
index 5c9fc948f6..619f82528a 100644
--- a/src/contour/Config.h
+++ b/src/contour/Config.h
@@ -180,6 +180,8 @@ struct TerminalProfile
     bool highlightDoubleClickedWord = true;
     terminal::StatusDisplayType initialStatusDisplayType = terminal::StatusDisplayType::None;
 
+    std::string urlPattern = R"((https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@#/%=~_|])";
+
     terminal::Opacity backgroundOpacity; // value between 0 (fully transparent) and 0xFF (fully visible).
     bool backgroundBlur;                 // On Windows 10, this will enable Acrylic Backdrop.
 
diff --git a/src/contour/TerminalSession.cpp b/src/contour/TerminalSession.cpp
index f7905c623c..a4e5df9581 100644
--- a/src/contour/TerminalSession.cpp
+++ b/src/contour/TerminalSession.cpp
@@ -122,6 +122,7 @@ namespace
         settings.primaryScreen.allowReflowOnResize = config.reflowOnResize;
         settings.highlightDoubleClickedWord = profile.highlightDoubleClickedWord;
         settings.highlightTimeout = profile.highlightTimeout;
+        settings.urlPattern = profile.urlPattern;
 
         return settings;
     }
diff --git a/src/regex_dfa/Alphabet.cpp b/src/regex_dfa/Alphabet.cpp
new file mode 100644
index 0000000000..8dccced1e1
--- /dev/null
+++ b/src/regex_dfa/Alphabet.cpp
@@ -0,0 +1,56 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/Alphabet.h>
+#include <regex_dfa/Symbols.h>
+
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+
+using namespace std;
+
+namespace regex_dfa
+{
+
+#if 0
+    #define DEBUG(msg, ...)                                \
+        do                                                 \
+        {                                                  \
+            cerr << fmt::format(msg, __VA_ARGS__) << "\n"; \
+        } while (0)
+#else
+    #define DEBUG(msg, ...) \
+        do                  \
+        {                   \
+        } while (0)
+#endif
+
+void Alphabet::insert(Symbol ch)
+{
+    if (_alphabet.find(ch) == _alphabet.end())
+    {
+        DEBUG("Alphabet: insert '{:}'", prettySymbol(ch));
+        _alphabet.insert(ch);
+    }
+}
+
+string Alphabet::to_string() const
+{
+    stringstream sstr;
+
+    sstr << '{';
+
+    for (Symbol c: _alphabet)
+        sstr << prettySymbol(c);
+
+    sstr << '}';
+
+    return sstr.str();
+}
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/Alphabet.h b/src/regex_dfa/Alphabet.h
new file mode 100644
index 0000000000..ec6d37cd1a
--- /dev/null
+++ b/src/regex_dfa/Alphabet.h
@@ -0,0 +1,60 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+#pragma once
+
+#include <regex_dfa/Symbols.h>
+
+#include <fmt/format.h>
+
+#include <set>
+#include <string>
+
+namespace regex_dfa
+{
+
+/**
+ * Represents the alphabet of a finite automaton or regular expression.
+ */
+class Alphabet
+{
+  public:
+    using set_type = std::set<Symbol>;
+    using iterator = set_type::iterator;
+
+    [[nodiscard]] size_t size() const noexcept { return _alphabet.size(); }
+
+    void insert(Symbol ch);
+
+    [[nodiscard]] std::string to_string() const;
+
+    [[nodiscard]] iterator begin() const { return _alphabet.begin(); }
+    [[nodiscard]] iterator end() const { return _alphabet.end(); }
+
+  private:
+    set_type _alphabet;
+};
+
+} // namespace regex_dfa
+
+namespace fmt
+{
+template <>
+struct formatter<regex_dfa::Alphabet>
+{
+    template <typename ParseContext>
+    constexpr auto parse(ParseContext& ctx)
+    {
+        return ctx.begin();
+    }
+
+    template <typename FormatContext>
+    constexpr auto format(const regex_dfa::Alphabet& v, FormatContext& ctx)
+    {
+        return fmt::format_to(ctx.out(), "{}", v.to_string());
+    }
+};
+} // namespace fmt
diff --git a/src/regex_dfa/CMakeLists.txt b/src/regex_dfa/CMakeLists.txt
new file mode 100644
index 0000000000..a415336887
--- /dev/null
+++ b/src/regex_dfa/CMakeLists.txt
@@ -0,0 +1,43 @@
+add_library(regex_dfa STATIC
+    Alphabet.cpp
+    Compiler.cpp
+    DFA.cpp
+    DFABuilder.cpp
+    DFAMinimizer.cpp
+    DotWriter.cpp
+    MultiDFA.cpp
+    NFA.cpp
+    NFABuilder.cpp
+    RegExpr.cpp
+    RegExprParser.cpp
+    RuleParser.cpp
+    State.cpp
+    Symbols.cpp
+    Report.cpp
+    SourceLocation.cpp
+)
+
+target_include_directories(regex_dfa PUBLIC ${PROJECT_SOURCE_DIR}/src ${CMAKE_SOURCE_DIR}/src)
+target_link_libraries(regex_dfa PUBLIC fmt::fmt-header-only)
+
+# ----------------------------------------------------------------------------
+option(REGEX_DFA_TESTING "Enables building of unittests for regex_dfa library [default: ON]" ON)
+if(REGEX_DFA_TESTING)
+    enable_testing()
+    add_executable(regex_dfa_test
+        regex_dfa_test.cpp
+        DFABuilder_test.cpp
+        DotWriter_test.cpp
+        Lexer_test.cpp
+        NFA_test.cpp
+        RegExprParser_test.cpp
+        RuleParser_test.cpp
+        State_test.cpp
+        Symbols_test.cpp
+        util/iterator_test.cpp
+    )
+
+    target_link_libraries(regex_dfa_test PUBLIC regex_dfa)
+    target_link_libraries(regex_dfa_test PUBLIC Catch2::Catch2)
+    target_link_libraries(regex_dfa_test PUBLIC fmt::fmt-header-only)
+endif(REGEX_DFA_TESTING)
diff --git a/src/regex_dfa/CharStream.h b/src/regex_dfa/CharStream.h
new file mode 100644
index 0000000000..d0d0e2d96f
--- /dev/null
+++ b/src/regex_dfa/CharStream.h
@@ -0,0 +1,67 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+#pragma once
+
+#include <iosfwd>
+#include <istream>
+#include <string>
+
+namespace regex_dfa
+{
+
+class CharStream
+{
+  public:
+    virtual ~CharStream() = default;
+
+    [[nodiscard]] virtual bool isEof() const noexcept = 0;
+    virtual char get() = 0;
+    virtual void rollback(int count) = 0;
+    virtual void rewind() = 0;
+};
+
+class StringStream: public CharStream
+{
+  public:
+    explicit StringStream(std::string&& s): _source { std::move(s) } {}
+
+    [[nodiscard]] bool isEof() const noexcept override { return _pos >= _source.size(); }
+    char get() override { return _source[_pos++]; }
+    void rollback(int count) override { _pos -= count; }
+    void rewind() override { _pos = 0; }
+
+  private:
+    std::string _source;
+    size_t _pos = 0;
+};
+
+class StandardStream: public CharStream
+{
+  public:
+    explicit StandardStream(std::istream* source);
+
+    [[nodiscard]] bool isEof() const noexcept override { return !_source->good(); }
+    char get() override { return static_cast<char>(_source->get()); }
+
+    void rollback(int count) override
+    {
+        _source->clear();
+        _source->seekg(-count, std::ios::cur);
+    }
+
+    void rewind() override
+    {
+        _source->clear();
+        _source->seekg(_initialOffset, std::ios::beg);
+    }
+
+  private:
+    std::istream* _source;
+    std::streamoff _initialOffset;
+};
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/Compiler.cpp b/src/regex_dfa/Compiler.cpp
new file mode 100644
index 0000000000..676ef26ac0
--- /dev/null
+++ b/src/regex_dfa/Compiler.cpp
@@ -0,0 +1,189 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/Compiler.h>
+#include <regex_dfa/DFA.h>
+#include <regex_dfa/DFABuilder.h>
+#include <regex_dfa/DFAMinimizer.h>
+#include <regex_dfa/LexerDef.h>
+#include <regex_dfa/MultiDFA.h>
+#include <regex_dfa/NFA.h>
+#include <regex_dfa/NFABuilder.h>
+#include <regex_dfa/RegExpr.h>
+#include <regex_dfa/RegExprParser.h>
+#include <regex_dfa/Rule.h>
+#include <regex_dfa/RuleParser.h>
+
+#include <iostream>
+
+using namespace std;
+
+namespace regex_dfa
+{
+
+void Compiler::parse(string text)
+{
+    parse(make_unique<stringstream>(std::move(text)));
+}
+
+void Compiler::parse(unique_ptr<istream> stream)
+{
+    declareAll(RuleParser { std::move(stream) }.parseRules());
+}
+
+void Compiler::declareAll(RuleList rules)
+{
+    rules_.reserve(rules_.size() + rules.size());
+
+    // populate RegExpr
+    for (Rule& rule: rules)
+        rule.regexpr = make_unique<RegExpr>(RegExprParser {}.parse(rule.pattern, rule.line, rule.column));
+
+    containsBeginOfLine_ = any_of(rules.begin(), rules.end(), ruleContainsBeginOfLine);
+
+    if (containsBeginOfLine_)
+    {
+        // We have at least one BOL-rule.
+        for (Rule& rule: rules)
+        {
+            if (!regex_dfa::containsBeginOfLine(*rule.regexpr))
+            {
+                NFA nfa = NFABuilder {}.construct(*rule.regexpr, rule.tag);
+                for (const string& condition: rule.conditions)
+                {
+                    NFA& fa = fa_[condition];
+                    if (fa.empty())
+                        fa = nfa.clone();
+                    else
+                        fa.alternate(nfa.clone());
+                }
+                declare(rule);
+            }
+            declare(rule, "_0"); // BOL
+        }
+    }
+    else
+    {
+        // No BOL-rules present, just declare them then.
+        for (Rule& rule: rules)
+            declare(rule);
+    }
+
+    for (Rule& rule: rules)
+    {
+        if (auto i = names_.find(rule.tag); i != names_.end() && i->first != rule.tag)
+            // Can actually only happen on "ignore" attributed rule count > 1.
+            names_[rule.tag] = fmt::format("{}, {}", i->second, rule.name);
+        else
+            names_[rule.tag] = rule.name;
+
+        rules_.emplace_back(std::move(rule));
+    }
+}
+
+size_t Compiler::size() const
+{
+    size_t result = 0;
+    for (const pair<const string, NFA>& fa: fa_)
+        result += fa.second.size();
+    return result;
+}
+
+void Compiler::declare(const Rule& rule, const string& conditionSuffix)
+{
+    NFA nfa = NFABuilder {}.construct(*rule.regexpr, rule.tag);
+
+    for (const string& condition: rule.conditions)
+    {
+        NFA& fa = fa_[condition + conditionSuffix];
+
+        if (fa.empty())
+            fa = nfa.clone();
+        else
+            fa.alternate(nfa.clone());
+    }
+}
+
+// const map<string, NFA>& Compiler::automata() const {
+//   return fa_;
+// }
+
+MultiDFA Compiler::compileMultiDFA(OvershadowMap* overshadows)
+{
+    map<string, DFA> dfaMap;
+    for (const auto& fa: fa_)
+        dfaMap[fa.first] = DFABuilder { fa.second.clone() }.construct(overshadows);
+
+    return constructMultiDFA(std::move(dfaMap));
+}
+
+DFA Compiler::compileDFA(OvershadowMap* overshadows)
+{
+    assert((!containsBeginOfLine_ && fa_.size() == 1) || (containsBeginOfLine_ && fa_.size() == 2));
+    return DFABuilder { fa_.begin()->second.clone() }.construct(overshadows);
+}
+
+DFA Compiler::compileMinimalDFA()
+{
+    return DFAMinimizer { compileDFA() }.constructDFA();
+}
+
+LexerDef Compiler::compile()
+{
+    return generateTables(compileMinimalDFA(), containsBeginOfLine_, std::move(names_));
+}
+
+LexerDef Compiler::compileMulti(OvershadowMap* overshadows)
+{
+    MultiDFA multiDFA = compileMultiDFA(overshadows);
+    multiDFA = DFAMinimizer { multiDFA }.constructMultiDFA();
+    return generateTables(multiDFA, containsBeginOfLine_, names());
+}
+
+LexerDef Compiler::generateTables(const DFA& dfa, bool requiresBeginOfLine, map<Tag, string> names)
+{
+    const Alphabet alphabet = dfa.alphabet();
+    TransitionMap transitionMap;
+
+    for (StateId state = 0, sE = dfa.lastState(); state <= sE; ++state)
+        for (Symbol c: alphabet)
+            if (optional<StateId> nextState = dfa.delta(state, c); nextState.has_value())
+                transitionMap.define(state, c, nextState.value());
+
+    map<StateId, Tag> acceptStates;
+    for (StateId s: dfa.acceptStates())
+        acceptStates.emplace(s, *dfa.acceptTag(s));
+
+    // TODO: many initial states !
+    return LexerDef { { { "INITIAL", dfa.initialState() } },
+                      requiresBeginOfLine,
+                      std::move(transitionMap),
+                      std::move(acceptStates),
+                      dfa.backtracking(),
+                      std::move(names) };
+}
+
+LexerDef Compiler::generateTables(const MultiDFA& multiDFA, bool requiresBeginOfLine, map<Tag, string> names)
+{
+    const Alphabet alphabet = multiDFA.dfa.alphabet();
+    TransitionMap transitionMap;
+
+    for (StateId state = 0, sE = multiDFA.dfa.lastState(); state <= sE; ++state)
+        for (const Symbol c: alphabet)
+            if (optional<StateId> nextState = multiDFA.dfa.delta(state, c); nextState.has_value())
+                transitionMap.define(state, c, nextState.value());
+
+    map<StateId, Tag> acceptStates;
+    for (StateId s: multiDFA.dfa.acceptStates())
+        acceptStates.emplace(s, *multiDFA.dfa.acceptTag(s));
+
+    // TODO: many initial states !
+    return LexerDef { multiDFA.initialStates,  requiresBeginOfLine,         std::move(transitionMap),
+                      std::move(acceptStates), multiDFA.dfa.backtracking(), std::move(names) };
+}
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/Compiler.h b/src/regex_dfa/Compiler.h
new file mode 100644
index 0000000000..9e8f1846d2
--- /dev/null
+++ b/src/regex_dfa/Compiler.h
@@ -0,0 +1,104 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+#pragma once
+
+#include <regex_dfa/DFABuilder.h>
+#include <regex_dfa/LexerDef.h>
+#include <regex_dfa/NFA.h>
+#include <regex_dfa/Rule.h>
+#include <regex_dfa/State.h>
+
+#include <istream>
+#include <map>
+#include <memory>
+#include <string>
+#include <string_view>
+
+namespace regex_dfa
+{
+
+struct MultiDFA;
+
+/**
+ * Top-Level API for compiling lexical patterns into table definitions for Lexer.
+ *
+ * @see Lexer
+ */
+class Compiler
+{
+  public:
+    using TagNameMap = std::map<Tag, std::string>;
+    using OvershadowMap = DFABuilder::OvershadowMap;
+    using AutomataMap = std::map<std::string, NFA>;
+
+    Compiler(): rules_ {}, containsBeginOfLine_ { false }, fa_ {}, names_ {} {}
+
+    /**
+     * Parses a @p stream of textual rule definitions to construct their internal data structures.
+     */
+    void parse(std::unique_ptr<std::istream> stream);
+    void parse(std::string text);
+
+    /**
+     * Parses a list of @p rules to construct their internal data structures.
+     */
+    void declareAll(RuleList rules);
+
+    [[nodiscard]] const RuleList& rules() const noexcept { return rules_; }
+    [[nodiscard]] const TagNameMap& names() const noexcept { return names_; }
+    [[nodiscard]] size_t size() const;
+
+    /**
+     * Compiles all previousely parsed rules into a DFA.
+     */
+    DFA compileDFA(OvershadowMap* overshadows = nullptr);
+    MultiDFA compileMultiDFA(OvershadowMap* overshadows = nullptr);
+
+    /**
+     * Compiles all previousely parsed rules into a minimal DFA.
+     */
+    DFA compileMinimalDFA();
+
+    /**
+     * Compiles all previousely parsed rules into a suitable data structure for Lexer.
+     *
+     * @see Lexer
+     */
+    LexerDef compile();
+
+    /**
+     * Compiles all previousely parsed rules into a suitable data structure for Lexer, taking care of
+     * multiple conditions as well as begin-of-line.
+     */
+    LexerDef compileMulti(OvershadowMap* overshadows = nullptr);
+
+    /**
+     * Translates the given DFA @p dfa with a given TagNameMap @p names into trivial table mappings.
+     *
+     * @see Lexer
+     */
+    static LexerDef generateTables(const DFA& dfa, bool requiresBeginOfLine, TagNameMap names);
+    static LexerDef generateTables(const MultiDFA& dfa, bool requiresBeginOfLine, TagNameMap names);
+
+    [[nodiscard]] const std::map<std::string, NFA>& automata() const { return fa_; }
+
+    [[nodiscard]] bool containsBeginOfLine() const noexcept { return containsBeginOfLine_; }
+
+  private:
+    /**
+     * Parses a single @p rule to construct their internal data structures.
+     */
+    void declare(const Rule& rule, const std::string& conditionSuffix = "");
+
+  private:
+    RuleList rules_;
+    bool containsBeginOfLine_;
+    AutomataMap fa_;
+    TagNameMap names_;
+};
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/DFA.cpp b/src/regex_dfa/DFA.cpp
new file mode 100644
index 0000000000..f0df7a8eac
--- /dev/null
+++ b/src/regex_dfa/DFA.cpp
@@ -0,0 +1,158 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/DFA.h>
+#include <regex_dfa/DotVisitor.h>
+#include <regex_dfa/NFA.h>
+
+#include <deque>
+#include <iostream>
+#include <map>
+#include <sstream>
+#include <vector>
+
+#if 0
+    #define DEBUG(msg, ...)                                \
+        do                                                 \
+        {                                                  \
+            cerr << fmt::format(msg, __VA_ARGS__) << "\n"; \
+        } while (0)
+#else
+    #define DEBUG(msg, ...) \
+        do                  \
+        {                   \
+        } while (0)
+#endif
+
+using namespace std;
+
+namespace regex_dfa
+{
+
+Alphabet DFA::alphabet() const
+{
+    Alphabet alphabet;
+    for (const State& state: states_)
+        for (pair<Symbol, StateId> const t: state.transitions)
+            alphabet.insert(t.first);
+
+    return alphabet;
+}
+
+vector<StateId> DFA::acceptStates() const
+{
+    vector<StateId> states;
+    states.reserve(acceptTags_.size());
+    for_each(begin(acceptTags_), end(acceptTags_), [&](const pair<StateId, Tag>& s) {
+        states.push_back(s.first);
+    });
+    return states;
+}
+
+// --------------------------------------------------------------------------
+
+void DFA::createStates(size_t count)
+{
+    states_.resize(states_.size() + count);
+}
+
+void DFA::setInitialState(StateId s)
+{
+    // TODO: assert (s is having no predecessors)
+    initialState_ = s;
+}
+
+void DFA::setTransition(StateId from, Symbol symbol, StateId to)
+{
+    // if (auto i = states_[from].transitions.find(symbol); i != states_[from].transitions.end())
+    // 	fmt::print("overwriting transition! {} --({})--> {} (new: {})\n", from, prettySymbol(symbol),
+    // 		   i->second, to);
+
+    // XXX assert(s.transitions.find(symbol) == s.transitions.end());
+    states_[from].transitions[symbol] = to;
+}
+
+void DFA::removeTransition(StateId from, Symbol symbol)
+{
+    State& s = states_[from];
+    if (auto i = s.transitions.find(symbol); i != s.transitions.end())
+        s.transitions.erase(i);
+}
+
+StateId DFA::append(DFA&& other, StateId q0)
+{
+    assert(other.initialState() == 0);
+
+    other.prepareStateIds(states_.size(), q0);
+
+    states_.reserve(size() + other.size() - 1);
+    states_[q0] = other.states_[0];
+    states_.insert(states_.end(), next(other.states_.begin()), other.states_.end());
+    backtrackStates_.insert(other.backtrackStates_.begin(), other.backtrackStates_.end());
+    acceptTags_.insert(other.acceptTags_.begin(), other.acceptTags_.end());
+
+    return other.initialState();
+}
+
+void DFA::prepareStateIds(StateId baseId, StateId q0)
+{
+    // adjust transition state IDs
+    // traverse through each state's transition set
+    //    traverse through each transition in the transition set
+    //        traverse through each element and add BASE_ID
+
+    auto transformId = [baseId, q0, this](StateId s) -> StateId {
+        // we subtract 1, because we already have a slot for q0 elsewhere (pre-allocated)
+        return s != initialState_ ? baseId + s - 1 : q0;
+    };
+
+    // for each state's transitions
+    for (State& state: states_)
+        for (pair<const Symbol, StateId>& t: state.transitions)
+            t.second = transformId(t.second);
+
+    AcceptMap remapped;
+    for (auto& a: acceptTags_)
+        remapped[transformId(a.first)] = a.second;
+    acceptTags_ = std::move(remapped);
+
+    BacktrackingMap backtracking;
+    for (const auto& bt: backtrackStates_)
+        backtracking[transformId(bt.first)] = transformId(bt.second);
+    backtrackStates_ = std::move(backtracking);
+
+    initialState_ = q0;
+}
+
+void DFA::visit(DotVisitor& v) const
+{
+    v.start(initialState_);
+
+    // STATE: initial
+    v.visitNode(initialState_, true, isAccepting(initialState_));
+
+    // STATE: accepting
+    for (StateId s: acceptStates())
+        if (s != initialState_)
+            v.visitNode(s, false, true);
+
+    // STATE: any other
+    for (StateId s = 0, sE = lastState(); s != sE; ++s)
+        if (s != initialState_ && !isAccepting(s))
+            v.visitNode(s, false, false);
+
+    // TRANSITIONS
+    for (StateId s = 0, sE = size(); s != sE; ++s)
+    {
+        const TransitionMap& T = states_[s].transitions;
+        for_each(T.begin(), T.end(), [&](const auto& t) { v.visitEdge(s, t.second, t.first); });
+        for_each(T.begin(), T.end(), [&](const auto& t) { v.endVisitEdge(s, t.second); });
+    }
+    v.end();
+}
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/DFA.h b/src/regex_dfa/DFA.h
new file mode 100644
index 0000000000..a2d4881fab
--- /dev/null
+++ b/src/regex_dfa/DFA.h
@@ -0,0 +1,170 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+#pragma once
+
+#include <regex_dfa/Alphabet.h>
+#include <regex_dfa/State.h>
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <optional>
+
+namespace regex_dfa
+{
+
+class NFA;
+class DFABuilder;
+class DotVisitor;
+
+/**
+ * Represents a deterministic finite automaton.
+ */
+class DFA
+{
+  public:
+    using TransitionMap = std::map<Symbol, StateId>;
+    struct State
+    {
+        // std::vector<StateId> states;
+        TransitionMap transitions;
+    };
+    using StateVec = std::vector<State>;
+
+    //! defines a mapping between accept state ID and another (prior) ID to track roll back the input stream
+    //! to.
+    using BacktrackingMap = std::map<StateId, StateId>;
+
+    DFA(const DFA& other) = delete;
+    DFA& operator=(const DFA& other) = delete;
+    DFA(DFA&&) = default;
+    DFA& operator=(DFA&&) = default;
+    ~DFA() = default;
+
+    DFA(): states_ {}, initialState_ { 0 }, backtrackStates_ {}, acceptTags_ {} {}
+
+    [[nodiscard]] bool empty() const noexcept { return states_.empty(); }
+    [[nodiscard]] size_t size() const noexcept { return states_.size(); }
+
+    [[nodiscard]] StateId lastState() const noexcept
+    {
+        assert(!empty());
+        return states_.size() - 1;
+    }
+
+    //! Retrieves the alphabet of this finite automaton.
+    [[nodiscard]] Alphabet alphabet() const;
+
+    //! Retrieves the initial state.
+    [[nodiscard]] StateId initialState() const { return initialState_; }
+
+    //! Retrieves the list of available states.
+    [[nodiscard]] const StateVec& states() const { return states_; }
+    [[nodiscard]] StateVec& states() { return states_; }
+
+    [[nodiscard]] StateIdVec stateIds() const
+    {
+        StateIdVec v;
+        v.reserve(states_.size());
+        for (size_t i = 0, e = states_.size(); i != e; ++i)
+            v.push_back(i); // funny, I know
+        return v;
+    }
+
+    //! Retrieves the list of accepting states.
+    [[nodiscard]] std::vector<StateId> acceptStates() const;
+
+    /**
+     * Traverses all states and edges in this NFA and calls @p visitor for each state & edge.
+     *
+     * Use this function to e.g. get a GraphViz dot-file drawn.
+     */
+    void visit(DotVisitor& visitor) const;
+
+    void createStates(size_t count);
+
+    void setInitialState(StateId state);
+
+    [[nodiscard]] const TransitionMap& stateTransitions(StateId id) const
+    {
+        return states_[static_cast<size_t>(id)].transitions;
+    }
+
+    // {{{ backtracking (for lookahead)
+    void setBacktrack(StateId from, StateId to) { backtrackStates_[from] = to; }
+
+    [[nodiscard]] std::optional<StateId> backtrack(StateId acceptState) const
+    {
+        if (auto i = backtrackStates_.find(acceptState); i != backtrackStates_.end())
+            return i->second;
+
+        return std::nullopt;
+    }
+
+    [[nodiscard]] const BacktrackingMap& backtracking() const noexcept { return backtrackStates_; }
+    // }}}
+
+    //! Flags given state as accepting-state with given Tag @p acceptTag.
+    void setAccept(StateId state, Tag acceptTag) { acceptTags_[state] = acceptTag; }
+
+    [[nodiscard]] bool isAccepting(StateId s) const { return acceptTags_.find(s) != acceptTags_.end(); }
+
+    [[nodiscard]] std::optional<Tag> acceptTag(StateId s) const
+    {
+        if (auto i = acceptTags_.find(s); i != acceptTags_.end())
+            return i->second;
+
+        return std::nullopt;
+    }
+
+    [[nodiscard]] std::optional<StateId> delta(StateId state, Symbol symbol) const
+    {
+        const auto& T = states_[state].transitions;
+        if (auto i = T.find(symbol); i != T.end())
+            return i->second;
+
+        return std::nullopt;
+    }
+
+    void setTransition(StateId from, Symbol symbol, StateId to);
+    void removeTransition(StateId from, Symbol symbol);
+
+    [[nodiscard]] StateIdVec nonAcceptStates() const
+    {
+        StateIdVec result;
+        result.reserve(
+            std::abs(static_cast<long int>(states_.size()) - static_cast<long int>(acceptTags_.size())));
+
+        for (StateId s = 0, sE = size(); s != sE; ++s)
+            if (!isAccepting(s))
+                result.push_back(s);
+
+        return result;
+    }
+
+    [[nodiscard]] bool isAcceptor(Tag t) const
+    {
+        for (std::pair<StateId, Tag> p: acceptTags_)
+            if (p.second == t)
+                return true;
+
+        return false;
+    }
+
+    StateId append(DFA&& other, StateId q0);
+
+  private:
+    void prepareStateIds(StateId baseId, StateId q0);
+
+  private:
+    StateVec states_;
+    StateId initialState_;
+    BacktrackingMap backtrackStates_;
+    AcceptMap acceptTags_;
+};
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/DFABuilder.cpp b/src/regex_dfa/DFABuilder.cpp
new file mode 100644
index 0000000000..aa8fd393ff
--- /dev/null
+++ b/src/regex_dfa/DFABuilder.cpp
@@ -0,0 +1,220 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/DFA.h>
+#include <regex_dfa/DFABuilder.h>
+#include <regex_dfa/NFA.h>
+#include <regex_dfa/State.h>
+
+#include <algorithm>
+#include <deque>
+#include <iostream>
+#include <sstream>
+#include <stack>
+#include <vector>
+
+using namespace std;
+
+namespace regex_dfa
+{
+
+#if 0
+    #define DEBUG(msg, ...)                                \
+        do                                                 \
+        {                                                  \
+            cerr << fmt::format(msg, __VA_ARGS__) << "\n"; \
+        } while (0)
+#else
+    #define DEBUG(msg, ...) \
+        do                  \
+        {                   \
+        } while (0)
+#endif
+
+struct DFABuilder::TransitionTable
+{ // {{{
+    void insert(StateId q, Symbol c, StateId t);
+    unordered_map<StateId, unordered_map<Symbol, StateId>> transitions;
+};
+
+inline void DFABuilder::TransitionTable::insert(StateId q, Symbol c, StateId t)
+{
+    transitions[q][c] = t;
+}
+// }}}
+
+/* DFA construction visualization
+  REGEX:      a(b|c)*
+
+  NFA:        n0 --(a)--> n1 --> n2 -----------------------------------> "n7"
+                                  \                                       ^
+                                   \---> n3 <------------------------    /
+                                         \ \                         \  /
+                                          \ \----> n4 --(b)--> n5 --> n6
+                                           \                          ^
+                                            \----> n8 --(c)--> n9 ---/
+
+  DFA:
+                                            <---
+              d0 --(a)--> "d1" ----(b)--> "d2"--(b)
+                             \             |^
+                              \         (c)||(b)
+                               \           v|
+                                \--(c)--> "d3"--(c)
+                                            <---
+
+
+  TABLE:
+
+    set   | DFA   | NFA                 |
+    name  | state | state               | 'a'                 | 'b'                 | 'c'
+    --------------------------------------------------------------------------------------------------------
+    q0    | d0    | {n0}                | {n1,n2,n3,n4,n7,n8} | -none-              | -none-
+    q1    | d1    | {n1,n2,n3,n4,n7,n8} | -none-              | {n3,n4,n5,n6,n7,n8} | {n3,n4,n6,n7,n8,n9}
+    q2    | d2    | {n3,n4,n5,n6,n7,n8} | -none-              | q2                  | q3
+    q3    | d3    | {n3,n4,n6,n7,n8,n9} | -none-              | q2                  | q3
+*/
+
+DFA DFABuilder::construct(OvershadowMap* overshadows)
+{
+    const StateIdVec q_0 = nfa_.epsilonClosure({ nfa_.initialStateId() });
+    vector<StateIdVec> Q = { q_0 }; // resulting states
+    deque<StateIdVec> workList = { q_0 };
+    TransitionTable T;
+
+    const Alphabet alphabet = nfa_.alphabet();
+
+    StateIdVec eclosure;
+    StateIdVec delta;
+    while (!workList.empty())
+    {
+        const StateIdVec q =
+            std::move(workList.front()); // each set q represents a valid configuration from the NFA
+        workList.pop_front();
+        const StateId q_i = *configurationNumber(Q, q);
+
+        for (Symbol c: alphabet)
+        {
+            nfa_.epsilonClosure(*nfa_.delta(q, c, &delta), &eclosure);
+            if (!eclosure.empty())
+            {
+                if (optional<StateId> t_i = configurationNumber(Q, eclosure); t_i.has_value())
+                    T.insert(q_i, c, *t_i); // T[q][c] = eclosure;
+                else
+                {
+                    Q.emplace_back(eclosure);
+                    t_i = StateId { Q.size() - 1 }; // equal to configurationNumber(Q, eclosure);
+                    T.insert(q_i, c, *t_i);         // T[q][c] = eclosure;
+                    workList.emplace_back(std::move(eclosure));
+                }
+                eclosure.clear();
+            }
+            delta.clear();
+        }
+    }
+
+    // Q now contains all the valid configurations and T all transitions between them
+    return constructDFA(Q, T, overshadows);
+}
+
+DFA DFABuilder::constructDFA(const vector<StateIdVec>& Q,
+                             const TransitionTable& T,
+                             OvershadowMap* overshadows) const
+{
+    DFA dfa;
+    dfa.createStates(Q.size());
+
+    // build remaps table (used as cache for quickly finding DFA StateIds from NFA StateIds)
+    unordered_map<StateId, StateId> remaps;
+    for_each(begin(Q), end(Q), [q_i = StateId { 0 }, &remaps](StateIdVec const& q) mutable {
+        for_each(begin(q), end(q), [&](StateId s) { remaps[s] = q_i; });
+        q_i++;
+    });
+
+    // map q_i to d_i and flag accepting states
+    map<Tag, Tag> overshadowing;
+    StateId q_i = 0;
+    for (const StateIdVec& q: Q)
+    {
+        // d_i represents the corresponding state in the DFA for all states of q from the NFA
+        const StateId d_i = q_i;
+        // cerr << fmt::format("map q{} to d{} for {} states, {}.\n", q_i, d_i->id(), q.size(),
+        // to_string(q, "d"));
+
+        // if q contains an accepting state, then d is an accepting state in the DFA
+        if (nfa_.isAnyAccepting(q))
+        {
+            optional<Tag> tag = determineTag(q, &overshadowing);
+            assert(tag.has_value() && "DFA accept state merged from input states with different tags.");
+            // DEBUG("determineTag: q{} tag {} from {}.", q_i, *tag, q);
+            dfa.setAccept(d_i, *tag);
+        }
+
+        if (optional<StateId> bt = nfa_.containsBacktrackState(q); bt.has_value())
+        {
+            // TODO: verify: must not contain more than one backtracking mapping
+            assert(dfa.isAccepting(d_i));
+            dfa.setBacktrack(d_i, remaps[*bt]);
+        }
+
+        q_i++;
+    }
+
+    // observe mapping from q_i to d_i
+    for (auto const& [q_i, branch]: T.transitions)
+        for (auto&& [c, t_i]: branch)
+            dfa.setTransition(q_i, c, t_i);
+
+    // q_0 becomes d_0 (initial state)
+    dfa.setInitialState(0);
+
+    if (overshadows)
+    {
+        // check if tag is an acceptor in NFA but not in DFA, hence, it was overshadowed by another rule
+        for (const pair<StateId, Tag> a: nfa_.acceptMap())
+        {
+            const Tag tag = a.second;
+            if (!dfa.isAcceptor(tag))
+                if (auto i = overshadowing.find(tag); i != overshadowing.end())
+                    overshadows->emplace_back(tag, i->second);
+        }
+    }
+
+    return dfa;
+}
+
+optional<StateId> DFABuilder::configurationNumber(const vector<StateIdVec>& Q, const StateIdVec& t)
+{
+    if (auto i = find(begin(Q), end(Q), t); i != end(Q))
+        return distance(begin(Q), i);
+    else
+        return nullopt;
+}
+
+optional<Tag> DFABuilder::determineTag(const StateIdVec& qn, map<Tag, Tag>* overshadows) const
+{
+    deque<Tag> tags;
+
+    for (StateId s: qn)
+        if (optional<Tag> t = nfa_.acceptTag(s); t.has_value())
+            tags.push_back(*t);
+
+    if (tags.empty())
+        return nullopt;
+
+    sort(begin(tags), end(tags));
+
+    optional<Tag> lowestTag = tags.front();
+    tags.erase(begin(tags));
+
+    for (Tag tag: tags)
+        (*overshadows)[tag] = *lowestTag; // {tag} is overshadowed by {lowestTag}
+
+    return lowestTag;
+}
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/DFABuilder.h b/src/regex_dfa/DFABuilder.h
new file mode 100644
index 0000000000..0cbaf5adeb
--- /dev/null
+++ b/src/regex_dfa/DFABuilder.h
@@ -0,0 +1,64 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+#pragma once
+
+#include <regex_dfa/NFA.h>
+
+#include <map>
+#include <utility>
+#include <vector>
+
+namespace regex_dfa
+{
+
+class DFA;
+class State;
+
+class DFABuilder
+{
+  public:
+    //! Map of rules that shows which rule is overshadowed by which other rule.
+    using OvershadowMap = std::vector<std::pair<Tag, Tag>>;
+
+    explicit DFABuilder(NFA&& nfa): nfa_ { std::move(nfa) } {}
+
+    /**
+     * Constructs a DFA out of the NFA.
+     *
+     * @param overshadows if not nullptr, it will be used to store semantic information about
+     *                    which rule tags have been overshadowed by which.
+     */
+    [[nodiscard]] DFA construct(OvershadowMap* overshadows = nullptr);
+
+  private:
+    struct TransitionTable;
+
+    [[nodiscard]] DFA constructDFA(const std::vector<StateIdVec>& Q,
+                                   const TransitionTable& T,
+                                   OvershadowMap* overshadows) const;
+
+    /**
+     * Finds @p t in @p Q and returns its offset (aka configuration number) or -1 if not found.
+     */
+    [[nodiscard]] static std::optional<StateId> configurationNumber(const std::vector<StateIdVec>& Q,
+                                                                    const StateIdVec& t);
+
+    /**
+     * Determines the tag to use for the deterministic state representing @p q from non-deterministic FA @p
+     * fa.
+     *
+     * @param q the set of states that reflect a single state in the DFA equal to the input FA
+     *
+     * @returns the determined tag or std::nullopt if none
+     */
+    [[nodiscard]] std::optional<Tag> determineTag(const StateIdVec& q, std::map<Tag, Tag>* overshadows) const;
+
+  private:
+    const NFA nfa_;
+};
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/DFABuilder_test.cpp b/src/regex_dfa/DFABuilder_test.cpp
new file mode 100644
index 0000000000..86a9613e0d
--- /dev/null
+++ b/src/regex_dfa/DFABuilder_test.cpp
@@ -0,0 +1,33 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/Compiler.h>
+#include <regex_dfa/DFA.h>
+#include <regex_dfa/DFABuilder.h>
+#include <regex_dfa/MultiDFA.h>
+
+#include <catch2/catch.hpp>
+
+#include <memory>
+#include <sstream>
+
+using namespace regex_dfa;
+
+TEST_CASE("regex_DFABuilder.shadowing")
+{
+    Compiler cc;
+    cc.parse(std::make_unique<std::stringstream>(R"(
+    Identifier  ::= [a-z][a-z0-9]*
+    TrueLiteral ::= "true"
+  )"));
+    // rule 2 is overshadowed by rule 1
+    Compiler::OvershadowMap overshadows;
+    DFA dfa = cc.compileDFA(&overshadows);
+    REQUIRE(1 == overshadows.size());
+    CHECK(2 == overshadows[0].first);  // overshadowee
+    CHECK(1 == overshadows[0].second); // overshadower
+}
diff --git a/src/regex_dfa/DFAMinimizer.cpp b/src/regex_dfa/DFAMinimizer.cpp
new file mode 100644
index 0000000000..9c5a58e53d
--- /dev/null
+++ b/src/regex_dfa/DFAMinimizer.cpp
@@ -0,0 +1,277 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/DFA.h>
+#include <regex_dfa/DFAMinimizer.h>
+#include <regex_dfa/State.h>
+
+#include <algorithm>
+#include <cassert>
+#include <functional>
+#include <map>
+#include <sstream>
+#include <vector>
+
+using namespace std;
+
+namespace regex_dfa
+{
+
+#if 0
+    #define DEBUG(msg, ...)                                \
+        do                                                 \
+        {                                                  \
+            cerr << fmt::format(msg, __VA_ARGS__) << "\n"; \
+        } while (0)
+#else
+    #define DEBUG(msg, ...) \
+        do                  \
+        {                   \
+        } while (0)
+#endif
+
+DFAMinimizer::DFAMinimizer(const DFA& dfa):
+    dfa_ { dfa },
+    initialStates_ { { "INITIAL", dfa.initialState() } },
+    alphabet_ { dfa_.alphabet() },
+    targetStateIdMap_ {}
+{
+}
+
+DFAMinimizer::DFAMinimizer(const MultiDFA& multiDFA):
+    dfa_ { multiDFA.dfa },
+    initialStates_ { multiDFA.initialStates },
+    alphabet_ { dfa_.alphabet() },
+    targetStateIdMap_ {}
+{
+}
+
+/**
+ * Tests whether or not StateId @p s is an initial state in any of the DFAs of the MultiDFA.
+ */
+bool DFAMinimizer::isMultiInitialState(StateId s) const
+{
+    return any_of(initialStates_.begin(), initialStates_.end(), [s](const auto& p) { return p.second == s; });
+}
+
+/**
+ * Tests whether any s in S is the initial state in the DFA that is to be minimized.
+ */
+bool DFAMinimizer::containsInitialState(const StateIdVec& S) const
+{
+    return any_of(S.begin(), S.end(), [this](StateId s) { return s == dfa_.initialState(); });
+}
+
+DFAMinimizer::PartitionVec::iterator DFAMinimizer::findGroup(StateId s)
+{
+    return find_if(begin(T), end(T), [&](StateIdVec& group) {
+        return dfa_.acceptTag(group.front()) == dfa_.acceptTag(s);
+    });
+}
+
+int DFAMinimizer::partitionId(StateId s) const
+{
+    auto i =
+        find_if(P.begin(), P.end(), [s](const auto& p) { return find(p.begin(), p.end(), s) != p.end(); });
+    assert(i != P.end() && "State ID must be present in any of the partition sets.");
+    return static_cast<int>(distance(P.begin(), i));
+}
+
+DFAMinimizer::PartitionVec DFAMinimizer::split(const StateIdVec& S) const
+{
+    for (Symbol c: alphabet_)
+    {
+        // if c splits S into s_1 and s_2
+        //      that is, phi(s_1, c) and phi(s_2, c) reside in two different p_i's (partitions)
+        // then return {s_1, s_2}
+
+        map<int /*target partition set*/, StateIdVec /*source states*/> t_i;
+        for (StateId s: S)
+        {
+            if (const optional<StateId> t = dfa_.delta(s, c); t.has_value())
+                t_i[partitionId(*t)].push_back(s);
+            else
+                t_i[-1].push_back(s);
+        }
+        if (t_i.size() > 1)
+        {
+            DEBUG("split: {} on character '{}' into {} sets", to_string(S), (char) c, t_i.size());
+            PartitionVec result;
+            for (auto&& t: t_i)
+            {
+                result.emplace_back(std::move(t.second));
+                DEBUG(" partition {}: {}", t.first, t.second);
+            }
+            return result;
+        }
+
+        assert(t_i.size() == 1);
+
+        // t_i's only element thus is a reconstruction of S.
+        assert(t_i.begin()->second == S);
+
+        for (StateId s: S)
+        {
+            PartitionVec result;
+            StateIdVec main;
+
+            if (isMultiInitialState(s))
+                result.emplace_back(StateIdVec { s });
+            else
+                main.emplace_back(s);
+
+            if (!main.empty())
+                result.emplace_back(std::move(main));
+        }
+    }
+
+    DEBUG("split: no split needed for {}", to_string(S));
+    return { S };
+}
+
+void DFAMinimizer::dumpGroups(const PartitionVec& T)
+{
+    DEBUG("dumping groups ({})", T.size());
+    [[maybe_unused]] int groupNr = 0;
+    for (const auto& t: T)
+    {
+        stringstream sstr;
+        sstr << "{";
+        for (size_t i = 0, e = t.size(); i != e; ++i)
+        {
+            if (i)
+                sstr << ", ";
+            sstr << "n" << t[i];
+        }
+        sstr << "}";
+        DEBUG("group {}: {}", groupNr, sstr.str());
+        groupNr++;
+    }
+}
+
+DFA DFAMinimizer::constructDFA()
+{
+    constructPartitions();
+    return constructFromPartitions(P);
+}
+
+MultiDFA DFAMinimizer::constructMultiDFA()
+{
+    constructPartitions();
+    DFA dfamin = constructFromPartitions(P);
+
+    // patch initialStates and the master-initial-state's transition symbol
+    MultiDFA::InitialStateMap initialStates;
+    for (const pair<const string, StateId>& p: initialStates_)
+        dfamin.removeTransition(dfamin.initialState(), static_cast<Symbol>(p.second));
+
+    for (const pair<const string, StateId>& p: initialStates_)
+    {
+        const StateId t = targetStateId(p.second);
+        initialStates[p.first] = t;
+        dfamin.setTransition(dfamin.initialState(), static_cast<Symbol>(t), t);
+    }
+
+    return MultiDFA { std::move(initialStates), std::move(dfamin) };
+}
+
+void DFAMinimizer::constructPartitions()
+{
+    // group all accept states by their tag
+    for (StateId s: dfa_.acceptStates())
+    {
+        if (auto group = findGroup(s); group != T.end())
+            group->push_back(s);
+        else
+            T.push_back({ s });
+    }
+
+    // add another group for all non-accept states
+    T.emplace_back(dfa_.nonAcceptStates());
+
+    dumpGroups(T);
+
+    PartitionVec splits;
+    while (P != T)
+    {
+        swap(P, T);
+        T.clear();
+
+        for (StateIdVec& p: P)
+            T.splice(T.end(), split(p));
+    }
+
+    // build up cache to quickly get target state ID from input DFA's state ID
+    targetStateIdMap_ = [&]() {
+        unordered_map<StateId, StateId> remaps;
+        StateId p_i = 0;
+        for (const StateIdVec& p: P)
+        {
+            for (StateId s: p)
+                remaps[s] = p_i;
+
+            p_i++;
+        }
+        return remaps;
+    }();
+}
+
+DFA DFAMinimizer::constructFromPartitions(const PartitionVec& P) const
+{
+    DEBUG("minimization terminated with {} unique partition sets", P.size());
+
+    // instanciate states
+    DFA dfamin;
+    dfamin.createStates(P.size());
+    StateId p_i = 0;
+    for (const StateIdVec& p: P)
+    {
+        const StateId s = *p.begin();
+        const StateId q = p_i;
+        DEBUG("Creating p{}: {} {}",
+              p_i,
+              dfa_.isAccepting(s) ? "accepting" : "rejecting",
+              containsInitialState(p) ? "initial" : "");
+        if (optional<Tag> tag = dfa_.acceptTag(s); tag.has_value())
+            dfamin.setAccept(q, *tag);
+
+        if (containsInitialState(p))
+            dfamin.setInitialState(q);
+
+        if (optional<StateId> bt = containsBacktrackState(p); bt.has_value())
+            dfamin.setBacktrack(p_i, targetStateId(*bt));
+
+        p_i++;
+    }
+
+    // setup transitions
+    p_i = 0;
+    for (const StateIdVec& p: P)
+    {
+        const StateId s = *p.begin();
+        for (pair<Symbol, StateId> const transition: dfa_.stateTransitions(s))
+        {
+            auto const t_i = partitionId(transition.second);
+            DEBUG("map p{} --({})--> p{}", p_i, prettySymbol(transition.first), t_i);
+            dfamin.setTransition(p_i, transition.first, t_i);
+        }
+        p_i++;
+    }
+
+    return dfamin;
+}
+
+optional<StateId> DFAMinimizer::containsBacktrackState(const StateIdVec& Q) const
+{
+    for (StateId q: Q)
+        if (optional<StateId> t = dfa_.backtrack(q); t.has_value())
+            return *t;
+
+    return nullopt;
+}
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/DFAMinimizer.h b/src/regex_dfa/DFAMinimizer.h
new file mode 100644
index 0000000000..0f30d06267
--- /dev/null
+++ b/src/regex_dfa/DFAMinimizer.h
@@ -0,0 +1,65 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+#pragma once
+
+#include <regex_dfa/Alphabet.h>
+#include <regex_dfa/MultiDFA.h>
+#include <regex_dfa/State.h>
+
+#include <cassert>
+#include <cstdlib>
+#include <list>
+#include <optional>
+#include <unordered_map>
+#include <vector>
+
+namespace regex_dfa
+{
+
+class DFA;
+
+class DFAMinimizer
+{
+  public:
+    explicit DFAMinimizer(const DFA& dfa);
+    explicit DFAMinimizer(const MultiDFA& multiDFA);
+
+    DFA constructDFA();
+    MultiDFA constructMultiDFA();
+
+  private:
+    using PartitionVec = std::list<StateIdVec>;
+
+    void constructPartitions();
+    [[nodiscard]] StateIdVec nonAcceptStates() const;
+    [[nodiscard]] bool containsInitialState(const StateIdVec& S) const;
+    [[nodiscard]] bool isMultiInitialState(StateId s) const;
+    [[nodiscard]] PartitionVec::iterator findGroup(StateId s);
+    [[nodiscard]] int partitionId(StateId s) const;
+    [[nodiscard]] PartitionVec split(const StateIdVec& S) const;
+    [[nodiscard]] DFA constructFromPartitions(const PartitionVec& P) const;
+    [[nodiscard]] std::optional<StateId> containsBacktrackState(const StateIdVec& Q) const;
+
+    static void dumpGroups(const PartitionVec& T);
+
+    [[nodiscard]] StateId targetStateId(StateId oldId) const
+    {
+        auto i = targetStateIdMap_.find(oldId);
+        assert(i != targetStateIdMap_.end());
+        return i->second;
+    }
+
+  private:
+    const DFA& dfa_;
+    const MultiDFA::InitialStateMap initialStates_;
+    const Alphabet alphabet_;
+    PartitionVec T;
+    PartitionVec P;
+    std::unordered_map<StateId, StateId> targetStateIdMap_;
+};
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/DotVisitor.h b/src/regex_dfa/DotVisitor.h
new file mode 100644
index 0000000000..303dec8373
--- /dev/null
+++ b/src/regex_dfa/DotVisitor.h
@@ -0,0 +1,29 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#pragma once
+
+#include <regex_dfa/State.h>
+
+#include <string_view>
+
+namespace regex_dfa
+{
+
+class DotVisitor
+{
+  public:
+    virtual ~DotVisitor() = default;
+
+    virtual void start(StateId initialState) = 0;
+    virtual void visitNode(StateId number, bool start, bool accept) = 0;
+    virtual void visitEdge(StateId from, StateId to, Symbol s) = 0;
+    virtual void endVisitEdge(StateId from, StateId to) = 0;
+    virtual void end() = 0;
+};
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/DotWriter.cpp b/src/regex_dfa/DotWriter.cpp
new file mode 100644
index 0000000000..36d98e4681
--- /dev/null
+++ b/src/regex_dfa/DotWriter.cpp
@@ -0,0 +1,115 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/DotWriter.h>
+#include <regex_dfa/Symbols.h>
+
+#include <fmt/format.h>
+
+#include <algorithm>
+#include <cassert>
+#include <sstream>
+
+using namespace std;
+
+namespace regex_dfa
+{
+
+template <typename StringType>
+static string escapeString(const StringType& str)
+{
+    stringstream stream_;
+    for (char ch: str)
+    {
+        // \t\n\r is already converted to escape sequence
+        switch (ch)
+        {
+            case '\\': stream_ << "\\\\"; break;
+            case '"': stream_ << "\\\""; break;
+            default: stream_ << ch; break;
+        }
+    }
+    return stream_.str();
+}
+
+void DotWriter::start(StateId initialState)
+{
+    initialState_ = initialState;
+    stream_ << "digraph {\n";
+    stream_ << "  rankdir=LR;\n";
+    // stream_ << "  label=\"" << escapeString("FA" /*TODO*/) << "\";\n";
+}
+
+void DotWriter::visitNode(StateId number, bool start, bool accept)
+{
+    if (start)
+    {
+        const string_view shape = accept ? "doublecircle" : "circle";
+        stream_ << "  \"\" [shape=plaintext];\n";
+        stream_ << "  node [shape=" << shape << ",color=red];\n";
+        stream_ << "  \"\" -> " << stateLabelPrefix_ << number << ";\n";
+        stream_ << "  node [color=black];\n";
+    }
+    else if (accept)
+    {
+        stream_ << "  node [shape=doublecircle]; " << stateLabelPrefix_ << number << ";\n";
+        stream_ << "  node [shape=circle,color=black];\n";
+    }
+    else
+    {
+        // stream_ << stateLabelPrefix_ << number << ";\n";
+    }
+}
+
+void DotWriter::visitEdge(StateId /*from*/, StateId to, Symbol s)
+{
+    transitionGroups_[to].push_back(s);
+}
+
+void DotWriter::endVisitEdge(StateId from, StateId to)
+{
+    auto& tgroup = transitionGroups_[to];
+    if (!tgroup.empty())
+    {
+        if (from == initialState_ && initialStates_ != nullptr)
+        {
+            for (Symbol s: tgroup)
+            {
+                const string label = [this, s]() {
+                    for (const auto& p: *initialStates_)
+                        if (p.second == static_cast<StateId>(s))
+                            return fmt::format("<{}>", p.first);
+                    return prettySymbol(s);
+                }();
+                stream_ << fmt::format("  {}{} -> {}{} [label=\"{}\"];\n",
+                                       stateLabelPrefix_,
+                                       from,
+                                       stateLabelPrefix_,
+                                       to,
+                                       escapeString(label));
+            }
+        }
+        else
+        {
+            string label = groupCharacterClassRanges(std::move(tgroup));
+            stream_ << fmt::format("  {}{} -> {}{} [label=\"{}\"];\n",
+                                   stateLabelPrefix_,
+                                   from,
+                                   stateLabelPrefix_,
+                                   to,
+                                   escapeString(label));
+        }
+        tgroup.clear();
+    }
+}
+
+void DotWriter::end()
+{
+    stream_ << "}\n";
+}
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/DotWriter.h b/src/regex_dfa/DotWriter.h
new file mode 100644
index 0000000000..66fa177ec5
--- /dev/null
+++ b/src/regex_dfa/DotWriter.h
@@ -0,0 +1,84 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+#pragma once
+
+#include <regex_dfa/DotVisitor.h>
+#include <regex_dfa/MultiDFA.h>
+#include <regex_dfa/State.h>
+
+#include <fstream>
+#include <map>
+#include <memory>
+#include <ostream>
+#include <string>
+#include <string_view>
+
+namespace regex_dfa
+{
+
+class DotWriter: public DotVisitor
+{
+  public:
+    DotWriter(std::ostream& os, std::string stateLabelPrefix):
+        ownedStream_ {},
+        stream_ { os },
+        stateLabelPrefix_ { stateLabelPrefix },
+        transitionGroups_ {},
+        initialStates_ { nullptr },
+        initialState_ { 0 }
+    {
+    }
+
+    DotWriter(const std::string& filename, std::string stateLabelPrefix):
+        ownedStream_ { std::make_unique<std::ofstream>(filename) },
+        stream_ { *ownedStream_.get() },
+        stateLabelPrefix_ { stateLabelPrefix },
+        transitionGroups_ {},
+        initialStates_ { nullptr },
+        initialState_ { 0 }
+    {
+    }
+
+    DotWriter(std::ostream& os, std::string stateLabelPrefix, const MultiDFA::InitialStateMap& initialStates):
+        ownedStream_ {},
+        stream_ { os },
+        stateLabelPrefix_ { stateLabelPrefix },
+        transitionGroups_ {},
+        initialStates_ { &initialStates },
+        initialState_ { 0 }
+    {
+    }
+
+    DotWriter(const std::string& filename,
+              std::string stateLabelPrefix,
+              const MultiDFA::InitialStateMap& initialStates):
+        ownedStream_ { std::make_unique<std::ofstream>(filename) },
+        stream_ { *ownedStream_.get() },
+        stateLabelPrefix_ { stateLabelPrefix },
+        transitionGroups_ {},
+        initialStates_ { &initialStates },
+        initialState_ { 0 }
+    {
+    }
+
+  public:
+    void start(StateId initialState) override;
+    void visitNode(StateId number, bool start, bool accept) override;
+    void visitEdge(StateId from, StateId to, Symbol s) override;
+    void endVisitEdge(StateId from, StateId to) override;
+    void end() override;
+
+  private:
+    std::unique_ptr<std::ostream> ownedStream_;
+    std::ostream& stream_;
+    std::string stateLabelPrefix_;
+    std::map<StateId /*target state*/, std::vector<Symbol> /*transition symbols*/> transitionGroups_;
+    const MultiDFA::InitialStateMap* initialStates_;
+    StateId initialState_;
+};
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/DotWriter_test.cpp b/src/regex_dfa/DotWriter_test.cpp
new file mode 100644
index 0000000000..4c659a1cfa
--- /dev/null
+++ b/src/regex_dfa/DotWriter_test.cpp
@@ -0,0 +1,67 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/DotWriter.h>
+
+#include <catch2/catch.hpp>
+
+#include <sstream>
+
+using namespace std;
+using namespace regex_dfa;
+
+TEST_CASE("regex_DotWriter.simple")
+{
+    stringstream sstr;
+    DotWriter dw(sstr, "n");
+
+    dw.start(0);
+    dw.visitNode(0, true, true);
+    dw.visitEdge(0, 1, 'a');
+    dw.endVisitEdge(0, 1);
+
+    dw.visitNode(1, false, true);
+    dw.visitEdge(1, 1, 'b');
+    dw.visitEdge(1, 1, '\r');
+    dw.visitEdge(1, 1, '\n');
+    dw.visitEdge(1, 1, '\t');
+    dw.visitEdge(1, 1, ' ');
+    dw.endVisitEdge(1, 1);
+    dw.end();
+
+    REQUIRE(!sstr.str().empty());
+    // just make sure it processes
+}
+
+TEST_CASE("regex_DotWriter.multidfa_simple")
+{
+    stringstream sstr;
+    const MultiDFA::InitialStateMap mis { { "foo", 1 }, { "bar", 2 } };
+    DotWriter dw(sstr, "n", mis);
+
+    dw.start(0);
+    dw.visitNode(0, true, false);
+    dw.visitNode(1, false, true);
+    dw.visitNode(2, false, true);
+
+    dw.visitEdge(0, 1, 0x01);
+    dw.endVisitEdge(0, 1);
+
+    dw.visitEdge(0, 2, 0x02);
+    dw.endVisitEdge(0, 2);
+
+    dw.visitEdge(1, 1, 'a');
+    dw.endVisitEdge(1, 1);
+
+    dw.visitEdge(2, 2, 'a');
+    dw.endVisitEdge(2, 2);
+
+    dw.end();
+
+    REQUIRE(!sstr.str().empty());
+    // just make sure it processes
+}
diff --git a/src/regex_dfa/Lexable.h b/src/regex_dfa/Lexable.h
new file mode 100644
index 0000000000..6524896b81
--- /dev/null
+++ b/src/regex_dfa/Lexable.h
@@ -0,0 +1,591 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+#pragma once
+
+#include <regex_dfa/Lexer.h> // TokenInfo: TODO: remove that header/API (inline TokenInfo here then)
+#include <regex_dfa/LexerDef.h>
+
+#include <fmt/format.h>
+
+#include <cassert>
+#include <climits>
+#include <deque>
+#include <functional>
+#include <iostream>
+#include <iterator>
+#include <map>
+#include <memory>
+#include <stdexcept>
+#include <string_view>
+#include <vector>
+
+namespace regex_dfa
+{
+
+//! Runtime exception that is getting thrown when a word could not be recognized.
+struct LexerError: public std::runtime_error
+{
+    explicit LexerError(unsigned int offset):
+        std::runtime_error { fmt::format("[{}] Failed to lexically recognize a word.", offset) },
+        offset { offset }
+    {
+    }
+
+    unsigned int offset;
+};
+
+template <typename Token = Tag,
+          typename Machine = StateId,
+          const bool RequiresBeginOfLine = true,
+          const bool Trace = false>
+class LexerIterator
+{
+  public:
+    using TokenInfo = regex_dfa::TokenInfo<Token>;
+    using TraceFn = std::function<void(const std::string&)>;
+
+    using difference_type = long;
+    using value_type = TokenInfo;
+    using pointer = TokenInfo*;
+    using reference = TokenInfo&;
+    using iterator_category = std::forward_iterator_tag;
+
+    enum class Eof
+    {
+        EofMark
+    };
+
+    /**
+     * Initializes a LexerIterator that purely marks the end of a lexically analyzed stream.
+     */
+    explicit LexerIterator(Eof);
+
+    /**
+     * Initializes a LexerIterator for a given source to be analyzed with given lexer definition .
+     */
+    LexerIterator(const LexerDef& ld, std::istream& source, TraceFn trace = TraceFn {});
+
+    /**
+     * Retrieves the default DFA machine that is used to recognize words.
+     */
+    [[nodiscard]] Machine defaultMachine() const noexcept;
+
+    /**
+     * Sets the active deterministic finite automaton to use for recognizing words.
+     *
+     * @param machine the DFA machine to use for recognizing words.
+     * @return the previous Machine state.
+     */
+    Machine setMachine(Machine machine);
+
+    [[nodiscard]] const TokenInfo& operator*() const noexcept { return currentToken_; }
+    [[nodiscard]] auto offset() const noexcept { return currentToken_.offset; }
+    [[nodiscard]] auto literal() const noexcept -> const std::string& { return currentToken_.literal; }
+    [[nodiscard]] auto token() const noexcept { return currentToken_.token; }
+    [[nodiscard]] auto name() const noexcept { return name(token()); }
+
+    [[nodiscard]] bool operator==(const LexerIterator& rhs) const noexcept;
+    [[nodiscard]] bool operator!=(const LexerIterator& rhs) const noexcept;
+
+    LexerIterator& operator++();
+    LexerIterator& operator++(int);
+
+  private:
+    void recognize();
+    [[nodiscard]] Token recognizeOne();
+
+    // ---------------------------------------------------------------------------------
+    // state helpers
+
+    static constexpr StateId BadState = std::numeric_limits<StateId>::max();
+
+    [[nodiscard]] StateId getInitialState() const noexcept;
+    [[nodiscard]] bool isAcceptState(StateId state) const;
+
+    /**
+     * Retrieves the next state for given input state and input symbol.
+     *
+     * @param currentState the current State the DFA is in to.
+     * @param inputSymbol the input symbol that is used for transitioning from current state to the next
+     * state.
+     * @returns the next state to transition to.
+     */
+    [[nodiscard]] StateId delta(StateId currentState, Symbol inputSymbol) const;
+
+    // ---------------------------------------------------------------------------------
+    // stream helpers
+
+    [[nodiscard]] int currentChar() const noexcept { return currentChar_; }
+    [[nodiscard]] bool eof() const noexcept { return !source_->good(); }
+    Symbol nextChar();
+    void rollback();
+
+    // ---------------------------------------------------------------------------------
+    // debugging helpers
+
+    template <typename... Args>
+    void tracef(fmt::format_string<Args...> msg, Args&&... args) const;
+
+    [[nodiscard]] const std::string& name(Token t) const;
+
+    [[nodiscard]] std::string toString(const std::deque<StateId>& stack);
+    [[nodiscard]] Token token(StateId s) const;
+    [[nodiscard]] static std::string stateName(StateId s);
+
+  private:
+    const LexerDef* def_ = nullptr;
+    const TraceFn trace_;
+    std::istream* source_ = nullptr;
+    int eof_ = 0; // 0=No, 1=EOF_INIT, 2=EOF_FINAL
+
+    TokenInfo currentToken_;
+    Machine initialStateId_ = def_ ? defaultMachine() : Machine {};
+    unsigned offset_ = 0;
+    bool isBeginOfLine_ = true;
+    int currentChar_ = -1;
+    std::vector<int> buffered_;
+};
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+inline Token token(const LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>& it)
+{
+    return it.token();
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+inline size_t offset(const LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>& it)
+{
+    return it.offset();
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+inline const std::string& literal(const LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>& it)
+{
+    return it.literal();
+}
+
+/**
+ * @brief Holds a lexically analyzable stream of characters with a Lexer definition.
+ */
+template <typename Token = Tag,
+          typename Machine = StateId,
+          const bool RequiresBeginOfLine = true,
+          const bool Trace = false>
+class Lexable
+{
+  public:
+    using TraceFn = std::function<void(const std::string&)>;
+    using iterator = LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>;
+    using value_type = TokenInfo<Token>;
+
+    Lexable(const LexerDef& ld, std::istream& src, TraceFn trace = TraceFn {}):
+        def_ { ld }, source_ { &src }, initialOffset_ { source_->tellg() }, trace_ { std::move(trace) }
+    {
+        if constexpr (!RequiresBeginOfLine)
+            if (def_.containsBeginOfLineStates)
+                throw std::invalid_argument {
+                    "LexerDef contains a grammar that requires begin-of-line handling, but this Lexer has "
+                    "begin-of-line support disabled."
+                };
+    }
+
+    Lexable(const LexerDef& ld, const std::string& src, TraceFn trace = TraceFn {}):
+        Lexable { ld, std::make_unique<std::stringstream>(src), std::move(trace) }
+    {
+    }
+
+    Lexable(const LexerDef& ld, std::unique_ptr<std::istream>&& src, TraceFn trace = TraceFn {}):
+        Lexable(ld, *src, std::move(trace))
+    {
+        ownedSource_ = std::move(src);
+    }
+
+    auto begin() const
+    {
+        source_->clear();
+        source_->seekg(initialOffset_, std::ios::beg);
+        return iterator { def_, *source_, trace_ };
+    }
+
+    auto end() const { return iterator { iterator::Eof::EofMark }; }
+
+  private:
+    const LexerDef& def_;
+    std::unique_ptr<std::istream> ownedSource_;
+    std::istream* source_;
+    std::streamoff initialOffset_;
+    TraceFn trace_;
+};
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+inline auto begin(const Lexable<Token, Machine, RequiresBeginOfLine, Trace>& ls)
+{
+    return ls.begin();
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+inline auto end(const Lexable<Token, Machine, RequiresBeginOfLine, Trace>& ls)
+{
+    return ls.end();
+}
+
+// {{{ LexerIterator: impl
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::LexerIterator(Eof): eof_ { 2 }
+{
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::LexerIterator(const LexerDef& ld,
+                                                                         std::istream& source,
+                                                                         TraceFn trace):
+    def_ { &ld }, trace_ { std::move(trace) }, source_ { &source }
+{
+    recognize();
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+Machine LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::defaultMachine() const noexcept
+{
+    auto i = def_->initialStates.find("INITIAL");
+    assert(i != def_->initialStates.end());
+    return static_cast<Machine>(i->second);
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+Machine LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::setMachine(Machine machine)
+{
+    return initialStateId_ = static_cast<StateId>(machine);
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+bool LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::operator==(
+    const LexerIterator& rhs) const noexcept
+{
+    return offset_ == rhs.offset_ || (eof_ == 2 && rhs.eof_ == 2);
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+bool LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::operator!=(
+    const LexerIterator& rhs) const noexcept
+{
+    return !(*this == rhs);
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>& LexerIterator<Token,
+                                                                         Machine,
+                                                                         RequiresBeginOfLine,
+                                                                         Trace>::operator++()
+{
+    if (eof())
+        eof_++;
+
+    recognize();
+    return *this;
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>& LexerIterator<Token,
+                                                                         Machine,
+                                                                         RequiresBeginOfLine,
+                                                                         Trace>::operator++(int)
+{
+    if (eof())
+        eof_++;
+
+    recognize();
+    return *this;
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+inline void LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::recognize()
+{
+    for (;;)
+        if (Token tag = recognizeOne(); static_cast<Tag>(tag) != IgnoreTag)
+            return;
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+inline Token LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::recognizeOne()
+{
+    // init
+    currentToken_.offset = offset_;
+    currentToken_.literal.clear();
+
+    StateId state = getInitialState();
+    std::deque<StateId> stack;
+    stack.push_back(BadState);
+
+    if constexpr (Trace)
+        tracef("recognizeOne: startState {}, offset {} {}",
+               stateName(state),
+               offset_,
+               isBeginOfLine_ ? "BOL" : "no-BOL");
+
+    // advance
+    while (state != ErrorState)
+    {
+        Symbol ch = nextChar(); // one of: input character, ERROR or EOF
+        currentToken_.literal.push_back(ch);
+
+        // we do not stack.clear() stack if isAcceptState(state) as we need this information iff
+        // lookahead is required. Otherwise we could clear here (for space savings)
+
+        stack.push_back(state);
+        state = delta(state, ch);
+    }
+
+    // backtrack to last (right-most) accept state
+    while (state != BadState && !isAcceptState(state))
+    {
+        if constexpr (Trace)
+            tracef("recognizeOne: backtrack: current state {} {}; stack: {}",
+                   stateName(state),
+                   isAcceptState(state) ? "accepting" : "non-accepting",
+                   toString(stack));
+
+        state = stack.back();
+        stack.pop_back();
+        if (!currentToken_.literal.empty())
+        {
+            rollback();
+            currentToken_.literal.resize(currentToken_.literal.size() - 1);
+        }
+    }
+
+    // backtrack to right-most non-lookahead position in input stream
+    if (auto i = def_->backtrackingStates.find(state); i != def_->backtrackingStates.end())
+    {
+        const StateId tmp = state;
+        const StateId backtrackState = i->second;
+        if constexpr (Trace)
+            tracef("recognize: backtracking from {} to {}; stack: {}",
+                   stateName(state),
+                   stateName(backtrackState),
+                   toString(stack));
+        while (!stack.empty() && state != backtrackState)
+        {
+            state = stack.back();
+            stack.pop_back();
+            if constexpr (Trace)
+                tracef("recognize: backtrack: state {}", stateName(state));
+            if (!currentToken_.literal.empty())
+            {
+                rollback();
+                currentToken_.literal.resize(currentToken_.literal.size() - 1);
+            }
+        }
+        state = tmp;
+    }
+
+    if constexpr (Trace)
+        tracef("recognize: final state {} {} {} {}-{} {} [currentChar: {}]",
+               stateName(state),
+               isAcceptState(state) ? "accepting" : "non-accepting",
+               isAcceptState(state) ? name(token(state)) : std::string(),
+               currentToken_.offset,
+               offset_,
+               quotedString(currentToken_.literal),
+               prettySymbol(currentChar_));
+
+    if (!isAcceptState(state))
+        throw LexerError { offset_ };
+
+    auto i = def_->acceptStates.find(state);
+    assert(i != def_->acceptStates.end() && "Accept state hit, but no tag assigned.");
+    isBeginOfLine_ = currentToken_.literal.back() == '\n';
+
+    return currentToken_.token = static_cast<Token>(i->second);
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+inline StateId LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::getInitialState() const noexcept
+{
+    if constexpr (RequiresBeginOfLine)
+        if (isBeginOfLine_ && def_->containsBeginOfLineStates)
+            return static_cast<StateId>(initialStateId_) + 1;
+
+    return static_cast<StateId>(initialStateId_);
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+inline bool LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::isAcceptState(StateId id) const
+{
+    return def_->acceptStates.find(id) != def_->acceptStates.end();
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+StateId LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::delta(StateId currentState,
+                                                                         Symbol inputSymbol) const
+{
+    const StateId nextState = def_->transitions.apply(currentState, inputSymbol);
+    if constexpr (Trace)
+    {
+        if (isAcceptState(nextState))
+            tracef("recognize: state {:>4} --{:-^7}--> {:<6} (accepting: {})",
+                   stateName(currentState),
+                   prettySymbol(inputSymbol),
+                   stateName(nextState),
+                   name(token(nextState)));
+        else
+            tracef("recognize: state {:>4} --{:-^7}--> {:<6}",
+                   stateName(currentState),
+                   prettySymbol(inputSymbol),
+                   stateName(nextState));
+    }
+
+    return nextState;
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+inline Symbol LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::nextChar()
+{
+    if (!buffered_.empty())
+    {
+        int ch = buffered_.back();
+        currentChar_ = ch;
+        buffered_.resize(buffered_.size() - 1);
+        if constexpr (Trace)
+            tracef("Lexer:{}: advance (buffered) '{}'", offset_, prettySymbol(ch));
+        offset_++;
+        return ch;
+    }
+
+    if (!source_->good())
+    { // EOF or I/O error
+        if constexpr (Trace)
+            tracef("Lexer:{}: advance '<<{}>>'", offset_, "EOF");
+        return Symbols::EndOfFile;
+    }
+
+    int ch = source_->get();
+    if (ch < 0)
+    {
+        currentChar_ = Symbols::EndOfFile;
+        offset_++;
+        if constexpr (Trace)
+            tracef("Lexer:{}: advance '{}'", offset_, prettySymbol(ch));
+        return currentChar_;
+    }
+
+    currentChar_ = ch;
+    if constexpr (Trace)
+        tracef("Lexer:{}: advance '{}'", offset_, prettySymbol(ch));
+    offset_++;
+    return ch;
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+inline void LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::rollback()
+{
+    currentChar_ = currentToken_.literal.back();
+    if (currentToken_.literal.back() != -1)
+    {
+        offset_--;
+        buffered_.push_back(static_cast<int>(static_cast<signed char>(currentToken_.literal.back())));
+        tracef("Lexer:{}: rollback '{}'", offset_, prettySymbol(buffered_.back()));
+    }
+}
+
+// =================================================================================
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+template <typename... Args>
+inline void LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::tracef(fmt::format_string<Args...> msg,
+                                                                              Args&&... args) const
+{
+    if constexpr (Trace)
+        if (trace_)
+            trace_(fmt::format(msg, std::forward<Args>(args)...));
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+inline const std::string& LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::name(Token t) const
+{
+    auto i = def_->tagNames.find(static_cast<Tag>(t));
+    assert(i != def_->tagNames.end());
+    return i->second;
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Debug>
+inline std::string LexerIterator<Token, Machine, RequiresBeginOfLine, Debug>::toString(
+    const std::deque<StateId>& stack)
+{
+    std::stringstream sstr;
+    sstr << "{";
+    int i = 0;
+    for (const auto s: stack)
+    {
+        if (i)
+            sstr << ",";
+        sstr << stateName(s);
+        i++;
+    }
+
+    sstr << "}";
+    return sstr.str();
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+Token LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>::token(StateId s) const
+{
+    auto i = def_->acceptStates.find(s);
+    assert(i != def_->acceptStates.end());
+    return static_cast<Token>(i->second);
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Debug>
+inline std::string LexerIterator<Token, Machine, RequiresBeginOfLine, Debug>::stateName(StateId s)
+{
+    switch (s)
+    {
+        case BadState: return "Bad";
+        case ErrorState: return "Error";
+        default: return fmt::format("n{}", std::to_string(s));
+    }
+}
+// }}}
+
+} // namespace regex_dfa
+
+namespace std
+{
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+struct iterator_traits<regex_dfa::LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>>
+{
+    using iterator = regex_dfa::LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>;
+
+    using difference_type = typename iterator::difference_type;
+    using value_type = typename iterator::value_type;
+    using pointer = typename iterator::pointer;
+    using reference = typename iterator::reference;
+    using iterator_category = typename iterator::iterator_category;
+};
+} // namespace std
+
+namespace fmt
+{
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Trace>
+struct formatter<regex_dfa::LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>>
+{
+    using TokenInfo = regex_dfa::TokenInfo<Token>;
+    using LexerIterator = regex_dfa::LexerIterator<Token, Machine, RequiresBeginOfLine, Trace>;
+
+    template <typename ParseContext>
+    constexpr auto parse(ParseContext& ctx)
+    {
+        return ctx.begin();
+    }
+
+    template <typename FormatContext>
+    constexpr auto format(const LexerIterator& v, FormatContext& ctx)
+    {
+        return fmt::format_to(ctx.out(), "{} ({})", v.literal(), v.name());
+    }
+};
+} // namespace fmt
diff --git a/src/regex_dfa/Lexer-inl.h b/src/regex_dfa/Lexer-inl.h
new file mode 100644
index 0000000000..25b2aaae11
--- /dev/null
+++ b/src/regex_dfa/Lexer-inl.h
@@ -0,0 +1,331 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/Lexer.h>
+
+#include <algorithm>
+#include <deque>
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+
+namespace regex_dfa
+{
+
+static inline std::string quotedString(const std::string& s)
+{
+    std::stringstream sstr;
+    sstr << std::quoted(s);
+    return sstr.str();
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Debug>
+inline Lexer<Token, Machine, RequiresBeginOfLine, Debug>::Lexer(const LexerDef& info, DebugLogger logger):
+    def_ { info },
+    debug_ { logger },
+    initialStateId_ { defaultMachine() },
+    word_ {},
+    ownedStream_ {},
+    stream_ { nullptr },
+    oldOffset_ { 0 },
+    offset_ { 0 },
+    fileSize_ { 0 },
+    isBeginOfLine_ { true },
+    token_ { 0 }
+{
+    if constexpr (!RequiresBeginOfLine)
+        if (def_.containsBeginOfLineStates)
+            throw std::invalid_argument {
+                "LexerDef contains a grammar that requires begin-of-line handling, but this Lexer has "
+                "begin-of-line support disabled."
+            };
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Debug>
+inline Lexer<Token, Machine, RequiresBeginOfLine, Debug>::Lexer(const LexerDef& info,
+                                                                std::unique_ptr<std::istream> stream,
+                                                                DebugLogger logger):
+    Lexer { info, std::move(logger) }
+{
+    reset(std::move(stream));
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Debug>
+inline Lexer<Token, Machine, RequiresBeginOfLine, Debug>::Lexer(const LexerDef& info,
+                                                                std::istream& stream,
+                                                                DebugLogger logger):
+    Lexer { info, std::move(logger) }
+{
+    stream_ = &stream;
+    fileSize_ = getFileSize();
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Debug>
+inline Lexer<Token, Machine, RequiresBeginOfLine, Debug>::Lexer(const LexerDef& info,
+                                                                std::string input,
+                                                                DebugLogger logger):
+    Lexer { info, std::move(logger) }
+{
+    reset(std::make_unique<std::stringstream>(std::move(input)));
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Debug>
+inline void Lexer<Token, Machine, RequiresBeginOfLine, Debug>::reset(std::unique_ptr<std::istream> stream)
+{
+    ownedStream_ = std::move(stream);
+    stream_ = ownedStream_.get();
+    oldOffset_ = 0;
+    offset_ = 0;
+    isBeginOfLine_ = true;
+    fileSize_ = getFileSize();
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Debug>
+inline void Lexer<Token, Machine, RequiresBeginOfLine, Debug>::reset(const std::string& text)
+{
+    reset(std::make_unique<std::stringstream>(text));
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Debug>
+inline size_t Lexer<Token, Machine, RequiresBeginOfLine, Debug>::getFileSize()
+{
+    std::streamoff oldpos = stream_->tellg();
+    stream_->seekg(0, stream_->end);
+
+    std::streamoff theSize = stream_->tellg();
+    stream_->seekg(oldpos, stream_->beg);
+
+    return static_cast<size_t>(theSize);
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Debug>
+inline std::string Lexer<Token, Machine, RequiresBeginOfLine, Debug>::stateName(StateId s, std::string_view n)
+{
+    switch (s)
+    {
+        case BadState: return "Bad";
+        case ErrorState: return "Error";
+        default: return fmt::format("{}{}", n, std::to_string(s));
+    }
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Debug>
+inline std::string Lexer<Token, Machine, RequiresBeginOfLine, Debug>::toString(
+    const std::deque<StateId>& stack)
+{
+    std::stringstream sstr;
+    sstr << "{";
+    int i = 0;
+    for (const auto s: stack)
+    {
+        if (i)
+            sstr << ",";
+        sstr << stateName(s);
+        i++;
+    }
+
+    sstr << "}";
+    return sstr.str();
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Debug>
+inline auto Lexer<Token, Machine, RequiresBeginOfLine, Debug>::recognize() -> TokenInfo
+{
+    for (;;)
+        if (Token tag = recognizeOne(); static_cast<Tag>(tag) != IgnoreTag)
+            return TokenInfo { tag, word_, oldOffset_ };
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Debug>
+inline StateId Lexer<Token, Machine, RequiresBeginOfLine, Debug>::getInitialState() const noexcept
+{
+    if constexpr (RequiresBeginOfLine)
+    {
+        if (isBeginOfLine_ && def_.containsBeginOfLineStates)
+        {
+            return static_cast<StateId>(initialStateId_) + 1;
+        }
+    }
+
+    return static_cast<StateId>(initialStateId_);
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Debug>
+inline Token Lexer<Token, Machine, RequiresBeginOfLine, Debug>::recognizeOne()
+{
+    // init
+    oldOffset_ = offset_;
+    word_.clear();
+    StateId state = getInitialState();
+    std::deque<StateId> stack;
+    stack.push_back(BadState);
+
+    if constexpr (Debug)
+        debugf("recognize: startState {}, offset {} {}",
+               stateName(state),
+               offset_,
+               isBeginOfLine_ ? "BOL" : "no-BOL");
+
+    // advance
+    while (state != ErrorState)
+    {
+        Symbol ch = nextChar(); // one of: input character, ERROR or EOF
+        word_.push_back(ch);
+
+        // we do not stack.clear() stack if isAcceptState(state) as we need this information iff
+        // lookahead is required. Otherwise we could clear here (for space savings)
+
+        stack.push_back(state);
+        state = delta(state, ch);
+    }
+
+    // backtrack to last (right-most) accept state
+    while (state != BadState && !isAcceptState(state))
+    {
+        if constexpr (Debug)
+            debugf("recognize: backtrack: current state {} {}; stack: {}",
+                   stateName(state),
+                   isAcceptState(state) ? "accepting" : "non-accepting",
+                   toString(stack));
+
+        state = stack.back();
+        stack.pop_back();
+        if (!word_.empty())
+        {
+            rollback();
+            word_.resize(word_.size() - 1);
+        }
+    }
+
+    // backtrack to right-most non-lookahead position in input stream
+    if (auto i = def_.backtrackingStates.find(state); i != def_.backtrackingStates.end())
+    {
+        const StateId tmp = state;
+        const StateId backtrackState = i->second;
+        if constexpr (Debug)
+            debugf("recognize: backtracking from {} to {}; stack: {}",
+                   stateName(state),
+                   stateName(backtrackState),
+                   toString(stack));
+        while (!stack.empty() && state != backtrackState)
+        {
+            state = stack.back();
+            stack.pop_back();
+            if constexpr (Debug)
+                debugf("recognize: backtrack: state {}", stateName(state));
+            if (!word_.empty())
+            {
+                rollback();
+                word_.resize(word_.size() - 1);
+            }
+        }
+        state = tmp;
+    }
+
+    if constexpr (Debug)
+        debugf("recognize: final state {} {} {} {}-{} {} [currentChar: {}]",
+               stateName(state),
+               isAcceptState(state) ? "accepting" : "non-accepting",
+               isAcceptState(state) ? name(token(state)) : std::string(),
+               oldOffset_,
+               offset_,
+               quotedString(word_),
+               prettySymbol(currentChar_));
+
+    if (!isAcceptState(state))
+        throw LexerError { offset_ };
+
+    auto i = def_.acceptStates.find(state);
+    assert(i != def_.acceptStates.end() && "Accept state hit, but no tag assigned.");
+    isBeginOfLine_ = word_.back() == '\n';
+    return token_ = static_cast<Token>(i->second);
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Debug>
+inline StateId Lexer<Token, Machine, RequiresBeginOfLine, Debug>::delta(StateId currentState,
+                                                                        Symbol inputSymbol) const
+{
+    const StateId nextState = def_.transitions.apply(currentState, inputSymbol);
+    if constexpr (Debug)
+    {
+        if (isAcceptState(nextState))
+        {
+            debugf("recognize: state {:>4} --{:-^7}--> {:<6} (accepting: {})",
+                   stateName(currentState),
+                   prettySymbol(inputSymbol),
+                   stateName(nextState),
+                   name(token(nextState)));
+        }
+        else
+        {
+            debugf("recognize: state {:>4} --{:-^7}--> {:<6}",
+                   stateName(currentState),
+                   prettySymbol(inputSymbol),
+                   stateName(nextState));
+        }
+    }
+
+    return nextState;
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Debug>
+inline bool Lexer<Token, Machine, RequiresBeginOfLine, Debug>::isAcceptState(StateId state) const noexcept
+{
+    return def_.acceptStates.find(state) != def_.acceptStates.end();
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Debug>
+inline Symbol Lexer<Token, Machine, RequiresBeginOfLine, Debug>::nextChar()
+{
+    if (!buffered_.empty())
+    {
+        int ch = buffered_.back();
+        currentChar_ = ch;
+        buffered_.resize(buffered_.size() - 1);
+        if constexpr (Debug)
+            debugf("Lexer:{}: advance '{}'", offset_, prettySymbol(ch));
+        offset_++;
+        return ch;
+    }
+
+    if (!stream_->good())
+    { // EOF or I/O error
+        if constexpr (Debug)
+            debugf("Lexer:{}: advance '{}'", offset_, "EOF");
+        return Symbols::EndOfFile;
+    }
+
+    int ch = stream_->get();
+    if (ch < 0)
+    {
+        currentChar_ = Symbols::EndOfFile;
+        offset_++;
+        if constexpr (Debug)
+            debugf("Lexer:{}: advance '{}'", offset_, prettySymbol(ch));
+        return currentChar_;
+    }
+
+    currentChar_ = ch;
+    if constexpr (Debug)
+        debugf("Lexer:{}: advance '{}'", offset_, prettySymbol(ch));
+    offset_++;
+    return ch;
+}
+
+template <typename Token, typename Machine, const bool RequiresBeginOfLine, const bool Debug>
+inline void Lexer<Token, Machine, RequiresBeginOfLine, Debug>::rollback()
+{
+    currentChar_ = word_.back();
+    if (word_.back() != -1)
+    {
+        offset_--;
+        buffered_.push_back(word_.back());
+    }
+}
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/Lexer.h b/src/regex_dfa/Lexer.h
new file mode 100644
index 0000000000..769ce34cad
--- /dev/null
+++ b/src/regex_dfa/Lexer.h
@@ -0,0 +1,289 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+#pragma once
+
+#include <regex_dfa/LexerDef.h>
+
+#include <fmt/format.h>
+
+#include <cassert>
+#include <deque>
+#include <functional>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <stdexcept>
+#include <string_view>
+#include <vector>
+
+namespace regex_dfa
+{
+
+template <typename Token = Tag>
+struct TokenInfo
+{
+    Token token;
+    std::string literal;
+    size_t offset;
+
+    operator Token() const noexcept { return token; }
+
+    friend bool operator==(const TokenInfo<Token>& a, Token b) noexcept { return a.token == b; }
+    friend bool operator!=(const TokenInfo<Token>& a, Token b) noexcept { return a.token != b; }
+    friend bool operator==(Token a, const TokenInfo<Token>& b) noexcept { return b == a; }
+    friend bool operator!=(Token a, const TokenInfo<Token>& b) noexcept { return b != a; }
+};
+
+template <typename Token>
+[[nodiscard]] inline Token token(const TokenInfo<Token>& it)
+{
+    return it.token;
+}
+
+template <typename Token>
+[[nodiscard]] inline size_t offset(const TokenInfo<Token>& it)
+{
+    return it.offset;
+}
+
+template <typename Token>
+[[nodiscard]] inline const std::string& literal(const TokenInfo<Token>& it)
+{
+    return it.literal;
+}
+
+template <typename Token>
+[[nodiscard]] inline const std::string& to_string(const TokenInfo<Token>& info) noexcept
+{
+    return info.literal;
+}
+
+/**
+ * Lexer API for recognizing words.
+ */
+template <typename Token = Tag,
+          typename Machine = StateId,
+          const bool RequiresBeginOfLine = true,
+          const bool Debug = false>
+class Lexer
+{
+  public:
+    using value_type = Token;
+    using DebugLogger = std::function<void(const std::string&)>;
+    using TokenInfo = regex_dfa::TokenInfo<Token>;
+
+    //! Constructs the Lexer with the given information table.
+    explicit Lexer(const LexerDef& info, DebugLogger logger = DebugLogger {});
+
+    //! Constructs the Lexer with the given information table and input stream.
+    Lexer(const LexerDef& info, std::unique_ptr<std::istream> input, DebugLogger logger = DebugLogger {});
+
+    //! Constructs the Lexer with the given information table and input stream.
+    Lexer(const LexerDef& info, std::istream& input, DebugLogger logger = DebugLogger {});
+
+    //! Constructs the Lexer with the given information table and input stream.
+    Lexer(const LexerDef& info, std::string input, DebugLogger logger = DebugLogger {});
+
+    /**
+     * Open given input stream.
+     */
+    void reset(std::unique_ptr<std::istream> input);
+    void reset(const std::string& input);
+
+    /**
+     * Recognizes one token (ignored patterns are skipped).
+     */
+    [[nodiscard]] TokenInfo recognize();
+
+    /**
+     * Recognizes one token, regardless of it is to be ignored or not.
+     */
+    [[nodiscard]] Token recognizeOne();
+
+    //! the underlying word of the currently recognized token
+    [[nodiscard]] const std::string& word() const { return word_; }
+
+    //! @returns the absolute offset of the file the lexer is currently reading from.
+    [[nodiscard]] std::pair<unsigned, unsigned> offset() const noexcept
+    {
+        return std::make_pair(oldOffset_, offset_);
+    }
+
+    //! @returns the last recognized token.
+    [[nodiscard]] Token token() const noexcept { return token_; }
+
+    //! @returns the name of the current token.
+    [[nodiscard]] const std::string& name() const { return name(token_); }
+
+    //! @returns the name of the token represented by Token @p t.
+    [[nodiscard]] const std::string& name(Token t) const
+    {
+        auto i = def_.tagNames.find(static_cast<Tag>(t));
+        assert(i != def_.tagNames.end());
+        return i->second;
+    }
+
+    /**
+     * Retrieves the next state for given input state and input symbol.
+     *
+     * @param currentState the current State the DFA is in to.
+     * @param inputSymbol the input symbol that is used for transitioning from current state to the next
+     * state.
+     * @returns the next state to transition to.
+     */
+    [[nodiscard]] inline StateId delta(StateId currentState, Symbol inputSymbol) const;
+
+    /**
+     * Sets the active deterministic finite automaton to use for recognizing words.
+     *
+     * @param machine the DFA machine to use for recognizing words.
+     */
+    Machine setMachine(Machine machine)
+    {
+        auto const oldMachine = initialStateId_;
+        // since Machine is a 1:1 mapping into the State's ID, we can simply cast here.
+        initialStateId_ = static_cast<StateId>(machine);
+        return oldMachine;
+    }
+
+    /**
+     * Retrieves the default DFA machine that is used to recognize words.
+     */
+    [[nodiscard]] Machine defaultMachine() const
+    {
+        auto i = def_.initialStates.find("INITIAL");
+        assert(i != def_.initialStates.end());
+        return static_cast<Machine>(i->second);
+    }
+
+    /**
+     * Runtime exception that is getting thrown when a word could not be recognized.
+     */
+    struct LexerError: public std::runtime_error
+    {
+        LexerError(unsigned int offset):
+            std::runtime_error { fmt::format("[{}] Failed to lexically recognize a word.", offset) },
+            offset { offset }
+        {
+        }
+
+        unsigned int offset;
+    };
+
+    struct iterator // NOLINT(readability-identifier-naming)
+    {
+        Lexer& lx;
+        int end;
+        TokenInfo info;
+
+        const TokenInfo& operator*() const { return info; }
+
+        iterator& operator++()
+        {
+            if (lx.eof())
+                ++end;
+
+            info = lx.recognize();
+
+            return *this;
+        }
+
+        iterator& operator++(int) { return ++*this; }
+        bool operator==(const iterator& rhs) const noexcept { return end == rhs.end; }
+        bool operator!=(const iterator& rhs) const noexcept { return !(*this == rhs); }
+    };
+
+    iterator begin()
+    {
+        const Token t = recognize();
+        return iterator { *this, 0, TokenInfo { t, word() } };
+    }
+
+    iterator end() { return iterator { *this, 2, TokenInfo { 0, "" } }; }
+
+    [[nodiscard]] bool eof() const { return !stream_->good(); }
+
+    [[nodiscard]] size_t fileSize() const noexcept { return fileSize_; }
+
+  private:
+    template <typename... Args>
+    inline void debugf(const char* msg, Args... args) const
+    {
+        if constexpr (Debug)
+            if (debug_)
+                debug_(fmt::format(msg, args...));
+    }
+
+    [[nodiscard]] Symbol nextChar();
+    void rollback();
+    [[nodiscard]] StateId getInitialState() const noexcept;
+    [[nodiscard]] bool isAcceptState(StateId state) const noexcept;
+    [[nodiscard]] static std::string stateName(StateId s, std::string_view n = "n");
+    static constexpr StateId BadState = 101010;
+    [[nodiscard]] std::string toString(const std::deque<StateId>& stack);
+
+    [[nodiscard]] int currentChar() const noexcept { return currentChar_; }
+
+    [[nodiscard]] Token token(StateId s) const
+    {
+        auto i = def_.acceptStates.find(s);
+        assert(i != def_.acceptStates.end());
+        return static_cast<Token>(i->second);
+    }
+
+    [[nodiscard]] size_t getFileSize();
+
+  private:
+    const LexerDef& def_;
+    const DebugLogger debug_;
+
+    Machine initialStateId_;
+    std::string word_;
+    std::unique_ptr<std::istream> ownedStream_;
+    std::istream* stream_;
+    std::vector<int> buffered_;
+    unsigned oldOffset_;
+    unsigned offset_;
+    size_t fileSize_; // cache
+    bool isBeginOfLine_;
+    int currentChar_;
+    Token token_;
+};
+
+template <typename Token = Tag,
+          typename Machine = StateId,
+          const bool RequiresBeginOfLine = true,
+          const bool Debug = false>
+inline const std::string& to_string(
+    const typename Lexer<Token, Machine, RequiresBeginOfLine, Debug>::iterator& it) noexcept
+{
+    return it.info.literal;
+}
+
+} // namespace regex_dfa
+
+namespace fmt
+{
+template <typename Token>
+struct formatter<regex_dfa::TokenInfo<Token>>
+{
+    using TokenInfo = regex_dfa::TokenInfo<Token>;
+
+    template <typename ParseContext>
+    constexpr auto parse(ParseContext& ctx)
+    {
+        return ctx.begin();
+    }
+
+    template <typename FormatContext>
+    constexpr auto format(const TokenInfo& v, FormatContext& ctx)
+    {
+        return fmt::format_to(ctx.out(), "{}", v.literal);
+    }
+};
+} // namespace fmt
+#include <regex_dfa/Lexer-inl.h>
diff --git a/src/regex_dfa/LexerDef.h b/src/regex_dfa/LexerDef.h
new file mode 100644
index 0000000000..3a774827c2
--- /dev/null
+++ b/src/regex_dfa/LexerDef.h
@@ -0,0 +1,87 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+#pragma once
+
+#include <regex_dfa/State.h>
+#include <regex_dfa/TransitionMap.h>
+
+#include <map>
+#include <sstream>
+#include <string>
+
+namespace regex_dfa
+{
+
+// special tags
+constexpr Tag IgnoreTag = static_cast<Tag>(-1);
+constexpr Tag FirstUserTag = 1;
+
+using AcceptStateMap = std::map<StateId, Tag>;
+
+//! defines a mapping between accept state ID and another (prior) ID to track roll back the input stream to.
+using BacktrackingMap = std::map<StateId, StateId>;
+
+struct LexerDef
+{
+    std::map<std::string, StateId> initialStates;
+    bool containsBeginOfLineStates;
+    TransitionMap transitions;
+    AcceptStateMap acceptStates;
+    BacktrackingMap backtrackingStates;
+    std::map<Tag, std::string> tagNames;
+
+    [[nodiscard]] std::string to_string() const;
+
+    [[nodiscard]] bool isValidTag(Tag t) const noexcept { return tagNames.find(t) != tagNames.end(); }
+
+    [[nodiscard]] std::string tagName(Tag t) const
+    {
+        auto i = tagNames.find(t);
+        assert(i != tagNames.end());
+        return i->second;
+    }
+};
+
+inline std::string LexerDef::to_string() const
+{
+    std::stringstream sstr;
+
+    sstr << fmt::format("initializerStates:\n");
+    for (const std::pair<std::string, StateId> q0: initialStates)
+        sstr << fmt::format("  {}: {}\n", q0.first, q0.second);
+    sstr << fmt::format("totalStates: {}\n", transitions.states().size());
+
+    sstr << "transitions:\n";
+    for (StateId inputState: transitions.states())
+    {
+        std::map<StateId, std::vector<Symbol>> T;
+        for (const std::pair<Symbol, StateId> p: transitions.map(inputState))
+        {
+            T[p.second].push_back(p.first);
+        }
+        for (auto& t: T)
+        {
+            sstr << fmt::format(
+                "- n{} --({})--> n{}\n", inputState, groupCharacterClassRanges(std::move(t.second)), t.first);
+        }
+    }
+
+    sstr << "accepts:\n";
+    for (const std::pair<StateId, Tag> a: acceptStates)
+        sstr << fmt::format("- n{} to {} ({})\n", a.first, a.second, tagName(a.second));
+
+    if (!backtrackingStates.empty())
+    {
+        sstr << "backtracking:\n";
+        for (const std::pair<StateId, StateId> bt: backtrackingStates)
+            sstr << fmt::format("- n{} to n{}\n", bt.first, bt.second);
+    }
+
+    return sstr.str();
+}
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/Lexer_test.cpp b/src/regex_dfa/Lexer_test.cpp
new file mode 100644
index 0000000000..1bc732eb28
--- /dev/null
+++ b/src/regex_dfa/Lexer_test.cpp
@@ -0,0 +1,602 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/Compiler.h>
+#include <regex_dfa/DFA.h>
+#include <regex_dfa/DotWriter.h>
+#include <regex_dfa/Lexable.h>
+#include <regex_dfa/MultiDFA.h>
+#include <regex_dfa/util/literals.h>
+
+#include <catch2/catch.hpp>
+
+using namespace std;
+using namespace regex_dfa;
+using namespace regex_dfa::util::literals;
+
+/* FEATURE UNITTEST CHECKLIST:
+ *
+ * - [ ] concatenation
+ * - [ ] alternation
+ * - [ ] {n}
+ * - [ ] {m,n}
+ * - [ ] {m,}
+ * - [ ] ?
+ * - [ ] character class, [a-z], [a-z0-9]
+ * - [ ] character class by name, such as [[:upper:]]
+ * - [ ] inverted character class, [^a-z], [^a-z0-9]
+ * - [ ] generic lookahead r/s
+ * - [ ] EOL lookahead r$
+ * - [ ] BOL lookbehind ^r
+ */
+
+const string RULES = R"(
+  Space(ignore) ::= [\s\t\n]+
+  Eof           ::= <<EOF>>
+  ABBA          ::= abba
+  AB_CD         ::= ab/cd
+  CD            ::= cd
+  CDEF          ::= cdef
+  EOL_LF        ::= eol$
+  XAnyLine      ::= x.*
+)";
+
+enum class LookaheadToken
+{
+    Eof = 1,
+    ABBA,
+    AB_CD, // NOLINT(readability-identifier-naming)
+    CD,
+    CDEF,
+    EOL_LF, // NOLINT(readability-identifier-naming)
+    XAnyLine
+};
+
+namespace fmt
+{ // it sucks that I've to specify that here
+template <>
+struct formatter<LookaheadToken>
+{
+    template <typename ParseContext>
+    constexpr auto parse(ParseContext& ctx)
+    {
+        return ctx.begin();
+    }
+
+    template <typename FormatContext>
+    constexpr auto format(const LookaheadToken& v, FormatContext& ctx)
+    {
+        switch (v)
+        {
+            case LookaheadToken::Eof: return fmt::format_to(ctx.out(), "Eof");
+            case LookaheadToken::ABBA: return fmt::format_to(ctx.out(), "abba");
+            case LookaheadToken::AB_CD: return fmt::format_to(ctx.out(), "ab/cd");
+            case LookaheadToken::CD: return fmt::format_to(ctx.out(), "cd");
+            case LookaheadToken::CDEF: return fmt::format_to(ctx.out(), "cdef");
+            case LookaheadToken::EOL_LF: return fmt::format_to(ctx.out(), "eol$");
+            case LookaheadToken::XAnyLine: return fmt::format_to(ctx.out(), "<XAnyLine>");
+            default: return fmt::format_to(ctx.out(), "<{}>", static_cast<unsigned>(v));
+        }
+    }
+};
+} // namespace fmt
+
+TEST_CASE("regex_Lexer.lookahead")
+{
+    Compiler cc;
+    cc.parse(RULES);
+
+    LexerDef const lexerDef = cc.compile();
+    CAPTURE(lexerDef.to_string());
+    Lexable<LookaheadToken, StateId, false, true> ls { lexerDef, "abba abcdef", [](const string& msg) {
+                                                          INFO(msg);
+                                                      } };
+    auto lexer = begin(ls);
+
+    REQUIRE(LookaheadToken::ABBA == *lexer);
+    REQUIRE(LookaheadToken::AB_CD == *++lexer);
+    REQUIRE(LookaheadToken::CDEF == *++lexer);
+    REQUIRE(LookaheadToken::Eof == *++lexer);
+    REQUIRE(end(ls) == ++lexer);
+}
+
+TEST_CASE("regex_Lexable.one")
+{
+    Compiler cc;
+    cc.parse(RULES);
+
+    LexerDef const ld = cc.compile();
+    CAPTURE(ld.to_string());
+    auto src = Lexable<LookaheadToken, StateId, false, true> { ld,
+                                                               make_unique<stringstream>("abba abcdef"),
+                                                               [](const string& msg) {
+                                                                   INFO(msg);
+                                                               } };
+    auto lexer = begin(src);
+    auto eof = end(src);
+
+    REQUIRE(lexer != eof);
+    CHECK(LookaheadToken::ABBA == token(lexer));
+    CHECK(0 == offset(lexer));
+
+    ++lexer;
+    CHECK(LookaheadToken::AB_CD == token(lexer));
+    CHECK(5 == offset(lexer));
+
+    ++lexer;
+    CHECK(LookaheadToken::CDEF == token(lexer));
+    CHECK(7 == offset(lexer));
+
+    ++lexer;
+    CHECK(LookaheadToken::Eof == token(lexer));
+    CHECK(11 == offset(lexer));
+
+    ++lexer;
+    REQUIRE(!(lexer != eof)); // TODO: make that work
+}
+
+TEST_CASE("regex_Lexer.LexerError")
+{
+    Compiler cc;
+    cc.parse(RULES);
+
+    const LexerDef ld = cc.compile();
+    Lexable<LookaheadToken, StateId, false, false> ls { ld, "invalid" };
+    CHECK_THROWS_AS((void) begin(ls), LexerError);
+}
+
+TEST_CASE("regex_Lexer.evaluateDotToken")
+{
+    Compiler cc;
+    cc.parse(RULES);
+
+    const LexerDef ld = cc.compile();
+    Lexable<LookaheadToken, StateId, false, false> ls { ld, "xanything" };
+    auto lexer = begin(ls);
+
+    REQUIRE(LookaheadToken::XAnyLine == *lexer);
+    REQUIRE(LookaheadToken::Eof == *++lexer);
+}
+
+TEST_CASE("regex_Lexer.match_eol")
+{
+    Compiler cc;
+    cc.parse(RULES);
+
+    LexerDef ld = cc.compile();
+    INFO(fmt::format("LexerDef:\n{}", ld.to_string()));
+    Lexable<LookaheadToken, StateId, false, true> ls { ld, "abba eol\nabba", [](const string& msg) {
+                                                          INFO(msg);
+                                                      } };
+    auto lexer = begin(ls);
+
+    REQUIRE(LookaheadToken::ABBA == *lexer);
+    CHECK(0 == offset(lexer));
+
+    REQUIRE(LookaheadToken::EOL_LF == *++lexer);
+    CHECK(5 == offset(lexer));
+
+    REQUIRE(LookaheadToken::ABBA == *++lexer);
+    CHECK(9 == offset(lexer));
+
+    REQUIRE(LookaheadToken::Eof == *++lexer);
+}
+
+TEST_CASE("regex_Lexer.bol")
+{
+    Compiler cc;
+    cc.parse(R"(|Spacing(ignore)  ::= [\s\t\n]+
+                |Pragma           ::= ^pragma
+                |Test             ::= test
+                |Unknown          ::= .
+                |Eof              ::= <<EOF>>
+                |)"_multiline);
+
+    LexerDef ld = cc.compileMulti();
+    Lexable<Tag, StateId, true, true> ls { ld, "pragma", [](const string& msg) {
+                                              INFO(msg);
+                                          } };
+    auto lexer = begin(ls);
+    REQUIRE(1 == *lexer);   // ^pragma
+    REQUIRE(4 == *++lexer); // EOS
+}
+
+TEST_CASE("regex_Lexer.bol_no_match")
+{
+    Compiler cc;
+    cc.parse(R"(|Spacing(ignore)  ::= [\s\t\n]+
+                |Pragma           ::= ^pragma
+                |Test             ::= test
+                |Unknown          ::= .
+                |Eof              ::= <<EOF>>
+                |)"_multiline);
+
+    LexerDef ld = cc.compileMulti();
+    INFO(fmt::format("LexerDef:\n{}", ld.to_string()));
+    Lexable<Tag, StateId, true, true> ls { ld, "test pragma", [](const string& msg) {
+                                              INFO(msg);
+                                          } };
+    auto lexer = begin(ls);
+    REQUIRE(2 == *lexer); // test
+
+    // pragma (char-wise) - must not be recognized as ^pragma
+    REQUIRE(3 == *++lexer);
+    REQUIRE(3 == *++lexer);
+    REQUIRE(3 == *++lexer);
+    REQUIRE(3 == *++lexer);
+    REQUIRE(3 == *++lexer);
+    REQUIRE(3 == *++lexer);
+
+    REQUIRE(4 == *++lexer); // EOS
+}
+
+TEST_CASE("regex_Lexer.bol_line2")
+{
+    Compiler cc;
+    cc.parse(R"(|Spacing(ignore)  ::= [\s\t\n]+
+                |Pragma           ::= ^pragma
+                |Test             ::= test
+                |Eof              ::= <<EOF>>
+                |)"_multiline);
+
+    LexerDef ld = cc.compileMulti();
+    INFO(fmt::format("LexerDef:\n{}", ld.to_string()));
+    Lexable<Tag, StateId, true, true> ls { ld, "test\npragma", [](const string& msg) {
+                                              INFO(msg);
+                                          } };
+    auto lexer = begin(ls);
+    REQUIRE(2 == *lexer);   // test
+    REQUIRE(1 == *++lexer); // ^pragma
+    REQUIRE(3 == *++lexer); // EOS
+}
+
+TEST_CASE("regex_Lexer.bol_and_other_conditions")
+{
+    Compiler cc;
+    cc.parse(R"(|Spacing(ignore)  ::= [\s\t\n]+
+                |Pragma           ::= ^pragma
+                |Test             ::= test
+                |Eof              ::= <<EOF>>
+                |<Asm>Jump        ::= jmp)"_multiline);
+    LexerDef ld = cc.compileMulti();
+    INFO(fmt::format("LexerDef:\n{}", ld.to_string()));
+
+    Lexable<Tag, StateId, true, true> ls { ld, "pragma test", [](const string& msg) {
+                                              INFO(msg);
+                                          } };
+    auto lexer = begin(ls);
+    REQUIRE(1 == *lexer);   // ^pragma
+    REQUIRE(2 == *++lexer); // test
+    REQUIRE(3 == *++lexer); // <<EOF>>
+}
+
+TEST_CASE("regex_Lexer.bol_rules_on_non_bol_lexer")
+{
+    Compiler cc;
+    cc.parse(R"(|Spacing(ignore)  ::= [\s\t\n]+
+                |Eof              ::= <<EOF>>
+                |Test             ::= "test"
+                |Pragma           ::= ^"pragma"
+                |Unknown          ::= .
+                |)"_multiline);
+
+    LexerDef ld = cc.compile();
+    using SimpleLexer = Lexable<Tag, StateId, false, false>;
+    CHECK_THROWS_AS(SimpleLexer(ld, "pragma"), std::invalid_argument);
+}
+
+TEST_CASE("regex_Lexer.non_bol_rules_on_non_bol_lexer")
+{
+    Compiler cc;
+    cc.parse(R"(|Spacing(ignore)  ::= [\s\t\n]+
+                |Eof              ::= <<EOF>>
+                |Test             ::= "test"
+                |Unknown          ::= .
+                |)"_multiline);
+
+    LexerDef ld = cc.compile();
+    Lexable<Tag, StateId, false, false> ls { ld, " test " };
+    auto lexer = begin(ls);
+
+    REQUIRE(2 == *lexer);   // "test"
+    REQUIRE(1 == *++lexer); // <<EOF>>
+}
+
+TEST_CASE("regex_Lexer.non_bol_rules_on_bol_lexer")
+{
+    Compiler cc;
+    cc.parse(R"(|Spacing(ignore)  ::= [\s\t\n]+
+            |Eof              ::= <<EOF>>
+                |Test             ::= "test"
+                |Unknown          ::= .
+                |)"_multiline);
+
+    LexerDef ld = cc.compile();
+    Lexable<Tag, StateId, false, false> ls { ld, " test " };
+    auto lexer = begin(ls);
+
+    REQUIRE(2 == *lexer);   // "test"
+    REQUIRE(1 == *++lexer); // <<EOF>>
+}
+
+TEST_CASE("regex_Lexer.iterator")
+{
+    Compiler cc;
+    cc.parse(make_unique<stringstream>(R"(
+        Spacing(ignore) ::= [\s\t\n]+
+        A               ::= a
+        B               ::= b
+        Eof             ::= <<EOF>>
+    )"));
+
+    auto const ld = cc.compile();
+    auto const ls = Lexable<Tag> { ld, make_unique<stringstream>("a b b a") };
+    auto const e = ls.end();
+    auto i = ls.begin();
+
+    // a
+    REQUIRE(1 == *i);
+    REQUIRE(i != e);
+
+    // b
+    i++;
+    REQUIRE(2 == *i);
+    REQUIRE(i != e);
+
+    // b
+    i++;
+    REQUIRE(2 == *i);
+    REQUIRE(i != e);
+
+    // a
+    i++;
+    REQUIRE(1 == *i);
+    REQUIRE(i != e);
+
+    // <<EOF>>
+    i++;
+    REQUIRE(3 == *i);
+    REQUIRE(i != e);
+
+    i++;
+    REQUIRE(3 == *i); // still EOF
+    REQUIRE(i == e);
+}
+
+TEST_CASE("regex_Lexer.empty_alt")
+{
+    Compiler cc;
+    cc.parse(R"(|Spacing(ignore) ::= [\s\t\n]+
+                |Test            ::= aa(bb|)
+                |Eof             ::= <<EOF>>
+                |)"_multiline);
+
+    LexerDef ld = cc.compileMulti();
+    Lexable<Tag, StateId, false, true> ls { ld, "aabb aa aabb", [](const string& msg) {
+                                               INFO(msg);
+                                           } };
+    auto lexer = begin(ls);
+
+    REQUIRE(1 == *lexer);
+    REQUIRE(1 == *++lexer);
+    REQUIRE(1 == *++lexer);
+    REQUIRE(2 == *++lexer); // EOF
+}
+
+TEST_CASE("regex_Lexer.ignore_many")
+{
+    Compiler cc;
+    cc.parse(R"(|Spacing(ignore)  ::= [\s\t\n]+
+                |Comment(ignore)  ::= #.*
+                |Eof              ::= <<EOF>>
+                |Foo              ::= foo
+                |Bar              ::= bar
+                |)"_multiline);
+
+    LexerDef ld = cc.compileMulti();
+    Lexable<int, StateId, false, true> ls { ld,
+                                            R"(|# some foo
+                                              |foo
+                                              |
+                                              |# some bar
+                                              |bar
+                                              |)"_multiline,
+                                            [](const string& msg) {
+                                                INFO(msg);
+                                            } };
+    auto lexer = begin(ls);
+
+    REQUIRE(2 == *lexer);
+    REQUIRE("foo" == literal(lexer));
+
+    REQUIRE(3 == *++lexer);
+    REQUIRE("bar" == literal(lexer));
+
+    REQUIRE(1 == *++lexer); // EOF
+}
+
+TEST_CASE("regex_Lexer.realworld_ipv4")
+{
+    Compiler cc;
+    cc.parse(R"(|
+                |Spacing(ignore)   ::= [\s\t\n]+
+                |Eof               ::= <<EOF>>
+                |IPv4Octet(ref)    ::= [0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
+                |IPv4(ref)         ::= {IPv4Octet}(\.{IPv4Octet}){3}
+                |IPv4Literal       ::= {IPv4}
+                |)"_multiline);
+
+    auto ld = cc.compile();
+    auto ls = Lexable<int, StateId, false, true> { ld,
+                                                   R"(0.0.0.0 4.2.2.1 10.10.40.199 255.255.255.255)",
+                                                   [](const string& msg) {
+                                                       INFO(msg);
+                                                   } };
+    auto lexer = begin(ls);
+
+    REQUIRE(2 == *lexer);
+    REQUIRE("0.0.0.0" == literal(lexer));
+
+    REQUIRE(2 == *++lexer);
+    REQUIRE("4.2.2.1" == literal(lexer));
+
+    REQUIRE(2 == *++lexer);
+    REQUIRE("10.10.40.199" == literal(lexer));
+
+    REQUIRE(2 == *++lexer);
+    REQUIRE("255.255.255.255" == literal(lexer));
+
+    REQUIRE(1 == *++lexer);
+}
+
+enum class RealWorld
+{
+    Eof = 1,
+    IPv4,
+    IPv6
+};
+namespace fmt
+{ // it sucks that I've to specify that here
+template <>
+struct formatter<RealWorld>
+{
+    template <typename ParseContext>
+    constexpr auto parse(ParseContext& ctx)
+    {
+        return ctx.begin();
+    }
+
+    template <typename FormatContext>
+    constexpr auto format(const RealWorld& v, FormatContext& ctx)
+    {
+        switch (v)
+        {
+            case RealWorld::Eof: return fmt::format_to(ctx.out(), "Eof");
+            case RealWorld::IPv4: return fmt::format_to(ctx.out(), "IPv4");
+            case RealWorld::IPv6: return fmt::format_to(ctx.out(), "IPv6");
+            default: return fmt::format_to(ctx.out(), "<{}>", static_cast<unsigned>(v));
+        }
+    }
+};
+} // namespace fmt
+
+TEST_CASE("regex_Lexer.realworld_ipv6")
+{
+    Compiler cc;
+    cc.parse(R"(|
+      |Spacing(ignore)   ::= [\s\t\n]+
+      |Eof               ::= <<EOF>>
+      |
+      |IPv4Octet(ref)    ::= [0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
+      |IPv4(ref)         ::= {IPv4Octet}(\.{IPv4Octet}){3}
+      |IPv4Literal       ::= {IPv4}
+      |
+      |ipv6Part(ref)     ::= [[:xdigit:]]{1,4}
+      |IPv6              ::= {ipv6Part}(:{ipv6Part}){7,7}
+      |                    | ({ipv6Part}:){1,7}:
+      |                    | :(:{ipv6Part}){1,7}
+      |                    | ::
+      |                    | ({ipv6Part}:){1}(:{ipv6Part}){0,6}
+      |                    | ({ipv6Part}:){2}(:{ipv6Part}){0,5}
+      |                    | ({ipv6Part}:){3}(:{ipv6Part}){0,4}
+      |                    | ({ipv6Part}:){4}(:{ipv6Part}){0,3}
+      |                    | ({ipv6Part}:){5}(:{ipv6Part}){0,2}
+      |                    | ({ipv6Part}:){6}(:{ipv6Part}){0,1}
+      |                    | ::[fF]{4}:{IPv4}
+  )"_multiline);
+
+    static const string TEXT = R"(|0:0:0:0:0:0:0:0
+                                   |1234:5678:90ab:cdef:aaaa:bbbb:cccc:dddd
+                                   |2001:0db8:85a3:0000:0000:8a2e:0370:7334
+                                   |1234:5678::
+                                   |0::
+                                   |::0
+                                   |::
+                                   |1::3:4:5:6:7:8
+                                   |1::4:5:6:7:8
+                                   |1::5:6:7:8
+                                   |1::8
+                                   |1:2::4:5:6:7:8
+                                   |1:2::5:6:7:8
+                                   |1:2::8
+                                   |::ffff:127.0.0.1
+                                   |::ffff:c000:0280
+                                   |)"_multiline;
+
+    auto ld = cc.compileMulti();
+    auto ls = Lexable<RealWorld, StateId, false, true> { ld, TEXT, [](const string& msg) {
+                                                            INFO(msg);
+                                                        } };
+    auto lexer = begin(ls);
+
+    REQUIRE(RealWorld::IPv6 == *lexer);
+    REQUIRE("0:0:0:0:0:0:0:0" == literal(lexer));
+
+    REQUIRE(RealWorld::IPv6 == *++lexer);
+    REQUIRE("1234:5678:90ab:cdef:aaaa:bbbb:cccc:dddd" == literal(lexer));
+
+    REQUIRE(RealWorld::IPv6 == *++lexer);
+    REQUIRE("2001:0db8:85a3:0000:0000:8a2e:0370:7334" == literal(lexer));
+
+    REQUIRE(RealWorld::IPv6 == *++lexer);
+    REQUIRE("1234:5678::" == literal(lexer));
+
+    REQUIRE(RealWorld::IPv6 == *++lexer);
+    REQUIRE("0::" == literal(lexer));
+
+    REQUIRE(RealWorld::IPv6 == *++lexer);
+    REQUIRE("::0" == literal(lexer));
+
+    REQUIRE(RealWorld::IPv6 == *++lexer);
+    REQUIRE("::" == literal(lexer));
+
+    REQUIRE(RealWorld::IPv6 == *++lexer);
+    REQUIRE("1::3:4:5:6:7:8" == literal(lexer));
+
+    REQUIRE(RealWorld::IPv6 == *++lexer);
+    REQUIRE("1::4:5:6:7:8" == literal(lexer));
+
+    REQUIRE(RealWorld::IPv6 == *++lexer);
+    REQUIRE("1::5:6:7:8" == literal(lexer));
+
+    REQUIRE(RealWorld::IPv6 == *++lexer);
+    REQUIRE("1::8" == literal(lexer));
+
+    REQUIRE(RealWorld::IPv6 == *++lexer);
+    REQUIRE("1:2::4:5:6:7:8" == literal(lexer));
+
+    REQUIRE(RealWorld::IPv6 == *++lexer);
+    REQUIRE("1:2::5:6:7:8" == literal(lexer));
+
+    REQUIRE(RealWorld::IPv6 == *++lexer);
+    REQUIRE("1:2::8" == literal(lexer));
+
+    REQUIRE(RealWorld::IPv6 == *++lexer);
+    REQUIRE("::ffff:127.0.0.1" == literal(lexer));
+
+    REQUIRE(RealWorld::IPv6 == *++lexer);
+    REQUIRE("::ffff:c000:0280" == literal(lexer));
+
+    REQUIRE(RealWorld::Eof == *++lexer);
+}
+
+TEST_CASE("regex_Lexer.internal")
+{
+    REQUIRE("Eof" == fmt::format("{}", LookaheadToken::Eof));
+    REQUIRE("abba" == fmt::format("{}", LookaheadToken::ABBA));
+    REQUIRE("ab/cd" == fmt::format("{}", LookaheadToken::AB_CD));
+    REQUIRE("cd" == fmt::format("{}", LookaheadToken::CD));
+    REQUIRE("cdef" == fmt::format("{}", LookaheadToken::CDEF));
+    REQUIRE("eol$" == fmt::format("{}", LookaheadToken::EOL_LF));
+    REQUIRE("<XAnyLine>" == fmt::format("{}", LookaheadToken::XAnyLine));
+    REQUIRE("<724>" == fmt::format("{}", static_cast<LookaheadToken>(724)));
+
+    REQUIRE("Eof" == fmt::format("{}", RealWorld::Eof));
+    REQUIRE("IPv4" == fmt::format("{}", RealWorld::IPv4));
+    REQUIRE("IPv6" == fmt::format("{}", RealWorld::IPv6));
+    REQUIRE("<724>" == fmt::format("{}", static_cast<RealWorld>(724)));
+}
diff --git a/src/regex_dfa/MultiDFA.cpp b/src/regex_dfa/MultiDFA.cpp
new file mode 100644
index 0000000000..208ce7f207
--- /dev/null
+++ b/src/regex_dfa/MultiDFA.cpp
@@ -0,0 +1,33 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/MultiDFA.h>
+
+using namespace std;
+
+namespace regex_dfa
+{
+
+MultiDFA constructMultiDFA(map<string, DFA> many)
+{
+    MultiDFA multiDFA {};
+    multiDFA.dfa.createStates(1 + many.size());
+    multiDFA.dfa.setInitialState(0);
+
+    StateId q0 = 1;
+    for (pair<const string, DFA>& p: many)
+    {
+        multiDFA.dfa.append(std::move(p.second), q0);
+        multiDFA.initialStates[p.first] = q0;
+        multiDFA.dfa.setTransition(0, static_cast<Symbol>(q0), q0);
+        q0++;
+    }
+
+    return multiDFA;
+}
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/MultiDFA.h b/src/regex_dfa/MultiDFA.h
new file mode 100644
index 0000000000..76a30c0907
--- /dev/null
+++ b/src/regex_dfa/MultiDFA.h
@@ -0,0 +1,29 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+#pragma once
+
+#include <regex_dfa/DFA.h>
+#include <regex_dfa/State.h>
+#include <regex_dfa/Symbols.h>
+
+#include <map>
+#include <string>
+
+namespace regex_dfa
+{
+
+struct MultiDFA
+{
+    using InitialStateMap = std::map<std::string, StateId>;
+
+    InitialStateMap initialStates;
+    DFA dfa;
+};
+
+MultiDFA constructMultiDFA(std::map<std::string, DFA> many);
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/NFA.cpp b/src/regex_dfa/NFA.cpp
new file mode 100644
index 0000000000..f8674b980a
--- /dev/null
+++ b/src/regex_dfa/NFA.cpp
@@ -0,0 +1,375 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/Alphabet.h>
+#include <regex_dfa/DotVisitor.h>
+#include <regex_dfa/NFA.h>
+
+#include <fmt/format.h>
+
+#include <algorithm>
+#include <iostream>
+#include <stack>
+#include <vector>
+
+using namespace std;
+
+namespace regex_dfa
+{
+
+#if 0
+    #define DEBUG(msg, ...)                                \
+        do                                                 \
+        {                                                  \
+            cerr << fmt::format(msg, __VA_ARGS__) << "\n"; \
+        } while (0)
+#else
+    #define DEBUG(msg, ...) \
+        do                  \
+        {                   \
+        } while (0)
+#endif
+
+Alphabet NFA::alphabet() const
+{
+    Alphabet alphabet;
+
+    for (const TransitionMap& transitions: states_)
+    {
+        for (auto const& t: transitions)
+        {
+            switch (t.first)
+            {
+                case Symbols::Epsilon: break;
+                default: alphabet.insert(t.first);
+            }
+        }
+    }
+
+    return alphabet;
+}
+
+NFA NFA::clone() const
+{
+    return *this;
+}
+
+StateId NFA::createState()
+{
+    states_.emplace_back();
+    return states_.size() - 1;
+}
+
+StateIdVec NFA::delta(const StateIdVec& S, Symbol c) const
+{
+    StateIdVec result;
+    delta(S, c, &result);
+    return result;
+}
+
+StateIdVec* NFA::delta(const StateIdVec& S, Symbol c, StateIdVec* result) const
+{
+    for (StateId s: S)
+    {
+        const TransitionMap& transitions = stateTransitions(s);
+        for (const auto& transition: transitions)
+        {
+            if (transition.first == c)
+            {
+                for (StateId targetState: transition.second)
+                {
+                    result->push_back(targetState);
+                }
+            }
+        }
+    }
+
+    return result;
+}
+
+StateIdVec NFA::epsilonTransitions(StateId s) const
+{
+    StateIdVec t;
+
+    const TransitionMap& transitions = stateTransitions(s);
+    for (auto&& [p, q]: transitions)
+        if (p == Symbols::Epsilon)
+            t.insert(t.end(), q.begin(), q.end());
+
+    return t;
+}
+
+StateIdVec NFA::epsilonClosure(const StateIdVec& S) const
+{
+    StateIdVec eclosure;
+    epsilonClosure(S, &eclosure);
+    return eclosure;
+}
+
+void NFA::epsilonClosure(const StateIdVec& S, StateIdVec* eclosure) const
+{
+    *eclosure = S;
+    vector<bool> availabilityCheck(1 + size(), false);
+    stack<StateId> workList;
+    for (StateId s: S)
+    {
+        workList.push(s);
+        availabilityCheck[s] = true;
+    }
+
+    while (!workList.empty())
+    {
+        const StateId s = workList.top();
+        workList.pop();
+
+        for (StateId t: epsilonTransitions(s))
+        {
+            if (!availabilityCheck[t])
+            {
+                eclosure->push_back(t);
+                workList.push(t);
+            }
+        }
+    }
+
+    sort(eclosure->begin(), eclosure->end());
+}
+
+void NFA::prepareStateIds(StateId baseId)
+{
+    // adjust transition state IDs
+    // traverse through each state's transition set
+    //    traverse through each transition in the transition set
+    //        traverse through each element and add BASE_ID
+
+    // for each state's transitions
+    for (StateId i = 0, e = size(); i != e; ++i)
+    {
+        TransitionMap& transitions = states_[i];
+
+        // for each vector of target-state-id per transition-symbol
+        for (auto t = transitions.begin(), tE = transitions.end(); t != tE; ++t)
+        {
+            StateIdVec& transition = t->second;
+
+            // for each target state ID
+            for (StateId k = 0, kE = transition.size(); k != kE; ++k)
+            {
+                transition[k] += baseId;
+            }
+        }
+    }
+
+    initialState_ += baseId;
+    acceptState_ += baseId;
+
+    AcceptMap remapped;
+    for (auto& a: acceptTags_)
+        remapped[baseId + a.first] = a.second;
+    acceptTags_ = std::move(remapped);
+
+    BacktrackingMap backtracking;
+    for (const auto& bt: backtrackStates_)
+        backtracking[baseId + bt.first] = baseId + bt.second;
+    backtrackStates_ = std::move(backtracking);
+}
+
+NFA NFA::join(const map<string, NFA>& mappings)
+{
+    if (mappings.size() == 1)
+        return mappings.begin()->second;
+
+    NFA multi;
+
+    for (size_t i = 0; i <= mappings.size(); ++i)
+        (void) multi.createState();
+
+    Symbol transitionSymbol = 0;
+    for (const auto& mapping: mappings)
+    {
+        transitionSymbol++;
+
+        NFA rhs = mapping.second.clone();
+        rhs.prepareStateIds(multi.size());
+
+        multi.states_.reserve(multi.size() + rhs.size());
+        multi.states_.insert(multi.states_.end(), rhs.states_.begin(), rhs.states_.end());
+        multi.acceptTags_.insert(rhs.acceptTags_.begin(), rhs.acceptTags_.end());
+
+        multi.addTransition(multi.initialState_, transitionSymbol, rhs.initialState_);
+        multi.backtrackStates_[rhs.acceptState_] = multi.acceptState_;
+        multi.acceptState_ = rhs.acceptState_;
+    }
+
+    return multi;
+}
+
+NFA& NFA::lookahead(NFA&& rhs)
+{
+    if (empty())
+    {
+        *this = std::move(rhs);
+        backtrackStates_[acceptState_] = initialState_;
+    }
+    else
+    {
+        rhs.prepareStateIds(states_.size());
+        states_.reserve(size() + rhs.size());
+        states_.insert(states_.end(), rhs.states_.begin(), rhs.states_.end());
+        acceptTags_.insert(rhs.acceptTags_.begin(), rhs.acceptTags_.end());
+
+        addTransition(acceptState_, Symbols::Epsilon, rhs.initialState_);
+        backtrackStates_[rhs.acceptState_] = acceptState_;
+        acceptState_ = rhs.acceptState_;
+    }
+
+    return *this;
+}
+
+NFA& NFA::alternate(NFA&& rhs)
+{
+    StateId newStart = createState();
+    StateId newEnd = createState();
+
+    rhs.prepareStateIds(states_.size());
+    states_.insert(states_.end(), rhs.states_.begin(), rhs.states_.end());
+    acceptTags_.insert(rhs.acceptTags_.begin(), rhs.acceptTags_.end());
+    backtrackStates_.insert(rhs.backtrackStates_.begin(), rhs.backtrackStates_.end());
+
+    addTransition(newStart, Symbols::Epsilon, initialState_);
+    addTransition(newStart, Symbols::Epsilon, rhs.initialState_);
+
+    addTransition(acceptState_, Symbols::Epsilon, newEnd);
+    addTransition(rhs.acceptState_, Symbols::Epsilon, newEnd);
+
+    initialState_ = newStart;
+    acceptState_ = newEnd;
+
+    return *this;
+}
+
+NFA& NFA::concatenate(NFA&& rhs)
+{
+    rhs.prepareStateIds(states_.size());
+    states_.reserve(size() + rhs.size());
+    states_.insert(states_.end(), rhs.states_.begin(), rhs.states_.end());
+    acceptTags_.insert(rhs.acceptTags_.begin(), rhs.acceptTags_.end());
+    backtrackStates_.insert(rhs.backtrackStates_.begin(), rhs.backtrackStates_.end());
+
+    addTransition(acceptState_, Symbols::Epsilon, rhs.initialState_);
+    acceptState_ = rhs.acceptState_;
+
+    return *this;
+}
+
+NFA& NFA::optional()
+{
+    StateId newStart = createState();
+    StateId newEnd = createState();
+
+    addTransition(newStart, Symbols::Epsilon, initialState_);
+    addTransition(newStart, Symbols::Epsilon, newEnd);
+    addTransition(acceptState_, Symbols::Epsilon, newEnd);
+
+    initialState_ = newStart;
+    acceptState_ = newEnd;
+
+    return *this;
+}
+
+NFA& NFA::recurring()
+{
+    // {0, inf}
+    StateId newStart = createState();
+    StateId newEnd = createState();
+
+    addTransition(newStart, Symbols::Epsilon, initialState_);
+    addTransition(newStart, Symbols::Epsilon, newEnd);
+
+    addTransition(acceptState_, Symbols::Epsilon, initialState_);
+    addTransition(acceptState_, Symbols::Epsilon, newEnd);
+
+    initialState_ = newStart;
+    acceptState_ = newEnd;
+
+    return *this;
+}
+
+NFA& NFA::positive()
+{
+    return concatenate(std::move(clone().recurring()));
+}
+
+NFA& NFA::times(unsigned factor)
+{
+    assert(factor != 0);
+
+    if (factor == 1)
+        return *this;
+
+    NFA base = clone();
+    for (unsigned n = 2; n <= factor; ++n)
+        concatenate(base.clone());
+
+    return *this;
+}
+
+NFA& NFA::repeat(unsigned minimum, unsigned maximum)
+{
+    assert(minimum <= maximum);
+
+    NFA factor = clone();
+
+    if (minimum != 0)
+        times(minimum);
+
+    for (unsigned n = minimum + 1; n <= maximum; n++)
+        alternate(std::move(factor.clone().times(n)));
+
+    if (minimum == 0)
+        optional();
+
+    return *this;
+}
+
+void NFA::visit(DotVisitor& v) const
+{
+    v.start(initialState_);
+
+    // initial state
+    v.visitNode(initialState_, true, acceptTags_.find(initialState_) != acceptTags_.end());
+
+    // accepting states
+    for (pair<StateId, Tag> acceptTag: acceptTags_)
+        if (acceptTag.first != initialState_)
+            v.visitNode(acceptTag.first, false, true);
+
+    // other states
+    for (StateId i = 0, e = size(); i != e; ++i)
+        if (i != initialState_ && acceptTags_.find(i) == acceptTags_.end())
+            v.visitNode(i, false, false);
+
+    // transitions
+    for (StateId sourceState = 0, sE = size(); sourceState != sE; ++sourceState)
+    {
+        map<StateId, vector<Symbol>> reversed;
+        for (pair<Symbol, StateIdVec> transitions: states_[sourceState])
+            for (StateId targetState: transitions.second)
+                reversed[targetState].push_back(transitions.first /* symbol */);
+
+        for (pair<StateId, vector<Symbol>> tr: reversed)
+        {
+            StateId targetState = tr.first;
+            const vector<Symbol>& T = tr.second;
+            for_each(T.begin(), T.end(), [&](const Symbol t) { v.visitEdge(sourceState, targetState, t); });
+            v.endVisitEdge(sourceState, targetState);
+        }
+    }
+    v.end();
+}
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/NFA.h b/src/regex_dfa/NFA.h
new file mode 100644
index 0000000000..7380de8333
--- /dev/null
+++ b/src/regex_dfa/NFA.h
@@ -0,0 +1,221 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+#pragma once
+
+#include <regex_dfa/State.h>
+
+#include <map>
+#include <optional>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+namespace regex_dfa
+{
+
+class Alphabet;
+class DotVisitor;
+class DFA;
+
+/**
+ * NFA Builder with the Thompson's Construction properties.
+ *
+ * <ul>
+ *   <li> There is exactly one initial state and exactly one accepting state..
+ *   <li> No transition other than the initial transition enters the initial state.
+ *   <li> The accepting state has no leaving edges
+ *   <li> An ε-transition always connects two states that were (earlier in the construction process)
+ *        the initial state and the accepting state of NFAs for some component REs.
+ *   <li> Each state has at most two entering states and at most two leaving states.
+ * </ul>
+ */
+class NFA
+{
+  private:
+    NFA(const NFA& other) = default;
+    NFA& operator=(const NFA& other) = default;
+
+  public:
+    //! represent a transition table for a specific state
+    using TransitionMap = std::map<Symbol, StateIdVec>;
+
+    //! defines a set of states within one NFA. the index represents the state Id.
+    using StateVec = std::vector<TransitionMap>;
+
+    //! defines a mapping between accept state ID and another (prior) ID to track roll back the input stream
+    //! to.
+    using BacktrackingMap = std::map<StateId, StateId>;
+
+    NFA(NFA&&) = default;
+    NFA& operator=(NFA&&) = default;
+
+    //! Constructs an empty NFA.
+    NFA(): states_ {}, initialState_ { 0 }, acceptState_ { 0 }, backtrackStates_ {}, acceptTags_ {} {}
+
+    /**
+     * Constructs an NFA for a single character transition.
+     *
+     * *No* acceptState flag is set on the accepting node!
+     */
+    explicit NFA(Symbol value): NFA {}
+    {
+        initialState_ = createState();
+        acceptState_ = createState();
+        addTransition(initialState_, value, acceptState_);
+    }
+
+    explicit NFA(SymbolSet value): NFA {}
+    {
+        initialState_ = createState();
+        acceptState_ = createState();
+        for (Symbol s: value)
+            addTransition(initialState_, s, acceptState_);
+    }
+
+    void addTransition(StateId from, Symbol s, StateId to) { states_[from][s].push_back(to); }
+
+    [[nodiscard]] static NFA join(const std::map<std::string, NFA>& mappings);
+
+    /**
+     * Traverses all states and edges in this NFA and calls @p visitor for each state & edge.
+     *
+     * Use this function to e.g. get a GraphViz dot-file drawn.
+     */
+    void visit(DotVisitor& visitor) const;
+
+    //! Tests whether or not this is an empty NFA.
+    [[nodiscard]] bool empty() const noexcept { return states_.empty(); }
+
+    //! Retrieves the number of states of this NFA.
+    [[nodiscard]] size_t size() const noexcept { return states_.size(); }
+
+    //! Retrieves the one and only initial state. This value is nullptr iff the NFA is empty.
+    [[nodiscard]] StateId initialStateId() const noexcept { return initialState_; }
+
+    //! Retrieves the one and only accept state. This value is nullptr iff the NFA is empty.
+    [[nodiscard]] StateId acceptStateId() const noexcept { return acceptState_; }
+
+    //! Retrieves the list of states this FA contains.
+    [[nodiscard]] const StateVec& states() const { return states_; }
+    StateVec& states() { return states_; }
+
+    //! Retrieves the alphabet of this finite automaton.
+    [[nodiscard]] Alphabet alphabet() const;
+
+    //! Clones this NFA.
+    [[nodiscard]] NFA clone() const;
+
+    /**
+     * Constructs an NFA where @p rhs is following but backtracking to @c acceptState(this) when
+     * when @p rhs is fully matched.
+     *
+     * This resembles the syntax r/s (or r(?=s) in Perl) where r is matched when also s is following.
+     */
+    NFA& lookahead(NFA&& rhs);
+
+    //! Reconstructs this FA to alternate between this FA and the @p other FA.
+    NFA& alternate(NFA&& other);
+
+    //! Concatenates the right FA's initial state with this FA's accepting state.
+    NFA& concatenate(NFA&& rhs);
+
+    //! Reconstructs this FA to allow optional input. X -> X?
+    NFA& optional();
+
+    //! Reconstructs this FA with the given @p quantifier factor.
+    NFA& times(unsigned quantifier);
+
+    //! Reconstructs this FA to allow recurring input. X -> X*
+    NFA& recurring();
+
+    //! Reconstructs this FA to be recurring at least once. X+ = XX*
+    NFA& positive();
+
+    //! Reconstructs this FA to be repeatable between range [minimum, maximum].
+    NFA& repeat(unsigned minimum, unsigned maximum);
+
+    //! Retrieves transitions for state with the ID @p id.
+    [[nodiscard]] TransitionMap const& stateTransitions(StateId id) const { return states_[id]; }
+
+    //! Retrieves all states that can be reached from @p S with one single input Symbol @p c.
+    [[nodiscard]] StateIdVec delta(const StateIdVec& S, Symbol c) const;
+    StateIdVec* delta(const StateIdVec& S, Symbol c, StateIdVec* result) const;
+
+    //! Retrieves all states that can be directly or indirectly accessed via epsilon-transitions exclusively.
+    [[nodiscard]] StateIdVec epsilonClosure(const StateIdVec& S) const;
+    void epsilonClosure(const StateIdVec& S, StateIdVec* result) const;
+
+    [[nodiscard]] TransitionMap& stateTransitions(StateId s) { return states_[s]; }
+
+    //! Flags given state as accepting-state with given Tag @p acceptTag.
+    void setAccept(Tag acceptTag) { acceptTags_[acceptState_] = acceptTag; }
+
+    void setAccept(StateId state, Tag tag) { acceptTags_[state] = tag; }
+
+    [[nodiscard]] std::optional<Tag> acceptTag(StateId s) const
+    {
+        if (auto i = acceptTags_.find(s); i != acceptTags_.end())
+            return i->second;
+
+        return std::nullopt;
+    }
+
+    [[nodiscard]] bool isAccepting(StateId s) const { return acceptTags_.find(s) != acceptTags_.end(); }
+
+    /**
+     * Returns whether or not the StateSet @p Q contains at least one State that is also "accepting".
+     */
+    [[nodiscard]] bool isAnyAccepting(const StateIdVec& Q) const
+    {
+        for (StateId q: Q)
+            if (isAccepting(q))
+                return true;
+
+        return false;
+    }
+
+    [[nodiscard]] const AcceptMap& acceptMap() const noexcept { return acceptTags_; }
+    [[nodiscard]] AcceptMap& acceptMap() noexcept { return acceptTags_; }
+
+    [[nodiscard]] std::optional<StateId> backtrack(StateId s) const
+    {
+        if (auto i = backtrackStates_.find(s); i != backtrackStates_.end())
+            return i->second;
+
+        return std::nullopt;
+    }
+
+    /**
+     * Checks if @p Q contains a state that is flagged as backtracking state in the NFA and returns
+     * the target state within the NFA or @c std::nullopt if not a backtracking state.
+     */
+    [[nodiscard]] std::optional<StateId> containsBacktrackState(const StateIdVec& Q) const
+    {
+        for (StateId q: Q)
+            if (std::optional<StateId> t = backtrack(q); t.has_value())
+                return *t;
+
+        return std::nullopt;
+    }
+
+  private:
+    [[nodiscard]] StateId createState();
+    void visit(DotVisitor& v, StateId s, std::unordered_map<StateId, size_t>& registry) const;
+    void prepareStateIds(StateId baseId);
+
+    //! Retrieves all epsilon-transitions directly connected to State @p s.
+    [[nodiscard]] StateIdVec epsilonTransitions(StateId s) const;
+
+  private:
+    StateVec states_;
+    StateId initialState_;
+    StateId acceptState_;
+    BacktrackingMap backtrackStates_;
+    AcceptMap acceptTags_;
+};
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/NFABuilder.cpp b/src/regex_dfa/NFABuilder.cpp
new file mode 100644
index 0000000000..0e07d6f342
--- /dev/null
+++ b/src/regex_dfa/NFABuilder.cpp
@@ -0,0 +1,124 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/DFA.h>
+#include <regex_dfa/NFABuilder.h>
+
+using namespace std;
+
+namespace regex_dfa
+{
+
+NFA NFABuilder::construct(const RegExpr& re, Tag tag)
+{
+    visit(*this, re);
+
+    // fa_.setAccept(acceptState_.value_or(fa_.acceptStateId()), tag);
+    if (acceptState_)
+        fa_.setAccept(acceptState_.value(), tag);
+    else
+        fa_.setAccept(tag);
+
+    return std::move(fa_);
+}
+
+NFA NFABuilder::construct(const RegExpr& re)
+{
+    visit(*this, re);
+    return std::move(fa_);
+}
+
+void NFABuilder::operator()(const LookAheadExpr& lookaheadExpr)
+{
+    // fa_ = move(construct(lookaheadExpr.leftExpr()).lookahead(construct(lookaheadExpr.rightExpr())));
+    NFA lhs = construct(*lookaheadExpr.left);
+    NFA rhs = construct(*lookaheadExpr.right);
+    lhs.lookahead(std::move(rhs));
+    fa_ = std::move(lhs);
+}
+
+void NFABuilder::operator()(const AlternationExpr& alternationExpr)
+{
+    NFA lhs = construct(*alternationExpr.left);
+    NFA rhs = construct(*alternationExpr.right);
+    lhs.alternate(std::move(rhs));
+    fa_ = std::move(lhs);
+}
+
+void NFABuilder::operator()(const ConcatenationExpr& concatenationExpr)
+{
+    NFA lhs = construct(*concatenationExpr.left);
+    NFA rhs = construct(*concatenationExpr.right);
+    lhs.concatenate(std::move(rhs));
+    fa_ = std::move(lhs);
+}
+
+void NFABuilder::operator()(const CharacterExpr& characterExpr)
+{
+    fa_ = NFA { characterExpr.value };
+}
+
+void NFABuilder::operator()(const CharacterClassExpr& characterClassExpr)
+{
+    fa_ = NFA { characterClassExpr.symbols };
+}
+
+void NFABuilder::operator()(const ClosureExpr& closureExpr)
+{
+    const unsigned xmin = closureExpr.minimumOccurrences;
+    const unsigned xmax = closureExpr.maximumOccurrences;
+    constexpr unsigned Infinity = numeric_limits<unsigned>::max();
+
+    if (xmin == 0 && xmax == 1)
+        fa_ = std::move(construct(*closureExpr.subExpr).optional());
+    else if (xmin == 0 && xmax == Infinity)
+        fa_ = std::move(construct(*closureExpr.subExpr).recurring());
+    else if (xmin == 1 && xmax == Infinity)
+        fa_ = std::move(construct(*closureExpr.subExpr).positive());
+    else if (xmin < xmax)
+        fa_ = std::move(construct(*closureExpr.subExpr).repeat(xmin, xmax));
+    else if (xmin == xmax)
+        fa_ = std::move(construct(*closureExpr.subExpr).times(xmin));
+    else
+        throw invalid_argument { "closureExpr" };
+}
+
+void NFABuilder::operator()(const BeginOfLineExpr&)
+{
+    fa_ = NFA { Symbols::Epsilon };
+}
+
+void NFABuilder::operator()(const EndOfLineExpr&)
+{
+    // NFA lhs;
+    // NFA rhs{'\n'};
+    // lhs.lookahead(move(rhs));
+    // fa_ = move(lhs);
+    fa_ = std::move(NFA {}.lookahead(NFA { '\n' }));
+}
+
+void NFABuilder::operator()(const EndOfFileExpr&)
+{
+    fa_ = NFA { Symbols::EndOfFile };
+}
+
+void NFABuilder::operator()(const DotExpr&)
+{
+    // any character except LF
+    fa_ = NFA { '\t' };
+    for (int ch = 32; ch < 127; ++ch)
+    {
+        fa_.addTransition(fa_.initialStateId(), ch, fa_.acceptStateId());
+    }
+}
+
+void NFABuilder::operator()(const EmptyExpr&)
+{
+    fa_ = NFA { Symbols::Epsilon };
+}
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/NFABuilder.h b/src/regex_dfa/NFABuilder.h
new file mode 100644
index 0000000000..4ec4892856
--- /dev/null
+++ b/src/regex_dfa/NFABuilder.h
@@ -0,0 +1,55 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+#pragma once
+
+#include <regex_dfa/Alphabet.h>
+#include <regex_dfa/NFA.h>
+#include <regex_dfa/RegExpr.h>
+
+#include <fmt/format.h>
+
+#include <list>
+#include <map>
+#include <memory>
+#include <set>
+#include <string_view>
+#include <tuple>
+#include <vector>
+
+namespace regex_dfa
+{
+
+class DFA;
+
+/*!
+ * Generates a finite automaton from the given input (a regular expression).
+ */
+class NFABuilder
+{
+  public:
+    explicit NFABuilder(): fa_ {} {}
+
+    [[nodiscard]] NFA construct(const RegExpr& re, Tag tag);
+    [[nodiscard]] NFA construct(const RegExpr& re);
+    void operator()(const LookAheadExpr& lookaheadExpr);
+    void operator()(const ConcatenationExpr& concatenationExpr);
+    void operator()(const AlternationExpr& alternationExpr);
+    void operator()(const CharacterExpr& characterExpr);
+    void operator()(const CharacterClassExpr& characterClassExpr);
+    void operator()(const ClosureExpr& closureExpr);
+    void operator()(const BeginOfLineExpr& bolExpr);
+    void operator()(const EndOfLineExpr& eolExpr);
+    void operator()(const EndOfFileExpr& eofExpr);
+    void operator()(const DotExpr& dotExpr);
+    void operator()(const EmptyExpr& emptyExpr);
+
+  private:
+    NFA fa_;
+    std::optional<StateId> acceptState_;
+};
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/NFA_test.cpp b/src/regex_dfa/NFA_test.cpp
new file mode 100644
index 0000000000..734e19581f
--- /dev/null
+++ b/src/regex_dfa/NFA_test.cpp
@@ -0,0 +1,85 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/Alphabet.h>
+#include <regex_dfa/NFA.h>
+#include <regex_dfa/State.h>
+
+#include <catch2/catch.hpp>
+
+using namespace std;
+using namespace regex_dfa;
+
+TEST_CASE("regex_NFA.emptyCtor")
+{
+    const NFA nfa;
+    REQUIRE(0 == nfa.size());
+    REQUIRE(nfa.empty());
+}
+
+TEST_CASE("regex_NFA.characterCtor")
+{
+    const NFA nfa { 'a' };
+    REQUIRE(2 == nfa.size());
+    REQUIRE(0 == nfa.initialStateId());
+    REQUIRE(1 == nfa.acceptStateId());
+    REQUIRE(StateIdVec { 1 } == nfa.delta(StateIdVec { 0 }, 'a'));
+}
+
+TEST_CASE("regex_NFA.concatenate")
+{
+    const NFA ab = std::move(NFA { 'a' }.concatenate(NFA { 'b' }));
+    REQUIRE(4 == ab.size());
+    REQUIRE(0 == ab.initialStateId());
+    REQUIRE(3 == ab.acceptStateId());
+
+    // TODO: check ab.initial == A.initial
+    // TODO: check A.accept == B.initial
+    // TODO: check ab.accept == B.accept
+}
+
+TEST_CASE("regex_NFA.alternate")
+{
+    const NFA ab = std::move(NFA { 'a' }.alternate(NFA { 'b' }));
+    REQUIRE(6 == ab.size());
+    REQUIRE(2 == ab.initialStateId());
+    REQUIRE(3 == ab.acceptStateId());
+
+    // TODO: check acceptState transitions to A and B
+    // TODO: check A and B's outgoing edges to final acceptState
+}
+
+TEST_CASE("regex_NFA.epsilonClosure")
+{
+    const NFA nfa { 'a' };
+    REQUIRE(0 == nfa.initialStateId());
+    REQUIRE(1 == nfa.acceptStateId());
+    REQUIRE(StateIdVec { 0 } == nfa.epsilonClosure(StateIdVec { 0 }));
+
+    const NFA abc =
+        std::move(NFA { 'a' }.concatenate(std::move(NFA { 'b' }.alternate(NFA { 'c' }).recurring())));
+    REQUIRE(StateIdVec { 0 } == abc.epsilonClosure(StateIdVec { 0 }));
+
+    const StateIdVec e1 { 1, 2, 4, 6, 8, 9 };
+    REQUIRE(e1 == abc.epsilonClosure(StateIdVec { 1 }));
+}
+
+TEST_CASE("regex_NFA.delta")
+{
+    const NFA nfa { 'a' };
+    REQUIRE(0 == nfa.initialStateId());
+    REQUIRE(1 == nfa.acceptStateId());
+    REQUIRE(StateIdVec { 1 } == nfa.delta(StateIdVec { 0 }, 'a'));
+}
+
+TEST_CASE("regex_NFA.alphabet")
+{
+    REQUIRE("{}" == NFA {}.alphabet().to_string());
+    REQUIRE("{a}" == NFA { 'a' }.alphabet().to_string());
+    REQUIRE("{ab}" == NFA { 'a' }.concatenate(NFA { 'b' }).alphabet().to_string());
+    REQUIRE("{abc}" == NFA { 'a' }.concatenate(NFA { 'b' }).alternate(NFA { 'c' }).alphabet().to_string());
+}
diff --git a/src/regex_dfa/RegExpr.cpp b/src/regex_dfa/RegExpr.cpp
new file mode 100644
index 0000000000..b7ba9c70af
--- /dev/null
+++ b/src/regex_dfa/RegExpr.cpp
@@ -0,0 +1,117 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/RegExpr.h>
+
+#include <crispy/overloaded.h>
+
+#include <fmt/format.h>
+
+#include <iostream>
+#include <limits>
+#include <sstream>
+
+using namespace std;
+
+/*
+  REGULAR EXPRESSION SYNTAX:
+  --------------------------
+
+  expr                    := alternation
+  alternation             := concatenation ('|' concatenation)*
+  concatenation           := closure (closure)*
+  closure                 := atom ['*' | '?' | '{' NUM [',' NUM] '}']
+  atom                    := character | characterClass | '(' expr ')'
+  characterClass          := '[' ['^'] characterClassFragment+ ']'
+  characterClassFragment  := character | character '-' character
+*/
+
+namespace regex_dfa
+{
+
+auto embrace(const RegExpr& outer, const RegExpr& inner)
+{
+    if (precedence(outer) > precedence(inner))
+        return "(" + to_string(inner) + ")";
+    else
+        return to_string(inner);
+}
+
+std::string to_string(const RegExpr& re)
+{
+    return visit(
+        overloaded {
+            [&](ClosureExpr const& e) {
+                stringstream sstr;
+                sstr << embrace(re, *e.subExpr);
+                if (e.minimumOccurrences == 0 && e.maximumOccurrences == 1)
+                    sstr << '?';
+                else if (e.minimumOccurrences == 0 && e.maximumOccurrences == numeric_limits<unsigned>::max())
+                    sstr << '*';
+                else if (e.minimumOccurrences == 1 && e.maximumOccurrences == numeric_limits<unsigned>::max())
+                    sstr << '+';
+                else
+                    sstr << '{' << e.minimumOccurrences << ',' << e.maximumOccurrences << '}';
+                return sstr.str();
+            },
+            [&](const AlternationExpr& e) { return embrace(re, *e.left) + "|" + embrace(re, *e.right); },
+            [&](const ConcatenationExpr& e) { return embrace(re, *e.left) + embrace(re, *e.right); },
+            [&](const LookAheadExpr& e) { return embrace(re, *e.left) + "/" + embrace(re, *e.right); },
+            [](const CharacterExpr& e) { return string(1, e.value); },
+            [](EndOfFileExpr) { return string { "<<EOF>>" }; },
+            [](BeginOfLineExpr) { return string { "^" }; },
+            [](EndOfLineExpr) { return string { "$" }; },
+            [](CharacterClassExpr const& e) { return e.symbols.to_string(); },
+            [](DotExpr) { return string { "." }; },
+            [](EmptyExpr) { return string {}; },
+        },
+        re);
+}
+
+int precedence(const RegExpr& regex)
+{
+    return visit(overloaded {
+                     [](const AlternationExpr&) { return 1; },
+                     [](const BeginOfLineExpr&) { return 4; },
+                     [](const CharacterClassExpr&) { return 4; },
+                     [](const CharacterExpr&) { return 4; },
+                     [](const ClosureExpr&) { return 3; },
+                     [](const ConcatenationExpr&) { return 2; },
+                     [](const DotExpr&) { return 4; },
+                     [](const EmptyExpr&) { return 4; },
+                     [](const EndOfFileExpr&) { return 4; },
+                     [](const EndOfLineExpr&) { return 4; },
+                     [](const LookAheadExpr&) { return 0; },
+                 },
+                 regex);
+}
+
+bool containsBeginOfLine(const RegExpr& regex)
+{
+    return visit(overloaded {
+                     [](const AlternationExpr& e) {
+                         return containsBeginOfLine(*e.left) || containsBeginOfLine(*e.right);
+                     },
+                     [](const BeginOfLineExpr&) { return true; },
+                     [](const CharacterClassExpr&) { return false; },
+                     [](const CharacterExpr&) { return false; },
+                     [](const ClosureExpr& e) { return containsBeginOfLine(*e.subExpr); },
+                     [](const ConcatenationExpr& e) {
+                         return containsBeginOfLine(*e.left) || containsBeginOfLine(*e.right);
+                     },
+                     [](const DotExpr&) { return false; },
+                     [](const EmptyExpr&) { return false; },
+                     [](const EndOfFileExpr&) { return false; },
+                     [](const EndOfLineExpr&) { return false; },
+                     [](const LookAheadExpr& e) {
+                         return containsBeginOfLine(*e.left) || containsBeginOfLine(*e.right);
+                     },
+                 },
+                 regex);
+}
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/RegExpr.h b/src/regex_dfa/RegExpr.h
new file mode 100644
index 0000000000..02e892baa4
--- /dev/null
+++ b/src/regex_dfa/RegExpr.h
@@ -0,0 +1,93 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+#pragma once
+
+#include <regex_dfa/Symbols.h>
+
+#include <fmt/format.h>
+
+#include <limits>
+#include <memory>
+#include <set>
+#include <string>
+#include <string_view>
+#include <variant>
+
+namespace regex_dfa
+{
+
+struct AlternationExpr;
+struct BeginOfLineExpr;
+struct CharacterClassExpr;
+struct CharacterExpr;
+struct ClosureExpr;
+struct ConcatenationExpr;
+struct DotExpr;
+struct EmptyExpr;
+struct EndOfFileExpr;
+struct EndOfLineExpr;
+struct LookAheadExpr;
+
+using RegExpr = std::variant<AlternationExpr,
+                             BeginOfLineExpr,
+                             CharacterClassExpr,
+                             CharacterExpr,
+                             ClosureExpr,
+                             ConcatenationExpr,
+                             DotExpr,
+                             EmptyExpr,
+                             EndOfFileExpr,
+                             EndOfLineExpr,
+                             LookAheadExpr>;
+
+struct LookAheadExpr
+{
+    std::unique_ptr<RegExpr> left;
+    std::unique_ptr<RegExpr> right;
+};
+
+struct AlternationExpr
+{
+    std::unique_ptr<RegExpr> left;
+    std::unique_ptr<RegExpr> right;
+};
+
+struct ConcatenationExpr
+{
+    std::unique_ptr<RegExpr> left;
+    std::unique_ptr<RegExpr> right;
+};
+
+struct ClosureExpr
+{
+    std::unique_ptr<RegExpr> subExpr;
+    unsigned minimumOccurrences { 0 };
+    unsigned maximumOccurrences { std::numeric_limits<unsigned>::max() };
+};
+
+struct CharacterExpr
+{
+    Symbol value;
+};
+struct CharacterClassExpr
+{
+    SymbolSet symbols;
+};
+
+// clang-format off
+struct DotExpr {};
+struct BeginOfLineExpr {};
+struct EndOfLineExpr {};
+struct EndOfFileExpr {};
+struct EmptyExpr {};
+// clang-format on
+
+[[nodiscard]] std::string to_string(const RegExpr& regex);
+[[nodiscard]] int precedence(const RegExpr& regex);
+[[nodiscard]] bool containsBeginOfLine(const RegExpr& regex);
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/RegExprParser.cpp b/src/regex_dfa/RegExprParser.cpp
new file mode 100644
index 0000000000..6c66dcfb0e
--- /dev/null
+++ b/src/regex_dfa/RegExprParser.cpp
@@ -0,0 +1,483 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/RegExpr.h>
+#include <regex_dfa/RegExprParser.h>
+#include <regex_dfa/Symbols.h>
+
+#include <fmt/format.h>
+
+#include <functional>
+#include <iostream>
+#include <limits>
+#include <sstream>
+
+using namespace std;
+
+#if 0
+    #define DEBUG(msg, ...)                                \
+        do                                                 \
+        {                                                  \
+            cerr << fmt::format(msg, __VA_ARGS__) << "\n"; \
+        } while (0)
+#else
+    #define DEBUG(msg, ...) \
+        do                  \
+        {                   \
+        } while (0)
+#endif
+
+/*
+  REGULAR EXPRESSION SYNTAX:
+  --------------------------
+
+  expr                    := alternation
+  alternation             := concatenation ('|' concatenation)*
+  concatenation           := closure (closure)*
+  closure                 := atom ['*' | '?' | '{' NUM [',' NUM] '}']
+  atom                    := character
+                           | '^'
+                           | '$'
+                           | '<<EOF>>'
+                           | '"' LITERAL '"'
+                           | characterClass
+                           | '(' expr ')'
+                           | EPSILON
+  characterClass          := '[' ['^'] characterClassFragment+ ']'
+  characterClassFragment  := character | character '-' character
+*/
+
+namespace regex_dfa
+{
+
+RegExprParser::RegExprParser(): input_ {}, currentChar_ { input_.end() }, line_ { 1 }, column_ { 0 }
+{
+}
+
+int RegExprParser::currentChar() const
+{
+    if (currentChar_ != input_.end())
+        return *currentChar_;
+    else
+        return std::char_traits<char>::eof();
+}
+
+bool RegExprParser::consumeIf(int ch)
+{
+    if (currentChar() != ch)
+        return false;
+
+    consume();
+    return true;
+}
+
+int RegExprParser::consume()
+{
+    if (currentChar_ == input_.end())
+        return std::char_traits<char>::eof();
+
+    int ch = *currentChar_;
+    if (ch == '\n')
+    {
+        line_++;
+        column_ = 1;
+    }
+    else
+    {
+        column_++;
+    }
+    ++currentChar_;
+    DEBUG("consume: '{}'", (char) ch);
+    return ch;
+}
+
+void RegExprParser::consume(int expected)
+{
+    int actual = currentChar();
+    consume();
+    if (actual != expected)
+    {
+        throw UnexpectedToken { line_, column_, actual, expected };
+    }
+}
+
+RegExpr RegExprParser::parse(string_view expr, unsigned line, unsigned column)
+{
+    input_ = expr;
+    currentChar_ = input_.begin();
+    line_ = line;
+    column_ = column;
+
+    return parseExpr();
+}
+
+RegExpr RegExprParser::parseExpr()
+{
+    return parseLookAheadExpr();
+}
+
+RegExpr RegExprParser::parseLookAheadExpr()
+{
+    RegExpr lhs = parseAlternation();
+
+    if (currentChar() == '/')
+    {
+        consume();
+        RegExpr rhs = parseAlternation();
+        lhs = LookAheadExpr { make_unique<RegExpr>(std::move(lhs)), make_unique<RegExpr>(std::move(rhs)) };
+    }
+
+    return lhs;
+}
+
+RegExpr RegExprParser::parseAlternation()
+{
+    RegExpr lhs = parseConcatenation();
+
+    while (currentChar() == '|')
+    {
+        consume();
+        RegExpr rhs = parseConcatenation();
+        lhs = AlternationExpr { make_unique<RegExpr>(std::move(lhs)), make_unique<RegExpr>(std::move(rhs)) };
+    }
+
+    return lhs;
+}
+
+RegExpr RegExprParser::parseConcatenation()
+{
+    // FOLLOW-set, the set of terminal tokens that can occur right after a concatenation
+    static const string_view follow = "/|)";
+    RegExpr lhs = parseClosure();
+
+    while (!eof() && follow.find(currentChar()) == std::string_view::npos)
+    {
+        RegExpr rhs = parseClosure();
+        lhs =
+            ConcatenationExpr { make_unique<RegExpr>(std::move(lhs)), make_unique<RegExpr>(std::move(rhs)) };
+    }
+
+    return lhs;
+}
+
+RegExpr RegExprParser::parseClosure()
+{
+    RegExpr subExpr = parseAtom();
+
+    switch (currentChar())
+    {
+        case '?': consume(); return ClosureExpr { make_unique<RegExpr>(std::move(subExpr)), 0, 1 };
+        case '*': consume(); return ClosureExpr { make_unique<RegExpr>(std::move(subExpr)), 0 };
+        case '+': consume(); return ClosureExpr { make_unique<RegExpr>(std::move(subExpr)), 1 };
+        case '{': {
+            consume();
+            unsigned int m = parseInt();
+            if (currentChar() == ',')
+            {
+                consume();
+                unsigned int n = parseInt();
+                consume('}');
+                return ClosureExpr { make_unique<RegExpr>(std::move(subExpr)), m, n };
+            }
+            else
+            {
+                consume('}');
+                return ClosureExpr { make_unique<RegExpr>(std::move(subExpr)), m, m };
+            }
+        }
+        default: return subExpr;
+    }
+}
+
+unsigned RegExprParser::parseInt()
+{
+    unsigned n = 0;
+    while (isdigit(currentChar()))
+    {
+        n *= 10;
+        n += currentChar() - '0';
+        consume();
+    }
+    return n;
+}
+
+RegExpr RegExprParser::parseAtom()
+{
+    // skip any whitespace (except newlines)
+    while (!eof() && isspace(currentChar()) && currentChar() != '\n')
+        consume();
+
+    switch (currentChar())
+    {
+        case std::char_traits<char>::eof(): // EOF
+        case ')': return EmptyExpr {};
+        case '<':
+            consume();
+            consume('<');
+            consume('E');
+            consume('O');
+            consume('F');
+            consume('>');
+            consume('>');
+            return EndOfFileExpr {};
+        case '(': {
+            consume();
+            RegExpr subExpr = parseExpr();
+            consume(')');
+            return subExpr;
+        }
+        case '"': {
+            consume();
+            RegExpr lhs = CharacterExpr { consume() };
+            while (!eof() && currentChar() != '"')
+            {
+                RegExpr rhs = CharacterExpr { consume() };
+                lhs = ConcatenationExpr { make_unique<RegExpr>(std::move(lhs)),
+                                          make_unique<RegExpr>(std::move(rhs)) };
+            }
+            consume('"');
+            return lhs;
+        }
+        case '[': return parseCharacterClass();
+        case '.': consume(); return DotExpr {};
+        case '^': consume(); return BeginOfLineExpr {};
+        case '$': consume(); return EndOfLineExpr {};
+        default: return CharacterExpr { parseSingleCharacter() };
+    }
+}
+
+RegExpr RegExprParser::parseCharacterClass()
+{
+    consume();                              // '['
+    const bool complement = consumeIf('^'); // TODO
+
+    SymbolSet ss;
+    parseCharacterClassFragment(ss);
+    while (!eof() && currentChar() != ']')
+        parseCharacterClassFragment(ss);
+
+    if (complement)
+        ss.complement();
+
+    consume(']');
+    return CharacterClassExpr { std::move(ss) };
+}
+
+void RegExprParser::parseNamedCharacterClass(SymbolSet& ss)
+{
+    consume('[');
+    consume(':');
+    string token;
+    while (isalpha(currentChar()))
+    {
+        token += static_cast<char>(consume());
+    }
+    consume(':');
+    consume(']');
+
+    static const unordered_map<string_view, function<void(SymbolSet&)>> names = {
+        { "alnum",
+          [](SymbolSet& ss) {
+              for (Symbol c = 'a'; c <= 'z'; c++)
+                  ss.insert(c);
+              for (Symbol c = 'A'; c <= 'Z'; c++)
+                  ss.insert(c);
+              for (Symbol c = '0'; c <= '9'; c++)
+                  ss.insert(c);
+          } },
+        { "alpha",
+          [](SymbolSet& ss) {
+              for (Symbol c = 'a'; c <= 'z'; c++)
+                  ss.insert(c);
+              for (Symbol c = 'A'; c <= 'Z'; c++)
+                  ss.insert(c);
+          } },
+        { "blank",
+          [](SymbolSet& ss) {
+              ss.insert(' ');
+              ss.insert('\t');
+          } },
+        { "cntrl",
+          [](SymbolSet& ss) {
+              for (Symbol c = 0; c <= 255; c++)
+                  if (iscntrl(c))
+                      ss.insert(c);
+          } },
+        { "digit",
+          [](SymbolSet& ss) {
+              for (Symbol c = '0'; c <= '9'; c++)
+                  ss.insert(c);
+          } },
+        { "graph",
+          [](SymbolSet& ss) {
+              for (Symbol c = 0; c <= 255; c++)
+                  if (isgraph(c))
+                      ss.insert(c);
+          } },
+        { "lower",
+          [](SymbolSet& ss) {
+              for (Symbol c = 'a'; c <= 'z'; c++)
+                  ss.insert(c);
+          } },
+        { "print",
+          [](SymbolSet& ss) {
+              for (Symbol c = 0; c <= 255; c++)
+                  if (isprint(c) || c == ' ')
+                      ss.insert(c);
+          } },
+        { "punct",
+          [](SymbolSet& ss) {
+              for (Symbol c = 0; c <= 255; c++)
+                  if (ispunct(c))
+                      ss.insert(c);
+          } },
+        { "space",
+          [](SymbolSet& ss) {
+              for (Symbol c: "\f\n\r\t\v")
+                  ss.insert(c);
+          } },
+        { "upper",
+          [](SymbolSet& ss) {
+              for (Symbol c = 'A'; c <= 'Z'; c++)
+                  ss.insert(c);
+          } },
+        { "xdigit",
+          [](SymbolSet& ss) {
+              for (Symbol c = '0'; c <= '9'; c++)
+                  ss.insert(c);
+              for (Symbol c = 'a'; c <= 'f'; c++)
+                  ss.insert(c);
+              for (Symbol c = 'A'; c <= 'F'; c++)
+                  ss.insert(c);
+          } },
+    };
+
+    if (auto i = names.find(token); i != names.end())
+        i->second(ss);
+    else
+        throw UnexpectedToken { line_, column_, token, "<valid character class>" };
+}
+
+Symbol RegExprParser::parseSingleCharacter()
+{
+    if (currentChar() != '\\')
+        return consume();
+
+    consume(); // consumes escape character
+    switch (currentChar())
+    {
+        case 'a': consume(); return '\a';
+        case 'b': consume(); return '\b';
+        case 'f': consume(); return '\f';
+        case 'n': consume(); return '\n';
+        case 'r': consume(); return '\r';
+        case 's': consume(); return ' ';
+        case 't': consume(); return '\t';
+        case 'v': consume(); return '\v';
+        case 'x': {
+            consume();
+
+            char buf[3];
+            buf[0] = consume();
+            if (!isxdigit(buf[0]))
+                throw UnexpectedToken { line_, column_, string(1, buf[0]), "[0-9a-fA-F]" };
+            buf[1] = consume();
+            if (!isxdigit(buf[1]))
+                throw UnexpectedToken { line_, column_, string(1, buf[1]), "[0-9a-fA-F]" };
+            buf[2] = 0;
+
+            return static_cast<Symbol>(strtoul(buf, nullptr, 16));
+        }
+        case '0': {
+            const Symbol x0 = consume();
+            if (!isdigit(currentChar()))
+                return '\0';
+
+            // octal value (\DDD)
+            char buf[4];
+            buf[0] = x0;
+            buf[1] = consume();
+            if (!(buf[1] >= '0' && buf[1] <= '7'))
+                throw UnexpectedToken { line_, column_, string(1, buf[1]), "[0-7]" };
+            buf[2] = consume();
+            if (!(buf[2] >= '0' && buf[2] <= '7'))
+                throw UnexpectedToken { line_, column_, string(1, buf[2]), "[0-7]" };
+            buf[3] = '\0';
+
+            return static_cast<Symbol>(strtoul(buf, nullptr, 8));
+        }
+        case '1':
+        case '2':
+        case '3':
+        case '4':
+        case '5':
+        case '6':
+        case '7': {
+            // octal value (\DDD)
+            char buf[4];
+            buf[0] = consume();
+            buf[1] = consume();
+            if (!(buf[1] >= '0' && buf[1] <= '7'))
+                throw UnexpectedToken { line_, column_, string(1, buf[1]), "[0-7]" };
+            buf[2] = consume();
+            if (!(buf[2] >= '0' && buf[2] <= '7'))
+                throw UnexpectedToken { line_, column_, string(1, buf[2]), "[0-7]" };
+            buf[3] = '\0';
+
+            return static_cast<Symbol>(strtoul(buf, nullptr, 8));
+        }
+        case '"':
+        case '$':
+        case '(':
+        case ')':
+        case '*':
+        case '+':
+        case ':':
+        case '?':
+        case '[':
+        case '\'':
+        case '\\':
+        case ']':
+        case '^':
+        case '{':
+        case '}':
+        case '.':
+        case '/': return consume();
+        default: {
+            throw UnexpectedToken { line_,
+                                    column_,
+                                    fmt::format("'{}'", static_cast<char>(currentChar())),
+                                    "<escape sequence character>" };
+        }
+    }
+}
+
+void RegExprParser::parseCharacterClassFragment(SymbolSet& ss)
+{
+    // parse [:named:]
+    if (currentChar() == '[')
+    {
+        parseNamedCharacterClass(ss);
+        return;
+    }
+
+    // parse single char (A) or range (A-Z)
+    const Symbol c1 = parseSingleCharacter();
+    if (currentChar() != '-')
+    {
+        ss.insert(c1);
+        return;
+    }
+
+    consume(); // consume '-'
+    const Symbol c2 = parseSingleCharacter();
+
+    for (Symbol c_i = c1; c_i <= c2; c_i++)
+        ss.insert(c_i);
+}
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/RegExprParser.h b/src/regex_dfa/RegExprParser.h
new file mode 100644
index 0000000000..8484087af8
--- /dev/null
+++ b/src/regex_dfa/RegExprParser.h
@@ -0,0 +1,96 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+#pragma once
+
+#include <regex_dfa/RegExpr.h>
+#include <regex_dfa/Symbols.h>
+
+#include <fmt/format.h>
+
+#include <memory>
+#include <string_view>
+
+namespace regex_dfa
+{
+
+class SymbolSet;
+
+class RegExprParser
+{
+  public:
+    RegExprParser();
+
+    [[nodiscard]] RegExpr parse(std::string_view expr, unsigned line, unsigned column);
+
+    [[nodiscard]] RegExpr parse(std::string_view expr) { return parse(expr, 1, 1); }
+
+    class UnexpectedToken: public std::runtime_error
+    {
+      public:
+        UnexpectedToken(unsigned int line, unsigned int column, std::string actual, std::string expected):
+            std::runtime_error { fmt::format(
+                "[{}:{}] Unexpected token {}. Expected {} instead.", line, column, actual, expected) },
+            line_ { line },
+            column_ { column },
+            actual_ { std::move(actual) },
+            expected_ { std::move(expected) }
+        {
+        }
+
+        UnexpectedToken(unsigned int line, unsigned int column, int actual, int expected):
+            UnexpectedToken { line,
+                              column,
+                              std::char_traits<char>::eq(actual, std::char_traits<char>::eof())
+                                  ? "EOF"
+                                  : fmt::format("{}", static_cast<char>(actual)),
+                              std::string(1, static_cast<char>(expected)) }
+        {
+        }
+
+        [[nodiscard]] unsigned int line() const noexcept { return line_; }
+        [[nodiscard]] unsigned int column() const noexcept { return column_; }
+        [[nodiscard]] const std::string& actual() const noexcept { return actual_; }
+        [[nodiscard]] const std::string& expected() const noexcept { return expected_; }
+
+      private:
+        unsigned int line_;
+        unsigned int column_;
+        std::string actual_;
+        std::string expected_;
+    };
+
+  private:
+    [[nodiscard]] int currentChar() const;
+    [[nodiscard]] bool eof() const noexcept
+    {
+        return std::char_traits<char>::eq(currentChar(), std::char_traits<char>::eof());
+    }
+    [[nodiscard]] bool consumeIf(int ch);
+    void consume(int ch);
+    int consume();
+    [[nodiscard]] unsigned parseInt();
+
+    [[nodiscard]] RegExpr parse();                   // expr
+    [[nodiscard]] RegExpr parseExpr();               // lookahead
+    [[nodiscard]] RegExpr parseLookAheadExpr();      // alternation ('/' alternation)?
+    [[nodiscard]] RegExpr parseAlternation();        // concatenation ('|' concatenation)*
+    [[nodiscard]] RegExpr parseConcatenation();      // closure (closure)*
+    [[nodiscard]] RegExpr parseClosure();            // atom ['*' | '?' | '{' NUM [',' NUM] '}']
+    [[nodiscard]] RegExpr parseAtom();               // character | characterClass | '(' expr ')'
+    [[nodiscard]] RegExpr parseCharacterClass();     // '[' characterClassFragment+ ']'
+    void parseCharacterClassFragment(SymbolSet& ss); // namedClass | character | character '-' character
+    void parseNamedCharacterClass(SymbolSet& ss);    // '[' ':' NAME ':' ']'
+    [[nodiscard]] Symbol parseSingleCharacter();
+
+  private:
+    std::string_view input_;
+    std::string_view::iterator currentChar_;
+    unsigned int line_;
+    unsigned int column_;
+};
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/RegExprParser_test.cpp b/src/regex_dfa/RegExprParser_test.cpp
new file mode 100644
index 0000000000..e668143206
--- /dev/null
+++ b/src/regex_dfa/RegExprParser_test.cpp
@@ -0,0 +1,309 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/RegExpr.h>
+#include <regex_dfa/RegExprParser.h>
+
+#include <catch2/catch.hpp>
+
+#include <memory>
+
+using namespace std;
+using namespace regex_dfa;
+
+namespace
+{
+
+RegExpr parseRegExpr(string const& s)
+{
+    return RegExprParser {}.parse(s);
+}
+
+} // namespace
+
+TEST_CASE("regex_RegExprParser.namedCharacterClass_graph")
+{
+    RegExpr re = parseRegExpr("[[:graph:]]");
+    REQUIRE(holds_alternative<CharacterClassExpr>(re));
+    CHECK("!-~" == to_string(re));
+}
+
+TEST_CASE("regex_RegExprParser.whitespaces_concatination")
+{
+    RegExpr re = parseRegExpr("a b");
+    REQUIRE(holds_alternative<ConcatenationExpr>(re));
+    CHECK("ab" == to_string(re));
+}
+
+TEST_CASE("regex_RegExprParser.whitespaces_alternation")
+{
+    RegExpr re = parseRegExpr("a | b");
+    REQUIRE(holds_alternative<ConcatenationExpr>(re));
+    CHECK("a|b" == to_string(re));
+}
+
+TEST_CASE("regex_RegExprParser.namedCharacterClass_digit")
+{
+    RegExpr re = parseRegExpr("[[:digit:]]");
+    REQUIRE(holds_alternative<CharacterClassExpr>(re));
+    CHECK("0-9" == to_string(re));
+}
+
+TEST_CASE("regex_RegExprParser.namedCharacterClass_alnum")
+{
+    RegExpr re = parseRegExpr("[[:alnum:]]");
+    REQUIRE(holds_alternative<CharacterClassExpr>(re));
+    CHECK("0-9A-Za-z" == to_string(re));
+}
+
+TEST_CASE("regex_RegExprParser.namedCharacterClass_alpha")
+{
+    RegExpr re = parseRegExpr("[[:alpha:]]");
+    REQUIRE(holds_alternative<CharacterClassExpr>(re));
+    CHECK("A-Za-z" == to_string(re));
+}
+
+TEST_CASE("regex_RegExprParser.namedCharacterClass_blank")
+{
+    RegExpr re = parseRegExpr("[[:blank:]]");
+    REQUIRE(holds_alternative<CharacterClassExpr>(re));
+    CHECK("\\t\\s" == to_string(re));
+}
+
+TEST_CASE("regex_RegExprParser.namedCharacterClass_cntrl")
+{
+    RegExpr re = parseRegExpr("[[:cntrl:]]");
+    REQUIRE(holds_alternative<CharacterClassExpr>(re));
+    CHECK("\\0-\\x1f\\x7f" == to_string(re));
+}
+
+TEST_CASE("regex_RegExprParser.namedCharacterClass_print")
+{
+    RegExpr re = parseRegExpr("[[:print:]]");
+    REQUIRE(holds_alternative<CharacterClassExpr>(re));
+    CHECK("\\s-~" == to_string(re));
+}
+
+TEST_CASE("regex_RegExprParser.namedCharacterClass_punct")
+{
+    RegExpr re = parseRegExpr("[[:punct:]]");
+    REQUIRE(holds_alternative<CharacterClassExpr>(re));
+    CHECK("!-/:-@[-`{-~" == to_string(re));
+}
+
+TEST_CASE("regex_RegExprParser.namedCharacterClass_space")
+{
+    RegExpr re = parseRegExpr("[[:space:]]");
+    REQUIRE(holds_alternative<CharacterClassExpr>(re));
+    CHECK("\\0\\t-\\r" == to_string(re));
+}
+
+TEST_CASE("regex_RegExprParser.namedCharacterClass_unknown")
+{
+    CHECK_THROWS_AS(parseRegExpr("[[:unknown:]]"), RegExprParser::UnexpectedToken);
+}
+
+TEST_CASE("regex_RegExprParser.namedCharacterClass_upper")
+{
+    RegExpr re = parseRegExpr("[[:upper:]]");
+    REQUIRE(holds_alternative<CharacterClassExpr>(re));
+    CHECK("A-Z" == to_string(re));
+}
+
+TEST_CASE("regex_RegExprParser.namedCharacterClass_mixed")
+{
+    RegExpr re = parseRegExpr("[[:lower:]0-9]");
+    REQUIRE(holds_alternative<CharacterClassExpr>(re));
+    CHECK("0-9a-z" == to_string(re));
+}
+
+TEST_CASE("regex_RegExprParser.characterClass_complement")
+{
+    RegExpr re = parseRegExpr("[^\\n]");
+    REQUIRE(holds_alternative<CharacterClassExpr>(re));
+    CHECK(get<CharacterClassExpr>(re).symbols.isDot());
+    CHECK("." == get<CharacterClassExpr>(re).symbols.to_string());
+}
+
+TEST_CASE("regex_RegExprParser.escapeSequences_invalid")
+{
+    CHECK_THROWS_AS(parseRegExpr("[\\z]"), RegExprParser::UnexpectedToken);
+}
+
+TEST_CASE("regex_RegExprParser.escapeSequences_abfnrstv")
+{
+    CHECK("\\a" == to_string(parseRegExpr("[\\a]")));
+    CHECK("\\b" == to_string(parseRegExpr("[\\b]")));
+    CHECK("\\f" == to_string(parseRegExpr("[\\f]")));
+    CHECK("\\n" == to_string(parseRegExpr("[\\n]")));
+    CHECK("\\r" == to_string(parseRegExpr("[\\r]")));
+    CHECK("\\s" == to_string(parseRegExpr("[\\s]")));
+    CHECK("\\t" == to_string(parseRegExpr("[\\t]")));
+    CHECK("\\v" == to_string(parseRegExpr("[\\v]")));
+}
+
+TEST_CASE("regex_RegExprParser.newline")
+{
+    RegExpr re = parseRegExpr("\n");
+    REQUIRE(holds_alternative<CharacterExpr>(re));
+    CHECK('\n' == get<CharacterExpr>(re).value);
+}
+
+TEST_CASE("regex_RegExprParser.escapeSequences_hex")
+{
+    RegExpr re = parseRegExpr("[\\x20]");
+    REQUIRE(holds_alternative<CharacterClassExpr>(re));
+    CHECK("\\s" == get<CharacterClassExpr>(re).symbols.to_string());
+
+    CHECK_THROWS_AS(parseRegExpr("[\\xZZ]"), RegExprParser::UnexpectedToken);
+    CHECK_THROWS_AS(parseRegExpr("[\\xAZ]"), RegExprParser::UnexpectedToken);
+    CHECK_THROWS_AS(parseRegExpr("[\\xZA]"), RegExprParser::UnexpectedToken);
+}
+
+TEST_CASE("regex_RegExprParser.escapeSequences_nul")
+{
+    RegExpr re = parseRegExpr("[\\0]");
+    REQUIRE(holds_alternative<CharacterClassExpr>(re));
+    CHECK("\\0" == get<CharacterClassExpr>(re).symbols.to_string());
+}
+
+TEST_CASE("regex_RegExprParser.escapeSequences_octal")
+{
+    // with leading zero
+    RegExpr re = parseRegExpr("[\\040]");
+    REQUIRE(holds_alternative<CharacterClassExpr>(re));
+    CHECK("\\s" == get<CharacterClassExpr>(re).symbols.to_string());
+
+    // with leading non-zero
+    re = parseRegExpr("[\\172]");
+    REQUIRE(holds_alternative<CharacterClassExpr>(re));
+    CHECK("z" == get<CharacterClassExpr>(re).symbols.to_string());
+
+    // invalids
+    CHECK_THROWS_AS(parseRegExpr("[\\822]"), RegExprParser::UnexpectedToken);
+    CHECK_THROWS_AS(parseRegExpr("[\\282]"), RegExprParser::UnexpectedToken);
+    CHECK_THROWS_AS(parseRegExpr("[\\228]"), RegExprParser::UnexpectedToken);
+    CHECK_THROWS_AS(parseRegExpr("[\\082]"), RegExprParser::UnexpectedToken);
+    CHECK_THROWS_AS(parseRegExpr("[\\028]"), RegExprParser::UnexpectedToken);
+}
+
+TEST_CASE("regex_RegExprParser.doubleQuote")
+{
+    // as concatenation character
+    RegExpr re = parseRegExpr(R"(\")");
+    REQUIRE(holds_alternative<CharacterExpr>(re));
+    CHECK('"' == get<CharacterExpr>(re).value);
+
+    // as character class
+    re = parseRegExpr(R"([\"])");
+    REQUIRE(holds_alternative<CharacterClassExpr>(re));
+    CHECK(R"(")" == get<CharacterClassExpr>(re).symbols.to_string());
+}
+
+TEST_CASE("regex_RegExprParser.dot")
+{
+    RegExpr re = parseRegExpr(".");
+    REQUIRE(holds_alternative<DotExpr>(re));
+    CHECK("." == to_string(re));
+}
+
+TEST_CASE("regex_RegExprParser.optional")
+{
+    RegExpr re = parseRegExpr("a?");
+    REQUIRE(holds_alternative<ClosureExpr>(re));
+    CHECK("a?" == to_string(re));
+}
+
+TEST_CASE("regex_RegExprParser.bol")
+{
+    RegExpr re = parseRegExpr("^a");
+    REQUIRE(holds_alternative<ConcatenationExpr>(re));
+    const ConcatenationExpr& cat = get<ConcatenationExpr>(re);
+
+    REQUIRE(holds_alternative<BeginOfLineExpr>(*cat.left));
+    CHECK("^" == to_string(*cat.left));
+    CHECK("a" == to_string(*cat.right));
+}
+
+TEST_CASE("regex_RegExprParser.eol")
+{
+    RegExpr re = parseRegExpr("a$");
+    REQUIRE(holds_alternative<ConcatenationExpr>(re));
+    const ConcatenationExpr& cat = get<ConcatenationExpr>(re);
+
+    REQUIRE(holds_alternative<EndOfLineExpr>(*cat.right));
+    CHECK("a$" == to_string(re));
+}
+
+TEST_CASE("regex_RegExprParser.eof")
+{
+    RegExpr re = parseRegExpr("<<EOF>>");
+    REQUIRE(holds_alternative<EndOfFileExpr>(re));
+    CHECK("<<EOF>>" == to_string(re));
+}
+
+TEST_CASE("regex_RegExprParser.alternation")
+{
+    CHECK("a|b" == to_string(parseRegExpr("a|b")));
+    CHECK("(a|b)c" == to_string(parseRegExpr("(a|b)c")));
+    CHECK("a(b|c)" == to_string(parseRegExpr("a(b|c)")));
+}
+
+TEST_CASE("regex_RegExprParser.lookahead")
+{
+    RegExpr re = parseRegExpr("ab/cd");
+    REQUIRE(holds_alternative<LookAheadExpr>(re));
+    CHECK("ab/cd" == to_string(re));
+    CHECK("(a/b)|b" == to_string(parseRegExpr("(a/b)|b")));
+    CHECK("a|(b/c)" == to_string(parseRegExpr("a|(b/c)")));
+}
+
+TEST_CASE("regex_RegExprParser.closure")
+{
+    RegExpr re = parseRegExpr("(abc)*");
+    REQUIRE(holds_alternative<ClosureExpr>(re));
+    const ClosureExpr& e = get<ClosureExpr>(re);
+    CHECK(0 == e.minimumOccurrences);
+    CHECK(numeric_limits<unsigned>::max() == e.maximumOccurrences);
+    CHECK("(abc)*" == to_string(re));
+}
+
+TEST_CASE("regex_RegExprParser.positive")
+{
+    auto re = parseRegExpr("(abc)+");
+    REQUIRE(holds_alternative<ClosureExpr>(re));
+    const ClosureExpr& e = get<ClosureExpr>(re);
+    CHECK(1 == e.minimumOccurrences);
+    CHECK(numeric_limits<unsigned>::max() == e.maximumOccurrences);
+    CHECK("(abc)+" == to_string(re));
+}
+
+TEST_CASE("regex_RegExprParser.closure_range")
+{
+    auto re = parseRegExpr("a{2,4}");
+    REQUIRE(holds_alternative<ClosureExpr>(re));
+    const ClosureExpr& e = get<ClosureExpr>(re);
+    CHECK(2 == e.minimumOccurrences);
+    CHECK(4 == e.maximumOccurrences);
+    CHECK("a{2,4}" == to_string(re));
+}
+
+TEST_CASE("regex_RegExprParser.empty")
+{
+    auto re = parseRegExpr("(a|)");
+    CHECK("a|" == to_string(re)); // grouping '(' & ')' is not preserved as node in the parse tree.
+}
+
+TEST_CASE("regex_RegExprParser.UnexpectedToken_grouping")
+{
+    CHECK_THROWS_AS(parseRegExpr("(a"), RegExprParser::UnexpectedToken);
+}
+
+TEST_CASE("regex_RegExprParser.UnexpectedToken_literal")
+{
+    CHECK_THROWS_AS(parseRegExpr("\"a"), RegExprParser::UnexpectedToken);
+}
diff --git a/src/regex_dfa/Report.cpp b/src/regex_dfa/Report.cpp
new file mode 100644
index 0000000000..9f2b9b51cd
--- /dev/null
+++ b/src/regex_dfa/Report.cpp
@@ -0,0 +1,109 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/Report.h>
+
+#include <iostream>
+#include <sstream>
+
+using namespace std;
+using namespace regex_dfa;
+
+// {{{ Message
+string Report::Message::to_string() const
+{
+    switch (type)
+    {
+        case Type::Warning: return fmt::format("[{}] {}", sourceLocation, text);
+        case Type::LinkError: return fmt::format("{}: {}", type, text);
+        default: return fmt::format("[{}] {}: {}", sourceLocation, type, text);
+    }
+}
+
+bool Report::Message::operator==(const Message& other) const noexcept
+{
+    // XXX ignore SourceLocation's filename & end
+    return type == other.type && sourceLocation.offset == other.sourceLocation.offset && text == other.text;
+}
+// }}}
+// {{{ ConsoleReport
+void ConsoleReport::onMessage(Message&& message)
+{
+    switch (message.type)
+    {
+        case Type::Warning: cerr << fmt::format("Warning: {}\n", message); break;
+        default: cerr << fmt::format("Error: {}\n", message); break;
+    }
+}
+// }}}
+// {{{ BufferedReport
+void BufferedReport::onMessage(Message&& msg)
+{
+    messages_.emplace_back(std::move(msg));
+}
+
+void BufferedReport::clear()
+{
+    messages_.clear();
+}
+
+string BufferedReport::to_string() const
+{
+    stringstream sstr;
+    for (const Message& message: messages_)
+    {
+        switch (message.type)
+        {
+            case Type::Warning: sstr << "Warning: " << message.to_string() << "\n"; break;
+            default: sstr << "Error: " << message.to_string() << "\n"; break;
+        }
+    }
+    return sstr.str();
+}
+
+bool BufferedReport::operator==(const BufferedReport& other) const noexcept
+{
+    if (size() != other.size())
+        return false;
+
+    for (size_t i = 0, e = size(); i != e; ++i)
+        if (messages_[i] != other.messages_[i])
+            return false;
+
+    return true;
+}
+
+bool BufferedReport::contains(const Message& message) const noexcept
+{
+    for (const Message& m: messages_)
+        if (m == message)
+            return true;
+
+    return false;
+}
+
+DifferenceReport difference(const BufferedReport& first, const BufferedReport& second)
+{
+    DifferenceReport diff;
+
+    for (const Report::Message& m: first)
+        if (!second.contains(m))
+            diff.first.push_back(m);
+
+    for (const Report::Message& m: second)
+        if (!first.contains(m))
+            diff.second.push_back(m);
+
+    return diff;
+}
+
+ostream& operator<<(ostream& os, const BufferedReport& report)
+{
+    os << report.to_string();
+    return os;
+}
+// }}}
diff --git a/src/regex_dfa/Report.h b/src/regex_dfa/Report.h
new file mode 100644
index 0000000000..0fc9bc71e7
--- /dev/null
+++ b/src/regex_dfa/Report.h
@@ -0,0 +1,223 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#pragma once
+
+#include <regex_dfa/SourceLocation.h>
+
+#include <fmt/format.h>
+
+#include <algorithm>
+#include <functional>
+#include <string>
+#include <system_error>
+#include <vector>
+
+namespace regex_dfa
+{
+
+class Report
+{
+  public:
+    enum class Type
+    {
+        TokenError,
+        SyntaxError,
+        TypeError,
+        Warning,
+        LinkError
+    };
+
+    struct Message
+    {
+        Type type;
+        SourceLocation sourceLocation;
+        std::string text;
+
+        Message(Type type, SourceLocation sloc, std::string text):
+            type { type }, sourceLocation { std::move(sloc) }, text { std::move(text) }
+        {
+        }
+
+        [[nodiscard]] std::string to_string() const;
+
+        bool operator==(const Message& other) const noexcept;
+        bool operator!=(const Message& other) const noexcept { return !(*this == other); }
+    };
+
+    using MessageList = std::vector<Message>;
+    using Reporter = std::function<void(Message)>;
+
+    explicit Report(Reporter reporter): onReport_ { std::move(reporter) } {}
+
+    template <typename... Args>
+    void tokenError(const SourceLocation& sloc, const std::string& f, Args&&... args)
+    {
+        report(Type::TokenError, sloc, fmt::format(f, std::forward<Args>(args)...));
+    }
+
+    template <typename... Args>
+    void syntaxError(const SourceLocation& sloc, const std::string& f, Args&&... args)
+    {
+        report(Type::SyntaxError, sloc, fmt::format(f, std::forward<Args>(args)...));
+    }
+
+    template <typename... Args>
+    void typeError(const SourceLocation& sloc, const std::string& f, Args&&... args)
+    {
+        report(Type::TypeError, sloc, fmt::format(f, std::forward<Args>(args)...));
+    }
+
+    template <typename... Args>
+    void warning(const SourceLocation& sloc, const std::string& f, Args&&... args)
+    {
+        report(Type::Warning, sloc, fmt::format(f, std::forward<Args>(args)...));
+    }
+
+    template <typename... Args>
+    void linkError(const std::string& f, Args&&... args)
+    {
+        report(Type::LinkError, SourceLocation {}, fmt::format(f, std::forward<Args>(args)...));
+    }
+
+    void report(Type type, SourceLocation sloc, std::string text)
+    {
+        if (type != Type::Warning)
+            errorCount_++;
+
+        if (onReport_)
+        {
+            onReport_(Message(type, std::move(sloc), std::move(text)));
+        }
+    }
+
+    [[nodiscard]] bool containsFailures() const noexcept { return errorCount_ != 0; }
+
+  private:
+    size_t errorCount_ = 0;
+    Reporter onReport_;
+};
+
+class ConsoleReport: public Report
+{
+  public:
+    ConsoleReport(): Report(std::bind(&ConsoleReport::onMessage, this, std::placeholders::_1)) {}
+
+  private:
+    void onMessage(Message&& msg);
+};
+
+class BufferedReport: public Report
+{
+  public:
+    BufferedReport(): Report(std::bind(&BufferedReport::onMessage, this, std::placeholders::_1)), messages_ {}
+    {
+    }
+
+    [[nodiscard]] std::string to_string() const;
+
+    [[nodiscard]] const MessageList& messages() const noexcept { return messages_; }
+
+    void clear();
+    [[nodiscard]] size_t size() const noexcept { return messages_.size(); }
+    [[nodiscard]] const Message& operator[](size_t i) const { return messages_[i]; }
+
+    using iterator = MessageList::iterator;
+    using const_iterator = MessageList::const_iterator;
+
+    [[nodiscard]] iterator begin() noexcept { return messages_.begin(); }
+    [[nodiscard]] iterator end() noexcept { return messages_.end(); }
+    [[nodiscard]] const_iterator begin() const noexcept { return messages_.begin(); }
+    [[nodiscard]] const_iterator end() const noexcept { return messages_.end(); }
+
+    [[nodiscard]] bool contains(const Message& m) const noexcept;
+
+    [[nodiscard]] bool operator==(const BufferedReport& other) const noexcept;
+    [[nodiscard]] bool operator!=(const BufferedReport& other) const noexcept { return !(*this == other); }
+
+  private:
+    void onMessage(Message&& msg);
+
+  private:
+    MessageList messages_;
+};
+
+std::ostream& operator<<(std::ostream& os, const BufferedReport& report);
+
+using DifferenceReport = std::pair<Report::MessageList, Report::MessageList>;
+
+DifferenceReport difference(const BufferedReport& first, const BufferedReport& second);
+
+} // namespace regex_dfa
+
+namespace fmt
+{
+template <>
+struct formatter<regex_dfa::Report::Type>: formatter<std::string_view>
+{
+    using Type = regex_dfa::Report::Type;
+
+    static std::string_view to_stringview(Type t)
+    {
+        switch (t)
+        {
+            case Type::TokenError: return "TokenError";
+            case Type::SyntaxError: return "SyntaxError";
+            case Type::TypeError: return "TypeError";
+            case Type::Warning: return "Warning";
+            case Type::LinkError: return "LinkError";
+            default: return "???";
+        }
+    }
+
+    template <typename FormatContext>
+    constexpr auto format(Type v, FormatContext& ctx)
+    {
+        return formatter<std::string_view>::format(to_stringview(v), ctx);
+    }
+};
+} // namespace fmt
+
+namespace fmt
+{
+template <>
+struct formatter<regex_dfa::SourceLocation>
+{
+    template <typename ParseContext>
+    constexpr auto parse(ParseContext& ctx)
+    {
+        return ctx.begin();
+    }
+
+    template <typename FormatContext>
+    constexpr auto format(const regex_dfa::SourceLocation& sloc, FormatContext& ctx)
+    {
+        return fmt::format_to(ctx.out(), "{} ({}-{})", sloc.filename, sloc.offset, sloc.offset + sloc.count);
+    }
+};
+} // namespace fmt
+
+namespace fmt
+{
+template <>
+struct formatter<regex_dfa::Report::Message>
+{
+    using Message = regex_dfa::Report::Message;
+
+    template <typename ParseContext>
+    constexpr auto parse(ParseContext& ctx)
+    {
+        return ctx.begin();
+    }
+
+    template <typename FormatContext>
+    constexpr auto format(const Message& v, FormatContext& ctx)
+    {
+        return fmt::format_to(ctx.out(), "{}", v.to_string());
+    }
+};
+} // namespace fmt
diff --git a/src/regex_dfa/Rule.h b/src/regex_dfa/Rule.h
new file mode 100644
index 0000000000..0c97764494
--- /dev/null
+++ b/src/regex_dfa/Rule.h
@@ -0,0 +1,137 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+#pragma once
+
+#include <regex_dfa/LexerDef.h> // IgnoreTag
+#include <regex_dfa/RegExpr.h>
+#include <regex_dfa/RegExprParser.h>
+#include <regex_dfa/State.h> // Tag
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <vector>
+
+namespace regex_dfa
+{
+
+struct Rule
+{
+    unsigned int line;
+    unsigned int column;
+    Tag tag;
+    std::vector<std::string> conditions;
+    std::string name;
+    std::string pattern;
+    std::unique_ptr<RegExpr> regexpr = nullptr;
+
+    [[nodiscard]] bool isIgnored() const noexcept { return tag == IgnoreTag; }
+
+    [[nodiscard]] Rule clone() const
+    {
+        return regexpr ? Rule { line,
+                                column,
+                                tag,
+                                conditions,
+                                name,
+                                pattern,
+                                std::make_unique<RegExpr>(RegExprParser {}.parse(pattern, line, column)) }
+                       : Rule { line, column, tag, conditions, name, pattern, nullptr };
+    }
+
+    Rule() = default;
+
+    Rule(unsigned line,
+         unsigned column,
+         Tag tag,
+         std::vector<std::string> conditions,
+         std::string name,
+         std::string pattern,
+         std::unique_ptr<RegExpr> regexpr = nullptr):
+        line { line },
+        column { column },
+        tag { tag },
+        conditions { std::move(conditions) },
+        name { std::move(name) },
+        pattern { std::move(pattern) },
+        regexpr { std::move(regexpr) }
+    {
+    }
+
+    Rule(const Rule& v):
+        line { v.line },
+        column { v.column },
+        tag { v.tag },
+        conditions { v.conditions },
+        name { v.name },
+        pattern { v.pattern },
+        regexpr { v.regexpr ? std::make_unique<RegExpr>(RegExprParser {}.parse(pattern, line, column))
+                            : nullptr }
+    {
+    }
+
+    Rule& operator=(const Rule& v)
+    {
+        line = v.line;
+        column = v.column;
+        tag = v.tag;
+        conditions = v.conditions;
+        name = v.name;
+        pattern = v.pattern;
+        regexpr =
+            v.regexpr ? std::make_unique<RegExpr>(RegExprParser {}.parse(pattern, line, column)) : nullptr;
+        return *this;
+    }
+
+    bool operator<(const Rule& rhs) const noexcept { return tag < rhs.tag; }
+    bool operator<=(const Rule& rhs) const noexcept { return tag <= rhs.tag; }
+    bool operator==(const Rule& rhs) const noexcept { return tag == rhs.tag; }
+    bool operator!=(const Rule& rhs) const noexcept { return tag != rhs.tag; }
+    bool operator>=(const Rule& rhs) const noexcept { return tag >= rhs.tag; }
+    bool operator>(const Rule& rhs) const noexcept { return tag > rhs.tag; }
+};
+
+using RuleList = std::vector<Rule>;
+
+inline bool ruleContainsBeginOfLine(const Rule& r)
+{
+    return containsBeginOfLine(*r.regexpr);
+}
+
+} // namespace regex_dfa
+
+namespace fmt
+{
+template <>
+struct formatter<regex_dfa::Rule>
+{
+    template <typename ParseContext>
+    constexpr auto parse(ParseContext& ctx)
+    {
+        return ctx.begin();
+    }
+
+    template <typename FormatContext>
+    constexpr auto format(const regex_dfa::Rule& v, FormatContext& ctx)
+    {
+        if (!v.conditions.empty())
+        {
+            fmt::format_to(ctx.out(), "<");
+            for (size_t i = 0; i < v.conditions.size(); ++i)
+                if (i != 0)
+                    fmt::format_to(ctx.out(), ", {}", v.conditions[i]);
+                else
+                    fmt::format_to(ctx.out(), "{}", v.conditions[i]);
+            fmt::format_to(ctx.out(), ">");
+        }
+        if (v.tag == regex_dfa::IgnoreTag)
+            return fmt::format_to(ctx.out(), "{}({}) ::= {}", v.name, "ignore", v.pattern);
+        else
+            return fmt::format_to(ctx.out(), "{}({}) ::= {}", v.name, v.tag, v.pattern);
+    }
+};
+} // namespace fmt
diff --git a/src/regex_dfa/RuleParser.cpp b/src/regex_dfa/RuleParser.cpp
new file mode 100644
index 0000000000..dda518404b
--- /dev/null
+++ b/src/regex_dfa/RuleParser.cpp
@@ -0,0 +1,378 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/LexerDef.h> // special tags
+#include <regex_dfa/RegExpr.h>
+#include <regex_dfa/RegExprParser.h>
+#include <regex_dfa/RuleParser.h>
+#include <regex_dfa/Symbols.h>
+
+#include <cstring>
+#include <iostream>
+#include <sstream>
+
+using namespace std;
+
+namespace regex_dfa
+{
+
+RuleParser::RuleParser(unique_ptr<istream> input, int firstTerminalId):
+    _stream { std::move(input) },
+    _lastParsedRule { nullptr },
+    _lastParsedRuleIsRef { false },
+    _currentChar { 0 },
+    _line { 1 },
+    _column { 0 },
+    _offset { 0 },
+    _nextTag { firstTerminalId }
+{
+    consumeChar();
+}
+
+RuleParser::RuleParser(string input, int firstTerminalId):
+    RuleParser { make_unique<stringstream>(std::move(input)), firstTerminalId }
+{
+}
+
+RuleList RuleParser::parseRules()
+{
+    RuleList rules;
+
+    for (;;)
+    {
+        consumeSpace();
+        if (eof())
+        {
+            break;
+        }
+        else if (currentChar() == '\n')
+        {
+            consumeChar();
+        }
+        else
+        {
+            parseRule(rules);
+        }
+    }
+
+    // collect all condition labels, find all <*>-conditions, then replace their <*> with {collected
+    // conditions}
+    set<string> conditions;
+    list<Rule*> starRules;
+    for (Rule& rule: rules)
+    {
+        for (const string& condition: rule.conditions)
+        {
+            if (condition != "*")
+            {
+                conditions.emplace(condition);
+            }
+            else
+            {
+                rule.conditions.clear();
+                starRules.emplace_back(&rule);
+            }
+        }
+    }
+    for (Rule* rule: starRules)
+        for (const string& condition: conditions)
+            rule->conditions.emplace_back(condition);
+
+    return rules;
+}
+
+void RuleParser::parseRule(RuleList& rules)
+{
+    // Rule         ::= RuleConditionList? BasicRule
+    //                | RuleConditionList '{' BasicRule* '}' (LF | EOF)?
+    // BasicRule    ::= TOKEN RuleOptions? SP '::=' SP RegEx SP? (LF | EOF)
+    // RuleOptions  ::= '(' RuleOption (',' RuleOption)*
+    // RuleOption   ::= ignore
+
+    consumeSP();
+    if (_currentChar == '|' && _lastParsedRule != nullptr)
+    {
+        consumeChar();
+        consumeSP();
+        const string pattern = parseExpression();
+        _lastParsedRule->pattern += '|' + pattern;
+        return;
+    }
+
+    // finalize ref-rule by surrounding it with round braces
+    if (_lastParsedRuleIsRef)
+        _lastParsedRule->pattern = fmt::format("({})", _lastParsedRule->pattern);
+
+    vector<string> conditions = parseRuleConditions();
+    consumeSP();
+    if (!conditions.empty() && currentChar() == '{')
+    {
+        consumeChar();
+        consumeAnySP(); // allow whitespace, including LFs
+        while (!eof() && currentChar() != '}')
+        {
+            parseBasicRule(rules, vector<string>(conditions));
+            consumeSP(); //  part of the next line, allow indentation
+        }
+        consumeChar('}');
+        consumeSP();
+        if (currentChar() == '\n')
+            consumeChar();
+        else if (!eof())
+            throw UnexpectedChar { _line, _column, _currentChar, '\n' };
+    }
+    else
+    {
+        parseBasicRule(rules, std::move(conditions));
+    }
+}
+
+struct TestRuleForName
+{
+    string name;
+    bool operator()(const Rule& r) const { return r.name == name; }
+};
+
+void RuleParser::parseBasicRule(RuleList& rules, vector<string>&& conditions)
+{
+    const unsigned int beginLine = _line;
+    const unsigned int beginColumn = _column;
+
+    string token = consumeToken();
+    bool ignore = false;
+    bool ref = false;
+    if (_currentChar == '(')
+    {
+        consumeChar();
+        unsigned optionOffset = _offset;
+        string option = consumeToken();
+        consumeChar(')');
+
+        if (option == "ignore")
+            ignore = true;
+        else if (option == "ref")
+            ref = true;
+        else
+            throw InvalidRuleOption { optionOffset, option };
+    }
+    consumeSP();
+    consumeAssoc();
+    consumeSP();
+    const unsigned int line = _line;
+    const unsigned int column = _column;
+    const string pattern = parseExpression();
+    if (currentChar() == '\n')
+        consumeChar();
+    else if (!eof())
+        throw UnexpectedChar { _line, _column, _currentChar, '\n' };
+
+    const Tag tag = [&] {
+        if (ignore || ref)
+            return IgnoreTag;
+        else if (auto i = find_if(rules.begin(), rules.end(), TestRuleForName { token }); i != rules.end())
+            return i->tag;
+        else
+            return _nextTag++;
+    }();
+
+    if (ref && !conditions.empty())
+        throw InvalidRefRuleWithConditions {
+            beginLine, beginColumn, Rule { line, column, tag, std::move(conditions), token, pattern }
+        };
+
+    if (conditions.empty())
+        conditions.emplace_back("INITIAL");
+
+    sort(conditions.begin(), conditions.end());
+
+    if (!ref)
+    {
+        if (auto i = find_if(rules.begin(), rules.end(), TestRuleForName { token }); i != rules.end())
+        {
+            throw DuplicateRule { Rule { line, column, tag, std::move(conditions), token, pattern }, *i };
+        }
+        else
+        {
+            rules.emplace_back(Rule { line, column, tag, conditions, token, pattern });
+            _lastParsedRule = &rules.back();
+            _lastParsedRuleIsRef = false;
+        }
+    }
+    else if (auto i = _refRules.find(token); i != _refRules.end())
+    {
+        throw DuplicateRule { Rule { line, column, tag, std::move(conditions), token, pattern }, i->second };
+    }
+    else
+    {
+        // TODO: throw if !conditions.empty();
+        _refRules[token] = { line, column, tag, {}, token, pattern };
+        _lastParsedRule = &_refRules[token];
+        _lastParsedRuleIsRef = true;
+    }
+}
+
+vector<string> RuleParser::parseRuleConditions()
+{
+    // RuleConditionList ::= '<' ('*' | TOKEN (',' SP* TOKEN)) '>'
+    if (currentChar() != '<')
+        return {};
+
+    consumeChar();
+
+    if (currentChar() == '*')
+    {
+        consumeChar();
+        consumeChar('>');
+        return { "*" };
+    }
+
+    vector<string> conditions { consumeToken() };
+
+    while (currentChar() == ',')
+    {
+        consumeChar();
+        consumeSP();
+        conditions.emplace_back(consumeToken());
+    }
+
+    consumeChar('>');
+
+    return conditions;
+}
+
+string RuleParser::parseExpression()
+{
+    // expression ::= " .... "
+    //              | ....
+
+    stringstream sstr;
+
+    size_t i = 0;
+    size_t lastGraph = 0;
+    while (!eof() && _currentChar != '\n')
+    {
+        if (isgraph(_currentChar))
+            lastGraph = i + 1;
+        i++;
+        sstr << consumeChar();
+    }
+    string pattern = sstr.str().substr(0, lastGraph); // skips trailing spaces
+
+    // replace all occurrences of {ref}
+    for (const pair<const string, Rule>& ref: _refRules)
+    {
+        const Rule& rule = ref.second;
+        const string name = fmt::format("{{{}}}", rule.name);
+        // for (size_t i = 0; (i = pattern.find(name, i)) != string::npos; i += rule.pattern.size()) {
+        //   pattern.replace(i, name.size(), rule.pattern);
+        // }
+        size_t i = 0;
+        while ((i = pattern.find(name, i)) != string::npos)
+        {
+            pattern.replace(i, name.size(), rule.pattern);
+            i += rule.pattern.size();
+        }
+    }
+
+    return pattern;
+}
+
+// skips space until LF or EOF
+void RuleParser::consumeSpace()
+{
+    for (;;)
+    {
+        switch (_currentChar)
+        {
+            case ' ':
+            case '\t':
+            case '\r': consumeChar(); break;
+            case '#':
+                while (!eof() && _currentChar != '\n')
+                {
+                    consumeChar();
+                }
+                break;
+            default: return;
+        }
+    }
+}
+
+char RuleParser::currentChar() const noexcept
+{
+    return _currentChar;
+}
+
+char RuleParser::consumeChar(char ch)
+{
+    if (_currentChar != ch)
+        throw UnexpectedChar { _line, _column, _currentChar, ch };
+
+    return consumeChar();
+}
+
+char RuleParser::consumeChar()
+{
+    char t = _currentChar;
+
+    _currentChar = _stream->get();
+    if (!_stream->eof())
+    {
+        _offset++;
+        if (t == '\n')
+        {
+            _line++;
+            _column = 1;
+        }
+        else
+        {
+            _column++;
+        }
+    }
+
+    return t;
+}
+
+bool RuleParser::eof() const noexcept
+{
+    return std::char_traits<char>::eq(_currentChar, std::char_traits<char>::eof()) || _stream->eof();
+}
+
+string RuleParser::consumeToken()
+{
+    stringstream sstr;
+
+    if (!isalpha(_currentChar) || _currentChar == '_')
+        throw UnexpectedToken { _offset, _currentChar, "Token" };
+
+    do
+        sstr << consumeChar();
+    while (isalnum(_currentChar) || _currentChar == '_');
+
+    return sstr.str();
+}
+
+void RuleParser::consumeAnySP()
+{
+    while (_currentChar == ' ' || _currentChar == '\t' || _currentChar == '\n')
+        consumeChar();
+}
+
+void RuleParser::consumeSP()
+{
+    while (_currentChar == ' ' || _currentChar == '\t')
+        consumeChar();
+}
+
+void RuleParser::consumeAssoc()
+{
+    consumeChar(':');
+    consumeChar(':');
+    consumeChar('=');
+}
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/RuleParser.h b/src/regex_dfa/RuleParser.h
new file mode 100644
index 0000000000..393e26ec8c
--- /dev/null
+++ b/src/regex_dfa/RuleParser.h
@@ -0,0 +1,187 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+#pragma once
+
+#include <regex_dfa/Rule.h>
+
+#include <fmt/format.h>
+
+#include <istream>
+#include <map>
+#include <memory>
+#include <optional>
+#include <stdexcept>
+#include <string>
+
+namespace regex_dfa
+{
+
+class RuleParser
+{
+  public:
+    explicit RuleParser(std::unique_ptr<std::istream> input, int firstTerminalId = FirstUserTag);
+    explicit RuleParser(std::string input, int firstTerminalId = FirstUserTag);
+
+    RuleList parseRules();
+
+    class UnexpectedChar;
+    class UnexpectedToken;
+    class InvalidRuleOption;
+    class InvalidRefRuleWithConditions;
+    class DuplicateRule;
+
+  private:
+    void parseRule(RuleList& rules);
+    std::vector<std::string> parseRuleConditions();
+    void parseBasicRule(RuleList& rules, std::vector<std::string>&& conditions);
+    std::string parseExpression();
+
+  private:
+    std::string consumeToken();
+    void consumeAnySP();
+    void consumeSP();
+    void consumeAssoc();
+    void consumeSpace();
+    [[nodiscard]] char currentChar() const noexcept;
+    char consumeChar(char ch);
+    char consumeChar();
+    [[nodiscard]] bool eof() const noexcept;
+    [[nodiscard]] std::string replaceRefs(const std::string& pattern);
+
+  private:
+    std::unique_ptr<std::istream> _stream;
+    std::map<std::string, Rule> _refRules;
+    Rule* _lastParsedRule;
+    bool _lastParsedRuleIsRef;
+    char _currentChar;
+    unsigned int _line;
+    unsigned int _column;
+    unsigned int _offset;
+    int _nextTag;
+};
+
+class RuleParser::InvalidRefRuleWithConditions: public std::runtime_error
+{
+  public:
+    InvalidRefRuleWithConditions(unsigned line, unsigned column, Rule rule):
+        std::runtime_error { fmt::format(
+            "{}:{}: Invalid rule \"{}\". Reference rules must not be labelled with conditions.",
+            line,
+            column,
+            rule.name) },
+        _rule { std::move(rule) }
+    {
+    }
+
+    [[nodiscard]] Rule const& rule() const noexcept { return _rule; }
+
+  private:
+    Rule _rule;
+};
+
+class RuleParser::DuplicateRule: public std::runtime_error
+{
+  public:
+    DuplicateRule(Rule&& duplicate, const Rule& other):
+        std::runtime_error { fmt::format(
+            "{}:{}: Duplicated rule definition with name \"{}\", previously defined in {}:{}.",
+            duplicate.line,
+            duplicate.column,
+            duplicate.name,
+            other.line,
+            other.column) },
+        _duplicate { std::move(duplicate) },
+        _other { other }
+    {
+    }
+
+    [[nodiscard]] Rule const& duplicate() const noexcept { return _duplicate; }
+    [[nodiscard]] Rule const& other() const noexcept { return _other; }
+
+  private:
+    Rule _duplicate;
+    Rule const& _other;
+};
+
+class RuleParser::UnexpectedToken: public std::runtime_error
+{
+  public:
+    UnexpectedToken(unsigned offset, char actual, std::string expected):
+        std::runtime_error { fmt::format(
+            "{}: Unexpected token {}, expected <{}> instead.", offset, actual, expected) },
+        _offset { offset },
+        _actual { actual },
+        _expected { std::move(expected) }
+    {
+    }
+
+    [[nodiscard]] unsigned offset() const noexcept { return _offset; }
+    [[nodiscard]] char actual() const noexcept { return _actual; }
+    [[nodiscard]] const std::string& expected() const noexcept { return _expected; }
+
+  private:
+    unsigned _offset;
+    char _actual;
+    std::string _expected;
+};
+
+class RuleParser::UnexpectedChar: public std::runtime_error
+{
+  public:
+    UnexpectedChar(unsigned int line, unsigned int column, char actual, char expected):
+        std::runtime_error { fmt::format("[{}:{}] Unexpected char {}, expected {} instead.",
+                                         line,
+                                         column,
+                                         quoted(actual),
+                                         quoted(expected)) },
+        _line { line },
+        _column { column },
+        _actual { actual },
+        _expected { expected }
+    {
+    }
+
+    [[nodiscard]] unsigned int line() const noexcept { return _line; }
+    [[nodiscard]] unsigned int column() const noexcept { return _column; }
+    [[nodiscard]] char actual() const noexcept { return _actual; }
+    [[nodiscard]] char expected() const noexcept { return _expected; }
+
+  private:
+    static std::string quoted(char ch)
+    {
+        if (std::char_traits<char>::eq(ch, std::char_traits<char>::eof()))
+            return "<<EOF>>";
+        else
+            return fmt::format("'{}'", static_cast<char>(ch));
+    }
+
+  private:
+    unsigned int _line;
+    unsigned int _column;
+    char _actual;
+    char _expected;
+};
+
+class RuleParser::InvalidRuleOption: public std::runtime_error
+{
+  public:
+    InvalidRuleOption(unsigned offset, std::string option):
+        std::runtime_error { fmt::format("{}: Invalid rule option \"{}\".", offset, option) },
+        _offset { offset },
+        _option { option }
+    {
+    }
+
+    [[nodiscard]] unsigned offset() const noexcept { return _offset; }
+    [[nodiscard]] const std::string& option() const noexcept { return _option; }
+
+  private:
+    unsigned _offset;
+    std::string _option;
+};
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/RuleParser_test.cpp b/src/regex_dfa/RuleParser_test.cpp
new file mode 100644
index 0000000000..aae7fdc58f
--- /dev/null
+++ b/src/regex_dfa/RuleParser_test.cpp
@@ -0,0 +1,247 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/RuleParser.h>
+
+#include <catch2/catch.hpp>
+
+#include <memory>
+#include <sstream>
+
+using namespace regex_dfa;
+
+TEST_CASE("regex_RuleParser.simple")
+{
+    RuleParser rp { "main ::= blah\n" };
+    RuleList rules = rp.parseRules();
+    REQUIRE(1 == rules.size());
+    CHECK("blah" == rules[0].pattern);
+}
+
+TEST_CASE("regex_RuleParser.whitespaces")
+{
+    RuleParser rp { "main ::= a\n\t| b | c\n" };
+    RuleList rules = rp.parseRules();
+    REQUIRE(1 == rules.size());
+    CHECK("a|b | c" == rules[0].pattern);
+}
+
+TEST_CASE("regex_RuleParser.rule_at_eof")
+{
+    RuleParser rp { "main ::= blah" };
+    RuleList rules = rp.parseRules();
+    REQUIRE(1 == rules.size());
+    CHECK("blah" == rules[0].pattern);
+}
+
+TEST_CASE("regex_RuleParser.simple_trailing_spaces")
+{
+    RuleParser rp { "main ::= blah\n   " };
+    RuleList rules = rp.parseRules();
+    REQUIRE(1 == rules.size());
+    CHECK("blah" == rules[0].pattern);
+}
+
+TEST_CASE("regex_RuleParser.quotedPattern")
+{
+    RuleParser rp { "main ::= \"blah\"" };
+    RuleList rules = rp.parseRules();
+    REQUIRE(1 == rules.size());
+    CHECK("\"blah\"" == rules[0].pattern);
+}
+
+TEST_CASE("regex_RuleParser.multiQuotedPattern")
+{
+    RuleParser rp { R"(rule ::= "b"la"h")" };
+    RuleList rules = rp.parseRules();
+    REQUIRE(1 == rules.size());
+    CHECK(R"("b"la"h")" == rules[0].pattern);
+}
+
+TEST_CASE("regex_RuleParser.doubleQuote")
+{
+    RuleParser rp { R"(rule ::= \")" };
+    RuleList rules = rp.parseRules();
+    REQUIRE(1 == rules.size());
+    CHECK(R"(\")" == rules[0].pattern);
+}
+
+TEST_CASE("regex_RuleParser.spaceRule")
+{
+    RuleParser rp { R"(rule ::= [ \n\t]+)" };
+    RuleList rules = rp.parseRules();
+    REQUIRE(1 == rules.size());
+    CHECK(R"([ \n\t]+)" == rules[0].pattern);
+}
+
+TEST_CASE("regex_RuleParser.stringRule")
+{
+    RuleParser rp { R"(rule ::= \"[^\"]*\")" };
+    RuleList rules = rp.parseRules();
+    REQUIRE(1 == rules.size());
+    CHECK(R"(\"[^\"]*\")" == rules[0].pattern);
+}
+
+TEST_CASE("regex_RuleParser.ref")
+{
+    RuleParser rp { R"(
+    Foo(ref) ::= foo
+    Bar(ref) ::= bar
+    FooBar   ::= {Foo}_{Bar}
+  )" };
+    RuleList rules = rp.parseRules();
+    REQUIRE(1 == rules.size());
+    CHECK("(foo)_(bar)" == rules[0].pattern);
+}
+
+TEST_CASE("regex_RuleParser.ref_duplicated")
+{
+    RuleParser rp { R"(
+    Foo(ref) ::= foo
+    Foo(ref) ::= bar
+    FooBar   ::= {Foo}
+  )" };
+    CHECK_THROWS_AS(rp.parseRules(), RuleParser::DuplicateRule);
+}
+
+TEST_CASE("regex_RuleParser.multiline_alt")
+{
+    RuleParser rp { R"(
+    Rule1       ::= foo
+                  | bar
+    Rule2(ref)  ::= fnord
+                  | hard
+    Rule3       ::= {Rule2}
+                  | {Rule2}
+  )" };
+    RuleList rules = rp.parseRules();
+    REQUIRE(2 == rules.size());
+    CHECK("foo|bar" == rules[0].pattern);
+    CHECK("(fnord|hard)|(fnord|hard)" == rules[1].pattern);
+}
+
+TEST_CASE("regex_RuleParser.condition1")
+{
+    RuleParser rp { R"(
+    <foo>Rule1    ::= foo
+    <bar>Rule2    ::= bar
+  )" };
+    RuleList rules = rp.parseRules();
+
+    REQUIRE(2 == rules.size());
+    CHECK("foo" == rules[0].pattern);
+    CHECK("bar" == rules[1].pattern);
+
+    REQUIRE(1 == rules[0].conditions.size());
+    CHECK("foo" == rules[0].conditions[0]);
+
+    REQUIRE(1 == rules[1].conditions.size());
+    CHECK("bar" == rules[1].conditions[0]);
+}
+
+TEST_CASE("regex_RuleParser.condition2")
+{
+    RuleParser rp { R"(
+    <foo>Rule1      ::= foo
+    <foo,bar>Rule2  ::= bar
+  )" };
+    RuleList rules = rp.parseRules();
+
+    REQUIRE(2 == rules.size());
+    CHECK("foo" == rules[0].pattern);
+    CHECK("bar" == rules[1].pattern);
+
+    REQUIRE(1 == rules[0].conditions.size());
+    CHECK("foo" == rules[0].conditions[0]);
+
+    REQUIRE(2 == rules[1].conditions.size());
+    // in sorted order
+    CHECK("bar" == rules[1].conditions[0]);
+    CHECK("foo" == rules[1].conditions[1]);
+}
+
+TEST_CASE("regex_RuleParser.conditional_star")
+{
+    RuleParser rp { R"(
+    Zero      ::= zero
+    <one>One  ::= one
+    <two>Two  ::= two
+    <*>Tri    ::= tri
+  )" };
+    RuleList rules = rp.parseRules();
+
+    REQUIRE(4 == rules.size());
+
+    CHECK("zero" == rules[0].pattern);
+    REQUIRE(1 == rules[0].conditions.size());
+    CHECK("INITIAL" == rules[0].conditions[0]);
+
+    CHECK("one" == rules[1].pattern);
+    REQUIRE(1 == rules[1].conditions.size());
+    CHECK("one" == rules[1].conditions[0]);
+
+    CHECK("two" == rules[2].pattern);
+    REQUIRE(1 == rules[2].conditions.size());
+    CHECK("two" == rules[2].conditions[0]);
+
+    CHECK("tri" == rules[3].pattern);
+    REQUIRE(3 == rules[3].conditions.size());
+    CHECK("INITIAL" == rules[3].conditions[0]);
+    CHECK("one" == rules[3].conditions[1]);
+    CHECK("two" == rules[3].conditions[2]);
+}
+
+TEST_CASE("regex_RuleParser.grouped_conditions")
+{
+    RuleParser rp { R"(
+    Rule1       ::= foo
+    <blah> {
+      Rule2     ::= bar
+    }
+  )" };
+    RuleList rules = rp.parseRules();
+
+    REQUIRE(2 == rules.size());
+    CHECK("foo" == rules[0].pattern);
+    CHECK("bar" == rules[1].pattern);
+
+    REQUIRE(1 == rules[1].conditions.size());
+    CHECK("blah" == rules[1].conditions[0]);
+}
+
+TEST_CASE("regex_RuleParser.InvalidRefRuleWithConditions")
+{
+    CHECK_THROWS_AS(RuleParser { "<cond>main(ref) ::= blah\n" }.parseRules(),
+                    RuleParser::InvalidRefRuleWithConditions);
+}
+
+TEST_CASE("regex_RuleParser.InvalidRuleOption")
+{
+    CHECK_THROWS_AS(RuleParser { "A(invalid) ::= a\n" }.parseRules(), RuleParser::InvalidRuleOption);
+}
+
+TEST_CASE("regex_RuleParser.DuplicateRule")
+{
+    RuleParser rp { R"(
+    foo ::= abc
+    foo ::= def
+  )" };
+    CHECK_THROWS_AS(rp.parseRules(), RuleParser::DuplicateRule);
+}
+
+TEST_CASE("regex_RuleParser.UnexpectedChar")
+{
+    CHECK_THROWS_AS(RuleParser { "A :=" }.parseRules(), RuleParser::UnexpectedChar);
+    CHECK_THROWS_AS(RuleParser { "<x A ::= a" }.parseRules(), RuleParser::UnexpectedChar);
+}
+
+TEST_CASE("regex_RuleParser.UnexpectedToken")
+{
+    CHECK_THROWS_AS(RuleParser { "<x,y,> A ::= a" }.parseRules(), RuleParser::UnexpectedToken);
+    CHECK_THROWS_AS(RuleParser { "<> A ::= a" }.parseRules(), RuleParser::UnexpectedToken);
+    CHECK_THROWS_AS(RuleParser { " ::= a" }.parseRules(), RuleParser::UnexpectedToken);
+}
diff --git a/src/regex_dfa/SourceLocation.cpp b/src/regex_dfa/SourceLocation.cpp
new file mode 100644
index 0000000000..c9e6cd8267
--- /dev/null
+++ b/src/regex_dfa/SourceLocation.cpp
@@ -0,0 +1,27 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/SourceLocation.h>
+
+#include <fstream>
+
+using namespace std;
+
+namespace regex_dfa
+{
+
+string SourceLocation::source() const // TODO
+{
+    string code;
+    ifstream ifs(filename);
+    ifs.seekg(offset, ifs.beg);
+    code.resize(count);
+    ifs.read(&code[0], count);
+    return code;
+}
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/SourceLocation.h b/src/regex_dfa/SourceLocation.h
new file mode 100644
index 0000000000..c69d7f7487
--- /dev/null
+++ b/src/regex_dfa/SourceLocation.h
@@ -0,0 +1,40 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#pragma once
+
+#include <string>
+
+namespace regex_dfa
+{
+
+struct SourceLocation
+{
+    std::string filename;
+    size_t offset;
+    size_t count;
+
+    [[nodiscard]] long long int compare(const SourceLocation& other) const noexcept
+    {
+        if (filename == other.filename)
+            return (long) offset - (long) other.offset;
+        else if (filename < other.filename)
+            return -1;
+        else
+            return 1;
+    }
+
+    [[nodiscard]] std::string source() const;
+
+    bool operator==(const SourceLocation& other) const noexcept { return compare(other) == 0; }
+    bool operator<=(const SourceLocation& other) const noexcept { return compare(other) <= 0; }
+    bool operator>=(const SourceLocation& other) const noexcept { return compare(other) >= 0; }
+    bool operator<(const SourceLocation& other) const noexcept { return compare(other) < 0; }
+    bool operator>(const SourceLocation& other) const noexcept { return compare(other) > 0; }
+};
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/State.cpp b/src/regex_dfa/State.cpp
new file mode 100644
index 0000000000..76eaa27f26
--- /dev/null
+++ b/src/regex_dfa/State.cpp
@@ -0,0 +1,37 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/State.h>
+
+#include <sstream>
+
+using namespace std;
+
+namespace regex_dfa
+{
+
+string to_string(const StateIdVec& S, string_view stateLabelPrefix)
+{
+    StateIdVec names = S;
+    sort(names.begin(), names.end());
+
+    stringstream sstr;
+    sstr << "{";
+    int i = 0;
+    for (StateId name: names)
+    {
+        if (i)
+            sstr << ", ";
+        sstr << stateLabelPrefix << name;
+        i++;
+    }
+    sstr << "}";
+
+    return sstr.str();
+}
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/State.h b/src/regex_dfa/State.h
new file mode 100644
index 0000000000..975dd8851e
--- /dev/null
+++ b/src/regex_dfa/State.h
@@ -0,0 +1,53 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+#pragma once
+
+#include <regex_dfa/Symbols.h>
+
+#include <fmt/format.h>
+
+#include <map>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <vector>
+
+namespace regex_dfa
+{
+
+using Tag = int;
+using StateId = size_t;
+using StateIdVec = std::vector<StateId>;
+
+using AcceptMap = std::map<StateId, Tag>;
+
+/**
+ * Returns a human readable string of @p S, such as "{n0, n1, n2}".
+ */
+[[nodiscard]] std::string to_string(const StateIdVec& S, std::string_view stateLabelPrefix = "n");
+
+} // namespace regex_dfa
+
+namespace fmt
+{
+template <>
+struct formatter<regex_dfa::StateIdVec>
+{
+    template <typename ParseContext>
+    constexpr auto parse(ParseContext& ctx)
+    {
+        return ctx.begin();
+    }
+
+    template <typename FormatContext>
+    constexpr auto format(const regex_dfa::StateIdVec& v, FormatContext& ctx)
+    {
+        return fmt::format_to(ctx.out(), "{}", regex_dfa::to_string(v));
+    }
+};
+} // namespace fmt
diff --git a/src/regex_dfa/State_test.cpp b/src/regex_dfa/State_test.cpp
new file mode 100644
index 0000000000..4cb9074f1e
--- /dev/null
+++ b/src/regex_dfa/State_test.cpp
@@ -0,0 +1,18 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/State.h>
+
+#include <fmt/format.h>
+
+#include <catch2/catch.hpp>
+
+TEST_CASE("regex_State.to_string")
+{
+    regex_dfa::StateIdVec v { 1, 2, 3 };
+    CHECK("{n1, n2, n3}" == fmt::format("{}", v));
+}
diff --git a/src/regex_dfa/Symbols.cpp b/src/regex_dfa/Symbols.cpp
new file mode 100644
index 0000000000..630670740b
--- /dev/null
+++ b/src/regex_dfa/Symbols.cpp
@@ -0,0 +1,184 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/Symbols.h>
+
+#include <sstream>
+
+using namespace std;
+
+namespace regex_dfa
+{
+
+string prettySymbol(Symbol input)
+{
+    switch (input)
+    {
+        case Symbols::Error: return "<<ERROR>>";
+        case Symbols::BeginOfLine: return "<<BOL>>";
+        case Symbols::EndOfLine: return "<<EOL>>";
+        case Symbols::EndOfFile: return "<<EOF>>";
+        case Symbols::Epsilon: return "ε";
+        case '\a': return "\\a";
+        case '\b': return "\\b";
+        case '\f': return "\\f";
+        case '\n': return "\\n";
+        case '\r': return "\\r";
+        case ' ': return "\\s";
+        case '\t': return "\\t";
+        case '\v': return "\\v";
+        case '\0': return "\\0";
+        case '.': return "\\."; // so we can distinguish from dot-operator
+        default:
+            if (isprint(input))
+            {
+                return fmt::format("{}", (char) input);
+            }
+            else
+            {
+                return fmt::format("\\x{:02x}", input);
+            }
+    }
+}
+
+string prettyCharRange(Symbol ymin, Symbol ymax)
+{
+    assert(ymin <= ymax);
+
+    stringstream sstr;
+    switch (ymax - ymin)
+    {
+        case 0: sstr << prettySymbol(ymin); break;
+        case 1: sstr << prettySymbol(ymin) << prettySymbol(ymin + 1); break;
+        case 2: sstr << prettySymbol(ymin) << prettySymbol(ymin + 1) << prettySymbol(ymax); break;
+        default: sstr << prettySymbol(ymin) << '-' << prettySymbol(ymax); break;
+    }
+    return sstr.str();
+}
+
+string groupCharacterClassRanges(const vector<bool>& syms)
+{
+    // {1,3,5,a,b,c,d,e,f,z]
+    // ->
+    // {{1}, {3}, {5}, {a-f}, {z}}
+
+    stringstream sstr;
+    Symbol ymin = '\0';
+    Symbol ymax = ymin;
+    int k = 0;
+
+    for (size_t i = 0, e = syms.size(); i != e; ++i)
+    {
+        if (!syms[i])
+            continue;
+
+        const Symbol c = (Symbol) i;
+        if (c == ymax + 1)
+        { // range growing
+            ymax = c;
+        }
+        else
+        { // gap found
+            if (k)
+            {
+                sstr << prettyCharRange(ymin, ymax);
+            }
+            ymin = ymax = c;
+        }
+        k++;
+    }
+    sstr << prettyCharRange(ymin, ymax);
+
+    return sstr.str();
+}
+
+string groupCharacterClassRanges(vector<Symbol> chars)
+{
+    // we took a copy in tgroup here, so I can sort() later
+    sort(chars.begin(), chars.end());
+
+    if (chars.size() == 1)
+        return prettySymbol(chars.front());
+
+    // {1,3,5,a,b,c,d,e,f,z]
+    // ->
+    // "123a-fz"
+
+    stringstream sstr;
+    Symbol ymin = 0;
+    Symbol ymax = ymin;
+    int i = 0;
+
+    for (Symbol c: chars)
+    {
+        if (c == ymax + 1)
+        { // range growing
+            ymax = c;
+        }
+        else
+        { // gap found
+            if (i)
+            {
+                sstr << prettyCharRange(ymin, ymax);
+            }
+            ymin = ymax = c;
+        }
+        i++;
+    }
+    sstr << prettyCharRange(ymin, ymax);
+
+    return sstr.str();
+}
+
+SymbolSet::SymbolSet(DotMode): set_(256, true), size_ { 255 }, hash_ { 2166136261 }
+{
+    set_[(size_t) '\n'] = false;
+    for (Symbol s: *this)
+    {
+        hash_ = (hash_ * 16777619) ^ s;
+    }
+}
+
+bool SymbolSet::isDot() const noexcept
+{
+    static SymbolSet dot(SymbolSet::Dot);
+    return *this == dot;
+}
+
+string SymbolSet::to_string() const
+{
+    if (isDot())
+        return ".";
+
+    return groupCharacterClassRanges(set_);
+}
+
+void SymbolSet::complement()
+{
+    // flip bits
+    for (size_t i = 0, e = set_.size(); i != e; ++i)
+    {
+        set_[i] = !set_[i];
+    }
+
+    // flip size
+    size_ = set_.size() - size_;
+
+    recalculateHash();
+}
+
+void SymbolSet::recalculateHash()
+{
+    // recalculate hash
+    hash_ = 2166136261;
+    for (Symbol s: *this)
+    {
+        hash_ = (hash_ * 16777619) ^ s;
+    }
+}
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/Symbols.h b/src/regex_dfa/Symbols.h
new file mode 100644
index 0000000000..bb8a5488e1
--- /dev/null
+++ b/src/regex_dfa/Symbols.h
@@ -0,0 +1,208 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+#pragma once
+
+#include <fmt/format.h>
+
+#include <algorithm>
+#include <cassert>
+#include <list>
+#include <memory>
+#include <set>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <vector>
+
+namespace regex_dfa
+{
+
+//! input symbol as used for transitions
+using Symbol = int;
+
+[[nodiscard]] std::string prettySymbol(Symbol input);
+[[nodiscard]] std::string prettyCharRange(Symbol ymin, Symbol ymax);
+[[nodiscard]] std::string groupCharacterClassRanges(const std::vector<bool>& syms);
+[[nodiscard]] std::string groupCharacterClassRanges(std::vector<Symbol> syms);
+
+// new way of wrapping up Symbols
+struct Symbols
+{
+    // NOLINTBEGIN(readability-identifier-naming)
+    constexpr static Symbol Epsilon = -1;
+    constexpr static Symbol Error = -2;
+    constexpr static Symbol BeginOfLine = -3;
+    constexpr static Symbol EndOfLine = -4;
+    constexpr static Symbol EndOfFile = -5;
+    constexpr static Symbol Character(char ch) { return Symbol(ch); }
+    // NOLINTEND(readability-identifier-naming)
+
+    [[nodiscard]] constexpr static bool isSpecial(Symbol s)
+    {
+        switch (s)
+        {
+            case Symbols::EndOfFile:
+            case Symbols::EndOfLine:
+            case Symbols::BeginOfLine:
+            case Symbols::Epsilon:
+            case Symbols::Error: return true;
+            default: return false;
+        }
+    }
+};
+
+/**
+ * Represents a set of symbols.
+ */
+class SymbolSet
+{
+  public:
+    enum DotMode
+    {
+        Dot
+    };
+
+    explicit SymbolSet(DotMode);
+    SymbolSet(): set_(256, false), size_ { 0 }, hash_ { 2166136261 } {}
+
+    explicit SymbolSet(std::initializer_list<Symbol> list): SymbolSet()
+    {
+        std::for_each(list.begin(), list.end(), [this](Symbol s) { insert(s); });
+    }
+
+    [[nodiscard]] bool empty() const noexcept { return size_ == 0; }
+    [[nodiscard]] size_t size() const noexcept { return size_; }
+
+    //! Transforms into the complement set.
+    void complement();
+
+    //! Inserts given Symbol @p s into this set.
+    void insert(Symbol s)
+    {
+        if (!contains(s))
+        {
+            set_[s] = true;
+            hash_ = (hash_ * 16777619) ^ s;
+            size_++;
+        }
+    }
+
+    //! Inserts a range of Simples between [a, b].
+    void insert(const std::pair<Symbol, Symbol>& range)
+    {
+        for (Symbol s = range.first; s <= range.second; ++s)
+        {
+            insert(s);
+        }
+    }
+
+    //! @returns whether or not given Symbol @p s is in this set.
+    [[nodiscard]] bool contains(Symbol s) const
+    {
+        assert(s >= 0 && s <= 255 && "Only ASCII allowed.");
+        return set_[(size_t) s];
+    }
+
+    //! Tests whether or not this SymbolSet can be represented as dot (.), i.e. all but \n.
+    [[nodiscard]] bool isDot() const noexcept;
+
+    //! @returns a human readable representation of this set
+    [[nodiscard]] std::string to_string() const;
+
+    bool operator==(const SymbolSet& rhs) const noexcept { return hash_ == rhs.hash_ && set_ == rhs.set_; }
+    bool operator!=(const SymbolSet& rhs) const noexcept { return !(*this == rhs); }
+
+    class const_iterator // NOLINT(readability-identifier-naming)
+    {                    // {{{
+      public:
+        const_iterator(std::vector<bool>::const_iterator beg,
+                       std::vector<bool>::const_iterator end,
+                       size_t n):
+            beg_ { beg }, end_ { end }, offset_ { n }
+        {
+            while (beg_ != end_ && !*beg_)
+            {
+                ++beg_;
+                ++offset_;
+            }
+        }
+
+        Symbol operator*() const { return static_cast<Symbol>(offset_); }
+
+        const_iterator& operator++(int)
+        {
+            do
+            {
+                ++beg_;
+                ++offset_;
+            } while (beg_ != end_ && !*beg_);
+            return *this;
+        }
+
+        const_iterator& operator++()
+        {
+            do
+            {
+                beg_++;
+                offset_++;
+            } while (beg_ != end_ && !*beg_);
+            return *this;
+        }
+
+        bool operator==(const const_iterator& rhs) const noexcept { return beg_ == rhs.beg_; }
+        bool operator!=(const const_iterator& rhs) const noexcept { return beg_ != rhs.beg_; }
+
+      private:
+        std::vector<bool>::const_iterator beg_;
+        std::vector<bool>::const_iterator end_;
+        size_t offset_;
+    }; // }}}
+
+    [[nodiscard]] const_iterator begin() const { return const_iterator(set_.begin(), set_.end(), 0); }
+    [[nodiscard]] const_iterator end() const { return const_iterator(set_.end(), set_.end(), set_.size()); }
+
+    [[nodiscard]] size_t hash() const noexcept { return hash_; }
+
+  private:
+    void recalculateHash();
+
+  private:
+    // XXX we chose vector<bool> as it is an optimized bit vector
+    std::vector<bool> set_;
+    size_t size_;
+    size_t hash_;
+};
+
+} // namespace regex_dfa
+
+namespace fmt
+{
+template <>
+struct formatter<regex_dfa::SymbolSet>
+{
+    template <typename ParseContext>
+    constexpr auto parse(ParseContext& ctx)
+    {
+        return ctx.begin();
+    }
+
+    template <typename FormatContext>
+    constexpr auto format(const regex_dfa::SymbolSet& v, FormatContext& ctx)
+    {
+        return fmt::format_to(ctx.out(), "{}", v.to_string());
+    }
+};
+} // namespace fmt
+
+namespace std
+{
+template <>
+struct hash<regex_dfa::SymbolSet>
+{
+    size_t operator()(const regex_dfa::SymbolSet& set) const { return set.hash(); }
+};
+} // namespace std
diff --git a/src/regex_dfa/Symbols_test.cpp b/src/regex_dfa/Symbols_test.cpp
new file mode 100644
index 0000000000..3374865b1b
--- /dev/null
+++ b/src/regex_dfa/Symbols_test.cpp
@@ -0,0 +1,112 @@
+// This file is part of the "x0" project, http://github.com/christianparpart/x0>
+//   (c) 2009-2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/Symbols.h>
+
+#include <catch2/catch.hpp>
+
+using namespace std;
+using regex_dfa::SymbolSet;
+
+TEST_CASE("regex_SymbolSet.s0")
+{
+    SymbolSet s0;
+    REQUIRE(0 == s0.size()); // NOLINT(readability-container-size-empty)
+    REQUIRE(s0.empty());
+}
+
+TEST_CASE("regex_SymbolSet.s1")
+{
+    SymbolSet s1;
+
+    // first add
+    s1.insert('a');
+    CHECK(1 == s1.size());
+    REQUIRE_FALSE(s1.empty());
+
+    // overwrite
+    s1.insert('a');
+    CHECK(1 == s1.size());
+    REQUIRE_FALSE(s1.empty());
+}
+
+TEST_CASE("regex_SymbolSet.initializer_list")
+{
+    SymbolSet a { 'a' };
+    CHECK(1 == a.size());
+    CHECK(a.contains('a'));
+
+    SymbolSet s2 { 'a', 'b', 'b', 'c' };
+    CHECK(3 == s2.size());
+    CHECK("abc" == s2.to_string());
+}
+
+TEST_CASE("regex_SymbolSet.dot")
+{
+    SymbolSet dot(SymbolSet::Dot);
+    REQUIRE(!dot.contains('\n'));
+    CHECK(dot.contains('\0'));
+    CHECK(dot.contains(' '));
+    CHECK(dot.isDot());
+    CHECK("." == dot.to_string());
+}
+
+TEST_CASE("regex_SymbolSet.complement")
+{
+    SymbolSet s;
+    s.insert('\n');
+    CHECK("\\n" == s.to_string());
+    s.complement();
+    CHECK("." == s.to_string());
+}
+
+TEST_CASE("regex_SymbolSet.range")
+{
+    SymbolSet r;
+    r.insert(make_pair('a', 'f'));
+
+    CHECK(6 == r.size());
+    CHECK("a-f" == r.to_string());
+
+    r.insert(make_pair('0', '9'));
+    CHECK(16 == r.size());
+    CHECK("0-9a-f" == r.to_string());
+}
+
+TEST_CASE("regex_SymbolSet.fmt_format")
+{
+    SymbolSet s;
+    s.insert(make_pair('0', '9'));
+    s.insert(make_pair('a', 'f'));
+
+    CHECK("0-9a-f" == fmt::format("{}", s));
+}
+
+TEST_CASE("regex_SymbolSet.hash_map")
+{
+    SymbolSet s0;
+    SymbolSet s1 { 'a' };
+    SymbolSet s2 { 'a', 'b' };
+
+    unordered_map<SymbolSet, int> map;
+    map[s0] = 0;
+    map[s1] = 1;
+    map[s2] = 2;
+
+    CHECK(0 == map[s0]);
+    CHECK(1 == map[s1]);
+    CHECK(2 == map[s2]);
+}
+
+TEST_CASE("regex_SymbolSet.compare")
+{
+    SymbolSet s1 { 'a', 'b' };
+    SymbolSet s2 { 'a', 'b' };
+    SymbolSet s3 { 'a', 'c' };
+    REQUIRE(s1 == s2);
+    REQUIRE(s1 != s3);
+}
diff --git a/src/regex_dfa/TransitionMap-inl.h b/src/regex_dfa/TransitionMap-inl.h
new file mode 100644
index 0000000000..df949ecab7
--- /dev/null
+++ b/src/regex_dfa/TransitionMap-inl.h
@@ -0,0 +1,49 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/State.h>
+#include <regex_dfa/TransitionMap.h>
+
+#include <algorithm>
+
+namespace regex_dfa
+{
+
+inline void TransitionMap::define(StateId currentState, Symbol charCat, StateId nextState)
+{
+    mapping_[currentState][charCat] = nextState;
+}
+
+inline StateId TransitionMap::apply(StateId currentState, Symbol charCat) const
+{
+    if (auto i = mapping_.find(currentState); i != mapping_.end())
+        if (auto k = i->second.find(charCat); k != i->second.end())
+            return k->second;
+
+    return ErrorState;
+}
+
+inline std::vector<StateId> TransitionMap::states() const
+{
+    std::vector<StateId> v;
+    v.reserve(mapping_.size());
+    for (const auto& i: mapping_)
+        v.push_back(i.first);
+    std::sort(v.begin(), v.end());
+    return v;
+}
+
+inline std::map<Symbol, StateId> TransitionMap::map(StateId inputState) const
+{
+    std::map<Symbol, StateId> m;
+    if (auto mapping = mapping_.find(inputState); mapping != mapping_.end())
+        for (const auto& i: mapping->second)
+            m[i.first] = i.second;
+    return m;
+}
+
+} // namespace regex_dfa
diff --git a/src/regex_dfa/TransitionMap.h b/src/regex_dfa/TransitionMap.h
new file mode 100644
index 0000000000..5b0693748b
--- /dev/null
+++ b/src/regex_dfa/TransitionMap.h
@@ -0,0 +1,66 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+#pragma once
+
+#include <regex_dfa/State.h>
+
+#include <map>
+#include <vector>
+
+namespace regex_dfa
+{
+
+using CharCatId = int;
+
+constexpr CharCatId ErrorCharCat = static_cast<CharCatId>(-1);
+
+/**
+ * Represents an error-state, such as invalid input character or unexpected EOF.
+ */
+constexpr StateId ErrorState { 808080 }; // static_cast<StateId>(-1);
+
+/**
+ * Transition mapping API to map the input (currentState, charCat) to (newState).
+ */
+class TransitionMap
+{
+  public:
+    using Container = std::map<StateId, std::map<Symbol, StateId>>;
+
+    TransitionMap(): mapping_ {} {}
+
+    TransitionMap(Container mapping): mapping_ { std::move(mapping) } {}
+
+    /**
+     * Defines a new mapping for (currentState, charCat) to (nextState).
+     */
+    void define(StateId currentState, Symbol charCat, StateId nextState);
+
+    /**
+     * Retrieves the next state for the input (currentState, charCat).
+     *
+     * @returns the transition from (currentState, charCat) to (nextState) or ErrorState if not defined.
+     */
+    [[nodiscard]] StateId apply(StateId currentState, Symbol charCat) const;
+
+    /**
+     * Retrieves a list of all available states.
+     */
+    [[nodiscard]] std::vector<StateId> states() const;
+
+    /**
+     * Retrieves a map of all transitions from given state @p inputState.
+     */
+    [[nodiscard]] std::map<Symbol, StateId> map(StateId inputState) const;
+
+  private:
+    Container mapping_;
+};
+
+} // namespace regex_dfa
+
+#include <regex_dfa/TransitionMap-inl.h>
diff --git a/src/regex_dfa/klex_test.cpp b/src/regex_dfa/klex_test.cpp
new file mode 100644
index 0000000000..17f2164e24
--- /dev/null
+++ b/src/regex_dfa/klex_test.cpp
@@ -0,0 +1,13 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//   (c) 2009-2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <klex/util/testing.h>
+
+int main(int argc, const char* argv[])
+{
+    return regex_dfa::util::testing::main(argc, argv);
+}
diff --git a/src/regex_dfa/regex_dfa_test.cpp b/src/regex_dfa/regex_dfa_test.cpp
new file mode 100644
index 0000000000..45742d7742
--- /dev/null
+++ b/src/regex_dfa/regex_dfa_test.cpp
@@ -0,0 +1,25 @@
+/**
+ * This file is part of the "libterminal" project
+ *   Copyright (c) 2019-2020 Christian Parpart <christian@parpart.family>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#define CATCH_CONFIG_RUNNER
+#include <catch2/catch.hpp>
+
+int main(int argc, char const* argv[])
+{
+    int const result = Catch::Session().run(argc, argv);
+
+    // avoid closing extern console to close on VScode/windows
+    // system("pause");
+
+    return result;
+}
diff --git a/src/regex_dfa/util/iterator-detail.h b/src/regex_dfa/util/iterator-detail.h
new file mode 100644
index 0000000000..948beabf06
--- /dev/null
+++ b/src/regex_dfa/util/iterator-detail.h
@@ -0,0 +1,177 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//	 (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <cstdint>
+#include <iterator>
+#include <utility>
+
+namespace regex_dfa::util::detail
+{
+
+template <typename Container>
+struct reversed
+{
+    const Container container;
+
+    auto begin() { return container.crbegin(); }
+    auto end() { return container.crend(); }
+};
+
+template <typename Container>
+struct indexed
+{
+    Container& container;
+
+    struct iterator
+    {
+        typename Container::iterator iter;
+        std::size_t index = 0;
+
+        iterator& operator++()
+        {
+            ++iter;
+            ++index;
+            return *this;
+        }
+
+        iterator& operator++(int)
+        {
+            ++*this;
+            return *this;
+        }
+
+        auto operator*() const { return std::make_pair(index, *iter); }
+
+        bool operator==(const iterator& rhs) const noexcept { return iter == rhs.iter; }
+        bool operator!=(const iterator& rhs) const noexcept { return iter != rhs.iter; }
+    };
+
+    struct const_iterator
+    {
+        typename Container::const_iterator iter;
+        std::size_t index = 0;
+
+        const_iterator& operator++()
+        {
+            ++iter;
+            ++index;
+            return *this;
+        }
+
+        const_iterator& operator++(int)
+        {
+            ++*this;
+            return *this;
+        }
+
+        auto operator*() const { return std::make_pair(index, *iter); }
+
+        bool operator==(const const_iterator& rhs) const noexcept { return iter == rhs.iter; }
+        bool operator!=(const const_iterator& rhs) const noexcept { return iter != rhs.iter; }
+    };
+
+    auto begin() const
+    {
+        if constexpr (std::is_const<Container>::value)
+            return const_iterator { container.cbegin() };
+        else
+            return iterator { container.begin() };
+    }
+
+    auto end() const
+    {
+        if constexpr (std::is_const<Container>::value)
+            return const_iterator { container.cend() };
+        else
+            return iterator { container.end() };
+    }
+};
+
+template <typename Container, typename Lambda>
+struct filter
+{
+    Container& container;
+    Lambda proc;
+
+    struct iterator
+    {
+        using iterator_category = std::forward_iterator_tag;
+        using value_type = typename Container::value_type;
+        using difference_type = long;
+        using pointer = value_type*;
+        using reference = value_type&;
+
+        typename Container::iterator i;
+        typename Container::iterator e;
+        Lambda filter;
+
+        auto operator*() const { return *i; }
+
+        iterator& operator++()
+        {
+            ++i;
+            while (i != e && !filter(*i))
+                ++i;
+            return *this;
+        }
+
+        iterator& operator++(int) { return ++*this; }
+
+        bool operator==(const iterator& rhs) const noexcept { return i == rhs.i; }
+        bool operator!=(const iterator& rhs) const noexcept { return !(*this == rhs); }
+    };
+
+    struct const_iterator
+    {
+        typename Container::const_iterator i;
+        typename Container::const_iterator e;
+        Lambda filter;
+
+        auto operator*() const { return *i; }
+
+        const_iterator& operator++()
+        {
+            ++i;
+            while (i != e && !filter(*i))
+                ++i;
+            return *this;
+        }
+
+        const_iterator& operator++(int) { return ++*this; }
+
+        bool operator==(const const_iterator& rhs) const noexcept { return i == rhs.i; }
+        bool operator!=(const const_iterator& rhs) const noexcept { return !(*this == rhs); }
+    };
+
+    auto begin() const
+    {
+        if constexpr (std::is_const<Container>::value)
+        {
+            auto i = const_iterator { std::cbegin(container), std::cend(container), proc };
+            while (i != end() && !proc(*i))
+                ++i;
+            return i;
+        }
+        else
+        {
+            auto i = iterator { std::begin(container), std::end(container), proc };
+            while (i != end() && !proc(*i))
+                ++i;
+            return i;
+        }
+    }
+
+    auto end() const
+    {
+        if constexpr (std::is_const<Container>::value)
+            return const_iterator { std::cend(container), std::cend(container), proc };
+        else
+            return iterator { std::end(container), std::end(container), proc };
+    }
+};
+
+} // namespace regex_dfa::util::detail
diff --git a/src/regex_dfa/util/iterator.h b/src/regex_dfa/util/iterator.h
new file mode 100644
index 0000000000..81c95838d9
--- /dev/null
+++ b/src/regex_dfa/util/iterator.h
@@ -0,0 +1,109 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//	 (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#pragma once
+
+#include <regex_dfa/util/iterator-detail.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <sstream>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+namespace regex_dfa::util
+{
+
+template <typename Container>
+inline auto reversed(Container&& c)
+{
+    if constexpr (std::is_reference<Container>::value)
+        return detail::reversed<Container&> { std::forward<Container>(c) };
+    else
+        return detail::reversed<Container> { std::forward<Container>(c) };
+}
+
+template <typename Container>
+inline auto indexed(const Container& c)
+{
+    return typename std::add_const<detail::indexed<const Container>>::type { c };
+}
+
+template <typename Container>
+inline auto indexed(Container& c)
+{
+    return detail::indexed<Container> { c };
+}
+
+template <typename Container, typename Lambda>
+inline auto translate(const Container& container, Lambda mapfn)
+{
+    using namespace std;
+    using T = decltype(mapfn(*begin(container)));
+
+    vector<T> out;
+    out.reserve(distance(begin(container), end(container)));
+    transform(begin(container), end(container), back_inserter(out), std::move(mapfn));
+
+    return out;
+}
+
+template <typename Container>
+inline std::string join(const Container& container, const std::string& separator = ", ")
+{
+    std::stringstream out;
+
+    for (const auto&& [i, v]: indexed(container))
+        if (i)
+            out << separator << v;
+        else
+            out << v;
+
+    return out.str();
+}
+
+template <typename T, typename Lambda>
+inline auto filter(std::initializer_list<T>&& c, Lambda proc)
+{
+    return typename std::add_const<detail::filter<const std::initializer_list<T>, Lambda>>::type { c, proc };
+}
+
+template <typename Container, typename Lambda>
+inline auto filter(const Container& c, Lambda proc)
+{
+    return typename std::add_const<detail::filter<const Container, Lambda>>::type { c, proc };
+}
+
+template <typename Container, typename Lambda>
+inline auto filter(Container& c, Lambda proc)
+{
+    return detail::filter<Container, Lambda> { c, proc };
+}
+
+/**
+ * Finds the last occurence of a given element satisfying @p test.
+ *
+ * @returns the iterator representing the last item satisfying @p test or @p end if none found.
+ */
+template <typename Container, typename Test>
+auto find_last(const Container& container, Test test) -> decltype(std::cbegin(container))
+{
+    auto begin = std::cbegin(container);
+    auto end = std::cend(container);
+
+    for (auto i = std::prev(end); i != begin; --i)
+        if (test(*i))
+            return i;
+
+    if (test(*begin))
+        return begin;
+    else
+        return end;
+}
+
+} // namespace regex_dfa::util
diff --git a/src/regex_dfa/util/iterator_test.cpp b/src/regex_dfa/util/iterator_test.cpp
new file mode 100644
index 0000000000..aa41e5a5dd
--- /dev/null
+++ b/src/regex_dfa/util/iterator_test.cpp
@@ -0,0 +1,182 @@
+// This file is part of the "klex" project, http://github.com/christianparpart/klex>
+//	 (c) 2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#include <regex_dfa/util/iterator.h>
+
+#include <fmt/format.h>
+
+#include <catch2/catch.hpp>
+
+#include <array>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+using namespace std;
+using namespace regex_dfa::util;
+
+TEST_CASE("util_iterator_reversed.empty")
+{
+    const vector<int> v;
+    auto x = reversed(v);
+    auto i = begin(x);
+    REQUIRE(i == end(x));
+}
+
+TEST_CASE("util_iterator_reversed.one")
+{
+    const vector<int> v { 1 };
+    auto x = reversed(v);
+    auto i = begin(x);
+    REQUIRE(1 == *i);
+    i++;
+    REQUIRE(i == end(x));
+}
+
+TEST_CASE("util_iterator_reversed.many")
+{
+    const vector<int> v { 1, 2, 3 };
+    auto x = reversed(v);
+    auto i = begin(x);
+    REQUIRE(3 == *i);
+    i++;
+    REQUIRE(2 == *i);
+    i++;
+    REQUIRE(1 == *i);
+    i++;
+    REQUIRE(i == end(x));
+}
+
+TEST_CASE("util_iterator_indexed.many_const")
+{
+    const vector<int> v { 10, 20, 30 };
+    const auto x = indexed(v);
+    static_assert(is_const<decltype(x)>::value);
+    auto i = begin(x);
+
+    REQUIRE(0 == (*i).first);
+    REQUIRE(10 == (*i).second);
+    i++;
+
+    REQUIRE(1 == (*i).first);
+    REQUIRE(20 == (*i).second);
+    i++;
+
+    REQUIRE(2 == (*i).first);
+    REQUIRE(30 == (*i).second);
+    i++;
+
+    REQUIRE(i == end(x));
+}
+
+TEST_CASE("util_iterator_indexed.many")
+{
+    vector<string> v { "zero", "one", "two" };
+    auto x = indexed(v);
+    auto i = begin(x);
+
+    REQUIRE(0 == (*i).first);
+    REQUIRE("zero" == (*i).second);
+    i++;
+
+    REQUIRE(1 == (*i).first);
+    REQUIRE("one" == (*i).second);
+    i++;
+
+    REQUIRE(2 == (*i).first);
+    REQUIRE("two" == (*i).second);
+    i++;
+
+    REQUIRE(i == end(x));
+}
+
+TEST_CASE("util_iterator_indexed.range_based_for_loop")
+{
+    INFO("const:");
+    const vector<int> v1 { 10, 20, 30 };
+    for (const auto&& [index, value]: indexed(v1))
+        INFO(fmt::format("index {}, value {}", index, value));
+
+    INFO("non-const:");
+    vector<int> v2 { 10, 20, 30 };
+    for (const auto&& [index, value]: indexed(v2))
+        INFO(fmt::format("index {}, value {}", index, value));
+}
+
+TEST_CASE("util_iterator_filter.for_range")
+{
+    const vector<int> nums = { 1, 2, 3, 4 };
+    vector<int> odds;
+    for (const int i: filter(nums, [](int x) { return x % 2 != 0; }))
+        odds.push_back(i);
+
+    REQUIRE(2 == odds.size());
+    REQUIRE(1 == odds[0]);
+    CHECK(3 == odds[1]);
+}
+
+TEST_CASE("util_iterator_filter.count_proc_invocations")
+{
+    static const array<int, 4> numbers = { 1, 2, 3, 4 };
+    int count = 0;
+    auto counter = [&](int) {
+        ++count;
+        return true;
+    };
+    const auto f = filter(numbers, counter);
+    for_each(begin(f), end(f), [](int) {});
+    REQUIRE(4 == count);
+}
+
+TEST_CASE("util_iterator_filter.for_range_initializer_list")
+{
+    static const array<int, 4> numbers = { 1, 2, 3, 4 };
+    vector<int> odds;
+    auto f_odd = [&](int x) {
+        INFO(fmt::format("f_odd: x={0}", x));
+        return x % 2 != 0;
+    };
+    for (const int i: filter(numbers, f_odd))
+        odds.push_back(i);
+
+    REQUIRE(2 == odds.size());
+    CHECK(1 == odds[0]);
+    CHECK(3 == odds[1]);
+}
+
+TEST_CASE("util_iterator_translate.vector")
+{
+    const vector<int> in { 1, 2, 3, 4 };
+    const vector<int> out = translate(in, [](auto i) -> int { return int(i * 2); });
+
+    for (const auto&& [i, v]: indexed(out))
+        INFO(fmt::format("out[{}] = {}", i, v));
+
+    REQUIRE(4 == out.size());
+
+    CHECK(2 == out[0]);
+    CHECK(4 == out[1]);
+    CHECK(6 == out[2]);
+    CHECK(8 == out[3]);
+}
+
+TEST_CASE("util_iterator_translate.chain_translate_join")
+{
+    const vector<int> in { 1, 2, 3, 4 };
+    const string out { join(translate(in, [](int i) -> string { return to_string(i); }), ", ") };
+
+    REQUIRE("1, 2, 3, 4" == out);
+}
+
+TEST_CASE("util_iterator.find_last")
+{
+    const vector<int> v { 1, 2, 3, 4 };
+    const auto i = find_last(v, [](int i) { return i % 2 != 0; }); // find last odd value -> 3
+
+    REQUIRE(i != end(v));
+    REQUIRE(3 == *i);
+}
diff --git a/src/regex_dfa/util/literals.h b/src/regex_dfa/util/literals.h
new file mode 100644
index 0000000000..9a1f9bc698
--- /dev/null
+++ b/src/regex_dfa/util/literals.h
@@ -0,0 +1,75 @@
+// This file is part of the "x0" project, http://github.com/christianparpart/x0>
+//   (c) 2009-2018 Christian Parpart <christian@parpart.family>
+//
+// Licensed under the MIT License (the "License"); you may not use this
+// file except in compliance with the License. You may obtain a copy of
+// the License at: http://opensource.org/licenses/MIT
+
+#pragma once
+
+#include <cstdint>
+#include <sstream>
+#include <string>
+
+namespace regex_dfa::util::literals
+{
+
+/**
+ * Strips a multiline string's indentation prefix.
+ *
+ * Example:
+ * \code
+ * string s = R"(|line one
+ *               |line two
+ *               |line three
+ *               )"_multiline;
+ * fmt::print(s);
+ * \endcode
+ *
+ * This prints three lines: @c "line one\nline two\nline three\n"
+ */
+inline std::string operator""_multiline(const char* text, size_t /*size*/)
+{
+    if (!*text)
+        return {};
+
+    enum class State
+    {
+        LineData,
+        SkipUntilPrefix,
+    };
+
+    constexpr char LF = '\n';
+    State state = State::LineData;
+    std::stringstream sstr;
+    char sep = *text++;
+
+    while (*text)
+    {
+        switch (state)
+        {
+            case State::LineData:
+                if (*text == LF)
+                {
+                    state = State::SkipUntilPrefix;
+                    sstr << *text++;
+                }
+                else
+                    sstr << *text++;
+                break;
+            case State::SkipUntilPrefix:
+                if (*text == sep)
+                {
+                    state = State::LineData;
+                    text++;
+                }
+                else
+                    text++;
+                break;
+        }
+    }
+
+    return sstr.str();
+}
+
+} // namespace regex_dfa::util::literals
diff --git a/src/vtbackend/CMakeLists.txt b/src/vtbackend/CMakeLists.txt
index 4ad0ab1cf1..fc00e9b5f7 100644
--- a/src/vtbackend/CMakeLists.txt
+++ b/src/vtbackend/CMakeLists.txt
@@ -103,6 +103,8 @@ target_link_libraries(vtbackend PUBLIC
     fmt::fmt-header-only
     range-v3::range-v3 
     ${LIBUNICODE_LIBS}  
+    regex_dfa
+    ctre::ctre
     vtparser 
     vtpty 
 )
diff --git a/src/vtbackend/Settings.h b/src/vtbackend/Settings.h
index cfc77865f3..a907ca9c03 100644
--- a/src/vtbackend/Settings.h
+++ b/src/vtbackend/Settings.h
@@ -65,6 +65,8 @@ struct Settings
     bool highlightDoubleClickedWord = true;
     // TODO: ^^^ make also use of it. probably rename to how VScode has named it.
 
+    std::string urlPattern = R"((https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@#/%=~_|])";
+
     struct PrimaryScreen
     {
         bool allowReflowOnResize = true;
diff --git a/src/vtbackend/TerminalState.cpp b/src/vtbackend/TerminalState.cpp
index d4df1a1cc7..b850815e5c 100644
--- a/src/vtbackend/TerminalState.cpp
+++ b/src/vtbackend/TerminalState.cpp
@@ -2,6 +2,8 @@
 #include <vtbackend/Terminal.h>
 #include <vtbackend/TerminalState.h>
 
+#include <regex_dfa/RegExprParser.h>
+
 namespace terminal
 {
 
@@ -16,6 +18,7 @@ TerminalState::TerminalState(Terminal& terminal):
         te->discardImage(*image);
     } },
     hyperlinks { HyperlinkCache { 1024 } },
+    urlPattern { regex_dfa::RegExprParser {}.parse(settings.urlPattern) },
     sequencer { terminal },
     parser { std::ref(sequencer) },
     viCommands { terminal },
diff --git a/src/vtbackend/TerminalState.h b/src/vtbackend/TerminalState.h
index 70635e4b7b..7940465c25 100644
--- a/src/vtbackend/TerminalState.h
+++ b/src/vtbackend/TerminalState.h
@@ -18,6 +18,8 @@
 
 #include <vtparser/Parser.h>
 
+#include <regex_dfa/RegExpr.h>
+
 #include <fmt/format.h>
 
 #include <atomic>
@@ -26,6 +28,7 @@
 #include <functional>
 #include <memory>
 #include <mutex>
+#include <regex>
 #include <stack>
 #include <vector>
 
@@ -193,6 +196,7 @@ struct TerminalState
     // Hyperlink related
     //
     HyperlinkStorage hyperlinks {};
+    regex_dfa::RegExpr urlPattern;
 
     std::string windowTitle {};
     std::stack<std::string> savedWindowTitles {};