diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..b806607 --- /dev/null +++ b/.clang-format @@ -0,0 +1,178 @@ +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignArrayOfStructures: None +AlignConsecutiveMacros: None +AlignConsecutiveAssignments: None +AlignConsecutiveBitFields: None +AlignConsecutiveDeclarations: None +AlignEscapedNewlines: Right +AlignOperands: Align +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortEnumsOnASingleLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Never +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: MultiLine +AttributeMacros: + - __capability +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeConceptDeclarations: true +BreakBeforeBraces: Attach +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: false +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + SortPriority: 0 + CaseSensitive: false + - Regex: '.*' + Priority: 1 + SortPriority: 0 + CaseSensitive: false +IncludeIsMainRegex: '(Test)?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseLabels: false +IndentCaseBlocks: false +IndentGotoLabels: true +IndentPPDirectives: None +IndentExternBlock: AfterExternBlock +IndentRequires: false +IndentWidth: 2 +IndentWrappedFunctionNames: false +InsertTrailingCommas: None +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +LambdaBodyIndentation: Signature +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PenaltyIndentedWhitespace: 0 +PointerAlignment: Right +PPIndentWidth: -1 +ReferenceAlignment: Pointer +ReflowComments: true +ShortNamespaceLines: 1 +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceAroundPointerQualifiers: Default +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: Never +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +BitFieldColonSpacing: Both +Standard: Latest +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 8 +UseCRLF: false +UseTab: Never +WhitespaceSensitiveMacros: + - STRINGIZE + - PP_STRINGIZE + - BOOST_PP_STRINGIZE + - NS_SWIFT_NAME + - CF_SWIFT_NAME +... + diff --git a/CMakeLists.txt b/CMakeLists.txt index 4380350..7fc4469 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.10) project(CppEarley - VERSION 1.3.0 + VERSION 1.3.1 DESCRIPTION "Basic C++ Earley Parser" LANGUAGES CXX) diff --git a/src/earley.cpp b/src/earley.cpp index 56ce7f2..e4b87ce 100644 --- a/src/earley.cpp +++ b/src/earley.cpp @@ -1,328 +1,288 @@ #include +#include #include #include -#include -#include "grammar.h" #include "earley.h" - +#include "grammar.h" using std::cout; using std::endl; -using Earley::S_state_type_t; +using Earley::S_grammar_type_t; using Earley::S_set_type_t; +using Earley::S_state_type_t; using Earley::S_type_t; -using Earley::S_grammar_type_t; +inline S_type_t Earley::init(const size_t len_words) { + S_type_t S; + for (unsigned int i = 0; i < len_words + 1; ++i) { + S_set_type_t s_set; + S.push_back(s_set); + } -inline S_type_t Earley::init(const size_t len_words) -{ - S_type_t S; - for (unsigned int i = 0; i < len_words + 1; ++i) - { - S_set_type_t s_set; - S.push_back(s_set); - } - - return S; + return S; } - -inline bool Earley::is_nonterminal(const char ch) -{ - return Earley::nonterminals.find(ch) != Earley::nonterminals.end(); +inline bool Earley::is_nonterminal(const char ch) { + return Earley::nonterminals.find(ch) != Earley::nonterminals.end(); } - -inline bool Earley::is_terminal(const char ch) -{ - return Earley::parts_of_speech.find(ch) != Earley::parts_of_speech.end(); +inline bool Earley::is_terminal(const char ch) { + return Earley::parts_of_speech.find(ch) != Earley::parts_of_speech.end(); } +inline bool Earley::is_finished(const S_state_type_t &state) { + std::string prod = std::get<1>(state); -inline bool Earley::is_finished(const S_state_type_t& state) -{ - std::string prod = std::get<1>(state); - - return prod.find(Earley::dot) >= prod.size() - 1; + return prod.find(Earley::dot) >= prod.size() - 1; } - // Swap with item in front of the dot by default -std::string Earley::swap_around_dot(const std::string& strng) -{ - if (strng.find(Earley::dot) >= strng.size() - 1) - return strng; - - std::string n_str(strng); - auto dot_idx = strng.find(Earley::dot); - auto next_item = strng.at(dot_idx + 1); - - // Swap around a/the lexeme - if (Earley::nonterminals.find(next_item) != Earley::nonterminals.end()) - { - std::swap(n_str[dot_idx], n_str[dot_idx + 1]); - return n_str; - } - else if (Earley::parts_of_speech.find(next_item) != Earley::parts_of_speech.end()) - { - std::swap(n_str[dot_idx], n_str[dot_idx + 1]); - return n_str; - } - - // Swap around a/the token (take the dot, remove it from the front and append to the end) - auto indx = n_str.find(Earley::dot); - n_str.erase(indx, indx+1); - n_str += Earley::dot; - - return n_str; +std::string Earley::swap_around_dot(const std::string &strng) { + if (strng.find(Earley::dot) >= strng.size() - 1) + return strng; + + std::string n_str(strng); + auto dot_idx = strng.find(Earley::dot); + auto next_item = strng.at(dot_idx + 1); + + // Swap around a/the lexeme + if (Earley::nonterminals.find(next_item) != Earley::nonterminals.end()) { + std::swap(n_str[dot_idx], n_str[dot_idx + 1]); + return n_str; + } else if (Earley::parts_of_speech.find(next_item) != + Earley::parts_of_speech.end()) { + std::swap(n_str[dot_idx], n_str[dot_idx + 1]); + return n_str; + } + + // Swap around a/the token (take the dot, remove it from the front and append + // to the end) + auto indx = n_str.find(Earley::dot); + n_str.erase(indx, indx + 1); + n_str += Earley::dot; + + return n_str; } +std::string Earley::get_next_element(const S_state_type_t &state) { + auto prod = std::get<1>(state); + auto m_index = prod.find(Earley::dot); + std::string nxt(1, prod.at(m_index + 1)); -std::string Earley::get_next_element(const S_state_type_t& state) -{ - auto prod = std::get<1>(state); - auto m_index = prod.find(Earley::dot); - std::string nxt(1, prod.at(m_index + 1)); - - if (is_nonterminal(nxt.at(0)) || is_terminal(nxt.at(0))) - return nxt; // return the thing(char) that comes just after the Earley::dot + if (is_nonterminal(nxt.at(0)) || is_terminal(nxt.at(0))) + return nxt; // return the thing(char) that comes just after the Earley::dot - // return the latter portion of the string (everything after the Earley::dot) - return prod.substr(m_index + 1, prod.size() - m_index); + // return the latter portion of the string (everything after the Earley::dot) + return prod.substr(m_index + 1, prod.size() - m_index); } +std::vector Earley::split(const std::string &strng, + const char delim) { + std::vector retvec; + std::stringstream ss(strng); + std::string holder; -std::vector Earley::split(const std::string& strng, const char delim) -{ - std::vector retvec; - std::stringstream ss(strng); - std::string holder; + while (ss) { + std::getline(ss, holder, delim); + retvec.push_back(holder); + } + retvec.pop_back(); // take out the final, extra, element (not needed) - while (ss) - { - std::getline(ss, holder, delim); - retvec.push_back(holder); - } - retvec.pop_back(); // take out the final, extra, element (not needed) - - return retvec; + return retvec; } - -std::vector Earley::prod_split(const std::string& strng) -{ - std::smatch sm; - std::regex rgx("(\\b([^|])+\\b([|])*)"); - std::string m_string(strng); - std::vector retvec; - - bool got_match = true; - // Parse through the string, stripping the matched terminal/non-terminal from the front - // this allows for continuous matching against one or more terminals/non-terminals between the pipes/spaces - while (got_match) - { - got_match = std::regex_search(m_string, sm, rgx); - if (got_match) - { - retvec.push_back(sm.str()); - m_string = sm.suffix(); - } - } - - return retvec; +std::vector Earley::prod_split(const std::string &strng) { + std::smatch sm; + std::regex rgx("(\\b([^|])+\\b([|])*)"); + std::string m_string(strng); + std::vector retvec; + + bool got_match = true; + // Parse through the string, stripping the matched terminal/non-terminal from + // the front this allows for continuous matching against one or more + // terminals/non-terminals between the pipes/spaces + while (got_match) { + got_match = std::regex_search(m_string, sm, rgx); + if (got_match) { + retvec.push_back(sm.str()); + m_string = sm.suffix(); + } + } + + return retvec; } - -bool Earley::predict(S_type_t& S, const unsigned int k, const std::string& nxt_elem, S_grammar_type_t& grammar) -{ - bool added = false; - std::string nxt_production = nxt_elem; - std::string grammar_prod = grammar[nxt_production]; - auto prod_splits = prod_split(grammar_prod); - - for (auto& split : prod_splits) - { - std::stringstream ss; - std::string split_nows = std::regex_replace(split, std::regex("\\s"), ""); - - ss << Earley::dot; - ss << split_nows; - - std::string dSplit(ss.str()); - Earley::S_set_type_t& current_set = S[k]; - S_state_type_t set_element = std::make_tuple(nxt_production, dSplit, k); - - // if not found: insert the tuple(set_element) - if (std::find(current_set.begin(), current_set.end(), set_element) == current_set.end()) - { - current_set.push_back(set_element); - added = true; - } - } - - return added; +bool Earley::predict(S_type_t &S, const unsigned int k, + const std::string &nxt_elem, S_grammar_type_t &grammar) { + bool added = false; + std::string nxt_production = nxt_elem; + std::string grammar_prod = grammar[nxt_production]; + auto prod_splits = prod_split(grammar_prod); + + for (auto &split : prod_splits) { + std::stringstream ss; + std::string split_nows = std::regex_replace(split, std::regex("\\s"), ""); + + ss << Earley::dot; + ss << split_nows; + + std::string dSplit(ss.str()); + Earley::S_set_type_t ¤t_set = S[k]; + S_state_type_t set_element = std::make_tuple(nxt_production, dSplit, k); + + // if not found: insert the tuple(set_element) + if (std::find(current_set.begin(), current_set.end(), set_element) == + current_set.end()) { + current_set.push_back(set_element); + added = true; + } + } + + return added; } - -bool Earley::scan(S_type_t& S, const unsigned int k, S_state_type_t& state, const std::string& words) -{ - bool added = false; - - std::string nxt_elem_scanner = get_next_element(state); - - if (k >= words.size()) - return added; - - auto words_k = words.at(k); - - { - // Process regex information against the token/word (see if it's a number) - std::string rgx_input(1, words_k); - std::regex rgx("[0-9]"); - std::smatch sm; - auto res = std::regex_match(rgx_input, sm, rgx); - - if (nxt_elem_scanner == "number" && !res) - { - cout << "Not going to add non-number to forward set:"; - cout << "Terminal symbol is not a number?"; - cout << endl; - return added; - } - } - - if (k > words.size() - 1) - return added; - - if (Earley::parts_of_speech.find(words.at(k)) != Earley::parts_of_speech.end()) - { - std::string state_swapped = swap_around_dot(std::get<1>(state)); - - S_state_type_t new_state = std::make_tuple(std::get<0>(state), state_swapped, std::get<2>(state)); - auto& nxt_set = S[k + 1]; - // If the element (new_state) is in the language, described by the grammar, - // then add that completed terminal symbol to S[k + 1] - if (std::find(nxt_set.begin(), nxt_set.end(), new_state) == nxt_set.end()) - { - nxt_set.push_back(new_state); - added = true; - } - } - - return added; +bool Earley::scan(S_type_t &S, const unsigned int k, S_state_type_t &state, + const std::string &words) { + bool added = false; + + std::string nxt_elem_scanner = get_next_element(state); + + if (k >= words.size()) + return added; + + auto words_k = words.at(k); + + { + // Process regex information against the token/word (see if it's a number) + std::string rgx_input(1, words_k); + std::regex rgx("[0-9]"); + std::smatch sm; + auto res = std::regex_match(rgx_input, sm, rgx); + + if (nxt_elem_scanner == "number" && !res) { + cout << "Not going to add non-number to forward set:"; + cout << "Terminal symbol is not a number?"; + cout << endl; + return added; + } + } + + if (k > words.size() - 1) + return added; + + if (Earley::parts_of_speech.find(words.at(k)) != + Earley::parts_of_speech.end()) { + std::string state_swapped = swap_around_dot(std::get<1>(state)); + + S_state_type_t new_state = + std::make_tuple(std::get<0>(state), state_swapped, std::get<2>(state)); + auto &nxt_set = S[k + 1]; + // If the element (new_state) is in the language, described by the grammar, + // then add that completed terminal symbol to S[k + 1] + if (std::find(nxt_set.begin(), nxt_set.end(), new_state) == nxt_set.end()) { + nxt_set.push_back(new_state); + added = true; + } + } + + return added; } - -bool Earley::complete(S_type_t& S, const unsigned int k, S_state_type_t& state) -{ - bool added = false; - int state_origin = std::get<2>(state); - - for (Earley::S_state_type_t& t_state : S[state_origin]) - { - std::string t_state_sym = std::get<1>(t_state); - std::string t_state_temp(t_state_sym); - - auto fnd = t_state_temp.find(Earley::dot); - t_state_temp.erase(fnd, fnd + 1); - - if (Earley::nonterminals.find(t_state_temp.at(0)) != Earley::nonterminals.end()) - { - std::string swapped_prod = swap_around_dot(t_state_sym); - - // assemble t_state with the dot after the nonterminal (completed the nonterminal) - // then add the new, completed, t_state to S[k] - S_state_type_t n_tuple = std::make_tuple(std::get<0>(t_state), swapped_prod, std::get<2>(t_state)); - auto sk = S[k]; - - // If we didn't already add the completed state to the current state, do so now; - // otherwise,ignore and continue on - if (std::find(sk.begin(), sk.end(), n_tuple) == sk.end()) - { - S[k].push_back(n_tuple); - added = true; - } - } - } - - - return added; +bool Earley::complete(S_type_t &S, const unsigned int k, + S_state_type_t &state) { + bool added = false; + int state_origin = std::get<2>(state); + + for (Earley::S_state_type_t &t_state : S[state_origin]) { + std::string t_state_sym = std::get<1>(t_state); + std::string t_state_temp(t_state_sym); + + auto fnd = t_state_temp.find(Earley::dot); + t_state_temp.erase(fnd, fnd + 1); + + if (Earley::nonterminals.find(t_state_temp.at(0)) != + Earley::nonterminals.end()) { + std::string swapped_prod = swap_around_dot(t_state_sym); + + // assemble t_state with the dot after the nonterminal (completed the + // nonterminal) then add the new, completed, t_state to S[k] + S_state_type_t n_tuple = std::make_tuple( + std::get<0>(t_state), swapped_prod, std::get<2>(t_state)); + auto sk = S[k]; + + // If we didn't already add the completed state to the current state, do + // so now; otherwise,ignore and continue on + if (std::find(sk.begin(), sk.end(), n_tuple) == sk.end()) { + S[k].push_back(n_tuple); + added = true; + } + } + } + + return added; } - -bool Earley::check_end_set(const S_type_t& S, const S_state_type_t& expected) -{ - auto& end_set = S[S.size() - 1]; - if (std::find(end_set.begin(), end_set.end(), expected) != end_set.end()) - { - return true; - } - return false; +bool Earley::check_end_set(const S_type_t &S, const S_state_type_t &expected) { + auto &end_set = S[S.size() - 1]; + if (std::find(end_set.begin(), end_set.end(), expected) != end_set.end()) { + return true; + } + return false; } - -bool Earley::earley_parse(const std::string& words, const S_grammar_type_t& grammar) -{ - // TBD: Add full support for empty string in grammar/earley - if (words.size() == 0) - return true; - - std::string start_p = "S"; - start_p += Earley::dot; - S_state_type_t expected = std::make_tuple("P", start_p, 0); - - // Create the overarching array of sets (parse table) - S_type_t S = init(words.size()); - { - std::string sp(1, Earley::dot); - sp += "S"; - S[0].push_back(std::make_tuple("P", sp, 0)); - } - bool done = false; - bool complete_parse = false; - - for (unsigned int k = 0; k < words.size() + 1; ++k) - { - bool added = true; - if (done) - break; - - while (added) - { - added = false; - - for (Earley::S_state_type_t state : S[k]) - { - if (check_end_set(S, expected)) - { - done = true; - complete_parse = true; - break; - } - - bool finished = is_finished(state); - if (!finished) - { - auto nxt_elem = get_next_element(state); - - if (is_nonterminal(nxt_elem.at(0))) - { - added = predict(S, k, nxt_elem, const_cast(grammar)); - } - else - { - added = scan(S, k, state, words); - } - } - else - { - added = complete(S, k, state); - } - } - } - } - - return complete_parse; +bool Earley::earley_parse(const std::string &words, + const S_grammar_type_t &grammar) { + // TBD: Add full support for empty string in grammar/earley + if (words.size() == 0) + return true; + + std::string start_p = "S"; + start_p += Earley::dot; + S_state_type_t expected = std::make_tuple("P", start_p, 0); + + // Create the overarching array of sets (parse table) + S_type_t S = init(words.size()); + { + std::string sp(1, Earley::dot); + sp += "S"; + S[0].push_back(std::make_tuple("P", sp, 0)); + } + bool done = false; + bool complete_parse = false; + + for (unsigned int k = 0; k < words.size() + 1; ++k) { + bool added = true; + if (done) + break; + + while (added) { + added = false; + + for (Earley::S_state_type_t state : S[k]) { + if (check_end_set(S, expected)) { + done = true; + complete_parse = true; + break; + } + + bool finished = is_finished(state); + if (!finished) { + auto nxt_elem = get_next_element(state); + + if (is_nonterminal(nxt_elem.at(0))) { + added = predict(S, k, nxt_elem, + const_cast(grammar)); + } else { + added = scan(S, k, state, words); + } + } else { + added = complete(S, k, state); + } + } + } + } + + return complete_parse; } diff --git a/src/grammar.cpp b/src/grammar.cpp index 950efcc..090ce4b 100644 --- a/src/grammar.cpp +++ b/src/grammar.cpp @@ -1,60 +1,50 @@ -#include #include +#include #include #include "grammar.h" - using std::cout; using std::endl; - -std::vector Earley::load_grammar(const std::string& location) -{ - std::ifstream iff(location.c_str(), std::ios::in); - std::vector mvec; - - if (iff.is_open()) - { - while (iff) - { - std::string holder; - std::getline(iff, holder); - mvec.push_back(holder); - } - mvec.pop_back(); - mvec.shrink_to_fit(); - iff.close(); - } - else - cout << "Failed to open grammar file '" << location << "'\n"; - - return mvec; +std::vector Earley::load_grammar(const std::string &location) { + std::ifstream iff(location.c_str(), std::ios::in); + std::vector mvec; + + if (iff.is_open()) { + while (iff) { + std::string holder; + std::getline(iff, holder); + mvec.push_back(holder); + } + mvec.pop_back(); + mvec.shrink_to_fit(); + iff.close(); + } else + cout << "Failed to open grammar file '" << location << "'\n"; + + return mvec; } +Earley::S_grammar_type_t +Earley::process_grammar(const std::vector &gramm_raw) { + Earley::S_grammar_type_t gramm_struct; -Earley::S_grammar_type_t Earley::process_grammar(const std::vector& gramm_raw) -{ - Earley::S_grammar_type_t gramm_struct; - - for (auto& prod : gramm_raw) - { - std::smatch match_results; - std::regex prod_regex("^(.*)\\s->\\s(.*)$"); - if (std::regex_match(prod, match_results, prod_regex)) - { - std::string prod_name = match_results[1]; - std::string prod_value = match_results[2]; + for (auto &prod : gramm_raw) { + std::smatch match_results; + std::regex prod_regex("^(.*)\\s->\\s(.*)$"); + if (std::regex_match(prod, match_results, prod_regex)) { + std::string prod_name = match_results[1]; + std::string prod_value = match_results[2]; - // Check to see if the production maps to a terminal symbol(s) - std::regex number_regex("(\\d\\s[|]\\s)+\\d"); - if (std::regex_match(prod_value, number_regex)) - prod_value = "number"; + // Check to see if the production maps to a terminal symbol(s) + std::regex number_regex("(\\d\\s[|]\\s)+\\d"); + if (std::regex_match(prod_value, number_regex)) + prod_value = "number"; - gramm_struct.insert(std::make_pair(prod_name, prod_value)); - } - } + gramm_struct.insert(std::make_pair(prod_name, prod_value)); + } + } - return gramm_struct; + return gramm_struct; } - diff --git a/src/main.cpp b/src/main.cpp index e8a3418..331779e 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,38 +1,34 @@ #include -#include "grammar.h" #include "earley.h" - +#include "grammar.h" using std::cout; using std::endl; - -int main(int argc, char* argv[]) -{ - std::string grammar_location(".\\grammars\\grammar.txt"); - // override for the window grammar path - if (argc > 1) - { - grammar_location = argv[1]; - } - // TBD: Pass in the path to the grammar via 'argv' - std::vector loaded_gramm = Earley::load_grammar(grammar_location); - - if (loaded_gramm.size() == 0) - { - cout << "Could not open grammar file '" << grammar_location << "' -- exiting\n"; - return -1; - } - - auto grammar = Earley::process_grammar(loaded_gramm); - const std::array inputs{ "1","1+","1+2","2+3*4" }; - - for (const auto& inp : inputs) - { - cout << "Input <" << inp << "> = " << std::boolalpha << Earley::earley_parse(inp, grammar) << endl; - } - - return 0; +int main(int argc, char *argv[]) { + std::string grammar_location(".\\grammars\\grammar.txt"); + // override for the window grammar path + if (argc > 1) { + grammar_location = argv[1]; + } + // TBD: Pass in the path to the grammar via 'argv' + std::vector loaded_gramm = + Earley::load_grammar(grammar_location); + + if (loaded_gramm.size() == 0) { + cout << "Could not open grammar file '" << grammar_location + << "' -- exiting\n"; + return -1; + } + + auto grammar = Earley::process_grammar(loaded_gramm); + const std::array inputs{"1", "1+", "1+2", "2+3*4"}; + + for (const auto &inp : inputs) { + cout << "Input <" << inp << "> = " << std::boolalpha + << Earley::earley_parse(inp, grammar) << endl; + } + + return 0; } - diff --git a/test/test.cpp b/test/test.cpp index 2266edf..a8837b8 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -1,19 +1,16 @@ -#include #include +#include #include -#include "grammar.cpp" #include "earley.cpp" - +#include "grammar.cpp" static std::string test_grammar_location; static std::string test_grammar_name("test_grammar.txt"); static std::vector loaded_gramm; static Earley::S_grammar_type_t test_grammar; - -TEST(earley, Input1Empty) -{ +TEST(earley, Input1Empty) { const std::string input_1(""); auto parse_res = Earley::earley_parse(input_1, test_grammar); @@ -21,119 +18,110 @@ TEST(earley, Input1Empty) EXPECT_EQ(parse_res, true); } +TEST(earley, AddPass) { + const std::array inputs{"0+1", "1+0", "1+1", + "9+0", "5+9", "0+9"}; -TEST(earley, AddPass) -{ - const std::array inputs {"0+1","1+0","1+1","9+0","5+9","0+9"}; - - for(auto& inp : inputs) - { + for (auto &inp : inputs) { auto parse_res = Earley::earley_parse(inp, test_grammar); EXPECT_EQ(parse_res, true); } } -TEST(earley, AddFail) -{ - const std::array inputs {"0+","+0","1+","+1","+"}; - - for(auto& inp : inputs) - { +TEST(earley, AddFail) { + const std::array inputs{"0+", "+0", "1+", "+1", "+"}; + + for (auto &inp : inputs) { auto parse_res = Earley::earley_parse(inp, test_grammar); - std::cout << std::boolalpha << "parse_res (" << parse_res << ")" << std::endl; + std::cout << std::boolalpha << "parse_res (" << parse_res << ")" + << std::endl; EXPECT_EQ(parse_res, false); } } -TEST(earley, MulPass) -{ - const std::array inputs {"0*1","1*0","1*1","9*0","5*9","0*9"}; - - for(auto& inp : inputs) - { +TEST(earley, MulPass) { + const std::array inputs{"0*1", "1*0", "1*1", + "9*0", "5*9", "0*9"}; + + for (auto &inp : inputs) { auto parse_res = Earley::earley_parse(inp, test_grammar); EXPECT_EQ(parse_res, true); } } -TEST(earley, MulFail) -{ - const std::array inputs {"0*","*0","1*","*1","*"}; - - for(auto& inp : inputs) - { +TEST(earley, MulFail) { + const std::array inputs{"0*", "*0", "1*", "*1", "*"}; + + for (auto &inp : inputs) { auto parse_res = Earley::earley_parse(inp, test_grammar); - std::cout << std::boolalpha << "parse_res (" << parse_res << ")" << std::endl; + std::cout << std::boolalpha << "parse_res (" << parse_res << ")" + << std::endl; EXPECT_EQ(parse_res, false); } } -TEST(earley, BadOpFail1) -{ - const std::array inputs {"0-","-0","1-","-1","-"}; - - for(auto& inp : inputs) - { +TEST(earley, BadOpFail1) { + const std::array inputs{"0-", "-0", "1-", "-1", "-"}; + + for (auto &inp : inputs) { auto parse_res = Earley::earley_parse(inp, test_grammar); - std::cout << std::boolalpha << "parse_res (" << parse_res << ")" << std::endl; + std::cout << std::boolalpha << "parse_res (" << parse_res << ")" + << std::endl; EXPECT_EQ(parse_res, false); } } -TEST(earley, BadOpFail2) -{ - const std::array inputs {"0/","/0","1/","/1","/"}; - - for(auto& inp : inputs) - { +TEST(earley, BadOpFail2) { + const std::array inputs{"0/", "/0", "1/", "/1", "/"}; + + for (auto &inp : inputs) { auto parse_res = Earley::earley_parse(inp, test_grammar); - std::cout << std::boolalpha << "parse_res (" << parse_res << ")" << std::endl; + std::cout << std::boolalpha << "parse_res (" << parse_res << ")" + << std::endl; EXPECT_EQ(parse_res, false); } } -TEST(earley, AllNumbersPass1) -{ - const std::array inputs {"0+1+2+3+4+5+6+7+8+9","0*1*2*3*4*5*6*7*8*9"}; - - for(auto& inp : inputs) - { +TEST(earley, AllNumbersPass1) { + const std::array inputs{"0+1+2+3+4+5+6+7+8+9", + "0*1*2*3*4*5*6*7*8*9"}; + + for (auto &inp : inputs) { auto parse_res = Earley::earley_parse(inp, test_grammar); - std::cout << std::boolalpha << "parse_res (" << parse_res << ")" << std::endl; + std::cout << std::boolalpha << "parse_res (" << parse_res << ")" + << std::endl; EXPECT_EQ(parse_res, true); } } -TEST(earley, AllNumbersPass2) -{ - const std::array inputs {"0+1*2+3*4+5*6+7*8+9","0*1+2*3+4*5+6*7+8*9"}; - - for(auto& inp : inputs) - { +TEST(earley, AllNumbersPass2) { + const std::array inputs{"0+1*2+3*4+5*6+7*8+9", + "0*1+2*3+4*5+6*7+8*9"}; + + for (auto &inp : inputs) { auto parse_res = Earley::earley_parse(inp, test_grammar); - std::cout << std::boolalpha << "parse_res (" << parse_res << ")" << std::endl; + std::cout << std::boolalpha << "parse_res (" << parse_res << ")" + << std::endl; EXPECT_EQ(parse_res, true); } } -int main(int argc, char** argv) -{ - if (argc < 2) - { +int main(int argc, char **argv) { + if (argc < 2) { cout << "ERROR: Not enough args to set path to test grammar - failing.\n"; return -1; } @@ -142,13 +130,12 @@ int main(int argc, char** argv) test_grammar_location += test_grammar_name; cout << "test_grammar_location " << test_grammar_location << "\n"; - + loaded_gramm = Earley::load_grammar(test_grammar_location); - + test_grammar = Earley::process_grammar(loaded_gramm); testing::InitGoogleTest(); return RUN_ALL_TESTS(); } -