From 6a6462e48072a24671dc0065b0e1c20a0b839096 Mon Sep 17 00:00:00 2001 From: Yufan Song Date: Fri, 11 Oct 2024 16:07:44 +0800 Subject: [PATCH 1/2] Extend the semantics of parentheses --- nfa/construction.py | 73 ++++++++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 30 deletions(-) diff --git a/nfa/construction.py b/nfa/construction.py index ce0cb13..7183f46 100644 --- a/nfa/construction.py +++ b/nfa/construction.py @@ -21,18 +21,17 @@ def pattern(pattern_string): lexer = Lexer(pattern_string) lexer.advance() nfa_pair = NfaPair() - group(nfa_pair) + expr(nfa_pair) # log_nfa(nfa_pair.start_node) return nfa_pair.start_node """ -group ::= ("(" expr ")")* expr ::= factor_conn ("|" factor_conn)* factor_conn ::= factor | factor factor* factor ::= (term | term ("*" | "+" | "?"))* -term ::= char | "[" char "-" char "]" | . +term ::= char | "[" char "-" char "]" | . | "(" expr ")" """ @@ -44,6 +43,8 @@ def term(pair_out): nfa_dot_char(pair_out) elif lexer.match(Token.CCL_START): nfa_set_nega_char(pair_out) + elif lexer.match(Token.OPEN_PAREN): + nfa_paren_around(pair_out) # 匹配单个字符 @@ -147,7 +148,6 @@ def factor_conn(pair_out): def is_conn(token): nc = [ - Token.OPEN_PAREN, Token.CLOSE_PAREN, Token.AT_EOL, Token.EOS, @@ -226,6 +226,19 @@ def nfa_option_closure(pair_out): return True +# () +def nfa_paren_around(pair_out): + if not lexer.match(Token.OPEN_PAREN): + return False + + lexer.advance() + expr(pair_out) + if not lexer.match(Token.CLOSE_PAREN): + return False + lexer.advance() + return True + + def expr(pair_out): factor_conn(pair_out) pair = NfaPair() @@ -246,32 +259,32 @@ def expr(pair_out): return True -def group(pair_out): - if lexer.match(Token.OPEN_PAREN): - lexer.advance() - expr(pair_out) - if lexer.match(Token.CLOSE_PAREN): - lexer.advance() - elif lexer.match(Token.EOS): - return False - else: - expr(pair_out) - - while True: - pair = NfaPair() - if lexer.match(Token.OPEN_PAREN): - lexer.advance() - expr(pair) - pair_out.end_node.next_1 = pair.start_node - pair_out.end_node = pair.end_node - if lexer.match(Token.CLOSE_PAREN): - lexer.advance() - elif lexer.match(Token.EOS): - return False - else: - expr(pair) - pair_out.end_node.next_1 = pair.start_node - pair_out.end_node = pair.end_node +# def group(pair_out): +# if lexer.match(Token.OPEN_PAREN): +# lexer.advance() +# expr(pair_out) +# if lexer.match(Token.CLOSE_PAREN): +# lexer.advance() +# elif lexer.match(Token.EOS): +# return False +# else: +# expr(pair_out) + +# while True: +# pair = NfaPair() +# if lexer.match(Token.OPEN_PAREN): +# lexer.advance() +# expr(pair) +# pair_out.end_node.next_1 = pair.start_node +# pair_out.end_node = pair.end_node +# if lexer.match(Token.CLOSE_PAREN): +# lexer.advance() +# elif lexer.match(Token.EOS): +# return False +# else: +# expr(pair) +# pair_out.end_node.next_1 = pair.start_node +# pair_out.end_node = pair.end_node \ No newline at end of file From e9782173e31b003816ce40c9164c732766256e0d Mon Sep 17 00:00:00 2001 From: Yufan Song Date: Fri, 11 Oct 2024 16:11:12 +0800 Subject: [PATCH 2/2] Add test cases for nested parentheses --- test/test.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/test.py b/test/test.py index 23b7d86..a065a7a 100644 --- a/test/test.py +++ b/test/test.py @@ -16,6 +16,13 @@ def __init__(self, str, pattern, result): testLists.append(RegexMaterial("abbbbb", "[^c]+", True)) testLists.append(RegexMaterial("ccccc", "[^c]+", False)) testLists.append(RegexMaterial("123", "[1-3]+", True)) +testLists.append(RegexMaterial("ad", "a(bc)*d", True)) +testLists.append(RegexMaterial("abcd", "a(bc)*d", True)) +testLists.append(RegexMaterial("abcbcd", "a(bc)*d", True)) +testLists.append(RegexMaterial("abcdef", "a(b(cd)*e)?f", True)) +testLists.append(RegexMaterial("abef", "a(b(cd)*e)?f", True)) +testLists.append(RegexMaterial("af", "a(b(cd)*e)?f", True)) +testLists.append(RegexMaterial("abf", "a(b(cd)*e)?f", False)) class TestRegex(unittest.TestCase): def test(self):