Skip to content

Commit

Permalink
# Git issue 498: Conditional negative lookahead inside positive looka…
Browse files Browse the repository at this point in the history
…head fails to match

Conditional node needed an additional member that points to the true branch.
  • Loading branch information
Matthew Barnett committed Jun 3, 2023
1 parent 9f03255 commit 774dbfd
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 9 deletions.
15 changes: 10 additions & 5 deletions changelog.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
Version: 2023.6.3

# Git issue 498: Conditional negative lookahead inside positive lookahead fails to match
Conditional node needed an additional member that points to the true branch.

Version: 2023.5.5

Removed semicolon after 'else' in 'munge_name'.
Expand Down Expand Up @@ -86,11 +91,11 @@ Version: 2022.3.2

Git issue 453: Document last supported python2 version

Added a brief reference to the last version to support Python 2 in README.rst.
Added a brief reference to the last version to support Python 2 in README.rst.

Git issue 456: RegexFlag exists in re, but not regex

Updated the flags to use enum now that regex supports only Python 3.6+.
Updated the flags to use enum now that regex supports only Python 3.6+.

Version: 2022.1.21

Expand Down Expand Up @@ -1225,13 +1230,13 @@ Version: 2013.1.25
Version: 2013.1.24

Hg issue 86: Enhance API of captures() to enable retrieval of ALL groups at once, as a dictionary
Added capturesdict() method to match object.
Added capturesdict() method to match object.

Hg issue 87: Allow duplicate names of groups
Now allowed.
Now allowed.

Hg issue 88: regex.match() hangs
Fixed.
Fixed.

Version: 2013.1.20

Expand Down
13 changes: 11 additions & 2 deletions regex_3/_regex.c
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ typedef struct RE_Node {
union {
struct {
RE_NextNode next_2;
struct RE_Node* true_node; /* Used by a CONDITIONAL node. */
} nonstring;
struct {
/* Used only if (node->status & RE_STATUS_STRING) is true. */
Expand Down Expand Up @@ -15272,7 +15273,7 @@ Py_LOCAL_INLINE(int) basic_match(RE_State* state, BOOL search) {
/* It's a negative lookaround that's failed. Go to the 'true'
* branch.
*/
node = conditional->next_1.node;
node = conditional->nonstring.true_node;

goto advance;
}
Expand Down Expand Up @@ -16990,7 +16991,6 @@ Py_LOCAL_INLINE(int) basic_match(RE_State* state, BOOL search) {

if (!lookaround->match) {
/* It's a negative lookaround that's failed. */

node = lookaround->nonstring.next_2.node;
goto advance;
}
Expand Down Expand Up @@ -22986,6 +22986,14 @@ Py_LOCAL_INLINE(void) skip_one_way_branches(PatternObject* pattern) {
node->nonstring.next_2.node = next->next_1.node;
modified = TRUE;
}

/* Check the true branch for CONDITIONAL. */
next = node->nonstring.true_node;
if (next && next->op == RE_OP_BRANCH &&
!next->nonstring.true_node) {
node->nonstring.true_node = next->next_1.node;
modified = TRUE;
}
}
} while (modified);

Expand Down Expand Up @@ -24443,6 +24451,7 @@ Py_LOCAL_INLINE(int) build_CONDITIONAL(RE_CompileArgs* args) {
/* end test node -> true branch -> end node */
add_node(end_test_node, subargs.start);
add_node(subargs.end, end_node);
test_node->nonstring.true_node = subargs.start;

if (args->code[0] == RE_OP_NEXT) {
/* There's a false branch. */
Expand Down
2 changes: 1 addition & 1 deletion regex_3/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@
"VERSION1", "X", "VERBOSE", "W", "WORD", "error", "Regex", "__version__",
"__doc__", "RegexFlag"]

__version__ = "2.5.128"
__version__ = "2.5.129"

# --------------------------------------------------------------------
# Public interface.
Expand Down
6 changes: 6 additions & 0 deletions regex_3/test_regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -4332,6 +4332,12 @@ def test_hg_bugs(self):
# Git issue 494: Backtracking failure matching regex ^a?(a?)b?c\1$ against string abca
self.assertEqual(regex.search(r"^a?(a?)b?c\1$", "abca").span(), (0, 4))

# Git issue 498: Conditional negative lookahead inside positive lookahead fails to match
self.assertEqual(regex.match(r"(?(?=a).|..)", "ab").span(), (0, 1))
self.assertEqual(regex.match(r"(?(?=b).|..)", "ab").span(), (0, 2))
self.assertEqual(regex.match(r"(?(?!a).|..)", "ab").span(), (0, 2))
self.assertEqual(regex.match(r"(?(?!b).|..)", "ab").span(), (0, 1))

def test_fuzzy_ext(self):
self.assertEqual(bool(regex.fullmatch(r'(?r)(?:a){e<=1:[a-z]}', 'e')),
True)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setup(
name='regex',
version='2023.5.5',
version='2023.6.3',
description='Alternative regular expression module, to replace re.',
long_description=long_description,
long_description_content_type='text/x-rst',
Expand Down

0 comments on commit 774dbfd

Please sign in to comment.