Skip to content

Commit

Permalink
Fix a bug in case folding characters given by escapes
Browse files Browse the repository at this point in the history
When we construct a Char node in the ir, we expect the character to be
already folded if our regex is case-insensitive. This was true for
ordinary characters but was not true if the character was given by an
escape.

This happened only in the unoptimizing cases - in the optimizing cases
we unfold the character to determine all possible matches, which masks
the bug. However if regex optimization is disabled then we would panic
in debug builds, and fail to match icase in release builds.
  • Loading branch information
ridiculousfish committed Oct 29, 2023
1 parent f62db8c commit 8b842f1
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 10 deletions.
1 change: 1 addition & 0 deletions src/ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ pub enum Node {
Goal,

/// Match a literal character.
/// If icase is true, then `c` MUST be already folded.
Char { c: u32, icase: bool },

/// Match a literal sequence of bytes.
Expand Down
28 changes: 18 additions & 10 deletions src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,15 @@ where
true
}

/// Fold a character if icase.
fn fold_if_icase(&self, c: u32) -> u32 {
if self.flags.icase {
unicode::fold(c)
} else {
c
}
}

/// Peek at the next character.
fn peek(&mut self) -> Option<u32> {
self.input.peek().copied()
Expand Down Expand Up @@ -355,13 +364,9 @@ where
return error("Nothing to repeat");
}
self.input = saved;
let mut cc = c;
self.consume(cc);
if self.flags.icase {
cc = unicode::fold(cc)
}
self.consume(c);
result.push(ir::Node::Char {
c: cc,
c: self.fold_if_icase(c),
icase: self.flags.icase,
})
}
Expand Down Expand Up @@ -826,10 +831,13 @@ where
error("Unexpected end of named backreference")
}
}
_ => Ok(ir::Node::Char {
c: self.consume_character_escape()?,
icase: self.flags.icase,
}),
_ => {
let c = self.consume_character_escape()?;
Ok(ir::Node::Char {
c: self.fold_if_icase(c),
icase: self.flags.icase,
})
}
}
}

Expand Down
17 changes: 17 additions & 0 deletions tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1520,3 +1520,20 @@ fn test_valid_character_sets_in_annex_b_tc(tc: TestConfig) {
tc.test_match_fails(regexp, "", "f");
tc.test_match_fails(regexp, "", " ");
}

#[test]
fn test_escapes_folding() {
test_with_configs(test_escapes_folding_tc)
}

fn test_escapes_folding_tc(tc: TestConfig) {
// Regression test for failing to fold characters which come from escapes.
tc.test_match_fails(r"\u{41}", "", "a");
tc.test_match_succeeds(r"\u{41}", "", "A");
tc.test_match_fails(r"\u{61}", "", "A");
tc.test_match_succeeds(r"\u{61}", "", "a");
tc.test_match_succeeds(r"\u{41}", "i", "a");
tc.test_match_succeeds(r"\u{41}", "i", "A");
tc.test_match_succeeds(r"\u{61}", "i", "a");
tc.test_match_succeeds(r"\u{61}", "i", "A");
}

0 comments on commit 8b842f1

Please sign in to comment.