diff --git a/regex-syntax/src/unicode.rs b/regex-syntax/src/unicode.rs index 84e781db4..5c22f66ac 100644 --- a/regex-syntax/src/unicode.rs +++ b/regex-syntax/src/unicode.rs @@ -248,7 +248,12 @@ impl<'a> ClassQuery<'a> { // also the abbreviation for the 'Script' property. So we avoid calling // 'canonical_prop' for it too, which would erroneously normalize it // to 'Script'. - if norm != "cf" && norm != "sc" { + // + // Another case: 'lc' is an abbreviation for the 'Cased_Letter' + // general category, but is also an abbreviation for the 'Lowercase_Mapping' + // property. We don't currently support the latter, so as with 'cf' + // above, we treat 'lc' as 'Cased_Letter'. + if norm != "cf" && norm != "sc" && norm != "lc" { if let Some(canon) = canonical_prop(&norm)? { return Ok(CanonicalClassQuery::Binary(canon)); } diff --git a/tests/unicode.rs b/tests/unicode.rs index 748bbb79c..d7dbdd31b 100644 --- a/tests/unicode.rs +++ b/tests/unicode.rs @@ -35,6 +35,8 @@ mat!(uni_not_boundary_ogham, r"\d\B", "6 ", None); // We should test more, but there's a lot. Write a script to generate more of // these tests. mat!(uni_class_gencat_cased_letter, r"\p{Cased_Letter}", "A", Some((0, 3))); +mat!(uni_class_gencat_cased_letter2, r"\p{gc=LC}", "A", Some((0, 3))); +mat!(uni_class_gencat_cased_letter3, r"\p{LC}", "A", Some((0, 3))); mat!( uni_class_gencat_close_punctuation, r"\p{Close_Punctuation}",