Skip to content

Commit

Permalink
Syntax: Make \p{Sc} work
Browse files Browse the repository at this point in the history
'sc' refers to the 'Currency_Symbol' general category,
 but is also the abbreviation for the 'Script' property.

 Fixes rust-lang#835
 Related rust-lang#719 b1489c8
  • Loading branch information
snsmac committed Aug 1, 2022
1 parent 159a63c commit 0b098ce
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 33 deletions.
68 changes: 35 additions & 33 deletions regex-syntax/src/unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,11 @@ impl fmt::Display for UnicodeWordError {
/// This returns an error if the Unicode case folding tables are not available.
pub fn simple_fold(
c: char,
) -> FoldResult<result::Result<impl Iterator<Item = char>, Option<char>>> {
) -> FoldResult<result::Result<impl Iterator<Item=char>, Option<char>>> {
#[cfg(not(feature = "unicode-case"))]
fn imp(
_: char,
) -> FoldResult<result::Result<impl Iterator<Item = char>, Option<char>>>
) -> FoldResult<result::Result<impl Iterator<Item=char>, Option<char>>>
{
use std::option::IntoIter;
Err::<result::Result<IntoIter<char>, _>, _>(CaseFoldError(()))
Expand All @@ -93,7 +93,7 @@ pub fn simple_fold(
#[cfg(feature = "unicode-case")]
fn imp(
c: char,
) -> FoldResult<result::Result<impl Iterator<Item = char>, Option<char>>>
) -> FoldResult<result::Result<impl Iterator<Item=char>, Option<char>>>
{
use crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE;

Expand Down Expand Up @@ -220,7 +220,7 @@ impl<'a> ClassQuery<'a> {
let canon_val =
match canonical_value(vals, &property_value) {
None => {
return Err(Error::PropertyValueNotFound)
return Err(Error::PropertyValueNotFound);
}
Some(canon_val) => canon_val,
};
Expand All @@ -243,7 +243,9 @@ impl<'a> ClassQuery<'a> {
// a general category. (Currently, we don't even support the
// 'Case_Folding' property. But if we do in the future, users will be
// required to spell it out.)
if norm != "cf" {
// 'sc' refers to the 'Currency_Symbol' general category, but is also
// the abbreviation for the 'Script' property.
if norm != "cf" && norm != "sc" {
if let Some(canon) = canonical_prop(&norm)? {
return Ok(CanonicalClassQuery::Binary(canon));
}
Expand Down Expand Up @@ -462,24 +464,24 @@ fn canonical_script(normalized_value: &str) -> Result<Option<&'static str>> {
/// If the property names data is not available, then an error is returned.
fn canonical_prop(normalized_name: &str) -> Result<Option<&'static str>> {
#[cfg(not(any(
feature = "unicode-age",
feature = "unicode-bool",
feature = "unicode-gencat",
feature = "unicode-perl",
feature = "unicode-script",
feature = "unicode-segment",
feature = "unicode-age",
feature = "unicode-bool",
feature = "unicode-gencat",
feature = "unicode-perl",
feature = "unicode-script",
feature = "unicode-segment",
)))]
fn imp(_: &str) -> Result<Option<&'static str>> {
Err(Error::PropertyNotFound)
}

#[cfg(any(
feature = "unicode-age",
feature = "unicode-bool",
feature = "unicode-gencat",
feature = "unicode-perl",
feature = "unicode-script",
feature = "unicode-segment",
feature = "unicode-age",
feature = "unicode-bool",
feature = "unicode-gencat",
feature = "unicode-perl",
feature = "unicode-script",
feature = "unicode-segment",
))]
fn imp(name: &str) -> Result<Option<&'static str>> {
use crate::unicode_tables::property_names::PROPERTY_NAMES;
Expand Down Expand Up @@ -519,24 +521,24 @@ fn property_values(
canonical_property_name: &'static str,
) -> Result<Option<PropertyValues>> {
#[cfg(not(any(
feature = "unicode-age",
feature = "unicode-bool",
feature = "unicode-gencat",
feature = "unicode-perl",
feature = "unicode-script",
feature = "unicode-segment",
feature = "unicode-age",
feature = "unicode-bool",
feature = "unicode-gencat",
feature = "unicode-perl",
feature = "unicode-script",
feature = "unicode-segment",
)))]
fn imp(_: &'static str) -> Result<Option<PropertyValues>> {
Err(Error::PropertyValueNotFound)
}

#[cfg(any(
feature = "unicode-age",
feature = "unicode-bool",
feature = "unicode-gencat",
feature = "unicode-perl",
feature = "unicode-script",
feature = "unicode-segment",
feature = "unicode-age",
feature = "unicode-bool",
feature = "unicode-gencat",
feature = "unicode-perl",
feature = "unicode-script",
feature = "unicode-segment",
))]
fn imp(name: &'static str) -> Result<Option<PropertyValues>> {
use crate::unicode_tables::property_values::PROPERTY_VALUES;
Expand Down Expand Up @@ -569,15 +571,15 @@ fn property_set(
///
/// If the given age value isn't valid or if the data isn't available, then an
/// error is returned instead.
fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>> {
fn ages(canonical_age: &str) -> Result<impl Iterator<Item=Range>> {
#[cfg(not(feature = "unicode-age"))]
fn imp(_: &str) -> Result<impl Iterator<Item = Range>> {
fn imp(_: &str) -> Result<impl Iterator<Item=Range>> {
use std::option::IntoIter;
Err::<IntoIter<Range>, _>(Error::PropertyNotFound)
}

#[cfg(feature = "unicode-age")]
fn imp(canonical_age: &str) -> Result<impl Iterator<Item = Range>> {
fn imp(canonical_age: &str) -> Result<impl Iterator<Item=Range>> {
use crate::unicode_tables::age;

const AGES: &[(&str, Range)] = &[
Expand Down Expand Up @@ -878,7 +880,7 @@ mod tests {
};

#[cfg(feature = "unicode-case")]
fn simple_fold_ok(c: char) -> impl Iterator<Item = char> {
fn simple_fold_ok(c: char) -> impl Iterator<Item=char> {
simple_fold(c).unwrap().unwrap()
}

Expand Down
1 change: 1 addition & 0 deletions tests/unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ mat!(uni_class_gencat_format, r"\p{Format}", "\u{E007F}", Some((0, 4)));
// See: https://github.com/rust-lang/regex/issues/719
mat!(uni_class_gencat_format_abbrev1, r"\p{cf}", "\u{E007F}", Some((0, 4)));
mat!(uni_class_gencat_format_abbrev2, r"\p{gc=cf}", "\u{E007F}", Some((0, 4)));
mat!(uni_class_gencat_format_abbrev3, r"\p{Sc}", "$", Some((0, 1)));
mat!(
uni_class_gencat_initial_punctuation,
r"\p{Initial_Punctuation}",
Expand Down

0 comments on commit 0b098ce

Please sign in to comment.