Skip to content

Commit

Permalink
Add docs explaining Ord on Locale (#5296)
Browse files Browse the repository at this point in the history
See #1215 and #5295
  • Loading branch information
sffc authored Jul 25, 2024
1 parent f5dc8de commit 77632d8
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 38 deletions.
80 changes: 61 additions & 19 deletions components/locale_core/src/langid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,36 @@ use writeable::Writeable;

/// A core struct representing a [`Unicode BCP47 Language Identifier`].
///
/// # Parsing
///
/// Unicode recognizes three levels of standard conformance for any language identifier:
///
/// * *well-formed* - syntactically correct
/// * *valid* - well-formed and only uses registered language, region, script and variant subtags...
/// * *canonical* - valid and no deprecated codes or structure.
///
/// At the moment parsing normalizes a well-formed language identifier converting
/// `_` separators to `-` and adjusting casing to conform to the Unicode standard.
///
/// Any bogus subtags will cause the parsing to fail with an error.
/// No subtag validation is performed.
///
/// # Ordering
///
/// This type deliberately does not implement `Ord` or `PartialOrd` because there are
/// multiple possible orderings, and the team did not want to favor one over any other.
///
/// Instead, there are functions available that return these different orderings:
///
/// - [`LanguageIdentifier::strict_cmp`]
/// - [`LanguageIdentifier::total_cmp`]
///
/// See issue: <https://github.com/unicode-org/icu4x/issues/1215>
///
/// # Examples
///
/// Simple example:
///
/// ```
/// use icu::locale::{
/// langid,
Expand All @@ -31,21 +59,7 @@ use writeable::Writeable;
/// assert_eq!(li.variants.len(), 0);
/// ```
///
/// # Parsing
///
/// Unicode recognizes three levels of standard conformance for any language identifier:
///
/// * *well-formed* - syntactically correct
/// * *valid* - well-formed and only uses registered language, region, script and variant subtags...
/// * *canonical* - valid and no deprecated codes or structure.
///
/// At the moment parsing normalizes a well-formed language identifier converting
/// `_` separators to `-` and adjusting casing to conform to the Unicode standard.
///
/// Any bogus subtags will cause the parsing to fail with an error.
/// No subtag validation is performed.
///
/// # Examples
/// More complex example:
///
/// ```
/// use icu::locale::{
Expand All @@ -62,7 +76,7 @@ use writeable::Writeable;
/// ```
///
/// [`Unicode BCP47 Language Identifier`]: https://unicode.org/reports/tr35/tr35.html#Unicode_language_identifier
#[derive(Default, PartialEq, Eq, Clone, Hash)]
#[derive(Default, PartialEq, Eq, Clone, Hash)] // no Ord or PartialOrd: see docs
#[allow(clippy::exhaustive_structs)] // This struct is stable (and invoked by a macro)
pub struct LanguageIdentifier {
/// Language subtag of the language identifier.
Expand Down Expand Up @@ -227,11 +241,39 @@ impl LanguageIdentifier {
}

/// Compare this [`LanguageIdentifier`] with another [`LanguageIdentifier`] field-by-field.
/// The result is a total ordering sufficient for use in a [`BTreeMap`].
/// The result is a total ordering sufficient for use in a [`BTreeSet`].
///
/// Unlike [`LanguageIdentifier::strict_cmp`], the ordering may or may not be equivalent
/// to string ordering, and it may or may not be stable across ICU4X releases.
///
/// Unlike [`Self::strict_cmp`], this function's ordering may not equal string ordering.
/// # Examples
///
/// Using a wrapper to add one of these to a [`BTreeSet`]:
///
/// ```no_run
/// use icu::locale::LanguageIdentifier;
/// use std::cmp::Ordering;
/// use std::collections::BTreeSet;
///
/// #[derive(PartialEq, Eq)]
/// struct LanguageIdentifierTotalOrd(LanguageIdentifier);
///
/// impl Ord for LanguageIdentifierTotalOrd {
/// fn cmp(&self, other: &Self) -> Ordering {
/// self.0.total_cmp(&other.0)
/// }
/// }
///
/// impl PartialOrd for LanguageIdentifierTotalOrd {
/// fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
/// Some(self.cmp(other))
/// }
/// }
///
/// let _: BTreeSet<LanguageIdentifierTotalOrd> = unimplemented!();
/// ```
///
/// [`BTreeMap`]: alloc::collections::BTreeMap
/// [`BTreeSet`]: alloc::collections::BTreeSet
pub fn total_cmp(&self, other: &Self) -> Ordering {
self.as_tuple().cmp(&other.as_tuple())
}
Expand Down
79 changes: 60 additions & 19 deletions components/locale_core/src/locale.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,37 @@ use writeable::Writeable;
/// [`Locale`] exposes all of the same fields and methods as [`LanguageIdentifier`], and
/// on top of that is able to parse, manipulate and serialize unicode extension fields.
///
/// # Parsing
///
/// Unicode recognizes three levels of standard conformance for a locale:
///
/// * *well-formed* - syntactically correct
/// * *valid* - well-formed and only uses registered language subtags, extensions, keywords, types...
/// * *canonical* - valid and no deprecated codes or structure.
///
/// At the moment parsing normalizes a well-formed locale identifier converting
/// `_` separators to `-` and adjusting casing to conform to the Unicode standard.
///
/// Any bogus subtags will cause the parsing to fail with an error.
///
/// No subtag validation or alias resolution is performed.
///
/// # Ordering
///
/// This type deliberately does not implement `Ord` or `PartialOrd` because there are
/// multiple possible orderings, and the team did not want to favor one over any other.
///
/// Instead, there are functions available that return these different orderings:
///
/// - [`Locale::strict_cmp`]
/// - [`Locale::total_cmp`]
///
/// See issue: <https://github.com/unicode-org/icu4x/issues/1215>
///
/// # Examples
///
/// Simple example:
///
/// ```
/// use icu::locale::{
/// extensions::unicode::{key, value},
Expand All @@ -44,22 +72,7 @@ use writeable::Writeable;
/// );
/// ```
///
/// # Parsing
///
/// Unicode recognizes three levels of standard conformance for a locale:
///
/// * *well-formed* - syntactically correct
/// * *valid* - well-formed and only uses registered language subtags, extensions, keywords, types...
/// * *canonical* - valid and no deprecated codes or structure.
///
/// At the moment parsing normalizes a well-formed locale identifier converting
/// `_` separators to `-` and adjusting casing to conform to the Unicode standard.
///
/// Any bogus subtags will cause the parsing to fail with an error.
///
/// No subtag validation or alias resolution is performed.
///
/// # Examples
/// More complex example:
///
/// ```
/// use icu::locale::{subtags::*, Locale};
Expand All @@ -76,8 +89,9 @@ use writeable::Writeable;
/// "valencia".parse::<Variant>().ok().as_ref()
/// );
/// ```
///
/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/tr35.html#Unicode_locale_identifier
#[derive(Default, PartialEq, Eq, Clone, Hash)]
#[derive(Default, PartialEq, Eq, Clone, Hash)] // no Ord or PartialOrd: see docs
#[allow(clippy::exhaustive_structs)] // This struct is stable (and invoked by a macro)
pub struct Locale {
/// The basic language/script/region components in the locale identifier along with any variants.
Expand Down Expand Up @@ -233,8 +247,35 @@ impl Locale {

/// Returns an ordering suitable for use in [`BTreeSet`].
///
/// The ordering may or may not be equivalent to string ordering, and it
/// may or may not be stable across ICU4X releases.
/// Unlike [`Locale::strict_cmp`], the ordering may or may not be equivalent
/// to string ordering, and it may or may not be stable across ICU4X releases.
///
/// # Examples
///
/// Using a wrapper to add one of these to a [`BTreeSet`]:
///
/// ```no_run
/// use icu::locale::Locale;
/// use std::cmp::Ordering;
/// use std::collections::BTreeSet;
///
/// #[derive(PartialEq, Eq)]
/// struct LocaleTotalOrd(Locale);
///
/// impl Ord for LocaleTotalOrd {
/// fn cmp(&self, other: &Self) -> Ordering {
/// self.0.total_cmp(&other.0)
/// }
/// }
///
/// impl PartialOrd for LocaleTotalOrd {
/// fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
/// Some(self.cmp(other))
/// }
/// }
///
/// let _: BTreeSet<LocaleTotalOrd> = unimplemented!();
/// ```
///
/// [`BTreeSet`]: alloc::collections::BTreeSet
pub fn total_cmp(&self, other: &Self) -> Ordering {
Expand Down

0 comments on commit 77632d8

Please sign in to comment.