Skip to content

Commit

Permalink
Implement Joining_Type property (#4599)
Browse files Browse the repository at this point in the history
  • Loading branch information
jedel1043 authored Feb 12, 2024
1 parent 170c971 commit 3f2a7c8
Show file tree
Hide file tree
Showing 35 changed files with 7,054 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
- `icu_properties`
- Add `Aran` script code (https://github.com/unicode-org/icu4x/pull/4426)
- Mark `BidiClassAdapter::new` as const (https://github.com/unicode-org/icu4x/pull/4584)
- Implement Joining_Type property (https://github.com/unicode-org/icu4x/pull/4599)
- `icu_segmenter`
- Fix Unicode 15.0 line breaking (https://github.com/unicode-org/icu4x/pull/4389)
- Data model and providers
Expand Down
3 changes: 2 additions & 1 deletion components/properties/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ mod trievalue;

pub use props::{
BidiClass, CanonicalCombiningClass, EastAsianWidth, GeneralCategory, GeneralCategoryGroup,
GraphemeClusterBreak, IndicSyllabicCategory, LineBreak, Script, SentenceBreak, WordBreak,
GraphemeClusterBreak, IndicSyllabicCategory, JoiningType, LineBreak, Script, SentenceBreak,
WordBreak,
};

/// Module for working with the names of property values
Expand Down
25 changes: 25 additions & 0 deletions components/properties/src/maps.rs
Original file line number Diff line number Diff line change
Expand Up @@ -600,3 +600,28 @@ make_map_property! {
pub const indic_syllabic_category => SINGLETON_PROPS_INSC_V1;
pub fn load_indic_syllabic_category();
}

make_map_property! {
property: "Joining_Type";
marker: JoiningTypeProperty;
value: crate::JoiningType;
keyed_data_marker: JoiningTypeV1Marker;
func:
/// Return a [`CodePointMapDataBorrowed`] for the Joining_Type Unicode enumerated
/// property. See [`JoiningType`].
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::{maps, JoiningType};
///
/// assert_eq!(maps::joining_type().get('ؠ'), JoiningType::DualJoining); // U+0620: Arabic Letter Kashmiri Yeh
/// assert_eq!(maps::joining_type().get('𐫍'), JoiningType::LeftJoining); // U+10ACD: Manichaean Letter Heth
/// ```
pub const joining_type => SINGLETON_PROPS_JT_V1;
pub fn load_joining_type();
}
92 changes: 92 additions & 0 deletions components/properties/src/props.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2364,3 +2364,95 @@ impl_value_getter! {
pub fn get_enum_to_long_name_mapper() / enum_to_long_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed;
}
}
/// Enumerated property Joining_Type.
/// See Section 9.2, Arabic Cursive Joining in The Unicode Standard for the summary of
/// each property value.
///
/// The numeric value is compatible with `UJoiningType` in ICU4C.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "datagen", derive(databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_properties))]
#[allow(clippy::exhaustive_structs)] // newtype
#[repr(transparent)]
#[zerovec::make_ule(JoiningTypeULE)]
pub struct JoiningType(pub u8);

#[allow(missing_docs)] // These constants don't need individual documentation.
#[allow(non_upper_case_globals)]
impl JoiningType {
pub const NonJoining: JoiningType = JoiningType(0); // name="U"
pub const JoinCausing: JoiningType = JoiningType(1); // name="C"
pub const DualJoining: JoiningType = JoiningType(2); // name="D"
pub const LeftJoining: JoiningType = JoiningType(3); // name="L"
pub const RightJoining: JoiningType = JoiningType(4); // name="R"
pub const Transparent: JoiningType = JoiningType(5); // name="T"
}

impl_value_getter! {
markers: JoiningTypeNameToValueV1Marker / SINGLETON_PROPNAMES_FROM_JT_V1, JoiningTypeValueToShortNameV1Marker / SINGLETON_PROPNAMES_TO_SHORT_LINEAR_JT_V1, JoiningTypeValueToLongNameV1Marker / SINGLETON_PROPNAMES_TO_LONG_LINEAR_JT_V1;
impl JoiningType {
/// Return a [`PropertyValueNameToEnumMapper`], capable of looking up values
/// from strings for the `Joining_Type` enumerated property.
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::JoiningType;
///
/// let lookup = JoiningType::name_to_enum_mapper();
/// // short name for value
/// assert_eq!(lookup.get_strict("T"), Some(JoiningType::Transparent));
/// assert_eq!(lookup.get_strict("D"), Some(JoiningType::DualJoining));
/// // long name for value
/// assert_eq!(lookup.get_strict("Join_Causing"), Some(JoiningType::JoinCausing));
/// assert_eq!(lookup.get_strict("Non_Joining"), Some(JoiningType::NonJoining));
/// // name has incorrect casing
/// assert_eq!(lookup.get_strict("LEFT_JOINING"), None);
/// // loose matching of name
/// assert_eq!(lookup.get_loose("LEFT_JOINING"), Some(JoiningType::LeftJoining));
/// // fake property
/// assert_eq!(lookup.get_strict("Inner_Joining"), None);
/// ```
pub fn get_name_to_enum_mapper() / name_to_enum_mapper();
/// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up short names
/// for values of the `Joining_Type` enumerated property.
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::JoiningType;
///
/// let lookup = JoiningType::enum_to_short_name_mapper();
/// assert_eq!(lookup.get(JoiningType::JoinCausing), Some("C"));
/// assert_eq!(lookup.get(JoiningType::LeftJoining), Some("L"));
/// ```
pub fn get_enum_to_short_name_mapper() / enum_to_short_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed;
/// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up long names
/// for values of the `Joining_Type` enumerated property.
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::JoiningType;
///
/// let lookup = JoiningType::enum_to_long_name_mapper();
/// assert_eq!(lookup.get(JoiningType::Transparent), Some("Transparent"));
/// assert_eq!(lookup.get(JoiningType::NonJoining), Some("Non_Joining"));
/// assert_eq!(lookup.get(JoiningType::RightJoining), Some("Right_Joining"));
/// ```
pub fn get_enum_to_long_name_mapper() / enum_to_long_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed;
}
}
14 changes: 14 additions & 0 deletions components/properties/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ const _: () = {
icu_properties_data::impl_propnames_from_gc_v1!(Baked);
icu_properties_data::impl_propnames_from_gcm_v1!(Baked);
icu_properties_data::impl_propnames_from_insc_v1!(Baked);
icu_properties_data::impl_propnames_from_jt_v1!(Baked);
icu_properties_data::impl_propnames_from_lb_v1!(Baked);
icu_properties_data::impl_propnames_from_sb_v1!(Baked);
icu_properties_data::impl_propnames_from_sc_v1!(Baked);
Expand All @@ -67,6 +68,7 @@ const _: () = {
icu_properties_data::impl_propnames_to_long_linear_gc_v1!(Baked);
icu_properties_data::impl_propnames_to_long_linear_gcb_v1!(Baked);
icu_properties_data::impl_propnames_to_long_linear_insc_v1!(Baked);
icu_properties_data::impl_propnames_to_long_linear_jt_v1!(Baked);
icu_properties_data::impl_propnames_to_long_linear_lb_v1!(Baked);
icu_properties_data::impl_propnames_to_long_linear_sb_v1!(Baked);
icu_properties_data::impl_propnames_to_long_linear_sc_v1!(Baked);
Expand All @@ -77,6 +79,7 @@ const _: () = {
icu_properties_data::impl_propnames_to_short_linear_gc_v1!(Baked);
icu_properties_data::impl_propnames_to_short_linear_gcb_v1!(Baked);
icu_properties_data::impl_propnames_to_short_linear_insc_v1!(Baked);
icu_properties_data::impl_propnames_to_short_linear_jt_v1!(Baked);
icu_properties_data::impl_propnames_to_short_linear_lb_v1!(Baked);
icu_properties_data::impl_propnames_to_short_linear_sb_v1!(Baked);
icu_properties_data::impl_propnames_to_short_linear_wb_v1!(Baked);
Expand Down Expand Up @@ -133,6 +136,7 @@ const _: () = {
icu_properties_data::impl_props_idst_v1!(Baked);
icu_properties_data::impl_props_insc_v1!(Baked);
icu_properties_data::impl_props_join_c_v1!(Baked);
icu_properties_data::impl_props_jt_v1!(Baked);
icu_properties_data::impl_props_lb_v1!(Baked);
icu_properties_data::impl_props_loe_v1!(Baked);
icu_properties_data::impl_props_lower_v1!(Baked);
Expand Down Expand Up @@ -896,6 +900,16 @@ expand!(
"InSC",
IndicSyllabicCategory
),
(
JoiningTypeV1Marker,
JoiningTypeNameToValueV1Marker,
(
linear: JoiningTypeValueToShortNameV1Marker,
JoiningTypeValueToLongNameV1Marker
),
"jt",
JoiningType
),
// note: the names key for the GCM mask is handled above
)
);
15 changes: 14 additions & 1 deletion components/properties/src/trievalue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ use crate::provider::bidi_data::{
use crate::script::ScriptWithExt;
use crate::{
BidiClass, CanonicalCombiningClass, EastAsianWidth, GeneralCategory, GeneralCategoryGroup,
GraphemeClusterBreak, IndicSyllabicCategory, LineBreak, Script, SentenceBreak, WordBreak,
GraphemeClusterBreak, IndicSyllabicCategory, JoiningType, LineBreak, Script, SentenceBreak,
WordBreak,
};
use core::convert::TryInto;
use core::num::TryFromIntError;
Expand Down Expand Up @@ -246,3 +247,15 @@ impl TrieValue for MirroredPairedBracketData {
Self::try_from(i)
}
}

impl TrieValue for JoiningType {
type TryFromU32Error = TryFromIntError;

fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
u8::try_from(i).map(Self)
}

fn to_u32(self) -> u32 {
u32::from(self.0)
}
}
2 changes: 2 additions & 0 deletions ffi/capi/bindings/c/ICU4XCodePointMapData8.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions ffi/capi/bindings/cpp/ICU4XCodePointMapData8.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions ffi/capi/bindings/cpp/ICU4XCodePointMapData8.hpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions ffi/capi/bindings/dart/CodePointMapData8.g.dart

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions ffi/capi/bindings/js/ICU4XCodePointMapData8.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions ffi/capi/bindings/js/ICU4XCodePointMapData8.mjs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions ffi/capi/src/properties_maps.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,18 @@ pub mod ffi {
provider,
)?))
}

#[diplomat::rust_link(icu::properties::maps::joining_type, Fn)]
#[diplomat::rust_link(icu::properties::maps::load_joining_type, Fn, hidden)]
pub fn load_joining_type(
provider: &ICU4XDataProvider,
) -> Result<Box<ICU4XCodePointMapData8>, ICU4XError> {
Ok(convert_8(call_constructor_unstable!(
maps::joining_type [r => Ok(r.static_to_owned())],
maps::load_joining_type,
provider,
)?))
}
}

#[diplomat::opaque]
Expand Down
20 changes: 20 additions & 0 deletions provider/baked/properties/data/macros.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 3f2a7c8

Please sign in to comment.