Skip to content

Commit

Permalink
Create wrapper object for ScriptExtensions array return value unicode…
Browse files Browse the repository at this point in the history
…-org#1536 (unicode-org#1990)

* Create wrapper object for ScriptExtensions array return value unicode-org#1536

* unicode-org#1536 Create wrapper object for ScriptExtensions array return value

* Resolving a minor issue with code comments.

* Update components/properties/src/script.rs

Co-authored-by: Shane F. Carr <shane@unicode.org>

* Addressing review comments.

* Making ScriptExtensionsSet.values private

* Addressing a minor review comment.

Co-authored-by: Shane F. Carr <shane@unicode.org>
  • Loading branch information
2 people authored and yzhang1994 committed Jun 8, 2022
1 parent ded4184 commit 635e8e8
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 25 deletions.
66 changes: 58 additions & 8 deletions components/properties/src/script.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,55 @@ impl From<ScriptWithExt> for Script {
}
}

/// A data structure that wraps ScriptExtensions array return value.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub struct ScriptExtensionsSet<'a> {
values: &'a ZeroSlice<Script>,
}

impl ScriptExtensionsSet<'_> {
/// Returns whether this set contains the given script.
///
/// # Example
///
/// ```
/// use icu::properties::{script, Script};
/// let provider = icu_testdata::get_provider();
/// let payload = script::get_script_with_extensions(&provider).expect("The data should be valid");
/// let data_struct = payload.get();
/// let swe = &data_struct.data;
///
/// assert!(swe
/// .get_script_extensions_val(0x11303) // GRANTHA SIGN VISARGA
/// .contains(&Script::Grantha));
/// ```
pub fn contains(&self, x: &Script) -> bool {
ZeroSlice::binary_search(&*self.values, x).is_ok()
}

/// Gets an iterator over the elements.
///
/// # Example
///
/// ```
/// use icu::properties::{script, Script};
/// let provider = icu_testdata::get_provider();
/// let payload = script::get_script_with_extensions(&provider).expect("The data should be valid");
/// let data_struct = payload.get();
/// let swe = &data_struct.data;
///
/// assert_eq!(
/// swe.get_script_extensions_val('௫' as u32) // U+0BEB TAMIL DIGIT FIVE
/// .iter()
/// .collect::<Vec<Script>>(),
/// vec![Script::Tamil, Script::Grantha]
/// );
/// ```
pub fn iter(&self) -> impl DoubleEndedIterator<Item = Script> + '_ {
ZeroSlice::iter(&*self.values)
}
}

/// A data structure that represents the data for both Script and
/// Script_Extensions properties in an efficient way. This structure matches
/// the data and data structures that are stored in the corresponding ICU data
Expand Down Expand Up @@ -317,12 +366,12 @@ impl<'data> ScriptWithExtensions<'data> {
///
/// If `code_point` has Script_Extensions, then return the Script codes in
/// the Script_Extensions. In this case, the Script property value
/// (normally Common or Inherited) is not included in the [`ZeroSlice`].
/// (normally Common or Inherited) is not included in the [`ScriptExtensionsSet`].
///
/// If c does not have Script_Extensions, then the one Script code is put
/// into the [`ZeroSlice`] and also returned.
/// into the [`ScriptExtensionsSet`] and also returned.
///
/// If c is not a valid code point, then return an empty [`ZeroSlice`].
/// If c is not a valid code point, then return an empty [`ScriptExtensionsSet`].
///
/// # Examples
///
Expand Down Expand Up @@ -360,15 +409,16 @@ impl<'data> ScriptWithExtensions<'data> {
/// vec![Script::Tamil, Script::Grantha]
/// );
/// ```
pub fn get_script_extensions_val(&self, code_point: u32) -> &ZeroSlice<Script> {
pub fn get_script_extensions_val(&self, code_point: u32) -> ScriptExtensionsSet {
let sc_with_ext_ule = self.trie.get_ule(code_point);

match sc_with_ext_ule {
Some(ule_ref) => self.get_scx_val_using_trie_val(ule_ref),
None => ZeroSlice::from_ule_slice(&[]),
ScriptExtensionsSet {
values: match sc_with_ext_ule {
Some(ule_ref) => self.get_scx_val_using_trie_val(ule_ref),
None => ZeroSlice::from_ule_slice(&[]),
},
}
}

/// Returns whether `script` is contained in the Script_Extensions
/// property value if the code_point has Script_Extensions, otherwise
/// if the code point does not have Script_Extensions then returns
Expand Down
61 changes: 44 additions & 17 deletions provider/datagen/src/transform/uprops/script.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,6 @@ mod tests {

#[test]
fn test_scx_array_from_script_extensions() {
use zerovec::ZeroVec;

let provider = ScriptWithExtensionsPropertyProvider::from(&SourceData::for_test());

let payload: DataPayload<ScriptWithExtensionsPropertyV1Marker> = provider
Expand All @@ -131,34 +129,63 @@ mod tests {
let swe: &ScriptWithExtensions = &payload.get().data;

assert_eq!(
swe.get_script_extensions_val('𐓐' as u32).as_zerovec(), /* U+104D0 OSAGE CAPITAL LETTER KHA */
ZeroVec::<Script>::alloc_from_slice(&[Script::Osage])
swe.get_script_extensions_val('𐓐' as u32) /* U+104D0 OSAGE CAPITAL LETTER KHA */
.iter()
.collect::<Vec<Script>>(),
vec![Script::Osage]
);
assert_eq!(
swe.get_script_extensions_val('🥳' as u32) /* U+1F973 FACE WITH PARTY HORN AND PARTY HAT */
.iter()
.collect::<Vec<Script>>(),
vec![Script::Common]
);
assert_eq!(
swe.get_script_extensions_val('🥳' as u32).as_zerovec(), /* U+1F973 FACE WITH PARTY HORN AND PARTY HAT */
ZeroVec::<Script>::alloc_from_slice(&[Script::Common])
swe.get_script_extensions_val(0x200D) // ZERO WIDTH JOINER
.iter()
.collect::<Vec<Script>>(),
vec![Script::Inherited]
);
assert_eq!(
swe.get_script_extensions_val(0x200D).as_zerovec(), // ZERO WIDTH JOINER
ZeroVec::<Script>::alloc_from_slice(&[Script::Inherited])
swe.get_script_extensions_val('௫' as u32) // U+0BEB TAMIL DIGIT FIVE
.iter()
.collect::<Vec<Script>>(),
vec![Script::Tamil, Script::Grantha]
);
assert_eq!(
swe.get_script_extensions_val('௫' as u32).as_zerovec(), // U+0BEB TAMIL DIGIT FIVE
ZeroVec::<Script>::alloc_from_slice(&[Script::Tamil, Script::Grantha])
swe.get_script_extensions_val(0x11303) // GRANTHA SIGN VISARGA
.iter()
.collect::<Vec<Script>>(),
vec![Script::Tamil, Script::Grantha]
);
assert_eq!(
swe.get_script_extensions_val(0x11303).as_zerovec(), // GRANTHA SIGN VISARGA
ZeroVec::<Script>::alloc_from_slice(&[Script::Tamil, Script::Grantha])
swe.get_script_extensions_val(0x30A0) // KATAKANA-HIRAGANA DOUBLE HYPHEN
.iter()
.collect::<Vec<Script>>(),
vec![Script::Hiragana, Script::Katakana]
);

assert_eq!(
swe.get_script_extensions_val(0x30A0).as_zerovec(), // KATAKANA-HIRAGANA DOUBLE HYPHEN
ZeroVec::<Script>::alloc_from_slice(&[Script::Hiragana, Script::Katakana])
swe.get_script_extensions_val(0x200D) // ZERO WIDTH JOINER
.iter()
.next(),
Some(Script::Inherited)
);

// Invalid code point
assert!(swe
.get_script_extensions_val(0x11303) // GRANTHA SIGN VISARGA
.contains(&Script::Grantha));

assert!(!swe
.get_script_extensions_val(0x11303) // GRANTHA SIGN VISARGA
.contains(&Script::Common));

// // Invalid code point
assert_eq!(
swe.get_script_extensions_val(0x11_0000).as_zerovec(), // CODE_POINT_MAX + 1 is invalid
ZeroVec::<Script>::alloc_from_slice(&[Script::Unknown])
swe.get_script_extensions_val(0x11_0000) // CODE_POINT_MAX + 1 is invalid
.iter()
.collect::<Vec<Script>>(),
vec![Script::Unknown]
);
}

Expand Down

0 comments on commit 635e8e8

Please sign in to comment.