From 5e5e16c6fddac511abba62ddda3481fc1f911254 Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Wed, 28 Jun 2023 13:20:34 +0200
Subject: [PATCH 1/6] Add IS_TITLE_CONTEXT

---
 experimental/casemapping/src/casemapping.rs | 30 ++++++++++++------
 experimental/casemapping/src/internals.rs   | 34 ++++++++++++---------
 2 files changed, 39 insertions(+), 25 deletions(-)
diff --git a/experimental/casemapping/src/casemapping.rs b/experimental/casemapping/src/casemapping.rs
index c5ae97995a2..7b000a744a0 100644
--- a/experimental/casemapping/src/casemapping.rs
+++ b/experimental/casemapping/src/casemapping.rs
@@ -87,7 +87,7 @@ impl CaseMapping {
         src: &'a str,
         langid: &LanguageIdentifier,
     ) -> impl Writeable + 'a {
-        self.data.get().full_helper_writeable(
+        self.data.get().full_helper_writeable::<false>(
             src,
             CaseMapLocale::from_langid(langid),
             MappingKind::Lower,
@@ -106,7 +106,7 @@ impl CaseMapping {
         src: &'a str,
         langid: &LanguageIdentifier,
     ) -> impl Writeable + 'a {
-        self.data.get().full_helper_writeable(
+        self.data.get().full_helper_writeable::<false>(
             src,
             CaseMapLocale::from_langid(langid),
             MappingKind::Upper,
@@ -123,7 +123,7 @@ impl CaseMapping {
     pub fn fold<'a>(&'a self, src: &'a str) -> impl Writeable + 'a {
         self.data
             .get()
-            .full_helper_writeable(src, CaseMapLocale::Root, MappingKind::Fold)
+            .full_helper_writeable::<false>(src, CaseMapLocale::Root, MappingKind::Fold)
     }
 
     /// Case-folds the characters in the given string as a [`Writeable`],
@@ -135,9 +135,11 @@ impl CaseMapping {
     /// See [`Self::fold_turkic_string()`] for the equivalent convenience function that returns a String,
     /// as well as for an example.
     pub fn fold_turkic<'a>(&'a self, src: &'a str) -> impl Writeable + 'a {
-        self.data
-            .get()
-            .full_helper_writeable(src, CaseMapLocale::Turkish, MappingKind::Fold)
+        self.data.get().full_helper_writeable::<false>(
+            src,
+            CaseMapLocale::Turkish,
+            MappingKind::Fold,
+        )
     }
 
     /// Returns the full lowercase mapping of the given string as a String.
@@ -169,7 +171,11 @@ impl CaseMapping {
     pub fn lowercase_to_string(&self, src: &str, langid: &LanguageIdentifier) -> String {
         self.data
             .get()
-            .full_helper_writeable(src, CaseMapLocale::from_langid(langid), MappingKind::Lower)
+            .full_helper_writeable::<false>(
+                src,
+                CaseMapLocale::from_langid(langid),
+                MappingKind::Lower,
+            )
             .write_to_string()
             .into_owned()
     }
@@ -206,7 +212,11 @@ impl CaseMapping {
     pub fn uppercase_to_string(&self, src: &str, langid: &LanguageIdentifier) -> String {
         self.data
             .get()
-            .full_helper_writeable(src, CaseMapLocale::from_langid(langid), MappingKind::Upper)
+            .full_helper_writeable::<false>(
+                src,
+                CaseMapLocale::from_langid(langid),
+                MappingKind::Upper,
+            )
             .write_to_string()
             .into_owned()
     }
@@ -236,7 +246,7 @@ impl CaseMapping {
     pub fn fold_string(&self, src: &str) -> String {
         self.data
             .get()
-            .full_helper_writeable(src, CaseMapLocale::Root, MappingKind::Fold)
+            .full_helper_writeable::<false>(src, CaseMapLocale::Root, MappingKind::Fold)
             .write_to_string()
             .into_owned()
     }
@@ -270,7 +280,7 @@ impl CaseMapping {
     pub fn fold_turkic_string(&self, src: &str) -> String {
         self.data
             .get()
-            .full_helper_writeable(src, CaseMapLocale::Turkish, MappingKind::Fold)
+            .full_helper_writeable::<false>(src, CaseMapLocale::Turkish, MappingKind::Fold)
             .write_to_string()
             .into_owned()
     }
diff --git a/experimental/casemapping/src/internals.rs b/experimental/casemapping/src/internals.rs
index 7586d53553d..27f36681270 100644
--- a/experimental/casemapping/src/internals.rs
+++ b/experimental/casemapping/src/internals.rs
@@ -126,13 +126,18 @@ impl<'data> CaseMappingV1<'data> {
     }
 
     #[inline(always)]
-    fn full_helper(
+    // IS_TITLE_CONTEXT must be true if kind is MappingKind::Title
+    fn full_helper<const IS_TITLE_CONTEXT: bool>(
         &self,
         c: char,
         context: ContextIterator,
         locale: CaseMapLocale,
         kind: MappingKind,
     ) -> FullMappingResult {
+        // IS_TITLE_CONTEXT exists to avoid perf impacts on the other, more common modes
+        // Ensure that they are either both true or both false, i.e. an XNOR operation
+        debug_assert!(!(IS_TITLE_CONTEXT ^ (kind == MappingKind::Title)));
+
         let data = self.lookup_data(c);
         if !data.has_exception() {
             if data.is_relevant_to(kind) {
@@ -151,12 +156,7 @@ impl<'data> CaseMappingV1<'data> {
                     MappingKind::Lower => self.full_lower_special_case(c, context, locale),
                     MappingKind::Fold => self.full_fold_special_case(c, context, locale),
                     MappingKind::Upper | MappingKind::Title => self
-                        .full_upper_or_title_special_case(
-                            c,
-                            context,
-                            locale,
-                            kind == MappingKind::Title,
-                        ),
+                        .full_upper_or_title_special_case::<IS_TITLE_CONTEXT>(c, context, locale),
                 } {
                     return special;
                 }
@@ -276,12 +276,11 @@ impl<'data> CaseMappingV1<'data> {
         None
     }
 
-    fn full_upper_or_title_special_case(
+    fn full_upper_or_title_special_case<const IS_TITLE_CONTEXT: bool>(
         &self,
         c: char,
         context: ContextIterator,
         locale: CaseMapLocale,
-        is_title: bool,
     ) -> Option<FullMappingResult> {
         if locale == CaseMapLocale::Turkish && c == 'i' {
             // In Turkic languages, i turns into a dotted capital I.
@@ -297,7 +296,7 @@ impl<'data> CaseMappingV1<'data> {
         }
         // ICU4C's non-standard extension for Armenian ligature ech-yiwn.
         if c == '\u{587}' {
-            return match (locale, is_title) {
+            return match (locale, IS_TITLE_CONTEXT) {
                 (CaseMapLocale::Armenian, false) => Some(FullMappingResult::String("ԵՎ")),
                 (CaseMapLocale::Armenian, true) => Some(FullMappingResult::String("Եվ")),
                 (_, false) => Some(FullMappingResult::String("ԵՒ")),
@@ -325,20 +324,20 @@ impl<'data> CaseMappingV1<'data> {
             (_, _) => None,
         }
     }
-    pub(crate) fn full_helper_writeable<'a: 'data>(
+    pub(crate) fn full_helper_writeable<'a: 'data, const IS_TITLE_CONTEXT: bool>(
         &'a self,
         src: &'a str,
         locale: CaseMapLocale,
         mapping: MappingKind,
     ) -> impl Writeable + 'a {
-        struct FullCaseWriteable<'a> {
+        struct FullCaseWriteable<'a, const IS_TITLE_CONTEXT: bool> {
             data: &'a CaseMappingV1<'a>,
             src: &'a str,
             locale: CaseMapLocale,
             mapping: MappingKind,
         }
 
-        impl<'a> Writeable for FullCaseWriteable<'a> {
+        impl<'a, const IS_TITLE_CONTEXT: bool> Writeable for FullCaseWriteable<'a, IS_TITLE_CONTEXT> {
             #[allow(clippy::indexing_slicing)] // last_uncopied_index and i are known to be in bounds
             fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
                 // To speed up the copying of long runs where nothing changes, we keep track
@@ -348,7 +347,12 @@ impl<'data> CaseMappingV1<'data> {
                 let src = self.src;
                 for (i, c) in src.char_indices() {
                     let context = ContextIterator::new(&src[..i], &src[i..]);
-                    match self.data.full_helper(c, context, self.locale, self.mapping) {
+                    match self.data.full_helper::<IS_TITLE_CONTEXT>(
+                        c,
+                        context,
+                        self.locale,
+                        self.mapping,
+                    ) {
                         FullMappingResult::CodePoint(c2) => {
                             if c == c2 {
                                 continue;
@@ -378,7 +382,7 @@ impl<'data> CaseMappingV1<'data> {
             }
         }
 
-        FullCaseWriteable {
+        FullCaseWriteable::<IS_TITLE_CONTEXT> {
             data: self,
             src,
             locale,

From 210853dd24962e2df23ae14692f258169cb4b783 Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Tue, 27 Jun 2023 15:27:24 +0200
Subject: [PATCH 2/6] Add titlecasing functions

---
 experimental/casemapping/src/casemapping.rs | 115 ++++++++++++++++----
 1 file changed, 91 insertions(+), 24 deletions(-)

diff --git a/experimental/casemapping/src/casemapping.rs b/experimental/casemapping/src/casemapping.rs
index 7b000a744a0..b2a60f0cb1f 100644
--- a/experimental/casemapping/src/casemapping.rs
+++ b/experimental/casemapping/src/casemapping.rs
@@ -113,6 +113,31 @@ impl CaseMapping {
         )
     }
 
+    /// Returns the full titlecase mapping of the given string as a [`Writeable`], treating
+    /// the string as a single segment (and thus only titlecasing the beginning of it).
+    ///
+    /// This should typically be used as a lower-level helper to construct the titlecasing operation desired
+    /// by the application, for example one can titlecase on a per-word basis by mixing this with
+    /// a `WordSegmenter`.
+    ///
+    /// This function is context and language sensitive. Callers should pass the text's language
+    /// as a `LanguageIdentifier` (usually the `id` field of the `Locale`) if available, or
+    /// `Default::default()` for the root locale.
+    ///
+    /// See [`Self::titlecase_to_string()`] for the equivalent convenience function that returns a String,
+    /// as well as for an example.
+    pub fn titlecase_segment<'a>(
+        &'a self,
+        src: &'a str,
+        langid: &LanguageIdentifier,
+    ) -> impl Writeable + 'a {
+        self.data.get().full_helper_writeable::<true>(
+            src,
+            CaseMapLocale::from_langid(langid),
+            MappingKind::Title,
+        )
+    }
+
     /// Case-folds the characters in the given string as a [`Writeable`].
     /// This function is locale-independent and context-insensitive.
     ///
@@ -221,6 +246,54 @@ impl CaseMapping {
             .into_owned()
     }
 
+    /// Returns the full titlecase mapping of the given string as a String, treating
+    /// the string as a single segment (and thus only titlecasing the beginning of it).
+    ///
+    /// This should typically be used as a lower-level helper to construct the titlecasing operation desired
+    /// by the application, for example one can titlecase on a per-word basis by mixing this with
+    /// a `WordSegmenter`.
+    ///
+    /// This function is context and language sensitive. Callers should pass the text's language
+    /// as a `LanguageIdentifier` (usually the `id` field of the `Locale`) if available, or
+    /// `Default::default()` for the root locale.
+    ///
+    /// See [`Self::titlecase_segment()`] for the equivalent lower-level function that returns a [`Writeable`]
+    ///
+    /// # Example
+    ///
+    /// ```rust,ignore
+    /// use icu_casemapping::CaseMapping;
+    /// use icu_locid::langid;
+    ///
+    /// let cm = CaseMapping::new();
+    /// let root = langid!("und");
+    ///
+    /// // note that the subsequent words are not titlecased, this function assumes
+    /// // that the entire string is a single segment and only titlecases at the beginning.
+    /// assert_eq!(cm.titlecase_segment_to_string("hEllO WorLd", &root), "Hello world");
+    /// assert_eq!(cm.titlecase_segment_to_string("Γειά σου Κόσμε", &root), "Γειά σου κόσμε");
+    /// assert_eq!(cm.titlecase_segment_to_string("नमस्ते दुनिया", &root), "नमस्ते दुनिया");
+    /// assert_eq!(cm.titlecase_segment_to_string("Привет мир", &root), "Привет мир");
+    ///
+    /// // Some behavior is language-sensitive
+    /// assert_eq!(cm.titlecase_segment_to_string("istanbul", &root), "Istanbuk");
+    /// assert_eq!(cm.titlecase_segment_to_string("istanbul", &langid!("tr")), "İstanbul"); // Turkish dotted i
+    ///
+    /// assert_eq!(cm.titlecase_segment_to_string("և Երևանի", &root), "Եւ Երևանի");
+    /// assert_eq!(cm.titlecase_segment_to_string("և Երևանի", &langid!("hy")), "Եվ Երևանի"); // Eastern Armenian ech-yiwn ligature
+    /// ```
+    pub fn titlecase_segment_to_string(&self, src: &str, langid: &LanguageIdentifier) -> String {
+        self.data
+            .get()
+            .full_helper_writeable::<true>(
+                src,
+                CaseMapLocale::from_langid(langid),
+                MappingKind::Title,
+            )
+            .write_to_string()
+            .into_owned()
+    }
+
     /// Case-folds the characters in the given string as a String.
     /// This function is locale-independent and context-insensitive.
     ///
@@ -499,18 +572,6 @@ mod tests {
     use super::*;
     use icu_locid::langid;
 
-    impl CaseMapping {
-        /// Only for testing titlecase special-cases, does NOT
-        /// segment input string
-        fn titlecase_to_string_test(&self, src: &str, langid: &LanguageIdentifier) -> String {
-            self.data
-                .get()
-                .full_helper_writeable(src, CaseMapLocale::from_langid(langid), MappingKind::Title)
-                .write_to_string()
-                .into_owned()
-        }
-    }
-
     #[test]
     /// Tests for SpecialCasing.txt. Some of the special cases are data-driven, some are code-driven
     fn test_special_cases() {
@@ -543,22 +604,22 @@ mod tests {
         );
 
         // U+1F80 GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
-        assert_eq!(cm.titlecase_to_string_test("ᾀ", &root), "ᾈ");
+        assert_eq!(cm.titlecase_segment_to_string("ᾀ", &root), "ᾈ");
         assert_eq!(cm.uppercase_to_string("ᾀ", &root), "ἈΙ");
 
         // U+1FFC GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
         assert_eq!(cm.lowercase_to_string("ῼ", &root), "ῳ");
-        assert_eq!(cm.titlecase_to_string_test("ῼ", &root), "ῼ");
+        assert_eq!(cm.titlecase_segment_to_string("ῼ", &root), "ῼ");
         assert_eq!(cm.uppercase_to_string("ῼ", &root), "ΩΙ");
 
         // U+1F98 GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
         assert_eq!(cm.lowercase_to_string("ᾘ", &root), "ᾐ");
-        assert_eq!(cm.titlecase_to_string_test("ᾘ", &root), "ᾘ");
+        assert_eq!(cm.titlecase_segment_to_string("ᾘ", &root), "ᾘ");
         assert_eq!(cm.uppercase_to_string("ᾘ", &root), "ἨΙ");
 
         // U+1FB2 GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
         assert_eq!(cm.lowercase_to_string("ᾲ", &root), "ᾲ");
-        assert_eq!(cm.titlecase_to_string_test("ᾲ", &root), "Ὰ\u{345}");
+        assert_eq!(cm.titlecase_segment_to_string("ᾲ", &root), "Ὰ\u{345}");
         assert_eq!(cm.uppercase_to_string("ᾲ", &root), "ᾺΙ");
 
         // Final sigma test
@@ -571,32 +632,38 @@ mod tests {
         // U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE
         assert_eq!(cm.lowercase_to_string("İ", &tr), "i");
         assert_eq!(cm.lowercase_to_string("İ", &az), "i");
-        assert_eq!(cm.titlecase_to_string_test("İ", &tr), "İ");
-        assert_eq!(cm.titlecase_to_string_test("İ", &az), "İ");
+        assert_eq!(cm.titlecase_segment_to_string("İ", &tr), "İ");
+        assert_eq!(cm.titlecase_segment_to_string("İ", &az), "İ");
         assert_eq!(cm.uppercase_to_string("İ", &tr), "İ");
         assert_eq!(cm.uppercase_to_string("İ", &az), "İ");
 
         // U+0049 LATIN CAPITAL LETTER I and U+0307 COMBINING DOT ABOVE
         assert_eq!(cm.lowercase_to_string("I\u{0307}", &tr), "i");
         assert_eq!(cm.lowercase_to_string("I\u{0307}", &az), "i");
-        assert_eq!(cm.titlecase_to_string_test("I\u{0307}", &tr), "I\u{0307}");
-        assert_eq!(cm.titlecase_to_string_test("I\u{0307}", &az), "I\u{0307}");
+        assert_eq!(
+            cm.titlecase_segment_to_string("I\u{0307}", &tr),
+            "I\u{0307}"
+        );
+        assert_eq!(
+            cm.titlecase_segment_to_string("I\u{0307}", &az),
+            "I\u{0307}"
+        );
         assert_eq!(cm.uppercase_to_string("I\u{0307}", &tr), "I\u{0307}");
         assert_eq!(cm.uppercase_to_string("I\u{0307}", &az), "I\u{0307}");
 
         // U+0049 LATIN CAPITAL LETTER I
         assert_eq!(cm.lowercase_to_string("I", &tr), "ı");
         assert_eq!(cm.lowercase_to_string("I", &az), "ı");
-        assert_eq!(cm.titlecase_to_string_test("I", &tr), "I");
-        assert_eq!(cm.titlecase_to_string_test("I", &az), "I");
+        assert_eq!(cm.titlecase_segment_to_string("I", &tr), "I");
+        assert_eq!(cm.titlecase_segment_to_string("I", &az), "I");
         assert_eq!(cm.uppercase_to_string("I", &tr), "I");
         assert_eq!(cm.uppercase_to_string("I", &az), "I");
 
         // U+0069 LATIN SMALL LETTER I
         assert_eq!(cm.lowercase_to_string("i", &tr), "i");
         assert_eq!(cm.lowercase_to_string("i", &az), "i");
-        assert_eq!(cm.titlecase_to_string_test("i", &tr), "İ");
-        assert_eq!(cm.titlecase_to_string_test("i", &az), "İ");
+        assert_eq!(cm.titlecase_segment_to_string("i", &tr), "İ");
+        assert_eq!(cm.titlecase_segment_to_string("i", &az), "İ");
         assert_eq!(cm.uppercase_to_string("i", &tr), "İ");
         assert_eq!(cm.uppercase_to_string("i", &az), "İ");
     }

From 282d43aa870232ba373c664dfbd7479aeadd5b81 Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Wed, 28 Jun 2023 14:00:01 +0200
Subject: [PATCH 3/6] Handle basic titlecasing

---
 experimental/casemapping/src/casemapping.rs   | 35 ++++++++++---------
 experimental/casemapping/src/internals.rs     | 25 ++++++++++---
 experimental/casemapping/tests/conversions.rs | 12 ++++---
 3 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/experimental/casemapping/src/casemapping.rs b/experimental/casemapping/src/casemapping.rs
index b2a60f0cb1f..e69944975ce 100644
--- a/experimental/casemapping/src/casemapping.rs
+++ b/experimental/casemapping/src/casemapping.rs
@@ -261,7 +261,7 @@ impl CaseMapping {
     ///
     /// # Example
     ///
-    /// ```rust,ignore
+    /// ```rust
     /// use icu_casemapping::CaseMapping;
     /// use icu_locid::langid;
     ///
@@ -276,11 +276,11 @@ impl CaseMapping {
     /// assert_eq!(cm.titlecase_segment_to_string("Привет мир", &root), "Привет мир");
     ///
     /// // Some behavior is language-sensitive
-    /// assert_eq!(cm.titlecase_segment_to_string("istanbul", &root), "Istanbuk");
+    /// assert_eq!(cm.titlecase_segment_to_string("istanbul", &root), "Istanbul");
     /// assert_eq!(cm.titlecase_segment_to_string("istanbul", &langid!("tr")), "İstanbul"); // Turkish dotted i
     ///
-    /// assert_eq!(cm.titlecase_segment_to_string("և Երևանի", &root), "Եւ Երևանի");
-    /// assert_eq!(cm.titlecase_segment_to_string("և Երևանի", &langid!("hy")), "Եվ Երևանի"); // Eastern Armenian ech-yiwn ligature
+    /// assert_eq!(cm.titlecase_segment_to_string("և Երևանի", &root), "Եւ երևանի");
+    /// assert_eq!(cm.titlecase_segment_to_string("և Երևանի", &langid!("hy")), "Եվ երևանի"); // Eastern Armenian ech-yiwn ligature
     /// ```
     pub fn titlecase_segment_to_string(&self, src: &str, langid: &LanguageIdentifier) -> String {
         self.data
@@ -300,7 +300,7 @@ impl CaseMapping {
     /// Can be used to test if two strings are case-insensitively equivalent.
     ///
     /// See [`Self::fold()`] for the equivalent lower-level function that returns a [`Writeable`]
-    ///
+    ///s s
     /// # Example
     ///
     /// ```rust
@@ -638,18 +638,19 @@ mod tests {
         assert_eq!(cm.uppercase_to_string("İ", &az), "İ");
 
         // U+0049 LATIN CAPITAL LETTER I and U+0307 COMBINING DOT ABOVE
-        assert_eq!(cm.lowercase_to_string("I\u{0307}", &tr), "i");
-        assert_eq!(cm.lowercase_to_string("I\u{0307}", &az), "i");
-        assert_eq!(
-            cm.titlecase_segment_to_string("I\u{0307}", &tr),
-            "I\u{0307}"
-        );
-        assert_eq!(
-            cm.titlecase_segment_to_string("I\u{0307}", &az),
-            "I\u{0307}"
-        );
-        assert_eq!(cm.uppercase_to_string("I\u{0307}", &tr), "I\u{0307}");
-        assert_eq!(cm.uppercase_to_string("I\u{0307}", &az), "I\u{0307}");
+        // TODO
+        // assert_eq!(cm.lowercase_to_string("I\u{0307}", &tr), "i");
+        // assert_eq!(cm.lowercase_to_string("I\u{0307}", &az), "i");
+        // assert_eq!(
+        //     cm.titlecase_segment_to_string("I\u{0307}", &tr),
+        //     "I\u{0307}"
+        // );
+        // assert_eq!(
+        //     cm.titlecase_segment_to_string("I\u{0307}", &az),
+        //     "I\u{0307}"
+        // );
+        // assert_eq!(cm.uppercase_to_string("I\u{0307}", &tr), "I\u{0307}");
+        // assert_eq!(cm.uppercase_to_string("I\u{0307}", &az), "I\u{0307}");
 
         // U+0049 LATIN CAPITAL LETTER I
         assert_eq!(cm.lowercase_to_string("I", &tr), "ı");
diff --git a/experimental/casemapping/src/internals.rs b/experimental/casemapping/src/internals.rs
index 27f36681270..35fd1e4e591 100644
--- a/experimental/casemapping/src/internals.rs
+++ b/experimental/casemapping/src/internals.rs
@@ -127,6 +127,8 @@ impl<'data> CaseMappingV1<'data> {
 
     #[inline(always)]
     // IS_TITLE_CONTEXT must be true if kind is MappingKind::Title
+    // The kind may be a different kind with IS_TITLE_CONTEXT still true because
+    // titlecasing a segment involves switching to lowercase later
     fn full_helper<const IS_TITLE_CONTEXT: bool>(
         &self,
         c: char,
@@ -134,9 +136,12 @@ impl<'data> CaseMappingV1<'data> {
         locale: CaseMapLocale,
         kind: MappingKind,
     ) -> FullMappingResult {
-        // IS_TITLE_CONTEXT exists to avoid perf impacts on the other, more common modes
-        // Ensure that they are either both true or both false, i.e. an XNOR operation
-        debug_assert!(!(IS_TITLE_CONTEXT ^ (kind == MappingKind::Title)));
+        // If using a title mapping IS_TITLE_CONTEXT must be true
+        debug_assert!(kind != MappingKind::Title || IS_TITLE_CONTEXT);
+        // In a title context, kind MUST be Title or Lower
+        debug_assert!(
+            !IS_TITLE_CONTEXT || kind == MappingKind::Title || kind == MappingKind::Lower
+        );
 
         let data = self.lookup_data(c);
         if !data.has_exception() {
@@ -324,12 +329,17 @@ impl<'data> CaseMappingV1<'data> {
             (_, _) => None,
         }
     }
+    /// IS_TITLE_CONTEXT is true iff the mapping is MappingKind::Title, primarily exists
+    /// to avoid perf impacts on other more common modes of operation
     pub(crate) fn full_helper_writeable<'a: 'data, const IS_TITLE_CONTEXT: bool>(
         &'a self,
         src: &'a str,
         locale: CaseMapLocale,
         mapping: MappingKind,
     ) -> impl Writeable + 'a {
+        // Ensure that they are either both true or both false, i.e. an XNOR operation
+        debug_assert!(!(IS_TITLE_CONTEXT ^ (mapping == MappingKind::Title)));
+
         struct FullCaseWriteable<'a, const IS_TITLE_CONTEXT: bool> {
             data: &'a CaseMappingV1<'a>,
             src: &'a str,
@@ -345,16 +355,20 @@ impl<'data> CaseMappingV1<'data> {
                 let mut last_uncopied_idx = 0;
 
                 let src = self.src;
+                let mut mapping = self.mapping;
                 for (i, c) in src.char_indices() {
                     let context = ContextIterator::new(&src[..i], &src[i..]);
                     match self.data.full_helper::<IS_TITLE_CONTEXT>(
                         c,
                         context,
                         self.locale,
-                        self.mapping,
+                        mapping,
                     ) {
                         FullMappingResult::CodePoint(c2) => {
                             if c == c2 {
+                                if IS_TITLE_CONTEXT {
+                                    mapping = MappingKind::Lower;
+                                }
                                 continue;
                             }
                             sink.write_str(&src[last_uncopied_idx..i])?;
@@ -371,6 +385,9 @@ impl<'data> CaseMappingV1<'data> {
                             last_uncopied_idx = i + c.len_utf8();
                         }
                     }
+                    if IS_TITLE_CONTEXT {
+                        mapping = MappingKind::Lower;
+                    }
                 }
                 if last_uncopied_idx < src.len() {
                     sink.write_str(&src[last_uncopied_idx..])?;
diff --git a/experimental/casemapping/tests/conversions.rs b/experimental/casemapping/tests/conversions.rs
index 08bfaf39bb8..3517a945759 100644
--- a/experimental/casemapping/tests/conversions.rs
+++ b/experimental/casemapping/tests/conversions.rs
@@ -205,8 +205,12 @@ fn test_armenian() {
     assert_eq!(cm.uppercase_to_string(s, &east), "ԵՎ ԵՐԵՎԱՆԻ");
     assert_eq!(cm.uppercase_to_string(s, &west), "ԵՒ ԵՐԵՒԱՆԻ");
 
-    // Titlecase doesn't work yet
-    // assert_eq!(cm.titlecase_to_string(s, &root), "Եւ Երևանի");
-    // assert_eq!(cm.uppercase_to_string(s, &east), "Եվ Երևանի");
-    // assert_eq!(cm.uppercase_to_string(s, &west), "Եւ Երևանի");
+    let ew = "և";
+    let yerevan = "Երևանի";
+    assert_eq!(cm.titlecase_segment_to_string(ew, &root), "Եւ");
+    assert_eq!(cm.titlecase_segment_to_string(yerevan, &root), "Երևանի");
+    assert_eq!(cm.titlecase_segment_to_string(ew, &east), "Եվ");
+    assert_eq!(cm.titlecase_segment_to_string(yerevan, &east), "Երևանի");
+    assert_eq!(cm.titlecase_segment_to_string(ew, &west), "Եւ");
+    assert_eq!(cm.titlecase_segment_to_string(yerevan, &west), "Երևանի");
 }

From 9babda136aae10ef76a111648a7f926709807141 Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Wed, 28 Jun 2023 14:36:34 +0200
Subject: [PATCH 4/6] Handle turkish special case

---
 experimental/casemapping/src/casemapping.rs | 25 +++++++++--------
 experimental/casemapping/src/internals.rs   | 30 ++++++++++++++++-----
 2 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/experimental/casemapping/src/casemapping.rs b/experimental/casemapping/src/casemapping.rs
index e69944975ce..e0ebfb89d0d 100644
--- a/experimental/casemapping/src/casemapping.rs
+++ b/experimental/casemapping/src/casemapping.rs
@@ -638,19 +638,18 @@ mod tests {
         assert_eq!(cm.uppercase_to_string("İ", &az), "İ");
 
         // U+0049 LATIN CAPITAL LETTER I and U+0307 COMBINING DOT ABOVE
-        // TODO
-        // assert_eq!(cm.lowercase_to_string("I\u{0307}", &tr), "i");
-        // assert_eq!(cm.lowercase_to_string("I\u{0307}", &az), "i");
-        // assert_eq!(
-        //     cm.titlecase_segment_to_string("I\u{0307}", &tr),
-        //     "I\u{0307}"
-        // );
-        // assert_eq!(
-        //     cm.titlecase_segment_to_string("I\u{0307}", &az),
-        //     "I\u{0307}"
-        // );
-        // assert_eq!(cm.uppercase_to_string("I\u{0307}", &tr), "I\u{0307}");
-        // assert_eq!(cm.uppercase_to_string("I\u{0307}", &az), "I\u{0307}");
+        assert_eq!(cm.lowercase_to_string("I\u{0307}", &tr), "i");
+        assert_eq!(cm.lowercase_to_string("I\u{0307}", &az), "i");
+        assert_eq!(
+            cm.titlecase_segment_to_string("I\u{0307}", &tr),
+            "I\u{0307}"
+        );
+        assert_eq!(
+            cm.titlecase_segment_to_string("I\u{0307}", &az),
+            "I\u{0307}"
+        );
+        assert_eq!(cm.uppercase_to_string("I\u{0307}", &tr), "I\u{0307}");
+        assert_eq!(cm.uppercase_to_string("I\u{0307}", &az), "I\u{0307}");
 
         // U+0049 LATIN CAPITAL LETTER I
         assert_eq!(cm.lowercase_to_string("I", &tr), "ı");
diff --git a/experimental/casemapping/src/internals.rs b/experimental/casemapping/src/internals.rs
index 35fd1e4e591..971e66ea37d 100644
--- a/experimental/casemapping/src/internals.rs
+++ b/experimental/casemapping/src/internals.rs
@@ -158,7 +158,9 @@ impl<'data> CaseMappingV1<'data> {
             let exception = self.exceptions.get(idx);
             if exception.bits.has_conditional_special() {
                 if let Some(special) = match kind {
-                    MappingKind::Lower => self.full_lower_special_case(c, context, locale),
+                    MappingKind::Lower => {
+                        self.full_lower_special_case::<IS_TITLE_CONTEXT>(c, context, locale)
+                    }
                     MappingKind::Fold => self.full_fold_special_case(c, context, locale),
                     MappingKind::Upper | MappingKind::Title => self
                         .full_upper_or_title_special_case::<IS_TITLE_CONTEXT>(c, context, locale),
@@ -216,7 +218,7 @@ impl<'data> CaseMappingV1<'data> {
         }
     }
 
-    fn full_lower_special_case(
+    fn full_lower_special_case<const IS_TITLE_CONTEXT: bool>(
         &self,
         c: char,
         context: ContextIterator,
@@ -252,10 +254,15 @@ impl<'data> CaseMappingV1<'data> {
             if c == '\u{130}' {
                 // I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
                 return Some(FullMappingResult::CodePoint('i'));
-            } else if c == '\u{307}' && context.preceded_by_capital_i(self) {
+            } else if c == '\u{307}' && context.preceded_by_capital_i::<IS_TITLE_CONTEXT>(self) {
                 // When lowercasing, remove dot_above in the sequence I + dot_above,
                 // which will turn into i. This matches the behaviour of the
                 // canonically equivalent I-dot_above.
+                //
+                // In a titlecase context, we do not want to apply this behavior to cases where the I
+                // was at the beginning of the string, as that I and its marks should be handled by the
+                // uppercasing rules (which ignore it, see below)
+
                 return Some(FullMappingResult::Remove);
             } else if c == 'I' && !context.followed_by_dot_above(self) {
                 // When lowercasing, unless an I is before a dot_above, it turns
@@ -580,10 +587,21 @@ impl<'a> ContextIterator<'a> {
         }
         false
     }
-    fn preceded_by_capital_i(&self, mapping: &CaseMappingV1) -> bool {
-        for c in self.before.chars().rev() {
+    /// Checks if the preceding character is a capital I, allowing for non-Above combining characters in between.
+    ///
+    /// If I_MUST_NOT_START_STRING is true, additionally will require that the capital I does not start the string
+    fn preceded_by_capital_i<const I_MUST_NOT_START_STRING: bool>(
+        &self,
+        mapping: &CaseMappingV1,
+    ) -> bool {
+        let mut iter = self.before.chars().rev();
+        while let Some(c) = iter.next() {
             if c == 'I' {
-                return true;
+                if I_MUST_NOT_START_STRING {
+                    return iter.next().is_some();
+                } else {
+                    return true;
+                }
             }
             if mapping.dot_type(c) != DotType::OtherAccent {
                 break;

From eb2577c7d59be237c5419f4dc1d24e28e045f1b6 Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Wed, 28 Jun 2023 14:39:48 +0200
Subject: [PATCH 5/6] add another ypogegrammeni case

---
 experimental/casemapping/src/casemapping.rs | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/experimental/casemapping/src/casemapping.rs b/experimental/casemapping/src/casemapping.rs
index e0ebfb89d0d..4731d6dc67b 100644
--- a/experimental/casemapping/src/casemapping.rs
+++ b/experimental/casemapping/src/casemapping.rs
@@ -602,6 +602,11 @@ mod tests {
             cm.uppercase_to_string("α\u{0313}\u{0345}", &root),
             "Α\u{0313}Ι"
         );
+        // but the YPOGEGRAMMENI should not titlecase
+        assert_eq!(
+            cm.titlecase_segment_to_string("α\u{0313}\u{0345}", &root),
+            "Α\u{0313}\u{0345}"
+        );
 
         // U+1F80 GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
         assert_eq!(cm.titlecase_segment_to_string("ᾀ", &root), "ᾈ");

From f6ade85c0b221c2d79cee17b1259e55e2048d621 Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Wed, 28 Jun 2023 15:42:29 +0200
Subject: [PATCH 6/6] implement dutch titlecasing

---
 experimental/casemapping/src/casemapping.rs   |  3 +
 experimental/casemapping/src/internals.rs     | 66 +++++++++++++++++++
 experimental/casemapping/tests/conversions.rs | 52 +++++++++++++++
 3 files changed, 121 insertions(+)

diff --git a/experimental/casemapping/src/casemapping.rs b/experimental/casemapping/src/casemapping.rs
index 4731d6dc67b..e284e138baf 100644
--- a/experimental/casemapping/src/casemapping.rs
+++ b/experimental/casemapping/src/casemapping.rs
@@ -281,6 +281,9 @@ impl CaseMapping {
     ///
     /// assert_eq!(cm.titlecase_segment_to_string("և Երևանի", &root), "Եւ երևանի");
     /// assert_eq!(cm.titlecase_segment_to_string("և Երևանի", &langid!("hy")), "Եվ երևանի"); // Eastern Armenian ech-yiwn ligature
+    ///
+    /// assert_eq!(cm.titlecase_segment_to_string("ijkdijk", &root), "Ijkdijk");
+    /// assert_eq!(cm.titlecase_segment_to_string("ijkdijk", &langid!("nl")), "IJkdijk"); // Dutch IJ digraph
     /// ```
     pub fn titlecase_segment_to_string(&self, src: &str, langid: &LanguageIdentifier) -> String {
         self.data
diff --git a/experimental/casemapping/src/internals.rs b/experimental/casemapping/src/internals.rs
index 971e66ea37d..54afce90998 100644
--- a/experimental/casemapping/src/internals.rs
+++ b/experimental/casemapping/src/internals.rs
@@ -14,6 +14,8 @@ use core::fmt;
 use icu_locid::LanguageIdentifier;
 use writeable::Writeable;
 
+const ACUTE: char = '\u{301}';
+
 // Used to control the behavior of CaseMapping::fold.
 // Currently only used to decide whether to use Turkic (T) mappings for dotted/dotless i.
 #[derive(Default)]
@@ -143,6 +145,18 @@ impl<'data> CaseMappingV1<'data> {
             !IS_TITLE_CONTEXT || kind == MappingKind::Title || kind == MappingKind::Lower
         );
 
+        // ICU4C's non-standard extension for Dutch IJ titlecasing
+        // handled here instead of in full_lower_special_case because J does not have conditional
+        // special casemapping.
+        if IS_TITLE_CONTEXT && locale == CaseMapLocale::Dutch && kind == MappingKind::Lower {
+            // When titlecasing, a J found immediately after an I at the beginning of the segment
+            // should also uppercase. They are both allowed to have an acute accent but it must
+            // be present on both letters or neither. They may not have any other combining marks.
+            if (c == 'j' || c == 'J') && context.is_dutch_ij_pair_at_beginning(self) {
+                return FullMappingResult::CodePoint('J');
+            }
+        }
+
         let data = self.lookup_data(c);
         if !data.has_exception() {
             if data.is_relevant_to(kind) {
@@ -648,4 +662,56 @@ impl<'a> ContextIterator<'a> {
         }
         false
     }
+
+    /// Checks the preceding and surrounding context of a j or J
+    /// and returns true if it is preceded by an i or I at the start of the string.
+    /// If one has an acute accent,
+    /// both must have the accent for this to return true. No other accents are handled.
+    fn is_dutch_ij_pair_at_beginning(&self, mapping: &CaseMappingV1) -> bool {
+        let mut before = self.before.chars().rev();
+        let mut i_has_acute = false;
+        loop {
+            match before.next() {
+                Some('i') | Some('I') => break,
+                Some('í') | Some('Í') => {
+                    i_has_acute = true;
+                    break;
+                }
+                Some(ACUTE) => i_has_acute = true,
+                _ => return false,
+            }
+        }
+
+        if before.next().is_some() {
+            // not at the beginning of a string, doesn't matter
+            return false;
+        }
+        let mut j_has_acute = false;
+        for c in self.after.chars() {
+            if c == ACUTE {
+                j_has_acute = true;
+                continue;
+            }
+            // We are supposed to check that `j` has no other combining marks aside
+            // from potentially an acute accent. Once we hit the first non-combining mark
+            // we are done.
+            //
+            // ICU4C checks for `gc=Mn` to determine if something is a combining mark,
+            // however this requires extra data (and is the *only* point in the casemapping algorithm
+            // where there is a direct dependency on properties data not mediated by the casemapping data trie).
+            //
+            // Instead, we can check for ccc via dot_type, the same way the rest of the algorithm does.
+            //
+            // See https://unicode-org.atlassian.net/browse/ICU-22429
+            match mapping.dot_type(c) {
+                // Not a combining character; ccc = 0
+                DotType::NoDot | DotType::SoftDotted => break,
+                // found combining character, bail
+                _ => return false,
+            }
+        }
+
+        // either both should have an acute accent, or none. this is an XNOR operation
+        !(j_has_acute ^ i_has_acute)
+    }
 }
diff --git a/experimental/casemapping/tests/conversions.rs b/experimental/casemapping/tests/conversions.rs
index 3517a945759..d461bc76195 100644
--- a/experimental/casemapping/tests/conversions.rs
+++ b/experimental/casemapping/tests/conversions.rs
@@ -214,3 +214,55 @@ fn test_armenian() {
     assert_eq!(cm.titlecase_segment_to_string(ew, &west), "Եւ");
     assert_eq!(cm.titlecase_segment_to_string(yerevan, &west), "Երևանի");
 }
+
+#[test]
+fn test_dutch() {
+    let cm = CaseMapping::new();
+    let nl = langid!("nl");
+
+    assert_eq!(cm.titlecase_segment_to_string("ijssel", &nl), "IJssel");
+    assert_eq!(cm.titlecase_segment_to_string("igloo", &nl), "Igloo");
+    assert_eq!(cm.titlecase_segment_to_string("IJMUIDEN", &nl), "IJmuiden");
+
+    assert_eq!(cm.titlecase_segment_to_string("ij", &nl), "IJ");
+    assert_eq!(cm.titlecase_segment_to_string("IJ", &nl), "IJ");
+    assert_eq!(cm.titlecase_segment_to_string("íj́", &nl), "ÍJ́");
+    assert_eq!(cm.titlecase_segment_to_string("ÍJ́", &nl), "ÍJ́");
+    assert_eq!(cm.titlecase_segment_to_string("íJ́", &nl), "ÍJ́");
+    assert_eq!(cm.titlecase_segment_to_string("Ij́", &nl), "Ij́");
+    assert_eq!(cm.titlecase_segment_to_string("ij́", &nl), "Ij́");
+    assert_eq!(cm.titlecase_segment_to_string("ïj́", &nl), "Ïj́");
+    assert_eq!(
+        cm.titlecase_segment_to_string("íj\u{0308}", &nl),
+        "Íj\u{0308}"
+    );
+    assert_eq!(
+        cm.titlecase_segment_to_string("íj́\u{1D16E}", &nl),
+        "Íj́\u{1D16E}"
+    );
+    assert_eq!(
+        cm.titlecase_segment_to_string("íj\u{1ABE}", &nl),
+        "Íj\u{1ABE}"
+    );
+
+    assert_eq!(cm.titlecase_segment_to_string("ijabc", &nl), "IJabc");
+    assert_eq!(cm.titlecase_segment_to_string("IJabc", &nl), "IJabc");
+    assert_eq!(cm.titlecase_segment_to_string("íj́abc", &nl), "ÍJ́abc");
+    assert_eq!(cm.titlecase_segment_to_string("ÍJ́abc", &nl), "ÍJ́abc");
+    assert_eq!(cm.titlecase_segment_to_string("íJ́abc", &nl), "ÍJ́abc");
+    assert_eq!(cm.titlecase_segment_to_string("Ij́abc", &nl), "Ij́abc");
+    assert_eq!(cm.titlecase_segment_to_string("ij́abc", &nl), "Ij́abc");
+    assert_eq!(cm.titlecase_segment_to_string("ïj́abc", &nl), "Ïj́abc");
+    assert_eq!(
+        cm.titlecase_segment_to_string("íjabc\u{0308}", &nl),
+        "Íjabc\u{0308}"
+    );
+    assert_eq!(
+        cm.titlecase_segment_to_string("íj́abc\u{1D16E}", &nl),
+        "ÍJ́abc\u{1D16E}"
+    );
+    assert_eq!(
+        cm.titlecase_segment_to_string("íjabc\u{1ABE}", &nl),
+        "Íjabc\u{1ABE}"
+    );
+}