From e17849c0b83402e2b5361f9468e83ff8c58ad479 Mon Sep 17 00:00:00 2001
From: Tarek Mahmoud Sayed <tarekms@microsoft.com>
Date: Sun, 22 Nov 2020 12:40:28 -0800
Subject: [PATCH 1/6] Fix Full Width Chars Casing

---
 .../pal_collation.c                           | 210 ++++++++++--------
 .../CompareInfo/CompareInfoTests.Compare.cs   |  26 +++
 2 files changed, 149 insertions(+), 87 deletions(-)

diff --git a/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c b/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c
index a9eb1d57d74c7..a76d7919744cb 100644
--- a/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c
+++ b/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c
@@ -59,8 +59,6 @@ struct SortHandle
     SearchIteratorNode searchIteratorList[CompareOptionsMask + 1];
 };
 
-typedef struct { UChar* items; size_t size; } UCharList;
-
 // Hiragana character range
 static const UChar hiraganaStart = 0x3041;
 static const UChar hiraganaEnd = 0x309e;
@@ -142,99 +140,91 @@ static int IsHalfFullHigherSymbol(UChar character)
 }
 
 /*
-Gets a string of custom collation rules, if necessary.
+Fill custom collation rules for ignoreKana cases.
 
 Since the CompareOptions flags don't map 1:1 with ICU default functionality, we need to fall back to using
 custom rules in order to support IgnoreKanaType and IgnoreWidth CompareOptions correctly.
 */
-static UCharList* GetCustomRules(int32_t options, UColAttributeValue strength, int isIgnoreSymbols)
+static void FillIgnoreKanaRules(UChar* completeRules, int32_t* fillIndex, int32_t completeRulesLength, int32_t isIgnoreKanaType)
 {
-    int isIgnoreKanaType = (options & CompareOptionsIgnoreKanaType) == CompareOptionsIgnoreKanaType;
-    int isIgnoreWidth = (options & CompareOptionsIgnoreWidth) == CompareOptionsIgnoreWidth;
-
-    // kana differs at the tertiary level
-    int needsIgnoreKanaTypeCustomRule = isIgnoreKanaType && strength >= UCOL_TERTIARY;
-    int needsNotIgnoreKanaTypeCustomRule = !isIgnoreKanaType && strength < UCOL_TERTIARY;
-
-    // character width differs at the tertiary level
-    int needsIgnoreWidthCustomRule = isIgnoreWidth && strength >= UCOL_TERTIARY;
-    int needsNotIgnoreWidthCustomRule = !isIgnoreWidth && strength < UCOL_TERTIARY;
+    UChar compareChar = isIgnoreKanaType ? '=' : '<';
 
-    if (!(needsIgnoreKanaTypeCustomRule || needsNotIgnoreKanaTypeCustomRule || needsIgnoreWidthCustomRule || needsNotIgnoreWidthCustomRule))
-        return NULL;
-
-    UCharList* customRules = (UCharList*)malloc(sizeof(UCharList));
-    if (customRules == NULL)
+    for (UChar hiraganaChar = hiraganaStart; hiraganaChar <= hiraganaEnd; hiraganaChar++)
     {
-        return NULL;
+        // Hiragana is the range 3041 to 3096 & 309D & 309E
+        if (hiraganaChar <= 0x3096 || hiraganaChar >= 0x309D) // characters between 3096 and 309D are not mapped to katakana
+        {
+            assert((*fillIndex) + 4 <= completeRulesLength);
+            completeRules[*fillIndex] = '&';
+            completeRules[(*fillIndex) + 1] = hiraganaChar;
+            completeRules[(*fillIndex) + 2] = compareChar;
+            completeRules[(*fillIndex) + 3] = hiraganaChar + hiraganaToKatakanaOffset;
+            (*fillIndex) += 4;
+        }
     }
+}
 
-    // If we need to create customRules, the KanaType custom rule will be 88 kana characters * 4 = 352 chars long
-    // and the Width custom rule will be at most 212 halfwidth characters * 5 = 1060 chars long.
-    int capacity =
-        ((needsIgnoreKanaTypeCustomRule || needsNotIgnoreKanaTypeCustomRule) ? 4 * (hiraganaEnd - hiraganaStart + 1) : 0) +
-        ((needsIgnoreWidthCustomRule || needsNotIgnoreWidthCustomRule) ? 5 * g_HalfFullCharsLength : 0);
+/*
+Fill custom collation rules for ignoreWidth cases.
 
-    UChar* items;
-    customRules->items = items = (UChar*)malloc((size_t)capacity * sizeof(UChar));
-    if (customRules->items == NULL)
-    {
-        free(customRules);
-        return NULL;
-    }
+Since the CompareOptions flags don't map 1:1 with ICU default functionality, we need to fall back to using
+custom rules in order to support IgnoreKanaType and IgnoreWidth CompareOptions correctly.
+*/
+static void FillIgnoreWidthRules(UChar* completeRules, int32_t* fillIndex, int32_t completeRulesLength, int32_t isIgnoreWidth, int32_t isIgnoreCase, int32_t isIgnoreSymbols)
+{
+    UChar compareChar = isIgnoreWidth ? '=' : '<';
 
-    if (needsIgnoreKanaTypeCustomRule || needsNotIgnoreKanaTypeCustomRule)
-    {
-        UChar compareChar = needsIgnoreKanaTypeCustomRule ? '=' : '<';
+    UChar lowerChar;
+    UChar higherChar;
+    int needsEscape;
 
-        for (UChar hiraganaChar = hiraganaStart; hiraganaChar <= hiraganaEnd; hiraganaChar++)
+    for (int i = 0; i < g_HalfFullCharsLength; i++)
+    {
+        lowerChar = g_HalfFullLowerChars[i];
+        higherChar = g_HalfFullHigherChars[i];
+        // the lower chars need to be checked for escaping since they contain ASCII punctuation
+        needsEscape = NeedsEscape(lowerChar);
+
+        // when isIgnoreSymbols is true and we are not ignoring width, check to see if
+        // this character is a symbol, and if so skip it
+        if (!(isIgnoreSymbols && (!isIgnoreWidth) && (needsEscape || IsHalfFullHigherSymbol(higherChar))))
         {
-            // Hiragana is the range 3041 to 3096 & 309D & 309E
-            if (hiraganaChar <= 0x3096 || hiraganaChar >= 0x309D) // characters between 3096 and 309D are not mapped to katakana
+            assert((*fillIndex) + 4 <= completeRulesLength);
+            completeRules[*fillIndex] = '&';
+            (*fillIndex)++;
+
+            if (needsEscape)
             {
-                assert(items - customRules->items <= capacity - 4);
-                *(items++) = '&';
-                *(items++) = hiraganaChar;
-                *(items++) = compareChar;
-                *(items++) = hiraganaChar + hiraganaToKatakanaOffset;
+                completeRules[*fillIndex] = '\\';
+                (*fillIndex)++;
             }
+
+            completeRules[*fillIndex]       = lowerChar;
+            completeRules[(*fillIndex) + 1] = compareChar;
+            completeRules[(*fillIndex) + 2] = higherChar;
+            (*fillIndex) += 3;
         }
     }
 
-    if (needsIgnoreWidthCustomRule || needsNotIgnoreWidthCustomRule)
+    // When we have isIgnoreWidth is false, we sort the normal width latin alphabet charcatres before the full width latin alphabet charcatres
+    //              e.g. `a` < `ａ` (\uFF41)
+    // This break the casing of the full width latin alphabet charcatres.
+    //              e.g. `ａ` (\uFF41) == `Ａ` (\uFF21).
+    // we are fixing back this case mapping here.
+    if (isIgnoreCase && (!isIgnoreWidth))
     {
-        UChar compareChar = needsIgnoreWidthCustomRule ? '=' : '<';
+        assert((*fillIndex) + (FullWidthAlphabetRangeLength * 4) <= completeRulesLength);
+        const int UpperCaseToLowerCaseOffset = 0xFF41 - 0xFF21;
 
-        UChar lowerChar;
-        UChar higherChar;
-        int needsEscape;
-        for (int i = 0; i < g_HalfFullCharsLength; i++)
+        for (UChar ch = 0xFF21; ch <= 0xFF3A; ch++)
         {
-            lowerChar = g_HalfFullLowerChars[i];
-            higherChar = g_HalfFullHigherChars[i];
-            // the lower chars need to be checked for escaping since they contain ASCII punctuation
-            needsEscape = NeedsEscape(lowerChar);
-
-            // when isIgnoreSymbols is true and we are not ignoring width, check to see if
-            // this character is a symbol, and if so skip it
-            if (!(isIgnoreSymbols && needsNotIgnoreWidthCustomRule && (needsEscape || IsHalfFullHigherSymbol(higherChar))))
-            {
-                assert(items - customRules->items <= capacity - 5);
-                *(items++) = '&';
-                if (needsEscape)
-                {
-                    *(items++) = '\\';
-                }
-                *(items++) = lowerChar;
-                *(items++) = compareChar;
-                *(items++) = higherChar;
-            }
+            completeRules[*fillIndex] = '&';
+            completeRules[(*fillIndex) + 1] = ch + UpperCaseToLowerCaseOffset;
+            completeRules[(*fillIndex) + 2] = '=';
+            completeRules[(*fillIndex) + 3] = ch;
+            (*fillIndex) += 4;
         }
     }
-
-    customRules->size = (size_t)(items - customRules->items);
-
-    return customRules;
 }
 
 /*
@@ -247,9 +237,11 @@ static UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t o
 {
     UColAttributeValue strength = ucol_getStrength(pCollator);
 
-    int isIgnoreCase = (options & CompareOptionsIgnoreCase) == CompareOptionsIgnoreCase;
-    int isIgnoreNonSpace = (options & CompareOptionsIgnoreNonSpace) == CompareOptionsIgnoreNonSpace;
-    int isIgnoreSymbols = (options & CompareOptionsIgnoreSymbols) == CompareOptionsIgnoreSymbols;
+    int32_t isIgnoreCase        = (options & CompareOptionsIgnoreCase)     == CompareOptionsIgnoreCase;
+    int32_t isIgnoreNonSpace    = (options & CompareOptionsIgnoreNonSpace) == CompareOptionsIgnoreNonSpace;
+    int32_t isIgnoreSymbols     = (options & CompareOptionsIgnoreSymbols)  == CompareOptionsIgnoreSymbols;
+    int32_t isIgnoreKanaType    = (options & CompareOptionsIgnoreKanaType) == CompareOptionsIgnoreKanaType;
+    int32_t isIgnoreWidth       = (options & CompareOptionsIgnoreWidth)    == CompareOptionsIgnoreWidth;
 
     if (isIgnoreCase)
     {
@@ -262,34 +254,78 @@ static UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t o
     }
 
     UCollator* pClonedCollator;
-    UCharList* customRules = GetCustomRules(options, strength, isIgnoreSymbols);
-    if (customRules == NULL || customRules->size == 0)
+
+    // IgnoreWidth - it would be easy to IgnoreWidth by just setting Strength <= Secondary.
+    // For any strength under that, the width of the characters will be ignored.
+    // For strength above that, the width of the characters will be used in differentiation.
+    //      a.	However, this doesn’t play nice with IgnoreCase, since these Strength levels are overloaded.
+    //      b.	So the plan to support IgnoreWidth is to use customized rules.
+    //          i.	Since the character width is differentiated at “Tertiary” strength, we only need to use custom rules in specific cases.
+    //          ii.	If (IgnoreWidth == true && Strength > “Secondary”)
+    //              1.	Build up a custom rule set for each half-width character and say that it is equal to the corresponding full-width character.
+    //                  a.	ex:  “0x30F2 = 0xFF66 & 0x30F3 = 0xFF9D & …”
+    //          iii.	If (IgnoreWidth == false && Strength <= “Secondary”)
+    //              1.	Build up a custom rule set saying that the half-width and full-width characters have a primary level difference (which will cause it always to be unequal)
+    //                  a.	Ex. “0x30F2 < 0xFF66 & 0x30F3 < 0xFF9D & …”
+    //  IgnoreKanaType – this works the same way as IgnoreWidth, only change “Secondary” to “Tertiary” and use the set of Hiragana and Katakana characters instead of half-width vs full-width characters to build the rules.
+    int32_t applyIgnoreKanaTypeCustomRule  = isIgnoreKanaType ^ (strength < UCOL_TERTIARY); // kana differs at the tertiary level
+    int32_t applyIgnoreWidthTypeCustomRule = isIgnoreWidth    ^ (strength < UCOL_TERTIARY); // character width differs at the tertiary level
+
+    int32_t customRuleLength = 0;
+    if (applyIgnoreKanaTypeCustomRule || applyIgnoreWidthTypeCustomRule)
+    {
+        // Length of the fullwidth charcatres from 'A' to 'Z'
+        // We'll use it to map the casing of the full width 'A' to 'Z' characters
+        const int32_t FullWidthAlphabetRangeLength = 0xFF3A - 0xFF21 + 1;
+
+        // If we need to create customRules, the KanaType custom rule will be 88 kana characters * 4 = 352 chars long
+        // and the Width custom rule will be at most 212 halfwidth characters * 5 = 1060 chars long.
+        customRuleLength = (applyIgnoreKanaTypeCustomRule ? 4 * (hiraganaEnd - hiraganaStart + 1) : 0) +
+                            (applyIgnoreWidthTypeCustomRule ? ((5 * g_HalfFullCharsLength) + (isIgnoreCase ? 4 * FullWidthAlphabetRangeLength : 0)) : 0) +
+                            1; // Adding extra terminator rule at the end to force ICU apply last actual entered rule, otherwise last actual rule get ignored.
+    }
+
+    if (customRuleLength == 0)
     {
         pClonedCollator = ucol_safeClone(pCollator, NULL, NULL, pErr);
     }
     else
     {
-        int32_t customRuleLength = (int32_t)customRules->size;
-
-        int32_t localeRulesLength;
-        const UChar* localeRules = ucol_getRules(pCollator, &localeRulesLength);
-        int32_t completeRulesLength = localeRulesLength + customRuleLength + 1;
+        int32_t rulesLength;
+        const UChar* localeRules = ucol_getRules(pCollator, &rulesLength);
+        int32_t completeRulesLength = rulesLength + customRuleLength + 1;
 
         UChar* completeRules = (UChar*)calloc((size_t)completeRulesLength, sizeof(UChar));
 
-        for (int i = 0; i < localeRulesLength; i++)
+        for (int i = 0; i < rulesLength; i++)
         {
             completeRules[i] = localeRules[i];
         }
-        for (int i = 0; i < customRuleLength; i++)
+
+        if (applyIgnoreKanaTypeCustomRule)
         {
-            completeRules[localeRulesLength + i] = customRules->items[i];
+            FillIgnoreKanaRules(completeRules, &rulesLength, completeRulesLength, isIgnoreKanaType);
         }
 
-        pClonedCollator = ucol_openRules(completeRules, completeRulesLength, UCOL_DEFAULT, strength, NULL, pErr);
+        assert(rulesLength <= completeRulesLength);
+
+        if (applyIgnoreWidthTypeCustomRule)
+        {
+            FillIgnoreWidthRules(completeRules, &rulesLength, completeRulesLength, isIgnoreWidth, isIgnoreCase, isIgnoreSymbols);
+        }
+
+        assert(rulesLength + 4 <= completeRulesLength);
+
+        // Adding extra terminator rule at the end to force ICU apply last actual entered rule, otherwise last actual rule get ignored.
+        completeRules[rulesLength] = '&';
+        completeRules[rulesLength + 1] = 'a';
+        completeRules[rulesLength + 2] = '=';
+        completeRules[rulesLength + 3] = 'a';
+        rulesLength += 4;
+
+        pClonedCollator = ucol_openRules(completeRules, rulesLength, UCOL_DEFAULT, strength, NULL, pErr);
         free(completeRules);
     }
-    free(customRules);
 
     if (isIgnoreSymbols)
     {
diff --git a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.Compare.cs b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.Compare.cs
index 57704146c8b68..98890c0c4fdbf 100644
--- a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.Compare.cs
+++ b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.Compare.cs
@@ -455,5 +455,31 @@ public void Compare_Invalid()
             AssertExtensions.Throws<ArgumentOutOfRangeException>("string2", () => s_invariantCompare.Compare("Test", 0, 2, "Test", 2, 3));
             AssertExtensions.Throws<ArgumentOutOfRangeException>("string2", () => s_invariantCompare.Compare("Test", 0, 2, "Test", 2, 3, CompareOptions.None));
         }
+
+        [Fact]
+        public void TestIgnoreKanaAndWidthCases()
+        {
+            for (char c = '\uFF41'; c <= '\uFF5A'; c++) // Full width 'ａ' to `ｚ`
+            {
+                Assert.False(string.Equals(new string(c, 1), new string((char) (c - 0x20), 1), StringComparison.InvariantCulture), $"Expected '{(int)c:x4}' != '{c - 0x20:x4}'");
+                Assert.True(string.Equals(new string(c, 1), new string((char) (c - 0x20), 1), StringComparison.InvariantCultureIgnoreCase), $"Expected '{(int)c:x4}' == '{c - 0x20:x4}'");
+            }
+
+            // Edge case of the Ignore Width.
+            Assert.False(string.Compare("\u3162\u3163", "\uFFDB\uFFDC", CultureInfo.InvariantCulture, CompareOptions.None) == 0, $"Expect '0x3162 0x3163' != '0xFFDB 0xFFDC'");
+            Assert.True(string.Compare("\u3162\u3163", "\uFFDB\uFFDC", CultureInfo.InvariantCulture, CompareOptions.IgnoreWidth) == 0, "Expect '0x3162 0x3163' == '0xFFDB 0xFFDC'");
+
+            const char hiraganaStart = '\u3041';
+            const char hiraganaEnd = '\u3096';
+            const int hiraganaToKatakanaOffset = 0x30a1 - 0x3041;
+
+            for (Char hiraganaChar = hiraganaStart; hiraganaChar <= hiraganaEnd; hiraganaChar++)
+            {
+                Assert.False(string.Compare(new string(hiraganaChar, 1), new string((char)(hiraganaChar + hiraganaToKatakanaOffset), 1), CultureInfo.InvariantCulture, CompareOptions.IgnoreCase) == 0,
+                                            $"Expect '{(int)hiraganaChar:x4}'  != {(int)hiraganaChar + hiraganaToKatakanaOffset:x4} with CompareOptions.IgnoreCase");
+                Assert.True(string.Compare(new string(hiraganaChar, 1), new string((char)(hiraganaChar + hiraganaToKatakanaOffset), 1), CultureInfo.InvariantCulture, CompareOptions.IgnoreKanaType) == 0,
+                                            $"Expect '{(int)hiraganaChar:x4}'  == {(int)hiraganaChar + hiraganaToKatakanaOffset:x4} with CompareOptions.IgnoreKanaType");
+            }
+        }
     }
 }

From 2cf96a10b877e80cbd3e7485e47ff44328e9904f Mon Sep 17 00:00:00 2001
From: Tarek Mahmoud Sayed <tarekms@microsoft.com>
Date: Sun, 22 Nov 2020 12:59:46 -0800
Subject: [PATCH 2/6] Fix constant definition

---
 .../Unix/System.Globalization.Native/pal_collation.c       | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c b/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c
index a76d7919744cb..38cda23e005fa 100644
--- a/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c
+++ b/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c
@@ -63,6 +63,9 @@ struct SortHandle
 static const UChar hiraganaStart = 0x3041;
 static const UChar hiraganaEnd = 0x309e;
 static const UChar hiraganaToKatakanaOffset = 0x30a1 - 0x3041;
+// Length of the fullwidth charcatres from 'A' to 'Z'
+// We'll use it to map the casing of the full width 'A' to 'Z' characters
+const int32_t FullWidthAlphabetRangeLength = 0xFF3A - 0xFF21 + 1;
 
 // Mapping between half- and fullwidth characters.
 // LowerChars are the characters that should sort lower than HigherChars
@@ -274,10 +277,6 @@ static UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t o
     int32_t customRuleLength = 0;
     if (applyIgnoreKanaTypeCustomRule || applyIgnoreWidthTypeCustomRule)
     {
-        // Length of the fullwidth charcatres from 'A' to 'Z'
-        // We'll use it to map the casing of the full width 'A' to 'Z' characters
-        const int32_t FullWidthAlphabetRangeLength = 0xFF3A - 0xFF21 + 1;
-
         // If we need to create customRules, the KanaType custom rule will be 88 kana characters * 4 = 352 chars long
         // and the Width custom rule will be at most 212 halfwidth characters * 5 = 1060 chars long.
         customRuleLength = (applyIgnoreKanaTypeCustomRule ? 4 * (hiraganaEnd - hiraganaStart + 1) : 0) +

From 2cc2371e09a7abadc03d44d78668819caccd7de8 Mon Sep 17 00:00:00 2001
From: Tarek Mahmoud Sayed <tarekms@microsoft.com>
Date: Sun, 22 Nov 2020 13:42:19 -0800
Subject: [PATCH 3/6] define the const as a static

---
 .../Native/Unix/System.Globalization.Native/pal_collation.c     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c b/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c
index 38cda23e005fa..c7f098ee821e6 100644
--- a/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c
+++ b/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c
@@ -65,7 +65,7 @@ static const UChar hiraganaEnd = 0x309e;
 static const UChar hiraganaToKatakanaOffset = 0x30a1 - 0x3041;
 // Length of the fullwidth charcatres from 'A' to 'Z'
 // We'll use it to map the casing of the full width 'A' to 'Z' characters
-const int32_t FullWidthAlphabetRangeLength = 0xFF3A - 0xFF21 + 1;
+static const int32_t FullWidthAlphabetRangeLength = 0xFF3A - 0xFF21 + 1;
 
 // Mapping between half- and fullwidth characters.
 // LowerChars are the characters that should sort lower than HigherChars

From 5df7edd2f73c01fbd6958d64494e3ab6f8f2d570 Mon Sep 17 00:00:00 2001
From: Tarek Mahmoud Sayed <tarekms@microsoft.com>
Date: Sun, 22 Nov 2020 14:17:46 -0800
Subject: [PATCH 4/6] Fix unused method parameter

---
 .../Unix/System.Globalization.Native/pal_collation.c   | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c b/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c
index c7f098ee821e6..d223c6138effa 100644
--- a/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c
+++ b/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c
@@ -158,6 +158,10 @@ static void FillIgnoreKanaRules(UChar* completeRules, int32_t* fillIndex, int32_
         if (hiraganaChar <= 0x3096 || hiraganaChar >= 0x309D) // characters between 3096 and 309D are not mapped to katakana
         {
             assert((*fillIndex) + 4 <= completeRulesLength);
+            if ((*fillIndex) + 4 > completeRulesLength)
+            {
+                return;
+            }
             completeRules[*fillIndex] = '&';
             completeRules[(*fillIndex) + 1] = hiraganaChar;
             completeRules[(*fillIndex) + 2] = compareChar;
@@ -192,7 +196,11 @@ static void FillIgnoreWidthRules(UChar* completeRules, int32_t* fillIndex, int32
         // this character is a symbol, and if so skip it
         if (!(isIgnoreSymbols && (!isIgnoreWidth) && (needsEscape || IsHalfFullHigherSymbol(higherChar))))
         {
-            assert((*fillIndex) + 4 <= completeRulesLength);
+            assert((*fillIndex) + 5 <= completeRulesLength);
+            if ((*fillIndex) + 5 > completeRulesLength)
+            {
+                return;
+            }
             completeRules[*fillIndex] = '&';
             (*fillIndex)++;
 

From 4cb1845544ffa25d13ad778e16c6abee4e42a46b Mon Sep 17 00:00:00 2001
From: Tarek Mahmoud Sayed <tarekms@microsoft.com>
Date: Mon, 23 Nov 2020 11:20:33 -0800
Subject: [PATCH 5/6] Address the feedback and Move bounds checks outside the
 loops

---
 .../pal_collation.c                           | 28 ++++++++++---------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c b/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c
index d223c6138effa..67ee3403c5516 100644
--- a/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c
+++ b/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c
@@ -63,7 +63,7 @@ struct SortHandle
 static const UChar hiraganaStart = 0x3041;
 static const UChar hiraganaEnd = 0x309e;
 static const UChar hiraganaToKatakanaOffset = 0x30a1 - 0x3041;
-// Length of the fullwidth charcatres from 'A' to 'Z'
+// Length of the fullwidth characters from 'A' to 'Z'
 // We'll use it to map the casing of the full width 'A' to 'Z' characters
 static const int32_t FullWidthAlphabetRangeLength = 0xFF3A - 0xFF21 + 1;
 
@@ -152,16 +152,17 @@ static void FillIgnoreKanaRules(UChar* completeRules, int32_t* fillIndex, int32_
 {
     UChar compareChar = isIgnoreKanaType ? '=' : '<';
 
+    assert((*fillIndex) + (4 * (hiraganaEnd - hiraganaStart + 1)) <= completeRulesLength);
+    if ((*fillIndex) + (4 * (hiraganaEnd - hiraganaStart + 1)) > completeRulesLength) // check the allocated the size
+    {
+        return;
+    }
+
     for (UChar hiraganaChar = hiraganaStart; hiraganaChar <= hiraganaEnd; hiraganaChar++)
     {
         // Hiragana is the range 3041 to 3096 & 309D & 309E
         if (hiraganaChar <= 0x3096 || hiraganaChar >= 0x309D) // characters between 3096 and 309D are not mapped to katakana
         {
-            assert((*fillIndex) + 4 <= completeRulesLength);
-            if ((*fillIndex) + 4 > completeRulesLength)
-            {
-                return;
-            }
             completeRules[*fillIndex] = '&';
             completeRules[(*fillIndex) + 1] = hiraganaChar;
             completeRules[(*fillIndex) + 2] = compareChar;
@@ -185,6 +186,12 @@ static void FillIgnoreWidthRules(UChar* completeRules, int32_t* fillIndex, int32
     UChar higherChar;
     int needsEscape;
 
+    assert((*fillIndex) + (5 * g_HalfFullCharsLength) <= completeRulesLength);
+    if ((*fillIndex) + (5 * g_HalfFullCharsLength) > completeRulesLength)
+    {
+        return;
+    }
+
     for (int i = 0; i < g_HalfFullCharsLength; i++)
     {
         lowerChar = g_HalfFullLowerChars[i];
@@ -196,11 +203,6 @@ static void FillIgnoreWidthRules(UChar* completeRules, int32_t* fillIndex, int32
         // this character is a symbol, and if so skip it
         if (!(isIgnoreSymbols && (!isIgnoreWidth) && (needsEscape || IsHalfFullHigherSymbol(higherChar))))
         {
-            assert((*fillIndex) + 5 <= completeRulesLength);
-            if ((*fillIndex) + 5 > completeRulesLength)
-            {
-                return;
-            }
             completeRules[*fillIndex] = '&';
             (*fillIndex)++;
 
@@ -217,9 +219,9 @@ static void FillIgnoreWidthRules(UChar* completeRules, int32_t* fillIndex, int32
         }
     }
 
-    // When we have isIgnoreWidth is false, we sort the normal width latin alphabet charcatres before the full width latin alphabet charcatres
+    // When we have isIgnoreWidth is false, we sort the normal width latin alphabet characters before the full width latin alphabet characters
     //              e.g. `a` < `ａ` (\uFF41)
-    // This break the casing of the full width latin alphabet charcatres.
+    // This break the casing of the full width latin alphabet characters.
     //              e.g. `ａ` (\uFF41) == `Ａ` (\uFF21).
     // we are fixing back this case mapping here.
     if (isIgnoreCase && (!isIgnoreWidth))

From 85f28d0a2cb0b96de1fd1d1ca4951897090411a8 Mon Sep 17 00:00:00 2001
From: Tarek Mahmoud Sayed <tarekms@microsoft.com>
Date: Mon, 23 Nov 2020 14:12:32 -0800
Subject: [PATCH 6/6] Fix the comments according to the feedback

---
 .../Native/Unix/System.Globalization.Native/pal_collation.c     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c b/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c
index 67ee3403c5516..99edfd96bcf1f 100644
--- a/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c
+++ b/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c
@@ -280,7 +280,7 @@ static UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t o
     //          iii.	If (IgnoreWidth == false && Strength <= “Secondary”)
     //              1.	Build up a custom rule set saying that the half-width and full-width characters have a primary level difference (which will cause it always to be unequal)
     //                  a.	Ex. “0x30F2 < 0xFF66 & 0x30F3 < 0xFF9D & …”
-    //  IgnoreKanaType – this works the same way as IgnoreWidth, only change “Secondary” to “Tertiary” and use the set of Hiragana and Katakana characters instead of half-width vs full-width characters to build the rules.
+    //  IgnoreKanaType – this works the same way as IgnoreWidth, it uses the set of Hiragana and Katakana characters instead of half-width vs full-width characters to build the rules.
     int32_t applyIgnoreKanaTypeCustomRule  = isIgnoreKanaType ^ (strength < UCOL_TERTIARY); // kana differs at the tertiary level
     int32_t applyIgnoreWidthTypeCustomRule = isIgnoreWidth    ^ (strength < UCOL_TERTIARY); // character width differs at the tertiary level