Skip to content

Commit

Permalink
[browser] HybridGlobalization correct HashCode ranges of skipped …
Browse files Browse the repository at this point in the history
…unicodes (#102912)

* Add full tests. Behavior is correct.

* Feedback.
  • Loading branch information
ilonatommy authored May 31, 2024
1 parent 9fca0c3 commit 3750ac5
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 94 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ private unsafe int JsIndexOfCore(ReadOnlySpan<char> source, ReadOnlySpan<char> t
}
}

// there are chars that are ignored by ICU hashing algorithm but not ignored by invariant hashing
// there are chars that are considered equal by HybridGlobalization but do not have equal hashes when binary hashed
// Control: 1105 (out of 1105)
// Format: 697 (out of 731)
// OtherPunctuation: 6919 (out of 7004)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,108 +12,52 @@ namespace System.Globalization.Tests
{
public class CompareInfoHashCodeTests : CompareInfoTestsBase
{
public static IEnumerable<object[]> HashCodeLocalized_TestData()
{
yield return new object[] { s_invariantCompare, "foo", "Foo", CompareOptions.IgnoreCase };
yield return new object[] { s_invariantCompare, "igloo", "\u0130GLOO", CompareOptions.IgnoreCase };
yield return new object[] { s_invariantCompare, "igloo", "\u0130GLOO", CompareOptions.None };
yield return new object[] { s_invariantCompare, "igloo", "IGLOO", CompareOptions.IgnoreCase };
yield return new object[] { new CultureInfo("pl-PL").CompareInfo, "igloo", "\u0130GLOO", CompareOptions.IgnoreCase };
yield return new object[] { new CultureInfo("pl-PL").CompareInfo, "igloo", "IGLOO", CompareOptions.IgnoreCase };
yield return new object[] { new CultureInfo("tr-TR").CompareInfo, "igloo", "\u0130GLOO", CompareOptions.IgnoreCase };
yield return new object[] { new CultureInfo("tr-TR").CompareInfo, "igloo", "IGLOO", CompareOptions.IgnoreCase };

if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
{
if (PlatformDetection.IsNotHybridGlobalizationOnApplePlatform)
yield return new object[] { new CultureInfo("en-GB").CompareInfo, "100", "100!", CompareOptions.IgnoreSymbols }; // HG: equal: True, hashCodesEqual: False
yield return new object[] { new CultureInfo("ja-JP").CompareInfo, "\u30A2", "\u3042", CompareOptions.IgnoreKanaType }; // HG: equal: True, hashCodesEqual: False
yield return new object[] { new CultureInfo("en-GB").CompareInfo, "caf\u00E9", "cafe\u0301", CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreKanaType }; // HG: equal: True, hashCodesEqual: False
}
}

[Theory]
[MemberData(nameof(HashCodeLocalized_TestData))]
public void HashCodeLocalized(CompareInfo cmpInfo, string str1, string str2, CompareOptions options)
[OuterLoop]
[ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalization))]
[ActiveIssue("https://github.com/dotnet/runtime/issues/95338", typeof(PlatformDetection), nameof(PlatformDetection.IsHybridGlobalizationOnApplePlatform))]
public void CheckHashingInLineWithEqual()
{
bool areEqual = cmpInfo.Compare(str1, str2, options) == 0;
var hashCode1 = cmpInfo.GetHashCode(str1, options);
var hashCode2 = cmpInfo.GetHashCode(str2, options);
bool areHashCodesEqual = hashCode1 == hashCode2;

if (areEqual)
{
Assert.True(areHashCodesEqual);
}
else
int additionalCollisions = 0;
CultureInfo[] cultures = CultureInfo.GetCultures(CultureTypes.AllCultures);
foreach (CultureInfo culture in cultures)
{
Assert.False(areHashCodesEqual);
}

// implication of the above behavior:
StringComparer stringComparer = new CustomComparer(cmpInfo, options);
TryAddToCustomDictionary(stringComparer, str1, str2, areHashCodesEqual);
}
// Japanese does not have "None" compare option, it always ignores Kana
// HashCode is not available for options different than None or IgnoreCase
if (culture.Name.Split('-')[0] == "ja")
continue;

private void TryAddToCustomDictionary(StringComparer comparer, string str1, string str2, bool shouldFail)
{
Dictionary<string, int> customDictionary = new Dictionary<string, int>(comparer);
customDictionary.Add(str1, 0);
try
{
customDictionary.Add(str2, 1);
Assert.False(shouldFail);
}
catch (ArgumentException ex)
{
Assert.True(shouldFail);
Assert.Contains("An item with the same key has already been added.", ex.Message);
for (int i = 0; i <= 65535; i++)
CheckChar(i, culture);
}
catch (Exception ex)

void CheckChar(int charCode, CultureInfo culture)
{
Assert.Fail($"Unexpected exception thrown: {ex}");
var cmpInfo = culture.CompareInfo;
char character = (char)charCode;
string str1 = $"a{character}b";
string str2 = "ab";
CompareOptions options = CompareOptions.None;
var hashCode1 = cmpInfo.GetHashCode(str1, options);
var hashCode2 = cmpInfo.GetHashCode(str2, options);
bool areHashCodesEqual = hashCode1 == hashCode2;
StringComparer stringComparer = new CustomComparer(cmpInfo, options);
// if equal => same, then expect hash => same
if (stringComparer.Compare(str1, str2) == 0)
{
Assert.True(areHashCodesEqual, $"Expected equal hashes for equal strings. The check failed for culture {culture.Name}, character: {character}, code: {charCode}.");
}
// if equal => diff, then expect hash => diff
else
{
if (areHashCodesEqual)
{
additionalCollisions++; // this should be smallest possible, 11541466
}
}
}
}

public static IEnumerable<object[]> CheckHashingOfSkippedChars_TestData()
{
// one char from each ignored category that is skipped on ICU
yield return new object[] { '\u0008', s_invariantCompare }; // Control: BACKSPACE
yield return new object[] { '\u200B', s_invariantCompare }; // Format: ZERO WIDTH SPACE
yield return new object[] { '\u180A', s_invariantCompare }; // OtherPunctuation: MONGOLIAN NIRUGU
yield return new object[] { '\uFE73', s_invariantCompare }; // OtherLetter: THAI CHARACTER PAIYANNOI
yield return new object[] { '\u0F3E', s_invariantCompare }; // SpacingCombiningMark: "TIBETAN MARK GTER YIG MGO UM RTAGS GNYIS
yield return new object[] { '\u0640', s_invariantCompare }; // ModifierLetter: ARABIC TATWEEL
yield return new object[] { '\u0488', s_invariantCompare }; // EnclosingMark: COMBINING CYRILLIC HUNDRED THOUSANDS SIGN
yield return new object[] { '\u034F', s_invariantCompare }; // NonSpacingMark: DIAERESIS
CompareInfo thaiCmpInfo = new CultureInfo("th-TH").CompareInfo;
yield return new object[] { '\u0020', thaiCmpInfo }; // SpaceSeparator: SPACE
yield return new object[] { '\u0028', thaiCmpInfo }; // OpenPunctuation: LEFT PARENTHESIS
yield return new object[] { '\u007D', thaiCmpInfo }; // ClosePunctuation: RIGHT PARENTHESIS
yield return new object[] { '\u2013', thaiCmpInfo }; // DashPunctuation: EN DASH
yield return new object[] { '\u005F', thaiCmpInfo }; // ConnectorPunctuation: LOW LINE
yield return new object[] { '\u2018', thaiCmpInfo }; // InitialQuotePunctuation: LEFT SINGLE QUOTATION MARK
yield return new object[] { '\u2019', thaiCmpInfo }; // FinalQuotePunctuation: RIGHT SINGLE QUOTATION MARK
yield return new object[] { '\u2028', thaiCmpInfo }; // LineSeparator: LINE SEPARATOR
yield return new object[] { '\u2029', thaiCmpInfo }; // ParagraphSeparator: PARAGRAPH SEPARATOR
}

[ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalization))]
[MemberData(nameof(CheckHashingOfSkippedChars_TestData))]
[ActiveIssue("https://github.com/dotnet/runtime/issues/95338", typeof(PlatformDetection), nameof(PlatformDetection.IsHybridGlobalizationOnApplePlatform))]
public void CheckHashingOfSkippedChars(char character, CompareInfo cmpInfo)
{
string str1 = $"a{character}b";
string str2 = "ab";
CompareOptions options = CompareOptions.None;
var hashCode1 = cmpInfo.GetHashCode(str1, options);
var hashCode2 = cmpInfo.GetHashCode(str2, options);
bool areHashCodesEqual = hashCode1 == hashCode2;
Assert.True(areHashCodesEqual);
StringComparer stringComparer = new CustomComparer(cmpInfo, options);
TryAddToCustomDictionary(stringComparer, str1, str2, areHashCodesEqual);
}

public static IEnumerable<object[]> GetHashCodeTestData => new[]
{
new object[] { "abc", CompareOptions.OrdinalIgnoreCase, "ABC", CompareOptions.OrdinalIgnoreCase, true },
Expand Down

0 comments on commit 3750ac5

Please sign in to comment.