Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[browser][non-icu] HybridGlobalization SortKey #84621

Merged
merged 9 commits into from
Apr 18, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion docs/design/features/hybrid-globalization.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,16 @@ Hybrid mode does not use ICU data for some functions connected with globalizatio

### WASM

For WebAssembly in Browser we are using Web API instead of some ICU data.
For WebAssembly in Browser we are using Web API instead of some ICU data. Ideally, we would use `System.Runtime.InteropServices.JavaScript` to call JS code from inside of C# but we cannot reference any assemblies from inside of `System.Private.CoreLib`. That is why we are using iCalls instead.

**SortKey**

Affected public APIs:
- CompareInfo.GetSortKey
- CompareInfo.GetSortKeyLength
- CompareInfo.GetHashCode

Web API does not have an equivalent, so they throw `PlatformNotSupportedException`.

**Case change**

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ public static string GetDistroVersionString()

public static bool IsInvariantGlobalization => m_isInvariant.Value;
public static bool IsHybridGlobalizationOnBrowser => m_isHybrid.Value && IsBrowser;
public static bool IsNotHybridGlobalizationOnBrowser => !IsHybridGlobalizationOnBrowser;
public static bool IsNotInvariantGlobalization => !IsInvariantGlobalization;
public static bool IsIcuGlobalization => ICUVersion > new Version(0, 0, 0, 0);
public static bool IsNlsGlobalization => IsNotInvariantGlobalization && !IsIcuGlobalization;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,8 @@

namespace System.Globalization.Tests
{
public class CompareInfoCompareTests
public class CompareInfoCompareTests : CompareInfoTestsBase
{
private static CompareInfo s_invariantCompare = CultureInfo.InvariantCulture.CompareInfo;
private static CompareInfo s_currentCompare = CultureInfo.CurrentCulture.CompareInfo;
private static CompareInfo s_hungarianCompare = new CultureInfo("hu-HU").CompareInfo;
private static CompareInfo s_turkishCompare = new CultureInfo("tr-TR").CompareInfo;
private static CompareInfo s_japaneseCompare = new CultureInfo("ja-JP").CompareInfo;
private static CompareOptions supportedIgnoreNonSpaceOption =
PlatformDetection.IsHybridGlobalizationOnBrowser ?
CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreKanaType :
CompareOptions.IgnoreNonSpace;

private static CompareOptions supportedIgnoreCaseIgnoreNonSpaceOptions =
PlatformDetection.IsHybridGlobalizationOnBrowser ?
CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreKanaType :
CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace;

// On Windows, hiragana characters sort after katakana.
// On ICU, it is the opposite
private static int s_expectedHiraganaToKatakanaCompare = PlatformDetection.IsNlsGlobalization ? 1 : -1;

// On Windows, all halfwidth characters sort before fullwidth characters.
// On ICU, half and fullwidth characters that aren't in the "Halfwidth and fullwidth forms" block U+FF00-U+FFEF
// sort before the corresponding characters that are in the block U+FF00-U+FFEF
private static int s_expectedHalfToFullFormsComparison = PlatformDetection.IsNlsGlobalization ? -1 : 1;

private const string SoftHyphen = "\u00AD";

public static IEnumerable<object[]> Compare_Kana_TestData()
Expand Down Expand Up @@ -329,11 +305,6 @@ public static IEnumerable<object[]> Compare_TestData()
yield return new object[] { s_invariantCompare, "\U00010400", "\U00010428", CompareOptions.OrdinalIgnoreCase, useNls ? -1 : 0};
}

// There is a regression in Windows 190xx version with the Kana comparison. Avoid running this test there.
public static bool IsNotWindowsKanaRegressedVersion() => !PlatformDetection.IsWindows10Version1903OrGreater ||
PlatformDetection.IsIcuGlobalization ||
s_invariantCompare.Compare("\u3060", "\uFF80\uFF9E", CompareOptions.IgnoreKanaType | CompareOptions.IgnoreWidth | CompareOptions.IgnoreCase) == 0;

[Fact]
public void CompareWithUnassignedChars()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,8 @@

namespace System.Globalization.Tests
{
public class CompareInfoIndexOfTests
public class CompareInfoIndexOfTests : CompareInfoTestsBase
{
private static CompareInfo s_invariantCompare = CultureInfo.InvariantCulture.CompareInfo;
private static CompareInfo s_currentCompare = CultureInfo.CurrentCulture.CompareInfo;
private static CompareInfo s_germanCompare = new CultureInfo("de-DE").CompareInfo;
private static CompareInfo s_hungarianCompare = new CultureInfo("hu-HU").CompareInfo;
private static CompareInfo s_turkishCompare = new CultureInfo("tr-TR").CompareInfo;
private static CompareInfo s_slovakCompare = new CultureInfo("sk-SK").CompareInfo;

public static IEnumerable<object[]> IndexOf_TestData()
{
// Empty string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,8 @@

namespace System.Globalization.Tests
{
public class CompareInfoLastIndexOfTests
public class CompareInfoLastIndexOfTests : CompareInfoTestsBase
{
private static CompareInfo s_invariantCompare = CultureInfo.InvariantCulture.CompareInfo;
private static CompareInfo s_germanCompare = new CultureInfo("de-DE").CompareInfo;
private static CompareInfo s_hungarianCompare = new CultureInfo("hu-HU").CompareInfo;
private static CompareInfo s_turkishCompare = new CultureInfo("tr-TR").CompareInfo;
private static CompareInfo s_slovakCompare = new CultureInfo("sk-SK").CompareInfo;

public static IEnumerable<object[]> LastIndexOf_TestData()
{
bool useNls = PlatformDetection.IsNlsGlobalization;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

namespace System.Globalization.Tests
{
public partial class CompareInfoTests
public class CompareInfoTests : CompareInfoTestsBase
{
[Theory]
[InlineData("")]
Expand Down Expand Up @@ -60,7 +60,7 @@ public void EqualsTest(CompareInfo compare1, object value, bool expected)
new object[] { "", CompareOptions.None, "\u200c", CompareOptions.None, true }, // see comment at bottom of SortKey_TestData
};

[Theory]
[ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsNotHybridGlobalizationOnBrowser))]
[MemberData(nameof(GetHashCodeTestData))]
public void GetHashCodeTest(string source1, CompareOptions options1, string source2, CompareOptions options2, bool expected)
{
Expand Down Expand Up @@ -97,19 +97,6 @@ public static IEnumerable<object[]> CompareInfo_TestData()
yield return new object[] { "tr-TR" , 0x041f };
}

// On NLS, hiragana characters sort after katakana.
// On ICU, it is the opposite
private static int s_expectedHiraganaToKatakanaCompare = PlatformDetection.IsNlsGlobalization ? 1 : -1;

// On NLS, all halfwidth characters sort before fullwidth characters.
// On ICU, half and fullwidth characters that aren't in the "Halfwidth and fullwidth forms" block U+FF00-U+FFEF
// sort before the corresponding characters that are in the block U+FF00-U+FFEF
private static int s_expectedHalfToFullFormsComparison = PlatformDetection.IsNlsGlobalization ? -1 : 1;

private static CompareInfo s_hungarianCompare = new CultureInfo("hu-HU").CompareInfo;
private static CompareInfo s_invariantCompare = CultureInfo.InvariantCulture.CompareInfo;
private static CompareInfo s_turkishCompare = new CultureInfo("tr-TR").CompareInfo;

public static IEnumerable<object[]> SortKey_Kana_TestData()
{
CompareOptions ignoreKanaIgnoreWidthIgnoreCase = CompareOptions.IgnoreKanaType | CompareOptions.IgnoreWidth | CompareOptions.IgnoreCase;
Expand All @@ -124,6 +111,7 @@ public static IEnumerable<object[]> SortKey_Kana_TestData()
yield return new object[] { s_invariantCompare, "\u3070\u3073\u3076\u3079\u307C", "\u30D0\u30D3\u3076\u30D9\uFF8E\uFF9E", CompareOptions.None, s_expectedHiraganaToKatakanaCompare };
yield return new object[] { s_invariantCompare, "\u3060", "\uFF80\uFF9E", CompareOptions.None, s_expectedHiraganaToKatakanaCompare };
}

public static IEnumerable<object[]> SortKey_TestData()
{
CompareOptions ignoreKanaIgnoreWidthIgnoreCase = CompareOptions.IgnoreKanaType | CompareOptions.IgnoreWidth | CompareOptions.IgnoreCase;
Expand Down Expand Up @@ -357,13 +345,48 @@ public static void LcidTest(string cultureName, int lcid)
Assert.Equal(lcid, ci.LCID);
}

[ConditionalTheory(typeof(CompareInfoCompareTests), nameof(CompareInfoCompareTests.IsNotWindowsKanaRegressedVersion))]
[ConditionalTheory(typeof(CompareInfoTests), nameof(IsNotWindowsKanaRegressedVersionAndNotHybridGlobalizationOnWasm))]
[MemberData(nameof(SortKey_Kana_TestData))]
public void SortKeyKanaTest(CompareInfo compareInfo, string string1, string string2, CompareOptions options, int expected)
{
SortKeyTest(compareInfo, string1, string2, options, expected);
}

[ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsHybridGlobalizationOnBrowser))]
public void SortKeyTestNotSupported()
{
try
{
s_invariantCompare.GetSortKey("");
AssertNotReached();
}
catch(PlatformNotSupportedException pnse)
{
Assert.Equal(GetPNSEText("SortKey"), pnse.Message);
}
try
{
s_invariantCompare.GetSortKeyLength(ReadOnlySpan<char>.Empty);
AssertNotReached();
}
catch(PlatformNotSupportedException pnse)
{
Assert.Equal(GetPNSEText("SortKey"), pnse.Message);
}

try
{
s_invariantCompare.GetHashCode("", CompareOptions.None);
AssertNotReached();
}
catch(PlatformNotSupportedException pnse)
{
Assert.Equal(GetPNSEText("HashCode"), pnse.Message);
}

string GetPNSEText(string funcName) => $"{funcName} is not supported when HybridGlobalization=true. Disable it to load larger ICU bundle, then use this option.";
void AssertNotReached() => Assert.True(false);
}

[DllImport("kernel32", CharSet = CharSet.Unicode)]
private static extern int CompareStringEx(string lpLocaleName, uint dwCmpFlags, string lpString1, int cchCount1, string lpString2, int cchCount2, IntPtr lpVersionInformation, IntPtr lpReserved, int lParam);
Expand All @@ -372,7 +395,7 @@ public void SortKeyKanaTest(CompareInfo compareInfo, string string1, string stri
private static bool WindowsVersionHasTheCompareStringRegression =>
PlatformDetection.IsNlsGlobalization && CompareStringEx("", NORM_LINGUISTIC_CASING, "", 0, "\u200C", 1, IntPtr.Zero, IntPtr.Zero, 0) != 2;

[Theory]
[ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsNotHybridGlobalizationOnBrowser))]
[MemberData(nameof(SortKey_TestData))]
public void SortKeyTest(CompareInfo compareInfo, string string1, string string2, CompareOptions options, int expectedSign)
{
Expand Down Expand Up @@ -421,7 +444,7 @@ unsafe static void RunSpanSortKeyTest(CompareInfo compareInfo, ReadOnlySpan<char
}
}

[Fact]
[ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotHybridGlobalizationOnBrowser))]
public void SortKeyMiscTest()
{
CompareInfo ci = new CultureInfo("en-US").CompareInfo;
Expand Down Expand Up @@ -506,7 +529,7 @@ public void VersionTest()
Assert.NotEqual(sv1.SortId, sv2.SortId);
}

[Theory]
[ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsNotHybridGlobalizationOnBrowser))]
[MemberData(nameof(GetHashCodeTestData))]
public void GetHashCode_Span(string source1, CompareOptions options1, string source2, CompareOptions options2, bool expectSameHashCode)
{
Expand All @@ -523,7 +546,7 @@ public void GetHashCode_Span(string source1, CompareOptions options1, string sou
Assert.Equal(expectSameHashCode, hashOfSource1AsSpan == hashOfSource2AsSpan);
}

[Fact]
[ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotHybridGlobalizationOnBrowser))]
public void GetHashCode_NullAndEmptySpan()
{
// Ensure that null spans and non-null empty spans produce the same hash code.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Reflection;
using System.Text;
using Xunit;

namespace System.Globalization.Tests
{
public class CompareInfoTestsBase
{
// On Windows's NLS, hiragana characters sort after katakana.
// On ICU, it is the opposite
protected static int s_expectedHiraganaToKatakanaCompare = PlatformDetection.IsNlsGlobalization ? 1 : -1;

// On Windows's NLS, all halfwidth characters sort before fullwidth characters.
// On ICU, half and fullwidth characters that aren't in the "Halfwidth and fullwidth forms" block U+FF00-U+FFEF
// sort before the corresponding characters that are in the block U+FF00-U+FFEF
protected static int s_expectedHalfToFullFormsComparison = PlatformDetection.IsNlsGlobalization ? -1 : 1;

protected static CompareInfo s_invariantCompare = CultureInfo.InvariantCulture.CompareInfo;
protected static CompareInfo s_currentCompare = CultureInfo.CurrentCulture.CompareInfo;
protected static CompareInfo s_germanCompare = new CultureInfo("de-DE").CompareInfo;
protected static CompareInfo s_hungarianCompare = new CultureInfo("hu-HU").CompareInfo;
protected static CompareInfo s_turkishCompare = new CultureInfo("tr-TR").CompareInfo;
protected static CompareInfo s_japaneseCompare = new CultureInfo("ja-JP").CompareInfo;
protected static CompareInfo s_slovakCompare = new CultureInfo("sk-SK").CompareInfo;
protected static CompareOptions supportedIgnoreNonSpaceOption =
PlatformDetection.IsHybridGlobalizationOnBrowser ?
CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreKanaType :
CompareOptions.IgnoreNonSpace;

protected static CompareOptions supportedIgnoreCaseIgnoreNonSpaceOptions =
PlatformDetection.IsHybridGlobalizationOnBrowser ?
CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreKanaType :
CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace;

// There is a regression in Windows 190xx version with the Kana comparison. Avoid running this test there.
protected static bool IsNotWindowsKanaRegressedVersion() => !PlatformDetection.IsWindows10Version1903OrGreater ||
PlatformDetection.IsIcuGlobalization ||
s_invariantCompare.Compare("\u3060", "\uFF80\uFF9E", CompareOptions.IgnoreKanaType | CompareOptions.IgnoreWidth | CompareOptions.IgnoreCase) == 0;

protected static bool IsNotWindowsKanaRegressedVersionAndNotHybridGlobalizationOnWasm() => !PlatformDetection.IsHybridGlobalizationOnBrowser && IsNotWindowsKanaRegressedVersion();
}
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>$(NetCoreAppCurrent)-browser</TargetFramework>
<TargetFrameworks>$(NetCoreAppCurrent)-browser</TargetFrameworks>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<TestRuntime>true</TestRuntime>
<HybridGlobalization>true</HybridGlobalization>
</PropertyGroup>
<ItemGroup>
<Compile Include="..\System\Globalization\TextInfoTests.cs" />
<Compile Include="..\CompareInfo\CompareInfoTests.Compare.cs" />
<Compile Include="..\CompareInfo\CompareInfoTests.cs" />
<Compile Include="..\CompareInfo\CompareInfoTestsBase.cs" />
</ItemGroup>
</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
<Compile Include="NlsSwitchTests.cs" />
<Compile Include="..\CompareInfo\CompareInfoTests.cs"
Link="CompareInfo\CompareInfoTests.cs" />
<Compile Include="..\CompareInfo\CompareInfoTestsBase.cs"
Link="CompareInfo\CompareInfoTestsBase.cs" />
<Compile Include="..\CompareInfo\CompareInfoTests.IndexOf.cs"
Link="CompareInfo\CompareInfoTests.IndexOf.cs" />
<Compile Include="..\CompareInfo\CompareInfoTests.IsPrefix.cs"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
<Compile Include="AssemblyInfo.cs" />
<Compile Include="IcuTests.cs" />
<Compile Include="CompareInfo\CompareInfoTests.cs" />
<Compile Include="CompareInfo\CompareInfoTestsBase.cs" />
<Compile Include="CompareInfo\CompareInfoTests.IndexOf.cs" />
<Compile Include="CompareInfo\CompareInfoTests.IsPrefix.cs" />
<Compile Include="CompareInfo\CompareInfoTests.Compare.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1425,6 +1425,10 @@ public SortKey GetSortKey(string source)
private SortKey CreateSortKeyCore(string source, CompareOptions options) =>
GlobalizationMode.UseNls ?
NlsCreateSortKey(source, options) :
#if TARGET_BROWSER
GlobalizationMode.Hybrid ?
throw new PlatformNotSupportedException(GetPNSEText("SortKey")) :
#endif
IcuCreateSortKey(source, options);

/// <summary>
Expand Down Expand Up @@ -1462,9 +1466,13 @@ public int GetSortKey(ReadOnlySpan<char> source, Span<byte> destination, Compare
}

private int GetSortKeyCore(ReadOnlySpan<char> source, Span<byte> destination, CompareOptions options) =>
GlobalizationMode.UseNls ?
NlsGetSortKey(source, destination, options) :
IcuGetSortKey(source, destination, options);
GlobalizationMode.UseNls ?
NlsGetSortKey(source, destination, options) :
#if TARGET_BROWSER
GlobalizationMode.Hybrid ?
throw new PlatformNotSupportedException(GetPNSEText("SortKey")) :
#endif
IcuGetSortKey(source, destination, options);

/// <summary>
/// Returns the length (in bytes) of the sort key that would be produced from the specified input.
Expand Down Expand Up @@ -1495,8 +1503,12 @@ public int GetSortKeyLength(ReadOnlySpan<char> source, CompareOptions options =
}

private int GetSortKeyLengthCore(ReadOnlySpan<char> source, CompareOptions options) =>
GlobalizationMode.UseNls ?
GlobalizationMode.UseNls ?
NlsGetSortKeyLength(source, options) :
#if TARGET_BROWSER
GlobalizationMode.Hybrid ?
throw new PlatformNotSupportedException(GetPNSEText("SortKey")) :
#endif
IcuGetSortKeyLength(source, options);

public override bool Equals([NotNullWhen(true)] object? value)
Expand Down Expand Up @@ -1570,6 +1582,10 @@ public int GetHashCode(ReadOnlySpan<char> source, CompareOptions options)
private unsafe int GetHashCodeOfStringCore(ReadOnlySpan<char> source, CompareOptions options) =>
GlobalizationMode.UseNls ?
NlsGetHashCodeOfString(source, options) :
#if TARGET_BROWSER
GlobalizationMode.Hybrid ?
throw new PlatformNotSupportedException(GetPNSEText("HashCode")) :
#endif
IcuGetHashCodeOfString(source, options);

public override string ToString() => "CompareInfo - " + Name;
Expand Down Expand Up @@ -1599,5 +1615,9 @@ public SortVersion Version
}

public int LCID => CultureInfo.GetCultureInfo(Name).LCID;

#if TARGET_BROWSER
private static string GetPNSEText(string funcName) => $"{funcName} is not supported when HybridGlobalization=true. Disable it to load larger ICU bundle, then use this option.";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be in resource file and localized ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea, I can see all exception text in PrivateCoreLib are stored like this, so you are right.

#endif
}
}