From d31b39e11acb6f7d5dcdbbffe56dce7fd11b9c89 Mon Sep 17 00:00:00 2001 From: Meri Khamoyan <96171496+mkhamoyan@users.noreply.github.com> Date: Wed, 16 Aug 2023 18:16:21 +0400 Subject: [PATCH] [iOS][non-icu] HybridGlobalization implement normalization functions (#90582) [iOS] HybridGlobalization implement normalization functions --- .../src/Interop/Interop.Normalization.iOS.cs | 17 ++++ ....Globalization.Extensions.iOS.Tests.csproj | 19 ++++ .../tests/Normalization/NormalizationAll.cs | 2 +- .../Normalization/StringNormalizationTests.cs | 2 +- .../System.Private.CoreLib.Shared.projitems | 3 + .../System/Globalization/Normalization.Icu.cs | 15 +++- src/mono/mono/mini/CMakeLists.txt | 3 +- .../CMakeLists.txt | 3 +- .../System.Globalization.Native/entrypoints.c | 2 + .../System.Globalization.Native/pal_casing.m | 31 ------- .../pal_icushim_internal.h | 32 +++++++ .../pal_normalization.h | 12 +++ .../pal_normalization.m | 87 +++++++++++++++++++ 13 files changed, 192 insertions(+), 36 deletions(-) create mode 100644 src/libraries/Common/src/Interop/Interop.Normalization.iOS.cs create mode 100644 src/libraries/System.Globalization.Extensions/tests/Hybrid/System.Globalization.Extensions.iOS.Tests.csproj create mode 100644 src/native/libs/System.Globalization.Native/pal_normalization.m diff --git a/src/libraries/Common/src/Interop/Interop.Normalization.iOS.cs b/src/libraries/Common/src/Interop/Interop.Normalization.iOS.cs new file mode 100644 index 0000000000000..4ba97148b4538 --- /dev/null +++ b/src/libraries/Common/src/Interop/Interop.Normalization.iOS.cs @@ -0,0 +1,17 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Runtime.InteropServices; +using System.Text; + +internal static partial class Interop +{ + internal static partial class Globalization + { + [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_IsNormalizedNative", StringMarshalling = StringMarshalling.Utf16)] + internal static unsafe partial int IsNormalizedNative(NormalizationForm normalizationForm, char* src, int srcLen); + + [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_NormalizeStringNative", StringMarshalling = StringMarshalling.Utf16)] + internal static unsafe partial int NormalizeStringNative(NormalizationForm normalizationForm, char* src, int srcLen, char* buffer, int bufferLength); + } +} diff --git a/src/libraries/System.Globalization.Extensions/tests/Hybrid/System.Globalization.Extensions.iOS.Tests.csproj b/src/libraries/System.Globalization.Extensions/tests/Hybrid/System.Globalization.Extensions.iOS.Tests.csproj new file mode 100644 index 0000000000000..ada1f18597f6e --- /dev/null +++ b/src/libraries/System.Globalization.Extensions/tests/Hybrid/System.Globalization.Extensions.iOS.Tests.csproj @@ -0,0 +1,19 @@ + + + $(NetCoreAppCurrent)-ios;$(NetCoreAppCurrent)-tvos;$(NetCoreAppCurrent)-maccatalyst + true + true + + + + + + + + NormalizationDataWin8 + + + NormalizationDataWin7 + + + diff --git a/src/libraries/System.Globalization.Extensions/tests/Normalization/NormalizationAll.cs b/src/libraries/System.Globalization.Extensions/tests/Normalization/NormalizationAll.cs index 48f73a4527a38..8d27652096c51 100644 --- a/src/libraries/System.Globalization.Extensions/tests/Normalization/NormalizationAll.cs +++ b/src/libraries/System.Globalization.Extensions/tests/Normalization/NormalizationAll.cs @@ -52,7 +52,7 @@ public void Normalize() VerifyConformanceInvariant(NormalizationForm.FormD, part0, part1, part2, part3, part4); // Mobile / Browser ICU doesn't support FormKC and FormKD - if (PlatformDetection.IsNotUsingLimitedCultures) + if (PlatformDetection.IsNotUsingLimitedCultures || PlatformDetection.IsHybridGlobalizationOnOSX) { // Form KC VerifyConformanceInvariant(NormalizationForm.FormKC, part0, part1, part2, part3, part4); diff --git a/src/libraries/System.Globalization.Extensions/tests/Normalization/StringNormalizationTests.cs b/src/libraries/System.Globalization.Extensions/tests/Normalization/StringNormalizationTests.cs index 7fe18bb314049..252d19a16f17b 100644 --- a/src/libraries/System.Globalization.Extensions/tests/Normalization/StringNormalizationTests.cs +++ b/src/libraries/System.Globalization.Extensions/tests/Normalization/StringNormalizationTests.cs @@ -49,7 +49,7 @@ public static IEnumerable NormalizeTestData() yield return new object[] { "\u1E9b\u0323", NormalizationForm.FormC, "\u1E9b\u0323" }; yield return new object[] { "\u1E9b\u0323", NormalizationForm.FormD, "\u017f\u0323\u0307" }; - if (PlatformDetection.IsNotUsingLimitedCultures) + if (PlatformDetection.IsNotUsingLimitedCultures || PlatformDetection.IsHybridGlobalizationOnOSX) { // Mobile / Browser ICU doesn't support FormKC and FormKD yield return new object[] { "\uFB01", NormalizationForm.FormKC, "fi" }; diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 8e58e09da35d3..144f349a32145 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -1330,6 +1330,9 @@ Common\Interop\Interop.Normalization.cs + + Common\Interop\Interop.Normalization.iOS.cs + Common\Interop\Interop.ResultCode.cs diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Icu.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Icu.cs index cfe698fabcd98..c23a0cdae59f1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Icu.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Icu.cs @@ -20,7 +20,14 @@ private static unsafe bool IcuIsNormalized(string strInput, NormalizationForm no int ret; fixed (char* pInput = strInput) { +#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) + ret = Interop.Globalization.IsNormalizedNative(normalizationForm, pInput, strInput.Length); + else + ret = Interop.Globalization.IsNormalized(normalizationForm, pInput, strInput.Length); +#else ret = Interop.Globalization.IsNormalized(normalizationForm, pInput, strInput.Length); +#endif } if (ret == -1) @@ -53,7 +60,14 @@ private static unsafe string IcuNormalize(string strInput, NormalizationForm nor fixed (char* pInput = strInput) fixed (char* pDest = &MemoryMarshal.GetReference(buffer)) { +#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) + realLen = Interop.Globalization.NormalizeStringNative(normalizationForm, pInput, strInput.Length, pDest, buffer.Length); + else + realLen = Interop.Globalization.NormalizeString(normalizationForm, pInput, strInput.Length, pDest, buffer.Length); +#else realLen = Interop.Globalization.NormalizeString(normalizationForm, pInput, strInput.Length, pDest, buffer.Length); +#endif } if (realLen == -1) @@ -100,7 +114,6 @@ private static void ValidateArguments(string strInput, NormalizationForm normali { Debug.Assert(strInput != null); - if (OperatingSystem.IsBrowser() && (normalizationForm == NormalizationForm.FormKC || normalizationForm == NormalizationForm.FormKD)) { // Browser's ICU doesn't contain data needed for FormKC and FormKD diff --git a/src/mono/mono/mini/CMakeLists.txt b/src/mono/mono/mini/CMakeLists.txt index 5d6ef3dfa3c31..6b300ab2b74cf 100644 --- a/src/mono/mono/mini/CMakeLists.txt +++ b/src/mono/mono/mini/CMakeLists.txt @@ -74,7 +74,8 @@ if(HAVE_SYS_ICU) pal_locale.m pal_collation.m pal_casing.m - pal_calendarData.m) + pal_calendarData.m + pal_normalization.m) addprefix(icu_shim_darwin_sources "${ICU_SHIM_PATH}" "${icu_shim_darwin_sources_base}") set(icu_shim_sources ${icu_shim_sources} ${icu_shim_darwin_sources}) diff --git a/src/native/libs/System.Globalization.Native/CMakeLists.txt b/src/native/libs/System.Globalization.Native/CMakeLists.txt index 0545ace30f4dd..d6dfcb65bf328 100644 --- a/src/native/libs/System.Globalization.Native/CMakeLists.txt +++ b/src/native/libs/System.Globalization.Native/CMakeLists.txt @@ -97,7 +97,8 @@ if (CLR_CMAKE_TARGET_APPLE) pal_locale.m pal_collation.m pal_casing.m - pal_calendarData.m) + pal_calendarData.m + pal_normalization.m) set_source_files_properties(${NATIVEGLOBALIZATION_SOURCES_OBJC} PROPERTIES COMPILE_FLAGS "-fobjc-arc ${CLR_CMAKE_COMMON_OBJC_FLAGS}") set(NATIVEGLOBALIZATION_SOURCES ${NATIVEGLOBALIZATION_SOURCES} ${NATIVEGLOBALIZATION_SOURCES_OBJC}) endif() diff --git a/src/native/libs/System.Globalization.Native/entrypoints.c b/src/native/libs/System.Globalization.Native/entrypoints.c index cffad72a02372..11ab93883cb0d 100644 --- a/src/native/libs/System.Globalization.Native/entrypoints.c +++ b/src/native/libs/System.Globalization.Native/entrypoints.c @@ -71,6 +71,8 @@ static const Entry s_globalizationNative[] = DllImportEntry(GlobalizationNative_GetLocaleNameNative) DllImportEntry(GlobalizationNative_GetLocaleTimeFormatNative) DllImportEntry(GlobalizationNative_IndexOfNative) + DllImportEntry(GlobalizationNative_IsNormalizedNative) + DllImportEntry(GlobalizationNative_NormalizeStringNative) DllImportEntry(GlobalizationNative_StartsWithNative) #endif }; diff --git a/src/native/libs/System.Globalization.Native/pal_casing.m b/src/native/libs/System.Globalization.Native/pal_casing.m index 16467e6b9cb81..31fd647347daf 100644 --- a/src/native/libs/System.Globalization.Native/pal_casing.m +++ b/src/native/libs/System.Globalization.Native/pal_casing.m @@ -55,37 +55,6 @@ } \ } -/** - * Append a code point to a string, overwriting 1 or 2 code units. - * The offset points to the current end of the string contents - * and is advanced (post-increment). - * "Safe" macro, checks for a valid code point. - * Converts code points outside of Basic Multilingual Plane into - * corresponding surrogate pairs if sufficient space in the string. - * High surrogate range: 0xD800 - 0xDBFF - * Low surrogate range: 0xDC00 - 0xDFFF - * If the code point is not valid or a trail surrogate does not fit, - * then isError is set to true. - * - * @param buffer const uint16_t * string buffer - * @param offset string offset, must be offset= (capacity)) /* insufficiently sized destination buffer */ { \ - (isError) = InsufficientBuffer; \ - } else if ((uint32_t)(codePoint) > 0x10ffff) /* invalid code point */ { \ - (isError) = InvalidCodePoint; \ - } else if ((uint32_t)(codePoint) <= 0xffff) { \ - (buffer)[(offset)++] = (uint16_t)(codePoint); \ - } else { \ - (buffer)[(offset)++] = (uint16_t)(((codePoint) >> 10) + 0xd7c0); \ - (buffer)[(offset)++] = (uint16_t)(((codePoint)&0x3ff) | 0xdc00); \ - } \ -} - /* Function: ChangeCaseNative diff --git a/src/native/libs/System.Globalization.Native/pal_icushim_internal.h b/src/native/libs/System.Globalization.Native/pal_icushim_internal.h index 9b5fd1a6c0be9..bc04acaf55b1e 100644 --- a/src/native/libs/System.Globalization.Native/pal_icushim_internal.h +++ b/src/native/libs/System.Globalization.Native/pal_icushim_internal.h @@ -346,3 +346,35 @@ const char* GlobalizationNative_GetICUDataPathFallback(void); #endif #endif // !defined(STATIC_ICU) +#if defined(TARGET_MACCATALYST) || defined(TARGET_IOS) || defined(TARGET_TVOS) +/** + * Append a code point to a string, overwriting 1 or 2 code units. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Safe" macro, checks for a valid code point. + * Converts code points outside of Basic Multilingual Plane into + * corresponding surrogate pairs if sufficient space in the string. + * High surrogate range: 0xD800 - 0xDBFF + * Low surrogate range: 0xDC00 - 0xDFFF + * If the code point is not valid or a trail surrogate does not fit, + * then isError is set to true. + * + * @param buffer const uint16_t * string buffer + * @param offset string offset, must be offset= (capacity)) /* insufficiently sized destination buffer */ { \ + (isError) = InsufficientBuffer; \ + } else if ((uint32_t)(codePoint) > 0x10ffff) /* invalid code point */ { \ + (isError) = InvalidCodePoint; \ + } else if ((uint32_t)(codePoint) <= 0xffff) { \ + (buffer)[(offset)++] = (uint16_t)(codePoint); \ + } else { \ + (buffer)[(offset)++] = (uint16_t)(((codePoint) >> 10) + 0xd7c0); \ + (buffer)[(offset)++] = (uint16_t)(((codePoint)&0x3ff) | 0xdc00); \ + } \ +} +#endif diff --git a/src/native/libs/System.Globalization.Native/pal_normalization.h b/src/native/libs/System.Globalization.Native/pal_normalization.h index b4c319f6770bd..198d3f9d518f0 100644 --- a/src/native/libs/System.Globalization.Native/pal_normalization.h +++ b/src/native/libs/System.Globalization.Native/pal_normalization.h @@ -27,3 +27,15 @@ PALEXPORT int32_t GlobalizationNative_NormalizeString(NormalizationForm normaliz int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength); + +#ifdef __APPLE__ +PALEXPORT int32_t GlobalizationNative_IsNormalizedNative(NormalizationForm normalizationForm, + const uint16_t* lpStr, + int32_t cwStrLength); + +PALEXPORT int32_t GlobalizationNative_NormalizeStringNative(NormalizationForm normalizationForm, + const uint16_t* lpSource, + int32_t cwSourceLength, + uint16_t* lpDst, + int32_t cwDstLength); +#endif diff --git a/src/native/libs/System.Globalization.Native/pal_normalization.m b/src/native/libs/System.Globalization.Native/pal_normalization.m new file mode 100644 index 0000000000000..c31242ca36ef3 --- /dev/null +++ b/src/native/libs/System.Globalization.Native/pal_normalization.m @@ -0,0 +1,87 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +#include "pal_errors.h" +#include "pal_icushim_internal.h" +#include "pal_normalization.h" +#import + +#if defined(TARGET_MACCATALYST) || defined(TARGET_IOS) || defined(TARGET_TVOS) +static NSString* GetNormalizedStringForForm(NormalizationForm normalizationForm, NSString* sourceString) +{ + switch (normalizationForm) + { + case FormC: + return sourceString.precomposedStringWithCanonicalMapping; + case FormD: + return sourceString.decomposedStringWithCanonicalMapping; + case FormKC: + return sourceString.precomposedStringWithCompatibilityMapping; + case FormKD: + return sourceString.decomposedStringWithCompatibilityMapping; + default: + return NULL; + } +} + +/* +Function: +IsNormalized + +Used by System.StringNormalizationExtensions.IsNormalized to detect if a string +is in a certain Unicode Normalization Form. + +Return values: +0: lpStr is not normalized. +1: lpStr is normalized. +-1: internal error during normalization. +*/ +int32_t GlobalizationNative_IsNormalizedNative(NormalizationForm normalizationForm, const uint16_t* lpStr, int32_t cwStrLength) +{ + @autoreleasepool + { + NSString *sourceString = [NSString stringWithCharacters: lpStr length: cwStrLength]; + NSString *normalizedString = GetNormalizedStringForForm(normalizationForm, sourceString); + + return normalizedString == NULL ? -1 : [sourceString isEqualToString: normalizedString]; + } +} + +/* +Function: +NormalizeString + +Used by System.StringNormalizationExtensions.Normalize to normalize a string +into a certain Unicode Normalization Form. + +Return values: +0: internal error during normalization. +>0: the length of the normalized string (not counting the null terminator). +*/ +int32_t GlobalizationNative_NormalizeStringNative(NormalizationForm normalizationForm, const uint16_t* lpSource, int32_t cwSourceLength, uint16_t* lpDst, int32_t cwDstLength) +{ + @autoreleasepool + { + NSString *sourceString = [NSString stringWithCharacters: lpSource length: cwSourceLength]; + NSString *normalizedString = GetNormalizedStringForForm(normalizationForm, sourceString); + + if (normalizedString == NULL || normalizedString.length == 0) + { + return 0; + } + + int32_t index = 0, dstIdx = 0, isError = 0; + uint16_t dstCodepoint; + while (index < normalizedString.length) + { + dstCodepoint = [normalizedString characterAtIndex: index]; + Append(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); + index++; + } + + return !isError ? [normalizedString length] : 0; + } +} +#endif +