Skip to content

Commit

Permalink
[iOS][non-icu] HybridGlobalization implement normalization functions (#…
Browse files Browse the repository at this point in the history
…90582)

[iOS] HybridGlobalization implement normalization functions
  • Loading branch information
mkhamoyan authored Aug 16, 2023
1 parent 9a696fd commit d31b39e
Show file tree
Hide file tree
Showing 13 changed files with 192 additions and 36 deletions.
17 changes: 17 additions & 0 deletions src/libraries/Common/src/Interop/Interop.Normalization.iOS.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Runtime.InteropServices;
using System.Text;

internal static partial class Interop
{
internal static partial class Globalization
{
[LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_IsNormalizedNative", StringMarshalling = StringMarshalling.Utf16)]
internal static unsafe partial int IsNormalizedNative(NormalizationForm normalizationForm, char* src, int srcLen);

[LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_NormalizeStringNative", StringMarshalling = StringMarshalling.Utf16)]
internal static unsafe partial int NormalizeStringNative(NormalizationForm normalizationForm, char* src, int srcLen, char* buffer, int bufferLength);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>$(NetCoreAppCurrent)-ios;$(NetCoreAppCurrent)-tvos;$(NetCoreAppCurrent)-maccatalyst</TargetFrameworks>
<TestRuntime>true</TestRuntime>
<HybridGlobalization>true</HybridGlobalization>
</PropertyGroup>
<ItemGroup>
<Compile Include="..\Normalization\StringNormalizationTests.cs" />
<Compile Include="..\Normalization\NormalizationAll.cs" />
</ItemGroup>
<ItemGroup>
<EmbeddedResource Include="..\Normalization\Data\win8.txt">
<LogicalName>NormalizationDataWin8</LogicalName>
</EmbeddedResource>
<EmbeddedResource Include="..\Normalization\Data\win7.txt">
<LogicalName>NormalizationDataWin7</LogicalName>
</EmbeddedResource>
</ItemGroup>
</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public void Normalize()
VerifyConformanceInvariant(NormalizationForm.FormD, part0, part1, part2, part3, part4);

// Mobile / Browser ICU doesn't support FormKC and FormKD
if (PlatformDetection.IsNotUsingLimitedCultures)
if (PlatformDetection.IsNotUsingLimitedCultures || PlatformDetection.IsHybridGlobalizationOnOSX)
{
// Form KC
VerifyConformanceInvariant(NormalizationForm.FormKC, part0, part1, part2, part3, part4);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public static IEnumerable<object[]> NormalizeTestData()
yield return new object[] { "\u1E9b\u0323", NormalizationForm.FormC, "\u1E9b\u0323" };
yield return new object[] { "\u1E9b\u0323", NormalizationForm.FormD, "\u017f\u0323\u0307" };

if (PlatformDetection.IsNotUsingLimitedCultures)
if (PlatformDetection.IsNotUsingLimitedCultures || PlatformDetection.IsHybridGlobalizationOnOSX)
{
// Mobile / Browser ICU doesn't support FormKC and FormKD
yield return new object[] { "\uFB01", NormalizationForm.FormKC, "fi" };
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1330,6 +1330,9 @@
<Compile Include="$(CommonPath)Interop\Interop.Normalization.cs">
<Link>Common\Interop\Interop.Normalization.cs</Link>
</Compile>
<Compile Include="$(CommonPath)Interop\Interop.Normalization.iOS.cs" Condition="'$(IsiOSLike)' == 'true'">
<Link>Common\Interop\Interop.Normalization.iOS.cs</Link>
</Compile>
<Compile Include="$(CommonPath)Interop\Interop.ResultCode.cs">
<Link>Common\Interop\Interop.ResultCode.cs</Link>
</Compile>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,14 @@ private static unsafe bool IcuIsNormalized(string strInput, NormalizationForm no
int ret;
fixed (char* pInput = strInput)
{
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
if (GlobalizationMode.Hybrid)
ret = Interop.Globalization.IsNormalizedNative(normalizationForm, pInput, strInput.Length);
else
ret = Interop.Globalization.IsNormalized(normalizationForm, pInput, strInput.Length);
#else
ret = Interop.Globalization.IsNormalized(normalizationForm, pInput, strInput.Length);
#endif
}

if (ret == -1)
Expand Down Expand Up @@ -53,7 +60,14 @@ private static unsafe string IcuNormalize(string strInput, NormalizationForm nor
fixed (char* pInput = strInput)
fixed (char* pDest = &MemoryMarshal.GetReference(buffer))
{
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
if (GlobalizationMode.Hybrid)
realLen = Interop.Globalization.NormalizeStringNative(normalizationForm, pInput, strInput.Length, pDest, buffer.Length);
else
realLen = Interop.Globalization.NormalizeString(normalizationForm, pInput, strInput.Length, pDest, buffer.Length);
#else
realLen = Interop.Globalization.NormalizeString(normalizationForm, pInput, strInput.Length, pDest, buffer.Length);
#endif
}

if (realLen == -1)
Expand Down Expand Up @@ -100,7 +114,6 @@ private static void ValidateArguments(string strInput, NormalizationForm normali
{
Debug.Assert(strInput != null);


if (OperatingSystem.IsBrowser() && (normalizationForm == NormalizationForm.FormKC || normalizationForm == NormalizationForm.FormKD))
{
// Browser's ICU doesn't contain data needed for FormKC and FormKD
Expand Down
3 changes: 2 additions & 1 deletion src/mono/mono/mini/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ if(HAVE_SYS_ICU)
pal_locale.m
pal_collation.m
pal_casing.m
pal_calendarData.m)
pal_calendarData.m
pal_normalization.m)

addprefix(icu_shim_darwin_sources "${ICU_SHIM_PATH}" "${icu_shim_darwin_sources_base}")
set(icu_shim_sources ${icu_shim_sources} ${icu_shim_darwin_sources})
Expand Down
3 changes: 2 additions & 1 deletion src/native/libs/System.Globalization.Native/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ if (CLR_CMAKE_TARGET_APPLE)
pal_locale.m
pal_collation.m
pal_casing.m
pal_calendarData.m)
pal_calendarData.m
pal_normalization.m)
set_source_files_properties(${NATIVEGLOBALIZATION_SOURCES_OBJC} PROPERTIES COMPILE_FLAGS "-fobjc-arc ${CLR_CMAKE_COMMON_OBJC_FLAGS}")
set(NATIVEGLOBALIZATION_SOURCES ${NATIVEGLOBALIZATION_SOURCES} ${NATIVEGLOBALIZATION_SOURCES_OBJC})
endif()
Expand Down
2 changes: 2 additions & 0 deletions src/native/libs/System.Globalization.Native/entrypoints.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ static const Entry s_globalizationNative[] =
DllImportEntry(GlobalizationNative_GetLocaleNameNative)
DllImportEntry(GlobalizationNative_GetLocaleTimeFormatNative)
DllImportEntry(GlobalizationNative_IndexOfNative)
DllImportEntry(GlobalizationNative_IsNormalizedNative)
DllImportEntry(GlobalizationNative_NormalizeStringNative)
DllImportEntry(GlobalizationNative_StartsWithNative)
#endif
};
Expand Down
31 changes: 0 additions & 31 deletions src/native/libs/System.Globalization.Native/pal_casing.m
Original file line number Diff line number Diff line change
Expand Up @@ -55,37 +55,6 @@
} \
}

/**
* Append a code point to a string, overwriting 1 or 2 code units.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Safe" macro, checks for a valid code point.
* Converts code points outside of Basic Multilingual Plane into
* corresponding surrogate pairs if sufficient space in the string.
* High surrogate range: 0xD800 - 0xDBFF
* Low surrogate range: 0xDC00 - 0xDFFF
* If the code point is not valid or a trail surrogate does not fit,
* then isError is set to true.
*
* @param buffer const uint16_t * string buffer
* @param offset string offset, must be offset<capacity
* @param capacity size of the string buffer
* @param codePoint code point to append
* @param isError output bool set to true if an error occurs, otherwise not modified
*/
#define Append(buffer, offset, capacity, codePoint, isError) { \
if ((offset) >= (capacity)) /* insufficiently sized destination buffer */ { \
(isError) = InsufficientBuffer; \
} else if ((uint32_t)(codePoint) > 0x10ffff) /* invalid code point */ { \
(isError) = InvalidCodePoint; \
} else if ((uint32_t)(codePoint) <= 0xffff) { \
(buffer)[(offset)++] = (uint16_t)(codePoint); \
} else { \
(buffer)[(offset)++] = (uint16_t)(((codePoint) >> 10) + 0xd7c0); \
(buffer)[(offset)++] = (uint16_t)(((codePoint)&0x3ff) | 0xdc00); \
} \
}

/*
Function:
ChangeCaseNative
Expand Down
32 changes: 32 additions & 0 deletions src/native/libs/System.Globalization.Native/pal_icushim_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -346,3 +346,35 @@ const char* GlobalizationNative_GetICUDataPathFallback(void);
#endif

#endif // !defined(STATIC_ICU)
#if defined(TARGET_MACCATALYST) || defined(TARGET_IOS) || defined(TARGET_TVOS)
/**
* Append a code point to a string, overwriting 1 or 2 code units.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Safe" macro, checks for a valid code point.
* Converts code points outside of Basic Multilingual Plane into
* corresponding surrogate pairs if sufficient space in the string.
* High surrogate range: 0xD800 - 0xDBFF
* Low surrogate range: 0xDC00 - 0xDFFF
* If the code point is not valid or a trail surrogate does not fit,
* then isError is set to true.
*
* @param buffer const uint16_t * string buffer
* @param offset string offset, must be offset<capacity
* @param capacity size of the string buffer
* @param codePoint code point to append
* @param isError output bool set to true if an error occurs, otherwise not modified
*/
#define Append(buffer, offset, capacity, codePoint, isError) { \
if ((offset) >= (capacity)) /* insufficiently sized destination buffer */ { \
(isError) = InsufficientBuffer; \
} else if ((uint32_t)(codePoint) > 0x10ffff) /* invalid code point */ { \
(isError) = InvalidCodePoint; \
} else if ((uint32_t)(codePoint) <= 0xffff) { \
(buffer)[(offset)++] = (uint16_t)(codePoint); \
} else { \
(buffer)[(offset)++] = (uint16_t)(((codePoint) >> 10) + 0xd7c0); \
(buffer)[(offset)++] = (uint16_t)(((codePoint)&0x3ff) | 0xdc00); \
} \
}
#endif
12 changes: 12 additions & 0 deletions src/native/libs/System.Globalization.Native/pal_normalization.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,15 @@ PALEXPORT int32_t GlobalizationNative_NormalizeString(NormalizationForm normaliz
int32_t cwSrcLength,
UChar* lpDst,
int32_t cwDstLength);

#ifdef __APPLE__
PALEXPORT int32_t GlobalizationNative_IsNormalizedNative(NormalizationForm normalizationForm,
const uint16_t* lpStr,
int32_t cwStrLength);

PALEXPORT int32_t GlobalizationNative_NormalizeStringNative(NormalizationForm normalizationForm,
const uint16_t* lpSource,
int32_t cwSourceLength,
uint16_t* lpDst,
int32_t cwDstLength);
#endif
87 changes: 87 additions & 0 deletions src/native/libs/System.Globalization.Native/pal_normalization.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
//

#include "pal_errors.h"
#include "pal_icushim_internal.h"
#include "pal_normalization.h"
#import <Foundation/Foundation.h>

#if defined(TARGET_MACCATALYST) || defined(TARGET_IOS) || defined(TARGET_TVOS)
static NSString* GetNormalizedStringForForm(NormalizationForm normalizationForm, NSString* sourceString)
{
switch (normalizationForm)
{
case FormC:
return sourceString.precomposedStringWithCanonicalMapping;
case FormD:
return sourceString.decomposedStringWithCanonicalMapping;
case FormKC:
return sourceString.precomposedStringWithCompatibilityMapping;
case FormKD:
return sourceString.decomposedStringWithCompatibilityMapping;
default:
return NULL;
}
}

/*
Function:
IsNormalized
Used by System.StringNormalizationExtensions.IsNormalized to detect if a string
is in a certain Unicode Normalization Form.
Return values:
0: lpStr is not normalized.
1: lpStr is normalized.
-1: internal error during normalization.
*/
int32_t GlobalizationNative_IsNormalizedNative(NormalizationForm normalizationForm, const uint16_t* lpStr, int32_t cwStrLength)
{
@autoreleasepool
{
NSString *sourceString = [NSString stringWithCharacters: lpStr length: cwStrLength];
NSString *normalizedString = GetNormalizedStringForForm(normalizationForm, sourceString);

return normalizedString == NULL ? -1 : [sourceString isEqualToString: normalizedString];
}
}

/*
Function:
NormalizeString
Used by System.StringNormalizationExtensions.Normalize to normalize a string
into a certain Unicode Normalization Form.
Return values:
0: internal error during normalization.
>0: the length of the normalized string (not counting the null terminator).
*/
int32_t GlobalizationNative_NormalizeStringNative(NormalizationForm normalizationForm, const uint16_t* lpSource, int32_t cwSourceLength, uint16_t* lpDst, int32_t cwDstLength)
{
@autoreleasepool
{
NSString *sourceString = [NSString stringWithCharacters: lpSource length: cwSourceLength];
NSString *normalizedString = GetNormalizedStringForForm(normalizationForm, sourceString);

if (normalizedString == NULL || normalizedString.length == 0)
{
return 0;
}

int32_t index = 0, dstIdx = 0, isError = 0;
uint16_t dstCodepoint;
while (index < normalizedString.length)
{
dstCodepoint = [normalizedString characterAtIndex: index];
Append(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
index++;
}

return !isError ? [normalizedString length] : 0;
}
}
#endif

0 comments on commit d31b39e

Please sign in to comment.