Skip to content

Commit

Permalink
Workaround for insufficiently sized dest buffer
Browse files Browse the repository at this point in the history
  • Loading branch information
mkhamoyan committed Jun 29, 2023
1 parent 3e34f7b commit 42e3269
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 44 deletions.
11 changes: 0 additions & 11 deletions docs/design/features/globalization-hybrid-mode.md
Original file line number Diff line number Diff line change
Expand Up @@ -428,14 +428,3 @@ Behavioural changes compared to ICU
- Final sigma behavior correction:

ICU-based case change does not respect final-sigma rule, but hybrid does, so "ΒΌΛΟΣ" -> "βόλος", not "βόλοσ".

- Below cases will throw exception because of insufficiently sized destination buffer

- Capitalizing the German letter ß (sharp S) gives SS when using Apple native functions.

- Capitalizing ligatures gives different result on Apple platforms, eg. "\uFB00" (ff) uppercase (FF)

- Capitalizing "\u0149" (ʼn) on Apple platforms returns combination of "\u02BC" (ʼ) and N ->N)



Original file line number Diff line number Diff line change
Expand Up @@ -396,29 +396,24 @@ public static IEnumerable<object[]> ToUpper_TestData()
// RAINBOW (outside the BMP and does not case)
yield return new object[] { cultureName, "\U0001F308", "\U0001F308" };

if (!PlatformDetection.IsHybridGlobalizationOnOSX)
{
// Unicode defines some codepoints which expand into multiple codepoints
// when cased (see SpecialCasing.txt from UNIDATA for some examples). We have never done
// these sorts of expansions, since it would cause string lengths to change when cased,
// which is non-intuitive. In addition, there are some context sensitive mappings which
// we also don't preform.
// es-zed does not case to SS when uppercased.
// on OSX, capitalizing the German letter ß (sharp S) gives SS
yield return new object[] { cultureName, "\u00DF", "\u00DF" };
yield return new object[] { cultureName, "stra\u00DFe", "STRA\u00DFE" };
if (!PlatformDetection.IsNlsGlobalization)
yield return new object[] { cultureName, "st\uD801\uDC37ra\u00DFe", "ST\uD801\uDC0FRA\u00DFE" };

// Ligatures do not expand when cased.
// on OSX, this is uppercase to "FF"
yield return new object[] { cultureName, "\uFB00", "\uFB00" };

// Precomposed character with no uppercase variant, we don't want to "decompose" this
// as part of casing.
// on OSX, this is uppercased to "ʼN"
yield return new object[] { cultureName, "\u0149", "\u0149" };
}

// Unicode defines some codepoints which expand into multiple codepoints
// when cased (see SpecialCasing.txt from UNIDATA for some examples). We have never done
// these sorts of expansions, since it would cause string lengths to change when cased,
// which is non-intuitive. In addition, there are some context sensitive mappings which
// we also don't preform.
// es-zed does not case to SS when uppercased.
yield return new object[] { cultureName, "\u00DF", "\u00DF" };
yield return new object[] { cultureName, "stra\u00DFe", "STRA\u00DFE" };
if (!PlatformDetection.IsNlsGlobalization && !PlatformDetection.IsHybridGlobalizationOnOSX)
yield return new object[] { cultureName, "st\uD801\uDC37ra\u00DFe", "ST\uD801\uDC0FRA\u00DFE" };

// Ligatures do not expand when cased.
yield return new object[] { cultureName, "\uFB00", "\uFB00" };

// Precomposed character with no uppercase variant, we don't want to "decompose" this
// as part of casing.
yield return new object[] { cultureName, "\u0149", "\u0149" };
}

// Turkish i
Expand Down
52 changes: 42 additions & 10 deletions src/native/libs/System.Globalization.Native/pal_casing.m
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,28 @@ int32_t GlobalizationNative_ChangeCaseNative(const uint16_t* localeName, int32_t

int32_t srcIdx = 0, dstIdx = 0, isError = 0;
uint16_t dstCodepoint;
while (srcIdx < result.length)
if (result.length <= cwDstLength)
{
dstCodepoint = [result characterAtIndex:srcIdx++];
Append(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
if (isError)
return isError;
while (srcIdx < result.length)
{
dstCodepoint = [result characterAtIndex:srcIdx++];
Append(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
if (isError)
return isError;
}
}
else
{
while (srcIdx < cwSrcLength)
{
NSString *src = [NSString stringWithCharacters: lpSrc + srcIdx length: 1];
srcIdx++;
NSString *dst = bToUpper ? [src uppercaseStringWithLocale:currentLocale] : [src lowercaseStringWithLocale:currentLocale];
dstCodepoint = dst.length > 1 ? [src characterAtIndex: 0] : [dst characterAtIndex: 0];
Append(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
if (isError)
return isError;
}
}
return Success;
}
Expand All @@ -90,12 +106,28 @@ int32_t GlobalizationNative_ChangeCaseInvariantNative(const uint16_t* lpSrc, int

int32_t srcIdx = 0, dstIdx = 0, isError = 0;
uint16_t dstCodepoint;
while (srcIdx < result.length)
if (result.length <= cwDstLength)
{
while (srcIdx < result.length)
{
dstCodepoint = [result characterAtIndex:srcIdx++];
Append(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
if (isError)
return isError;
}
}
else
{
dstCodepoint = [result characterAtIndex:srcIdx++];
Append(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
if (isError)
return isError;
while (srcIdx < cwSrcLength)
{
NSString *src = [NSString stringWithCharacters: lpSrc + srcIdx length: 1];
srcIdx++;
NSString *dst = bToUpper ? src.uppercaseString : src.lowercaseString;
dstCodepoint = dst.length > 1 ? [src characterAtIndex: 0] : [dst characterAtIndex: 0];
Append(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
if (isError)
return isError;
}
}
return Success;
}
Expand Down

0 comments on commit 42e3269

Please sign in to comment.