diff --git a/icu4c/source/common/loclikely.cpp b/icu4c/source/common/loclikely.cpp index 136081bcfaf5..d80096b588e7 100644 --- a/icu4c/source/common/loclikely.cpp +++ b/icu4c/source/common/loclikely.cpp @@ -1181,13 +1181,13 @@ _uloc_minimizeSubtags(const char* localeID, } } -static UBool +static int32_t do_canonicalize(const char* localeID, char* buffer, int32_t bufferCapacity, UErrorCode* err) { - uloc_canonicalize( + int32_t canonicalizedSize = uloc_canonicalize( localeID, buffer, bufferCapacity, @@ -1195,16 +1195,14 @@ do_canonicalize(const char* localeID, if (*err == U_STRING_NOT_TERMINATED_WARNING || *err == U_BUFFER_OVERFLOW_ERROR) { - *err = U_ILLEGAL_ARGUMENT_ERROR; - - return FALSE; + return canonicalizedSize; } else if (U_FAILURE(*err)) { - return FALSE; + return -1; } else { - return TRUE; + return canonicalizedSize; } } @@ -1241,12 +1239,17 @@ static UBool _ulocimp_addLikelySubtags(const char* localeID, icu::ByteSink& sink, UErrorCode* status) { - char localeBuffer[ULOC_FULLNAME_CAPACITY]; - - if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) { - return _uloc_addLikelySubtags(localeBuffer, sink, status); + PreflightingLocaleIDBuffer localeBuffer; + do { + localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(), + localeBuffer.getCapacity(), status); + } while (localeBuffer.needToTryAgain(status)); + + if (U_SUCCESS(*status)) { + return _uloc_addLikelySubtags(localeBuffer.getBuffer(), sink, status); + } else { + return FALSE; } - return FALSE; } U_CAPI void U_EXPORT2 @@ -1289,11 +1292,13 @@ U_CAPI void U_EXPORT2 ulocimp_minimizeSubtags(const char* localeID, icu::ByteSink& sink, UErrorCode* status) { - char localeBuffer[ULOC_FULLNAME_CAPACITY]; - - if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) { - _uloc_minimizeSubtags(localeBuffer, sink, status); - } + PreflightingLocaleIDBuffer localeBuffer; + do { + localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(), + localeBuffer.getCapacity(), status); + } while (localeBuffer.needToTryAgain(status)); + + _uloc_minimizeSubtags(localeBuffer.getBuffer(), sink, status); } // Pairs of (language subtag, + or -) for finding out fast if common languages diff --git a/icu4c/source/common/uloc.cpp b/icu4c/source/common/uloc.cpp index d96e79b8fdd8..1b14e641422b 100644 --- a/icu4c/source/common/uloc.cpp +++ b/icu4c/source/common/uloc.cpp @@ -478,15 +478,19 @@ static const CanonicalizationMap CANONICALIZE_MAP[] = { /* Test if the locale id has BCP47 u extension and does not have '@' */ #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1) /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */ -#define _ConvertBCP47(finalID, id, buffer, length,err) UPRV_BLOCK_MACRO_BEGIN { \ - if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \ - U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \ - finalID=id; \ - if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \ - } else { \ - finalID=buffer; \ - } \ -} UPRV_BLOCK_MACRO_END +static int32_t _ConvertBCP47( + const char*& finalID, const char* id, char* buffer, int32_t length, UErrorCode* err) { + int32_t localeIDSize = uloc_forLanguageTag(id, buffer, length, NULL, err); + if (localeIDSize <= 0 || U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { + finalID=id; + if (*err == U_STRING_NOT_TERMINATED_WARNING) { + *err = U_BUFFER_OVERFLOW_ERROR; + } + } else { + finalID=buffer; + } + return localeIDSize; +} /* Gets the size of the shortest subtag in the given localeID. */ static int32_t getShortestSubtagLength(const char *localeID) { int32_t localeIDLength = static_cast(uprv_strlen(localeID)); @@ -1474,7 +1478,7 @@ _canonicalize(const char* localeID, uint32_t options, UErrorCode* err) { int32_t j, fieldCount=0, scriptSize=0, variantSize=0; - char tempBuffer[ULOC_FULLNAME_CAPACITY]; + PreflightingLocaleIDBuffer tempBuffer; const char* origLocaleID; const char* tmpLocaleID; const char* keywordAssign = NULL; @@ -1485,7 +1489,10 @@ _canonicalize(const char* localeID, } if (_hasBCP47Extension(localeID)) { - _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err); + do { + tempBuffer.requestedCapacity = _ConvertBCP47(tmpLocaleID, localeID, + tempBuffer.getBuffer(), tempBuffer.getCapacity(), err); + } while (tempBuffer.needToTryAgain(err)); } else { if (localeID==NULL) { localeID=uloc_getDefault(); diff --git a/icu4c/source/common/ulocimp.h b/icu4c/source/common/ulocimp.h index 1f796aa21304..755e02c6b88e 100644 --- a/icu4c/source/common/ulocimp.h +++ b/icu4c/source/common/ulocimp.h @@ -307,4 +307,72 @@ U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* le // Return true if the value is already canonicalized. U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName); +/** + * A utility class for handling locale IDs that may be longer than ULOC_FULLNAME_CAPACITY. + * This encompasses all of the logic to allocate a temporary locale ID buffer on the stack, + * and then, if it's not big enough, reallocate it on the heap and try again. + * + * You use it like this: + * UErrorCode err = U_ZERO_ERROR; + * + * PreflightingLocaleIDBuffer tempBuffer; + * do { + * tempBuffer.requestedCapacity = uloc_doSomething(localeID, tempBuffer.getBuffer(), tempBuffer.getCapacity(), &err); + * } while (tempBuffer.needToTryAgain(&err)); + * if (U_SUCCESS(err)) { + * uloc_doSomethingWithTheResult(tempBuffer.getBuffer()); + * } + */ +class PreflightingLocaleIDBuffer { +private: + char stackBuffer[ULOC_FULLNAME_CAPACITY]; + char* heapBuffer = nullptr; + int32_t capacity = ULOC_FULLNAME_CAPACITY; + +public: + int32_t requestedCapacity = ULOC_FULLNAME_CAPACITY; + + // No heap allocation. Use only on the stack. + static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete; + static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete; +#if U_HAVE_PLACEMENT_NEW + static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete; +#endif + + PreflightingLocaleIDBuffer() {} + + ~PreflightingLocaleIDBuffer() { uprv_free(heapBuffer); } + + char* getBuffer() { + if (heapBuffer == nullptr) { + return stackBuffer; + } else { + return heapBuffer; + } + } + + int32_t getCapacity() { + return capacity; + } + + bool needToTryAgain(UErrorCode* err) { + if (heapBuffer != nullptr) { + return false; + } + + if (*err == U_BUFFER_OVERFLOW_ERROR || *err == U_STRING_NOT_TERMINATED_WARNING) { + int32_t newCapacity = requestedCapacity + 2; // one for the terminating null, one just for paranoia + heapBuffer = static_cast(uprv_malloc(newCapacity)); + if (heapBuffer == nullptr) { + *err = U_MEMORY_ALLOCATION_ERROR; + } else { + *err = U_ZERO_ERROR; + capacity = newCapacity; + } + return U_SUCCESS(*err); + } + return false; + } +}; + #endif diff --git a/icu4c/source/test/cintltst/cloctst.c b/icu4c/source/test/cintltst/cloctst.c index 16adf9a81a4a..032023730c61 100644 --- a/icu4c/source/test/cintltst/cloctst.c +++ b/icu4c/source/test/cintltst/cloctst.c @@ -58,6 +58,7 @@ static void TestBug20370(void); static void TestBug20321UnicodeLocaleKey(void); static void TestUsingDefaultWarning(void); +static void TestExcessivelyLongIDs(void); void PrintDataTable(); @@ -281,6 +282,7 @@ void addLocaleTest(TestNode** root) TESTCASE(TestBug20321UnicodeLocaleKey); TESTCASE(TestUsingDefaultWarning); TESTCASE(TestBug21449InfiniteLoop); + TESTCASE(TestExcessivelyLongIDs); } @@ -7009,3 +7011,45 @@ static void TestBug21449InfiniteLoop() { // so the test is considered passed if the call to the API below returns anything at all. uloc_getDisplayLanguage(invalidLocaleId, invalidLocaleId, NULL, 0, &status); } + +// rdar://79296849 and https://unicode-org.atlassian.net/browse/ICU-21639 +static void TestExcessivelyLongIDs(void) { + const char* reallyLongID = + "de-u-cu-eur-em-default-hc-h23-ks-level1-lb-strict-lw-normal-ms-metric" + "-nu-latn-rg-atzzzz-sd-atat1-ss-none-tz-atvie-va-posix"; + char minimizedID[ULOC_FULLNAME_CAPACITY]; + char maximizedID[ULOC_FULLNAME_CAPACITY]; + int32_t actualMinimizedLength = 0; + int32_t actualMaximizedLength = 0; + UErrorCode err = U_ZERO_ERROR; + + actualMinimizedLength = uloc_minimizeSubtags(reallyLongID, minimizedID, ULOC_FULLNAME_CAPACITY, &err); + assertTrue("uloc_minimizeSubtags() with too-small buffer didn't fail as expected", + U_FAILURE(err) && actualMinimizedLength > ULOC_FULLNAME_CAPACITY); + + err = U_ZERO_ERROR; + actualMaximizedLength = uloc_addLikelySubtags(reallyLongID, maximizedID, ULOC_FULLNAME_CAPACITY, &err); + assertTrue("uloc_addLikelySubtags() with too-small buffer didn't fail as expected", + U_FAILURE(err) && actualMaximizedLength > ULOC_FULLNAME_CAPACITY); + + err = U_ZERO_ERROR; + char* realMinimizedID = (char*)uprv_malloc(actualMinimizedLength + 1); + uloc_minimizeSubtags(reallyLongID, realMinimizedID, actualMinimizedLength + 1, &err); + if (assertSuccess("uloc_minimizeSubtags() failed", &err)) { + assertEquals("Wrong result from uloc_minimizeSubtags()", + "de__POSIX@colstrength=primary;currency=eur;em=default;hours=h23;lb=strict;" + "lw=normal;measure=metric;numbers=latn;rg=atzzzz;sd=atat1;ss=none;timezone=Europe/Vienna", + realMinimizedID); + } + uprv_free(realMinimizedID); + + char* realMaximizedID = (char*)uprv_malloc(actualMaximizedLength + 1); + uloc_addLikelySubtags(reallyLongID, realMaximizedID, actualMaximizedLength + 1, &err); + if (assertSuccess("uloc_addLikelySubtags() failed", &err)) { + assertEquals("Wrong result from uloc_addLikelySubtags()", + "de_Latn_DE_POSIX@colstrength=primary;currency=eur;em=default;hours=h23;lb=strict;" + "lw=normal;measure=metric;numbers=latn;rg=atzzzz;sd=atat1;ss=none;timezone=Europe/Vienna", + realMaximizedID); + } + uprv_free(realMaximizedID); +}