Skip to content

Commit

Permalink
ICU-22661 Implement review feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
FrankYFTang committed Mar 7, 2024
1 parent 9f3baf0 commit cbbc582
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 46 deletions.
66 changes: 29 additions & 37 deletions icu4c/source/common/uloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1327,80 +1327,72 @@ _getVariant(const char* localeID,
bool needSeparator,
UErrorCode& status) {
if (U_FAILURE(status)) return;
const char* start = localeID;
bool hasVariant = false;
if (pEnd != nullptr) { *pEnd = localeID; }

// Reasonable upper limit for variants
constexpr int32_t MAX_NUMBER_OF_VARIANTS = 20;
constexpr int32_t MAX_VARIANT_LENGTH = 8;
constexpr int32_t MAX_VARIANTS_LENGTH = (MAX_VARIANT_LENGTH + 1) * MAX_NUMBER_OF_VARIANTS;
// There are no strict limitation of the syntax of variant in the legacy
// locale format. If the locale is constructed from unicode_locale_id
// as defined in UTS35, then we know each unicode_variant_subtag
// could have max length of 8 ((alphanum{5,8} | digit alphanum{3})
// 179 would allow 20 unicode_variant_subtag with sep in the
// unicode_locale_id
// 8*20 + 1*(20-1) = 179
constexpr int32_t MAX_VARIANTS_LENGTH = 179;

/* get one or more variant tags and separate them with '_' */
if(_isIDSeparator(prev)) {
int32_t index = 0;
if (_isIDSeparator(prev)) {
/* get a variant string after a '-' or '_' */
while(!_isTerminator(*localeID)) {
localeID++;
}
int32_t variantsLength = localeID - start;
if (variantsLength > MAX_VARIANTS_LENGTH) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}

localeID = start;
while(!_isTerminator(*localeID)) {
for (index=0; !_isTerminator(localeID[index]); index++) {
if (needSeparator) {
if (sink != nullptr) {
sink->Append("_", 1);
}
needSeparator = false;
}
if (sink != nullptr) {
char c = (char)uprv_toupper(*localeID);
char c = (char)uprv_toupper(localeID[index]);
if (c == '-') c = '_';
sink->Append(&c, 1);
}
hasVariant = true;
localeID++;
if (index > MAX_VARIANTS_LENGTH) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
}
if (pEnd != nullptr) { *pEnd = localeID; }
if (pEnd != nullptr) { *pEnd = localeID+index; }
}

/* if there is no variant tag after a '-' or '_' then look for '@' */
if(!hasVariant) {
if(prev=='@') {
if (index == 0) {
if (prev=='@') {
/* keep localeID */
} else if((localeID=locale_getKeywordsStart(localeID))!=nullptr) {
++localeID; /* point after the '@' */
} else {
return;
}
start = localeID;
while(!_isTerminator(*localeID)) {
localeID++;
}
int32_t variantsLength = localeID - start;
if (variantsLength > MAX_VARIANTS_LENGTH) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
localeID = start;
while(!_isTerminator(*localeID)) {
for(; !_isTerminator(localeID[index]); index++) {
if (needSeparator) {
if (sink != nullptr) {
sink->Append("_", 1);
}
needSeparator = false;
}
if (sink != nullptr) {
char c = (char)uprv_toupper(*localeID);
char c = (char)uprv_toupper(localeID[index]);
if (c == '-' || c == ',') c = '_';
sink->Append(&c, 1);
}
localeID++;
if (index > MAX_VARIANTS_LENGTH) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
}
if (pEnd != nullptr) { *pEnd = localeID; }
if (pEnd != nullptr) { *pEnd = localeID + index; }
}
if (index > MAX_VARIANTS_LENGTH) {
status = U_ILLEGAL_ARGUMENT_ERROR;
}
}

Expand Down
4 changes: 2 additions & 2 deletions icu4c/source/test/cintltst/cloctst.c
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,7 @@ static void TestVariantLengthLimit(void) {
"_12345678"
"_12345678"
"_12345678"
"_12345678X";
"_12345678";

static const char invalid[] =
"_"
Expand All @@ -614,7 +614,7 @@ static void TestVariantLengthLimit(void) {
"_12345678"
"_12345678"
"_12345678"
"_12345678XX"; // One character too long.
"_12345678X"; // One character too long.

const char* const variantsExpected = valid + 2; // Skip initial "__".
const int32_t reslenExpected = uprv_strlen(variantsExpected);
Expand Down
4 changes: 2 additions & 2 deletions icu4c/source/test/cintltst/creststn.c
Original file line number Diff line number Diff line change
Expand Up @@ -2803,7 +2803,7 @@ static void TestGetFunctionalEquivalentVariantLengthLimit(void) {
"_12345678"
"_12345678"
"_12345678"
"_12345678X";
"_12345678";

static const char invalid[] =
"_"
Expand All @@ -2826,7 +2826,7 @@ static void TestGetFunctionalEquivalentVariantLengthLimit(void) {
"_12345678"
"_12345678"
"_12345678"
"_12345678XX"; // One character too long.
"_12345678X"; // One character too long.

static const char localeExpected[] = "_@calendar=gregorian";
const int32_t reslenExpected = uprv_strlen(localeExpected);
Expand Down
4 changes: 2 additions & 2 deletions icu4c/source/test/intltest/loctest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ void LocaleTest::TestVariantLengthLimit() {
"_12345678"
"_12345678"
"_12345678"
"_12345678X";
"_12345678";

static constexpr char invalid[] =
"_"
Expand All @@ -451,7 +451,7 @@ void LocaleTest::TestVariantLengthLimit() {
"_12345678"
"_12345678"
"_12345678"
"_12345678XX"; // One character too long.
"_12345678X"; // One character too long.

constexpr const char* variantsExpected = valid + 2; // Skip initial "__".

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ public void TestGetFunctionalEquivalentVariantLengthWithinLimit() {
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678X";
"_12345678";

ULocale equivLocale = ICUResourceBundle.getFunctionalEquivalent(
ICUData.ICU_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER,
Expand Down Expand Up @@ -207,7 +207,7 @@ public void TestGetFunctionalEquivalentVariantLengthOverLimit() {
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678XX"; // One character too long.
"_12345678X"; // One character too long.
ULocale equivLocale2 = ICUResourceBundle.getFunctionalEquivalent(
ICUData.ICU_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER,
"calendar", "calendar", new ULocale(invalid), new boolean[1], false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5438,6 +5438,63 @@ public void TestCanonical() {

}


@Test
public void TestVariantLengthWithinLimit() {
String valid =
"_" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678";

ULocale locale = new ULocale(valid);
Assert.assertEquals(valid.substring(2), locale.getVariant());
}

@Test(expected = IllegalArgumentException.class)
public void TestVariantLengthOverLimit() {
String invalid =
"_" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678" +
"_12345678X"; // One character too long.
ULocale locale = new ULocale(invalid);
}

@Test
public void TestLocaleCanonicalizationFromFile() throws IOException {
BufferedReader testFile = TestUtil.getDataReader("cldr/localeIdentifiers/localeCanonicalization.txt");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,15 @@ private void skipCountry() {
}
}

// There are no strict limitation of the syntax of variant in the legacy
// locale format. If the locale is constructed from unicode_locale_id
// as defined in UTS35, then we know each unicode_variant_subtag
// could have max length of 8 ((alphanum{5,8} | digit alphanum{3})
// 179 would allow 20 unicode_variant_subtag with sep in the
// unicode_locale_id
// 8*20 + 1*(20-1) = 179
private static final int MAX_VARIANTS_LENGTH = 179;

/**
* Advance index past variant, and accumulate normalized variant in buffer. This ignores
* the codepage information from POSIX ids. Index must be immediately after the country
Expand Down Expand Up @@ -432,10 +441,13 @@ private int parseVariant() {
c = UNDERSCORE;
}
append(c);
if (buffer.length() - oldBlen > MAX_VARIANTS_LENGTH) {
throw new IllegalArgumentException("variants is too long");
}
}
}
--index; // unget
if (buffer.length() - oldBlen > 180) {
if (buffer.length() - oldBlen > MAX_VARIANTS_LENGTH) {
throw new IllegalArgumentException("variants is too long");
}
return oldBlen;
Expand Down

0 comments on commit cbbc582

Please sign in to comment.