Skip to content

Commit

Permalink
ICU22511 Fix infinity loop in collator compare with "vi" and "wo" locale
Browse files Browse the repository at this point in the history
  • Loading branch information
FrankYFTang committed Oct 18, 2023
1 parent 3d1dee6 commit ce0c676
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 2 deletions.
17 changes: 17 additions & 0 deletions icu4c/source/i18n/collationcompare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ CollationCompare::compareUpToQuaternary(CollationIterator &left, CollationIterat
}
UBool anyVariable = false;

uint32_t lastPrimary = 0;
uint32_t samePrimaryCount = 0;
// Fetch CEs, compare primaries, store secondary & tertiary weights.
for(;;) {
// We fetch CEs until we get a non-ignorable primary or reach the end.
Expand Down Expand Up @@ -103,6 +105,21 @@ CollationCompare::compareUpToQuaternary(CollationIterator &left, CollationIterat
return (leftPrimary < rightPrimary) ? UCOL_LESS : UCOL_GREATER;
}
if(leftPrimary == Collation::NO_CE_PRIMARY) { break; }

// leftPrimary and rightPrimary is surely the same now.
// ICU-22511 To avoid infinity loop, terminate and return error if the
// leftPrimary stay the same for too many times..
constexpr uint32_t kSamePrimaryLoopLimit = 3;
if (lastPrimary == leftPrimary) {
if (++samePrimaryCount > kSamePrimaryLoopLimit) {
errorCode = U_INTERNAL_PROGRAM_ERROR;
return UCOL_EQUAL;
}
} else {
// reset samePrimaryCount
samePrimaryCount = 0;
}
lastPrimary = leftPrimary;
}
if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }

Expand Down
4 changes: 2 additions & 2 deletions icu4c/source/test/fuzzer/collator_compare_fuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
if (size > 4096) {
size = 4096;
}
std::unique_ptr<char16_t> compbuff1(new char16_t[size/4]);
std::unique_ptr<char16_t[]> compbuff1(new char16_t[size/4]);
std::memcpy(compbuff1.get(), data, (size/4)*2);
data = data + size/2;
std::unique_ptr<char16_t> compbuff2(new char16_t[size/4]);
std::unique_ptr<char16_t[]> compbuff2(new char16_t[size/4]);
std::memcpy(compbuff2.get(), data, (size/4)*2);


Expand Down
27 changes: 27 additions & 0 deletions icu4c/source/test/intltest/collationtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class CollationTest : public IntlTest {
void TestDataDriven();
void TestLongLocale();
void TestBuilderContextsOverflow();
void TestWoHang22511();
void TestHang22414();

private:
Expand Down Expand Up @@ -154,6 +155,7 @@ void CollationTest::runIndexedTest(int32_t index, UBool exec, const char *&name,
TESTCASE_AUTO(TestLongLocale);
TESTCASE_AUTO(TestBuilderContextsOverflow);
TESTCASE_AUTO(TestHang22414);
TESTCASE_AUTO(TestWoHang22511);
TESTCASE_AUTO_END;
}

Expand Down Expand Up @@ -1881,6 +1883,31 @@ void CollationTest::TestLongLocale() {
LocalPointer<Collator> coll(Collator::createInstance(longLocale, errorCode));
}

void CollationTest::TestWoHang22511() {
IcuTestErrorCode errorCode(*this, "TestWoHang22511");
char16_t str1[] = {
0x0000, 0x0100, 0x032a, 0x01e0, 0xd804, 0xdd00, 0x031c,
};

int32_t num_locales = 0;
const icu::Locale* locales = icu::Locale::getAvailableLocales(num_locales);
for (int32_t i = 0; i < num_locales; i++) {
errorCode.reset();
icu::Locale l = locales[i];
LocalPointer<Collator> coll(Collator::createInstance(l, errorCode));
if(errorCode.isFailure()) {
logln("cannot built the Collator");
continue;
}
coll->setStrength(icu::Collator::IDENTICAL);

coll->compare(str1,
sizeof(str1)/sizeof(char16_t),
str1+1,
(sizeof(str1)/sizeof(char16_t))-1,
errorCode);
}
}
void CollationTest::TestHang22414() {
IcuTestErrorCode errorCode(*this, "TestHang22414");
const char* cases[] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import com.ibm.icu.text.Collator;

public final class CollationCompare /* all static */ {
private static int kSamePrimaryLoopLimit = 3;
public static int compareUpToQuaternary(CollationIterator left, CollationIterator right,
CollationSettings settings) {
int options = settings.options;
Expand All @@ -28,6 +29,8 @@ public static int compareUpToQuaternary(CollationIterator left, CollationIterato
}
boolean anyVariable = false;

long lastPrimary = 0;
int samePrimaryCount = 0;
// Fetch CEs, compare primaries, store secondary & tertiary weights.
for (;;) {
// We fetch CEs until we get a non-ignorable primary or reach the end.
Expand Down Expand Up @@ -90,6 +93,19 @@ public static int compareUpToQuaternary(CollationIterator left, CollationIterato
if (leftPrimary == Collation.NO_CE_PRIMARY) {
break;
}

// leftPrimary and rightPrimary is surely the same now.
// ICU-22511 To avoid infinity loop, terminate and return error if the
// leftPrimary stay the same for too many times..
if (lastPrimary == leftPrimary) {
if (++samePrimaryCount > kSamePrimaryLoopLimit) {
return Collation.EQUAL;
}
} else {
// reset samePrimaryCount
samePrimaryCount = 0;
}
lastPrimary = leftPrimary;
}

// Compare the buffered secondary & tertiary weights.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import java.io.IOException;
import java.text.ParseException;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;

import org.junit.Test;
Expand Down Expand Up @@ -1761,4 +1762,14 @@ public void TestBuilderContextsOverflow() {
errln("unexpected type of exception: " + e);
}
}
@Test
public void TestWoHang22511() {
String str1 = "\u0000\u0100\u032a\u01e0\ud804\udd00\u031c";
String str2 = str1.substring(1);
for (Locale l : Collator.getAvailableLocales()) {
Collator col = Collator.getInstance(l);
col.setStrength(Collator.IDENTICAL);
col.compare(str1, str2);
}
}
}

0 comments on commit ce0c676

Please sign in to comment.