Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ICU-22579 Fix Null deref while Unicode Set only has string #2739

Merged
merged 1 commit into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion icu4c/source/common/rbbiscan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1224,6 +1224,7 @@ void RBBIRuleScanner::scanSet() {
UErrorCode localStatus = U_ZERO_ERROR;
LocalPointer<UnicodeSet> uset(new UnicodeSet(), localStatus);
if (U_FAILURE(localStatus)) {
error(localStatus);
return;
}
uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus);
Expand All @@ -1240,7 +1241,11 @@ void RBBIRuleScanner::scanSet() {
// Verify that the set contains at least one code point.
//
U_ASSERT(uset.isValid());
if (uset->isEmpty()) {
UnicodeSet tempSet(*uset);
// Use tempSet to handle the case that the UnicodeSet contains
// only string element, such as [{ab}] and treat it as empty set.
tempSet.removeAllStrings();
if (tempSet.isEmpty()) {
// This set is empty.
// Make it an error, because it almost certainly is not what the user wanted.
// Also, avoids having to think about corner cases in the tree manipulation code
Expand Down
9 changes: 9 additions & 0 deletions icu4c/source/test/intltest/rbbitst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
TESTCASE_AUTO(TestRandomAccess);
TESTCASE_AUTO(TestExternalBreakEngineWithFakeTaiLe);
TESTCASE_AUTO(TestExternalBreakEngineWithFakeYue);
TESTCASE_AUTO(TestBug22579);
TESTCASE_AUTO(TestBug22581);
TESTCASE_AUTO(TestBug22584);
TESTCASE_AUTO(TestBug22585);
Expand Down Expand Up @@ -5895,6 +5896,14 @@ void RBBITest::TestBug22584() {
RuleBasedBreakIterator bi2(ruleStr, pe, ec);
}

void RBBITest::TestBug22579() {
// Test not causing null deref in cloneTree
UnicodeString ruleStr = u"[{ab}];";
UParseError pe {};
UErrorCode ec {U_ZERO_ERROR};

RuleBasedBreakIterator bi(ruleStr, pe, ec);
}
void RBBITest::TestBug22581() {
// Test duplicate variable setting will not leak the rule compilation
UnicodeString ruleStr = u"$foo=[abc]; $foo=[xyz]; $foo;";
Expand Down
1 change: 1 addition & 0 deletions icu4c/source/test/intltest/rbbitst.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ class RBBITest: public IntlTest {
void TestRandomAccess();
void TestExternalBreakEngineWithFakeTaiLe();
void TestExternalBreakEngineWithFakeYue();
void TestBug22579();
void TestBug22581();
void TestBug22584();
void TestBug22585();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1062,7 +1062,11 @@ void scanSet() {

// Verify that the set contains at least one code point.
//
if (uset.isEmpty()) {
// Use tempSet to handle the case that the UnicodeSet contains
// only string element, such as [{ab}] and treat it as empty set.
UnicodeSet tempSet = new UnicodeSet(uset);
tempSet.removeAllStrings();
if (tempSet.isEmpty()) {
// This set is empty.
// Make it an error, because it almost certainly is not what the user wanted.
// Also, avoids having to think about corner cases in the tree manipulation code
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -946,6 +946,20 @@ public void TestUnpairedSurrogate() {
assertEquals("Rules does not match", rules, bi.toString());
}

@Test
public void TestBug22579() {
try {
new RuleBasedBreakIterator("[{ab}];");
fail("TestBug22579: RuleBasedBreakIterator() failed to throw an exception with only string in an Unicode set.");
}
catch (IllegalArgumentException e) {
// expected exception with only string inside an Unicode set.
}
catch (Exception e) {
fail("TestBug22579: Unexpected exception while new RuleBasedBreakIterator() with only string in an Unicode Set: " + e);
}

}
@Test
public void TestBug22585() {
try {
Expand Down
Loading