diff --git a/icu4c/source/common/rbbiscan.cpp b/icu4c/source/common/rbbiscan.cpp index 121e5c26c8bf..e4162f0aea7f 100644 --- a/icu4c/source/common/rbbiscan.cpp +++ b/icu4c/source/common/rbbiscan.cpp @@ -1224,6 +1224,7 @@ void RBBIRuleScanner::scanSet() { UErrorCode localStatus = U_ZERO_ERROR; LocalPointer uset(new UnicodeSet(), localStatus); if (U_FAILURE(localStatus)) { + error(localStatus); return; } uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus); @@ -1240,7 +1241,11 @@ void RBBIRuleScanner::scanSet() { // Verify that the set contains at least one code point. // U_ASSERT(uset.isValid()); - if (uset->isEmpty()) { + UnicodeSet tempSet(*uset); + // Use tempSet to handle the case that the UnicodeSet contains + // only string element, such as [{ab}] and treat it as empty set. + tempSet.removeAllStrings(); + if (tempSet.isEmpty()) { // This set is empty. // Make it an error, because it almost certainly is not what the user wanted. // Also, avoids having to think about corner cases in the tree manipulation code diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp index d73488435246..e9062ce214b9 100644 --- a/icu4c/source/test/intltest/rbbitst.cpp +++ b/icu4c/source/test/intltest/rbbitst.cpp @@ -144,6 +144,7 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha TESTCASE_AUTO(TestRandomAccess); TESTCASE_AUTO(TestExternalBreakEngineWithFakeTaiLe); TESTCASE_AUTO(TestExternalBreakEngineWithFakeYue); + TESTCASE_AUTO(TestBug22579); TESTCASE_AUTO(TestBug22581); TESTCASE_AUTO(TestBug22584); TESTCASE_AUTO(TestBug22585); @@ -5895,6 +5896,14 @@ void RBBITest::TestBug22584() { RuleBasedBreakIterator bi2(ruleStr, pe, ec); } +void RBBITest::TestBug22579() { + // Test not causing null deref in cloneTree + UnicodeString ruleStr = u"[{ab}];"; + UParseError pe {}; + UErrorCode ec {U_ZERO_ERROR}; + + RuleBasedBreakIterator bi(ruleStr, pe, ec); +} void RBBITest::TestBug22581() { // Test duplicate variable setting will not leak the rule compilation UnicodeString ruleStr = u"$foo=[abc]; $foo=[xyz]; $foo;"; diff --git a/icu4c/source/test/intltest/rbbitst.h b/icu4c/source/test/intltest/rbbitst.h index 03dd2fb3a05b..224b02ec9fec 100644 --- a/icu4c/source/test/intltest/rbbitst.h +++ b/icu4c/source/test/intltest/rbbitst.h @@ -98,6 +98,7 @@ class RBBITest: public IntlTest { void TestRandomAccess(); void TestExternalBreakEngineWithFakeTaiLe(); void TestExternalBreakEngineWithFakeYue(); + void TestBug22579(); void TestBug22581(); void TestBug22584(); void TestBug22585(); diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/text/RBBIRuleScanner.java b/icu4j/main/core/src/main/java/com/ibm/icu/text/RBBIRuleScanner.java index 0b90431a3aba..057adce8ea51 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/text/RBBIRuleScanner.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/text/RBBIRuleScanner.java @@ -1062,7 +1062,11 @@ void scanSet() { // Verify that the set contains at least one code point. // - if (uset.isEmpty()) { + // Use tempSet to handle the case that the UnicodeSet contains + // only string element, such as [{ab}] and treat it as empty set. + UnicodeSet tempSet = new UnicodeSet(uset); + tempSet.removeAllStrings(); + if (tempSet.isEmpty()) { // This set is empty. // Make it an error, because it almost certainly is not what the user wanted. // Also, avoids having to think about corner cases in the tree manipulation code diff --git a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/RBBITest.java b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/RBBITest.java index 1503f208dce0..a90f0cf8a8fe 100644 --- a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/RBBITest.java +++ b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/RBBITest.java @@ -946,6 +946,20 @@ public void TestUnpairedSurrogate() { assertEquals("Rules does not match", rules, bi.toString()); } + @Test + public void TestBug22579() { + try { + new RuleBasedBreakIterator("[{ab}];"); + fail("TestBug22579: RuleBasedBreakIterator() failed to throw an exception with only string in an Unicode set."); + } + catch (IllegalArgumentException e) { + // expected exception with only string inside an Unicode set. + } + catch (Exception e) { + fail("TestBug22579: Unexpected exception while new RuleBasedBreakIterator() with only string in an Unicode Set: " + e); + } + + } @Test public void TestBug22585() { try {