From b8a5c0dd2444970ef409865d0121dc99741dfd39 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 21 Sep 2018 13:44:21 -0700 Subject: [PATCH] ICU-13707 Make UnicodeString safe when appended or inserted into itself. (#147) --- icu4c/source/common/unicode/unistr.h | 3 + icu4c/source/common/unistr.cpp | 59 +++++++++++++---- icu4c/source/test/intltest/ustrtest.cpp | 85 ++++++++++++++++++++----- icu4c/source/test/intltest/ustrtest.h | 1 + 4 files changed, 121 insertions(+), 27 deletions(-) diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index b84f40bd449c..bf954b5f1d82 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -243,6 +243,9 @@ class UnicodeStringAppendable; // unicode/appendable.h * than other ICU APIs. In particular: * - If indexes are out of bounds for a UnicodeString object * (<0 or >length()) then they are "pinned" to the nearest boundary. + * - If the buffer passed to an insert/append/replace operation is owned by the + * target object, e.g., calling str.append(str), an extra copy may take place + * to ensure safety. * - If primitive string pointer values (e.g., const char16_t * or char *) * for input strings are NULL, then those input string parameters are treated * as if they pointed to an empty string. diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp index 5d7cab2e155c..c8b6c0a3a463 100644 --- a/icu4c/source/common/unistr.cpp +++ b/icu4c/source/common/unistr.cpp @@ -1447,10 +1447,15 @@ UnicodeString::doReplace(int32_t start, } if(srcChars == 0) { - srcStart = srcLength = 0; - } else if(srcLength < 0) { - // get the srcLength if necessary - srcLength = u_strlen(srcChars + srcStart); + srcLength = 0; + } else { + // Perform all remaining operations relative to srcChars + srcStart. + // From this point forward, do not use srcStart. + srcChars += srcStart; + if (srcLength < 0) { + // get the srcLength if necessary + srcLength = u_strlen(srcChars); + } } // pin the indices to legal values @@ -1465,17 +1470,28 @@ UnicodeString::doReplace(int32_t start, } newLength += srcLength; + // Check for insertion into ourself + const UChar *oldArray = getArrayStart(); + if (isBufferWritable() && + oldArray < srcChars + srcLength && + srcChars < oldArray + oldLength) { + // Copy into a new UnicodeString and start over + UnicodeString copy(srcChars, srcLength); + if (copy.isBogus()) { + setToBogus(); + return *this; + } + return doReplace(start, length, copy.getArrayStart(), 0, srcLength); + } + // cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents; // therefore we need to keep the current fArray UChar oldStackBuffer[US_STACKBUF_SIZE]; - UChar *oldArray; if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) { // copy the stack buffer contents because it will be overwritten with // fUnion.fFields values - u_memcpy(oldStackBuffer, fUnion.fStackFields.fBuffer, oldLength); + u_memcpy(oldStackBuffer, oldArray, oldLength); oldArray = oldStackBuffer; - } else { - oldArray = getArrayStart(); } // clone our array and allocate a bigger array if needed @@ -1503,7 +1519,7 @@ UnicodeString::doReplace(int32_t start, } // now fill in the hole with the new string - us_arrayCopy(srcChars, srcStart, newArray, start, srcLength); + us_arrayCopy(srcChars, 0, newArray, start, srcLength); setLength(newLength); @@ -1536,15 +1552,34 @@ UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLeng return *this; } + // Perform all remaining operations relative to srcChars + srcStart. + // From this point forward, do not use srcStart. + srcChars += srcStart; + if(srcLength < 0) { // get the srcLength if necessary - if((srcLength = u_strlen(srcChars + srcStart)) == 0) { + if((srcLength = u_strlen(srcChars)) == 0) { return *this; } } int32_t oldLength = length(); int32_t newLength = oldLength + srcLength; + + // Check for append onto ourself + const UChar* oldArray = getArrayStart(); + if (isBufferWritable() && + oldArray < srcChars + srcLength && + srcChars < oldArray + oldLength) { + // Copy into a new UnicodeString and start over + UnicodeString copy(srcChars, srcLength); + if (copy.isBogus()) { + setToBogus(); + return *this; + } + return doAppend(copy.getArrayStart(), 0, srcLength); + } + // optimize append() onto a large-enough, owned string if((newLength <= getCapacity() && isBufferWritable()) || cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) { @@ -1556,8 +1591,8 @@ UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLeng // or // str.appendString(buffer, length) // or similar. - if(srcChars + srcStart != newArray + oldLength) { - us_arrayCopy(srcChars, srcStart, newArray, oldLength, srcLength); + if(srcChars != newArray + oldLength) { + us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength); } setLength(newLength); } diff --git a/icu4c/source/test/intltest/ustrtest.cpp b/icu4c/source/test/intltest/ustrtest.cpp index 4b7cb7ae7c7f..57d72fb3f547 100644 --- a/icu4c/source/test/intltest/ustrtest.cpp +++ b/icu4c/source/test/intltest/ustrtest.cpp @@ -64,6 +64,7 @@ void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* & TESTCASE_AUTO(TestUInt16Pointers); TESTCASE_AUTO(TestWCharPointers); TESTCASE_AUTO(TestNullPointers); + TESTCASE_AUTO(TestUnicodeStringInsertAppendToSelf); TESTCASE_AUTO_END; } @@ -1123,27 +1124,25 @@ UnicodeStringTest::TestMiscellaneous() errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer"); } - test1=UNICODE_STRING("la", 2); - test1.append(UNICODE_STRING(" lila", 5).getTerminatedBuffer(), 0, -1); - if(test1!=UNICODE_STRING("la lila", 7)) { - errln("UnicodeString::append(const UChar *, start, length) failed"); - } + // NOTE: Some compilers will optimize u"la" to point to the same static memory + // as u" lila", offset by 3 code units + test1=UnicodeString(TRUE, u"la", 2); + test1.append(UnicodeString(TRUE, u" lila", 5).getTerminatedBuffer(), 0, -1); + assertEquals("UnicodeString::append(const UChar *, start, length) failed", + u"la lila", test1); - test1.insert(3, UNICODE_STRING("dudum ", 6), 0, INT32_MAX); - if(test1!=UNICODE_STRING("la dudum lila", 13)) { - errln("UnicodeString::insert(start, const UniStr &, start, length) failed"); - } + test1.insert(3, UnicodeString(TRUE, u"dudum ", 6), 0, INT32_MAX); + assertEquals("UnicodeString::insert(start, const UniStr &, start, length) failed", + u"la dudum lila", test1); static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 }; test1.insert(9, ucs, -1); - if(test1!=UNICODE_STRING("la dudum hm lila", 16)) { - errln("UnicodeString::insert(start, const UChar *, length) failed"); - } + assertEquals("UnicodeString::insert(start, const UChar *, length) failed", + u"la dudum hm lila", test1); test1.replace(9, 2, (UChar)0x2b); - if(test1!=UNICODE_STRING("la dudum + lila", 15)) { - errln("UnicodeString::replace(start, length, UChar) failed"); - } + assertEquals("UnicodeString::replace(start, length, UChar) failed", + u"la dudum + lila", test1); if(test1.hasMetaData() || UnicodeString().hasMetaData()) { errln("UnicodeString::hasMetaData() returns TRUE"); @@ -2248,3 +2247,59 @@ UnicodeStringTest::TestNullPointers() { UnicodeString(u"def").extract(nullptr, 0, errorCode); assertEquals("buffer overflow extracting to nullptr", U_BUFFER_OVERFLOW_ERROR, errorCode); } + +void UnicodeStringTest::TestUnicodeStringInsertAppendToSelf() { + IcuTestErrorCode status(*this, "TestUnicodeStringAppendToSelf"); + + // Test append operation + UnicodeString str(u"foo "); + str.append(str); + str.append(str); + str.append(str); + assertEquals("", u"foo foo foo foo foo foo foo foo ", str); + + // Test append operation with readonly alias to start + str = UnicodeString(TRUE, u"foo ", 4); + str.append(str); + str.append(str); + str.append(str); + assertEquals("", u"foo foo foo foo foo foo foo foo ", str); + + // Test append operation with aliased substring + str = u"abcde"; + UnicodeString sub = str.tempSubString(1, 2); + str.append(sub); + assertEquals("", u"abcdebc", str); + + // Test append operation with double-aliased substring + str = UnicodeString(TRUE, u"abcde", 5); + sub = str.tempSubString(1, 2); + str.append(sub); + assertEquals("", u"abcdebc", str); + + // Test insert operation + str = u"a-*b"; + str.insert(2, str); + str.insert(4, str); + str.insert(8, str); + assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str); + + // Test insert operation with readonly alias to start + str = UnicodeString(TRUE, u"a-*b", 4); + str.insert(2, str); + str.insert(4, str); + str.insert(8, str); + assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str); + + // Test insert operation with aliased substring + str = u"abcde"; + sub = str.tempSubString(1, 3); + str.insert(2, sub); + assertEquals("", u"abbcdcde", str); + + // Test insert operation with double-aliased substring + str = UnicodeString(TRUE, u"abcde", 5); + sub = str.tempSubString(1, 3); + str.insert(2, sub); + assertEquals("", u"abbcdcde", str); +} diff --git a/icu4c/source/test/intltest/ustrtest.h b/icu4c/source/test/intltest/ustrtest.h index 4ba348c431fd..218befdcc685 100644 --- a/icu4c/source/test/intltest/ustrtest.h +++ b/icu4c/source/test/intltest/ustrtest.h @@ -96,6 +96,7 @@ class UnicodeStringTest: public IntlTest { void TestUInt16Pointers(); void TestWCharPointers(); void TestNullPointers(); + void TestUnicodeStringInsertAppendToSelf(); }; #endif