Skip to content

Commit

Permalink
ICU-13707 Make UnicodeString safe when appended or inserted into itse…
Browse files Browse the repository at this point in the history
  • Loading branch information
sffc committed Sep 26, 2018
1 parent 8e8f4cb commit b8a5c0d
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 27 deletions.
3 changes: 3 additions & 0 deletions icu4c/source/common/unicode/unistr.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,9 @@ class UnicodeStringAppendable; // unicode/appendable.h
* than other ICU APIs. In particular:
* - If indexes are out of bounds for a UnicodeString object
* (<0 or >length()) then they are "pinned" to the nearest boundary.
* - If the buffer passed to an insert/append/replace operation is owned by the
* target object, e.g., calling str.append(str), an extra copy may take place
* to ensure safety.
* - If primitive string pointer values (e.g., const char16_t * or char *)
* for input strings are NULL, then those input string parameters are treated
* as if they pointed to an empty string.
Expand Down
59 changes: 47 additions & 12 deletions icu4c/source/common/unistr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1447,10 +1447,15 @@ UnicodeString::doReplace(int32_t start,
}

if(srcChars == 0) {
srcStart = srcLength = 0;
} else if(srcLength < 0) {
// get the srcLength if necessary
srcLength = u_strlen(srcChars + srcStart);
srcLength = 0;
} else {
// Perform all remaining operations relative to srcChars + srcStart.
// From this point forward, do not use srcStart.
srcChars += srcStart;
if (srcLength < 0) {
// get the srcLength if necessary
srcLength = u_strlen(srcChars);
}
}

// pin the indices to legal values
Expand All @@ -1465,17 +1470,28 @@ UnicodeString::doReplace(int32_t start,
}
newLength += srcLength;

// Check for insertion into ourself
const UChar *oldArray = getArrayStart();
if (isBufferWritable() &&
oldArray < srcChars + srcLength &&
srcChars < oldArray + oldLength) {
// Copy into a new UnicodeString and start over
UnicodeString copy(srcChars, srcLength);
if (copy.isBogus()) {
setToBogus();
return *this;
}
return doReplace(start, length, copy.getArrayStart(), 0, srcLength);
}

// cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents;
// therefore we need to keep the current fArray
UChar oldStackBuffer[US_STACKBUF_SIZE];
UChar *oldArray;
if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
// copy the stack buffer contents because it will be overwritten with
// fUnion.fFields values
u_memcpy(oldStackBuffer, fUnion.fStackFields.fBuffer, oldLength);
u_memcpy(oldStackBuffer, oldArray, oldLength);
oldArray = oldStackBuffer;
} else {
oldArray = getArrayStart();
}

// clone our array and allocate a bigger array if needed
Expand Down Expand Up @@ -1503,7 +1519,7 @@ UnicodeString::doReplace(int32_t start,
}

// now fill in the hole with the new string
us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
us_arrayCopy(srcChars, 0, newArray, start, srcLength);

setLength(newLength);

Expand Down Expand Up @@ -1536,15 +1552,34 @@ UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLeng
return *this;
}

// Perform all remaining operations relative to srcChars + srcStart.
// From this point forward, do not use srcStart.
srcChars += srcStart;

if(srcLength < 0) {
// get the srcLength if necessary
if((srcLength = u_strlen(srcChars + srcStart)) == 0) {
if((srcLength = u_strlen(srcChars)) == 0) {
return *this;
}
}

int32_t oldLength = length();
int32_t newLength = oldLength + srcLength;

// Check for append onto ourself
const UChar* oldArray = getArrayStart();
if (isBufferWritable() &&
oldArray < srcChars + srcLength &&
srcChars < oldArray + oldLength) {
// Copy into a new UnicodeString and start over
UnicodeString copy(srcChars, srcLength);
if (copy.isBogus()) {
setToBogus();
return *this;
}
return doAppend(copy.getArrayStart(), 0, srcLength);
}

// optimize append() onto a large-enough, owned string
if((newLength <= getCapacity() && isBufferWritable()) ||
cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
Expand All @@ -1556,8 +1591,8 @@ UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLeng
// or
// str.appendString(buffer, length)
// or similar.
if(srcChars + srcStart != newArray + oldLength) {
us_arrayCopy(srcChars, srcStart, newArray, oldLength, srcLength);
if(srcChars != newArray + oldLength) {
us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength);
}
setLength(newLength);
}
Expand Down
85 changes: 70 additions & 15 deletions icu4c/source/test/intltest/ustrtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &
TESTCASE_AUTO(TestUInt16Pointers);
TESTCASE_AUTO(TestWCharPointers);
TESTCASE_AUTO(TestNullPointers);
TESTCASE_AUTO(TestUnicodeStringInsertAppendToSelf);
TESTCASE_AUTO_END;
}

Expand Down Expand Up @@ -1123,27 +1124,25 @@ UnicodeStringTest::TestMiscellaneous()
errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
}

test1=UNICODE_STRING("la", 2);
test1.append(UNICODE_STRING(" lila", 5).getTerminatedBuffer(), 0, -1);
if(test1!=UNICODE_STRING("la lila", 7)) {
errln("UnicodeString::append(const UChar *, start, length) failed");
}
// NOTE: Some compilers will optimize u"la" to point to the same static memory
// as u" lila", offset by 3 code units
test1=UnicodeString(TRUE, u"la", 2);
test1.append(UnicodeString(TRUE, u" lila", 5).getTerminatedBuffer(), 0, -1);
assertEquals("UnicodeString::append(const UChar *, start, length) failed",
u"la lila", test1);

test1.insert(3, UNICODE_STRING("dudum ", 6), 0, INT32_MAX);
if(test1!=UNICODE_STRING("la dudum lila", 13)) {
errln("UnicodeString::insert(start, const UniStr &, start, length) failed");
}
test1.insert(3, UnicodeString(TRUE, u"dudum ", 6), 0, INT32_MAX);
assertEquals("UnicodeString::insert(start, const UniStr &, start, length) failed",
u"la dudum lila", test1);

static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 };
test1.insert(9, ucs, -1);
if(test1!=UNICODE_STRING("la dudum hm lila", 16)) {
errln("UnicodeString::insert(start, const UChar *, length) failed");
}
assertEquals("UnicodeString::insert(start, const UChar *, length) failed",
u"la dudum hm lila", test1);

test1.replace(9, 2, (UChar)0x2b);
if(test1!=UNICODE_STRING("la dudum + lila", 15)) {
errln("UnicodeString::replace(start, length, UChar) failed");
}
assertEquals("UnicodeString::replace(start, length, UChar) failed",
u"la dudum + lila", test1);

if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
errln("UnicodeString::hasMetaData() returns TRUE");
Expand Down Expand Up @@ -2248,3 +2247,59 @@ UnicodeStringTest::TestNullPointers() {
UnicodeString(u"def").extract(nullptr, 0, errorCode);
assertEquals("buffer overflow extracting to nullptr", U_BUFFER_OVERFLOW_ERROR, errorCode);
}

void UnicodeStringTest::TestUnicodeStringInsertAppendToSelf() {
IcuTestErrorCode status(*this, "TestUnicodeStringAppendToSelf");

// Test append operation
UnicodeString str(u"foo ");
str.append(str);
str.append(str);
str.append(str);
assertEquals("", u"foo foo foo foo foo foo foo foo ", str);

// Test append operation with readonly alias to start
str = UnicodeString(TRUE, u"foo ", 4);
str.append(str);
str.append(str);
str.append(str);
assertEquals("", u"foo foo foo foo foo foo foo foo ", str);

// Test append operation with aliased substring
str = u"abcde";
UnicodeString sub = str.tempSubString(1, 2);
str.append(sub);
assertEquals("", u"abcdebc", str);

// Test append operation with double-aliased substring
str = UnicodeString(TRUE, u"abcde", 5);
sub = str.tempSubString(1, 2);
str.append(sub);
assertEquals("", u"abcdebc", str);

// Test insert operation
str = u"a-*b";
str.insert(2, str);
str.insert(4, str);
str.insert(8, str);
assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);

// Test insert operation with readonly alias to start
str = UnicodeString(TRUE, u"a-*b", 4);
str.insert(2, str);
str.insert(4, str);
str.insert(8, str);
assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);

// Test insert operation with aliased substring
str = u"abcde";
sub = str.tempSubString(1, 3);
str.insert(2, sub);
assertEquals("", u"abbcdcde", str);

// Test insert operation with double-aliased substring
str = UnicodeString(TRUE, u"abcde", 5);
sub = str.tempSubString(1, 3);
str.insert(2, sub);
assertEquals("", u"abbcdcde", str);
}
1 change: 1 addition & 0 deletions icu4c/source/test/intltest/ustrtest.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class UnicodeStringTest: public IntlTest {
void TestUInt16Pointers();
void TestWCharPointers();
void TestNullPointers();
void TestUnicodeStringInsertAppendToSelf();
};

#endif

0 comments on commit b8a5c0d

Please sign in to comment.