-
-
Notifications
You must be signed in to change notification settings - Fork 449
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix mbstowcs_nonfatal() that might convert string shorter than desired #1586
base: main
Are you sure you want to change the base?
Conversation
Looking at the callers I noticed, they both ( Thus, could you take another look and see if we can get this sorted out properly (I think, fully going |
f109b7d
to
a547c87
Compare
This still does not resolve the issue I mentioned above regarding these two places: static inline int RichString_writeFromWide(RichString* this, int attrs, const char* data_c, int from, int len) {
wchar_t data[len]; // <-- UB here
len = mbstowcs_nonfatal(data, data_c, len); // int underflow here
… int RichString_appendnWideColumns(RichString* this, int attrs, const char* data_c, int len, int* columns) {
wchar_t data[len]; // <-- UB here
len = mbstowcs_nonfatal(data, data_c, len); // int underflow here
… In both cases, making |
Actually I was trying to migrate the Firstly there were confusions between display width offsets and character offsets. Many of the As for VLAs, my plan was to remove both VLA buffers with a function like this. It would write to ATTR_NONNULL_N(3, 5) ATTR_ACCESS3_W(1, 4)
static size_t RichString_convertMBString(CharType* chstr, int attrs, const char** src, size_t n, int* width) {
size_t wcCount = 0;
wchar_t wc = L'\0';
mbstate_t ps = {0};
bool wasReplaced = false;
int totalWidth = 0;
while (wcCount < n) {
bool shouldReplace = false;
size_t ret = mbrtowc(&wc, *src, SIZE_MAX, &ps);
if (ret == (size_t)-1 || ret == (size_t)-2) {
// Invalid or incomplete multibyte sequence
shouldReplace = true;
if (ret == (size_t)-2) {
// There would be no bytes that form a valid sequence from
// here on. Skip to the end of string.
ret = strlen(*src);
} else {
// Unfortunately mbrtowc() doesn't tell which next byte may
// start a potentially valid sequence. Thus we have to skip
// one byte at a time.
ret = 1;
}
}
if (!iswprint(wc)) {
// A control character
shouldReplace = true;
}
if (shouldReplace) {
wc = L'\xFFFD';
} else {
wasReplaced = false;
}
if (!wasReplaced) {
int cWidth = wcwidth(wc);
assert(cWidth >= 0);
if (*width >= 0 && cWidth > *width - totalWidth) {
break;
}
// Write character to buffer
if (chstr) {
chstr[wcCount] = (CharType) {
.attr = attrs & 0xffffff,
.chars = { wch, '\0' }
};
}
wcCount++;
totalWidth = (totalWidth > INT_MAX - cWidth) ? INT_MAX : totalWidth + cWidth;
if (shouldReplace) {
wasReplaced = true;
}
}
if (ret == 0) {
break;
}
*src += ret;
}
*width = totalWidth;
return wcCount;
} |
I wonder if I can just put As I mentioned, it's more work than I expected. If I just change the APIs then I have to address the case where any string length ( |
A quick&dirty upgrade of the RichString header to diff --git a/RichString.c b/RichString.c
index 390beb07..455dc80a 100644
--- a/RichString.c
+++ b/RichString.c
@@ -21,7 +21,7 @@ in the source distribution for its full text.
#define charBytes(n) (sizeof(CharType) * (n))
-static void RichString_extendLen(RichString* this, int len) {
+static void RichString_extendLen(RichString* this, size_t len) {
if (this->chptr == this->chstr) {
// String is in internal buffer
if (len > RICHSTRING_MAXLEN) {
@@ -49,7 +49,7 @@ static void RichString_extendLen(RichString* this, int len) {
this->chlen = len;
}
-static void RichString_setLen(RichString* this, int len) {
+static void RichString_setLen(RichString* this, size_t len) {
if (len < RICHSTRING_MAXLEN && this->chlen < RICHSTRING_MAXLEN) {
RichString_setChar(this, len, 0);
this->chlen = len;
@@ -58,8 +58,8 @@ static void RichString_setLen(RichString* this, int len) {
}
}
-void RichString_rewind(RichString* this, int count) {
- RichString_setLen(this, this->chlen - count);
+void RichString_rewind(RichString* this, size_t count) {
+ RichString_setLen(this, this->chlen > count ? this->chlen - count : 0);
}
#ifdef HAVE_LIBNCURSESW
@@ -97,7 +97,7 @@ static size_t mbstowcs_nonfatal(wchar_t* restrict dest, const char* restrict src
return written;
}
-static inline int RichString_writeFromWide(RichString* this, int attrs, const char* data_c, int from, int len) {
+static inline size_t RichString_writeFromWide(RichString* this, int attrs, const char* data_c, int from, size_t len) {
wchar_t data[len];
len = mbstowcs_nonfatal(data, data_c, len);
if (len <= 0)
@@ -112,18 +112,18 @@ static inline int RichString_writeFromWide(RichString* this, int attrs, const ch
return len;
}
-int RichString_appendnWideColumns(RichString* this, int attrs, const char* data_c, int len, int* columns) {
+int RichString_appendnWideColumns(RichString* this, int attrs, const char* data_c, size_t len, int* columns) {
wchar_t data[len];
len = mbstowcs_nonfatal(data, data_c, len);
if (len <= 0)
return 0;
- int from = this->chlen;
- int newLen = from + len;
+ size_t from = this->chlen;
+ size_t newLen = from + len;
RichString_setLen(this, newLen);
int columnsWritten = 0;
- int pos = from;
- for (int j = 0; j < len; j++) {
+ size_t pos = from;
+ for (size_t j = 0; j < len; j++) {
wchar_t c = iswprint(data[j]) ? data[j] : L'\xFFFD';
int cwidth = wcwidth(c);
if (cwidth > *columns)
@@ -142,10 +142,10 @@ int RichString_appendnWideColumns(RichString* this, int attrs, const char* data_
return pos - from;
}
-static inline int RichString_writeFromAscii(RichString* this, int attrs, const char* data, int from, int len) {
- int newLen = from + len;
+static inline int RichString_writeFromAscii(RichString* this, int attrs, const char* data, size_t from, size_t len) {
+ size_t newLen = from + len;
RichString_setLen(this, newLen);
- for (int i = from, j = 0; i < newLen; i++, j++) {
+ for (size_t i = from, j = 0; i < newLen; i++, j++) {
assert((unsigned char)data[j] <= SCHAR_MAX);
this->chptr[i] = (CharType) { .attr = attrs & 0xffffff, .chars = { (isprint((unsigned char)data[j]) ? data[j] : L'\xFFFD') } };
}
@@ -153,26 +153,26 @@ static inline int RichString_writeFromAscii(RichString* this, int attrs, const c
return len;
}
-inline void RichString_setAttrn(RichString* this, int attrs, int start, int charcount) {
- int end = CLAMP(start + charcount, 0, this->chlen);
- for (int i = start; i < end; i++) {
+inline void RichString_setAttrn(RichString* this, int attrs, size_t start, size_t charcount) {
+ size_t end = CLAMP(start + charcount, 0, this->chlen);
+ for (size_t i = start; i < end; i++) {
this->chptr[i].attr = attrs;
}
}
-void RichString_appendChr(RichString* this, int attrs, char c, int count) {
- int from = this->chlen;
- int newLen = from + count;
+void RichString_appendChr(RichString* this, int attrs, char c, size_t count) {
+ size_t from = this->chlen;
+ size_t newLen = from + count;
RichString_setLen(this, newLen);
- for (int i = from; i < newLen; i++) {
+ for (size_t i = from; i < newLen; i++) {
this->chptr[i] = (CharType) { .attr = attrs, .chars = { c, 0 } };
}
}
-int RichString_findChar(const RichString* this, char c, int start) {
+int RichString_findChar(const RichString* this, char c, size_t start) {
const wchar_t wc = btowc(c);
const cchar_t* ch = this->chptr + start;
- for (int i = start; i < this->chlen; i++) {
+ for (size_t i = start; i < this->chlen; i++) {
if (ch->chars[0] == wc)
return i;
ch++;
@@ -182,10 +182,10 @@ int RichString_findChar(const RichString* this, char c, int start) {
#else /* HAVE_LIBNCURSESW */
-static inline int RichString_writeFromWide(RichString* this, int attrs, const char* data_c, int from, int len) {
- int newLen = from + len;
+static inline int RichString_writeFromWide(RichString* this, int attrs, const char* data_c, size_t from, size_t len) {
+ size_t newLen = from + len;
RichString_setLen(this, newLen);
- for (int i = from, j = 0; i < newLen; i++, j++) {
+ for (size_t i = from, j = 0; i < newLen; i++, j++) {
this->chptr[i] = (((unsigned char)data_c[j]) >= 32 ? ((unsigned char)data_c[j]) : '?') | attrs;
}
this->chptr[newLen] = 0;
@@ -193,35 +193,35 @@ static inline int RichString_writeFromWide(RichString* this, int attrs, const ch
return len;
}
-int RichString_appendnWideColumns(RichString* this, int attrs, const char* data_c, int len, int* columns) {
+int RichString_appendnWideColumns(RichString* this, int attrs, const char* data_c, size_t len, int* columns) {
int written = RichString_writeFromWide(this, attrs, data_c, this->chlen, MINIMUM(len, *columns));
*columns = written;
return written;
}
-static inline int RichString_writeFromAscii(RichString* this, int attrs, const char* data_c, int from, int len) {
+static inline int RichString_writeFromAscii(RichString* this, int attrs, const char* data_c, size_t from, size_t len) {
return RichString_writeFromWide(this, attrs, data_c, from, len);
}
-void RichString_setAttrn(RichString* this, int attrs, int start, int charcount) {
- int end = CLAMP(start + charcount, 0, this->chlen);
- for (int i = start; i < end; i++) {
+void RichString_setAttrn(RichString* this, int attrs, size_t start, size_t charcount) {
+ size_t end = CLAMP(start + charcount, 0, this->chlen);
+ for (size_t i = start; i < end; i++) {
this->chptr[i] = (this->chptr[i] & 0xff) | attrs;
}
}
-void RichString_appendChr(RichString* this, int attrs, char c, int count) {
- int from = this->chlen;
- int newLen = from + count;
+void RichString_appendChr(RichString* this, int attrs, char c, size_t count) {
+ size_t from = this->chlen;
+ size_t newLen = from + count;
RichString_setLen(this, newLen);
- for (int i = from; i < newLen; i++) {
+ for (size_t i = from; i < newLen; i++) {
this->chptr[i] = c | attrs;
}
}
-int RichString_findChar(const RichString* this, char c, int start) {
+int RichString_findChar(const RichString* this, char c, size_t start) {
const chtype* ch = this->chptr + start;
- for (int i = start; i < this->chlen; i++) {
+ for (size_t i = start; i < this->chlen; i++) {
if ((*ch & 0xff) == (chtype) c)
return i;
ch++;
@@ -246,7 +246,7 @@ int RichString_appendWide(RichString* this, int attrs, const char* data) {
return RichString_writeFromWide(this, attrs, data, this->chlen, strlen(data));
}
-int RichString_appendnWide(RichString* this, int attrs, const char* data, int len) {
+int RichString_appendnWide(RichString* this, int attrs, const char* data, size_t len) {
return RichString_writeFromWide(this, attrs, data, this->chlen, len);
}
@@ -258,7 +258,7 @@ int RichString_appendAscii(RichString* this, int attrs, const char* data) {
return RichString_writeFromAscii(this, attrs, data, this->chlen, strlen(data));
}
-int RichString_appendnAscii(RichString* this, int attrs, const char* data, int len) {
+int RichString_appendnAscii(RichString* this, int attrs, const char* data, size_t len) {
return RichString_writeFromAscii(this, attrs, data, this->chlen, len);
}
diff --git a/RichString.h b/RichString.h
index 7783378b..73177b53 100644
--- a/RichString.h
+++ b/RichString.h
@@ -40,7 +40,7 @@ in the source distribution for its full text.
#define RICHSTRING_MAXLEN 350
typedef struct RichString_ {
- int chlen;
+ size_t chlen;
CharType* chptr;
CharType chstr[RICHSTRING_MAXLEN + 1];
int highlightAttr;
@@ -48,32 +48,32 @@ typedef struct RichString_ {
void RichString_delete(RichString* this);
-void RichString_rewind(RichString* this, int count);
+void RichString_rewind(RichString* this, size_t count);
-void RichString_setAttrn(RichString* this, int attrs, int start, int charcount);
+void RichString_setAttrn(RichString* this, int attrs, size_t start, size_t charcount);
-int RichString_findChar(const RichString* this, char c, int start);
+int RichString_findChar(const RichString* this, char c, size_t start);
void RichString_setAttr(RichString* this, int attrs);
-void RichString_appendChr(RichString* this, int attrs, char c, int count);
+void RichString_appendChr(RichString* this, int attrs, char c, size_t count);
/* All appending and writing functions return the number of written characters (not columns). */
int RichString_appendWide(RichString* this, int attrs, const char* data);
ATTR_ACCESS3_R(3, 4)
-int RichString_appendnWide(RichString* this, int attrs, const char* data, int len);
+int RichString_appendnWide(RichString* this, int attrs, const char* data, size_t len);
/* columns takes the maximum number of columns to write and contains on return the number of columns written. */
-int RichString_appendnWideColumns(RichString* this, int attrs, const char* data, int len, int* columns);
+int RichString_appendnWideColumns(RichString* this, int attrs, const char* data, size_t len, int* columns);
int RichString_writeWide(RichString* this, int attrs, const char* data);
int RichString_appendAscii(RichString* this, int attrs, const char* data);
ATTR_ACCESS3_R(3, 4)
-int RichString_appendnAscii(RichString* this, int attrs, const char* data, int len);
+int RichString_appendnAscii(RichString* this, int attrs, const char* data, size_t len);
int RichString_writeAscii(RichString* this, int attrs, const char* data);
Didn't test it aside from a clean compile on my system. This does not update return types to be That aside, I'd be okay with some asserts before the VLA ( The conversion function LGTM, though AFAICS this is basically a rewritten version of |
I would choose to review the functions one by one, because the caller code would also need to reflect the type changes.
Not just that. Two features: (1.) The replacement with U+FFFD can be centralized in one place, making any future change to the character filter easier. (2.) It allows limiting the string by display width ( I plan to add a third feature that detects whether the string contains characters of different width (combining characters or East Asian characters) and let the |
Fixing this by adding the check is independent of the type change and can be done now too. With the type change it's just become necessary to avoid obvious UB.
???
This is also something that's missing now already. Just becomes more obvious with the type change.
|
It's okay to compare "<= RICHSTRING_MAXLEN". RICHSTRING_MAXLEN does not include the terminating L'\0' character and the internal buffer of a RichString instance has (RICHSTRING_MAXLEN + 1) elements. Signed-off-by: Kang-Che Sung <explorer09@gmail.com>
Prevent arithmetic overflows when computing sizes in RichString_extendLen(). Note that we are not using xMallocArray() and xReallocArray() as they would add unnecessary overflow checks in this particular case. Signed-off-by: Kang-Che Sung <explorer09@gmail.com>
The computed length could wrap to a negative integer, causing undefined behavior. (Currently it would cause out-of-bound array access due to a signed length property in RichString. The length property should be migrated to an unsigned size_t type, but that would be done in a later commit.) Signed-off-by: Kang-Che Sung <explorer09@gmail.com>
The "n" parameter of mbstowcs_nonfatal() refers to number of wide characters. But the logic seemed to be confused with "n" parameter of mbrtowc() (the number of bytes). Signed-off-by: Kang-Che Sung <explorer09@gmail.com>
a547c87
to
ffcc552
Compare
@@ -22,11 +23,17 @@ in the source distribution for its full text. | |||
#define charBytes(n) (sizeof(CharType) * (n)) | |||
|
|||
static void RichString_extendLen(RichString* this, int len) { | |||
// TODO: Remove the "len" type casts once all the length properties | |||
// of RichString have been upgraded to size_t. | |||
if ((size_t)len > (SIZE_MAX - 1) / sizeof(CharType)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Given INT_MAX <= SIZE_MAX
, the only check needs to be len < 0
…
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@BenBE
On 32-bit systems INT_MAX
can be SIZE_MAX / 2
(rounded down) and it could be larger than (SIZE_MAX - 1) / sizeof(CharType)
. That's what the check is for (although it seems useless in 64-bit systems).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Either way that's not right to check for overflow as is intended …
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I should have changed the conditional to (len < 0 && (size_t)len > (SIZE_MAX - 1) / sizeof(CharType))
...
The "n" parameter of
mbstowcs_nonfatal()
refers to number of wide characters. But the logic seemed to be confused with "n" parameter ofmbrtowc()
(the number of bytes).Note: I didn't test this patch or check whether the bug really happens with a user string. I just discovered this logic mistake when I try to rework the routines of
RichString_writeFromWide()
andRichString_appendnWideColumns()
Also I'm not sure if it's worth it to add
ATTR_ACCESS3_W(1, 3)
to this function.