Skip to content

Commit

Permalink
src: fix ParseEncoding
Browse files Browse the repository at this point in the history
"utf-16LE" was parsed "UNKNOWN", this fixes to "UCS2"
"utf-buffer" was parsed "BUFFER", this fixes to "UNKNOWN"
"utf-16leNOT" was parsed "UCS2", this fixes to "UNKNOWN"

PR-URL: #33957
Reviewed-By: Anna Henningsen <anna@addaleax.net>
Reviewed-By: James M Snell <jasnell@gmail.com>
  • Loading branch information
sapics authored and addaleax committed Sep 28, 2020
1 parent 6ee800f commit 63cd05b
Showing 3 changed files with 65 additions and 47 deletions.
99 changes: 58 additions & 41 deletions src/api/encoding.cc
Original file line number Diff line number Diff line change
@@ -14,74 +14,91 @@ enum encoding ParseEncoding(const char* encoding,
enum encoding default_encoding) {
switch (encoding[0]) {
case 'u':
case 'U':
// utf8, utf16le
if (encoding[1] == 't' && encoding[2] == 'f') {
// Skip `-`
encoding += encoding[3] == '-' ? 4 : 3;
if (encoding[0] == '8' && encoding[1] == '\0')
const size_t skip = encoding[3] == '-' ? 4 : 3;
if (encoding[skip] == '8' && encoding[skip + 1] == '\0')
return UTF8;
if (strncmp(encoding, "16le", 4) == 0)
if (strncmp(encoding + skip, "16le", 5) == 0)
return UCS2;

// ucs2
} else if (encoding[1] == 'c' && encoding[2] == 's') {
encoding += encoding[3] == '-' ? 4 : 3;
if (encoding[0] == '2' && encoding[1] == '\0')
const size_t skip = encoding[3] == '-' ? 4 : 3;
if (encoding[skip] == '2' && encoding[skip + 1] == '\0')
return UCS2;
}
if (StringEqualNoCase(encoding, "utf8"))
return UTF8;
if (StringEqualNoCase(encoding, "utf-8"))
return UTF8;
if (StringEqualNoCase(encoding, "ucs2"))
return UCS2;
if (StringEqualNoCase(encoding, "ucs-2"))
return UCS2;
if (StringEqualNoCase(encoding, "utf16le"))
return UCS2;
if (StringEqualNoCase(encoding, "utf-16le"))
return UCS2;
break;

case 'l':
case 'L':
// latin1
if (encoding[1] == 'a') {
if (strncmp(encoding + 2, "tin1", 4) == 0)
if (strncmp(encoding + 2, "tin1", 5) == 0)
return LATIN1;
}
if (StringEqualNoCase(encoding, "latin1"))
return LATIN1;
break;

case 'b':
// binary
case 'B':
// binary is a deprecated alias of latin1
if (encoding[1] == 'i') {
if (strncmp(encoding + 2, "nary", 4) == 0)
if (strncmp(encoding + 2, "nary", 5) == 0)
return LATIN1;

// buffer
} else if (encoding[1] == 'u') {
if (strncmp(encoding + 2, "ffer", 4) == 0)
if (strncmp(encoding + 2, "ffer", 5) == 0)
return BUFFER;
// base64
} else if (encoding[1] == 'a') {
if (strncmp(encoding + 2, "se64", 5) == 0)
return BASE64;
}
if (StringEqualNoCase(encoding, "binary"))
return LATIN1; // BINARY is a deprecated alias of LATIN1.
if (StringEqualNoCase(encoding, "buffer"))
return BUFFER;
if (StringEqualNoCase(encoding, "base64"))
return BASE64;
break;
case '\0':
return default_encoding;
default:

case 'a':
case 'A':
// ascii
if (encoding[1] == 's') {
if (strncmp(encoding + 2, "cii", 4) == 0)
return ASCII;
}
if (StringEqualNoCase(encoding, "ascii"))
return ASCII;
break;
}

if (StringEqualNoCase(encoding, "utf8")) {
return UTF8;
} else if (StringEqualNoCase(encoding, "utf-8")) {
return UTF8;
} else if (StringEqualNoCase(encoding, "ascii")) {
return ASCII;
} else if (StringEqualNoCase(encoding, "base64")) {
return BASE64;
} else if (StringEqualNoCase(encoding, "ucs2")) {
return UCS2;
} else if (StringEqualNoCase(encoding, "ucs-2")) {
return UCS2;
} else if (StringEqualNoCase(encoding, "utf16le")) {
return UCS2;
} else if (StringEqualNoCase(encoding, "utf-16le")) {
return UCS2;
} else if (StringEqualNoCase(encoding, "latin1")) {
return LATIN1;
} else if (StringEqualNoCase(encoding, "binary")) {
return LATIN1; // BINARY is a deprecated alias of LATIN1.
} else if (StringEqualNoCase(encoding, "buffer")) {
return BUFFER;
} else if (StringEqualNoCase(encoding, "hex")) {
return HEX;
} else {
return default_encoding;
case 'h':
case 'H':
// hex
if (encoding[1] == 'e')
if (encoding[2] == 'x' && encoding[3] == '\0')
return HEX;
if (StringEqualNoCase(encoding, "hex"))
return HEX;
break;
}
return default_encoding;
}


10 changes: 4 additions & 6 deletions src/util-inl.h
Original file line number Diff line number Diff line change
@@ -299,12 +299,10 @@ std::string ToUpper(const std::string& in) {
}

bool StringEqualNoCase(const char* a, const char* b) {
do {
if (*a == '\0')
return *b == '\0';
if (*b == '\0')
return *a == '\0';
} while (ToLower(*a++) == ToLower(*b++));
while (ToLower(*a) == ToLower(*b++)) {
if (*a++ == '\0')
return true;
}
return false;
}

3 changes: 3 additions & 0 deletions test/addons/parse-encoding/test.js
Original file line number Diff line number Diff line change
@@ -14,6 +14,9 @@ assert.strictEqual(parseEncoding('hex'), 'HEX');
assert.strictEqual(parseEncoding('latin1'), 'LATIN1');
assert.strictEqual(parseEncoding('ucs2'), 'UCS2');
assert.strictEqual(parseEncoding('utf8'), 'UTF8');
assert.strictEqual(parseEncoding('utf-16LE'), 'UCS2');
assert.strictEqual(parseEncoding('utf-buffer'), 'UNKNOWN');
assert.strictEqual(parseEncoding('utf-16leNOT'), 'UNKNOWN');

assert.strictEqual(parseEncoding('linary'), 'UNKNOWN');
assert.strictEqual(parseEncoding('luffer'), 'UNKNOWN');

0 comments on commit 63cd05b

Please sign in to comment.