From 4f300a5b0077de4662f9b7b419c7e90cf78cf4da Mon Sep 17 00:00:00 2001 From: Ruben Bridgewater Date: Mon, 6 Dec 2021 18:38:15 +0100 Subject: [PATCH] util: escape lone surrogate code points using .inspect() Unpaired surrogate code points have no representation in UTF8. Therefore, such code points are just "random" output that is unreadable. Instead, escape the code points similar to C0 and C1 control characters. Refs: https://unicodebook.readthedocs.io/unicode_encodings.html#utf-16-surrogate-pairs Signed-off-by: Ruben Bridgewater PR-URL: https://github.com/nodejs/node/pull/41001 Reviewed-By: James M Snell --- lib/internal/util/inspect.js | 28 ++++++++++++++------ test/parallel/test-util-inspect.js | 42 ++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 8 deletions(-) diff --git a/lib/internal/util/inspect.js b/lib/internal/util/inspect.js index 0279e5ecb155f1..a4db785042965f 100644 --- a/lib/internal/util/inspect.js +++ b/lib/internal/util/inspect.js @@ -179,10 +179,10 @@ const kArrayType = 1; const kArrayExtrasType = 2; /* eslint-disable no-control-regex */ -const strEscapeSequencesRegExp = /[\x00-\x1f\x27\x5c\x7f-\x9f]/; -const strEscapeSequencesReplacer = /[\x00-\x1f\x27\x5c\x7f-\x9f]/g; -const strEscapeSequencesRegExpSingle = /[\x00-\x1f\x5c\x7f-\x9f]/; -const strEscapeSequencesReplacerSingle = /[\x00-\x1f\x5c\x7f-\x9f]/g; +const strEscapeSequencesRegExp = /[\x00-\x1f\x27\x5c\x7f-\x9f]|[\ud800-\udbff](?![\udc00-\udfff])|(? meta[StringPrototypeCharCodeAt(str)]; +function escapeFn(str) { + const charCode = StringPrototypeCharCodeAt(str); + return meta.length > charCode ? meta[charCode] : `\\u${charCode.toString(16)}`; +} // Escape control characters, single quotes and the backslash. // This is similar to JSON stringify escaping. @@ -501,8 +504,7 @@ function strEscape(str) { let result = ''; let last = 0; - const lastIndex = str.length; - for (let i = 0; i < lastIndex; i++) { + for (let i = 0; i < str.length; i++) { const point = StringPrototypeCharCodeAt(str, i); if (point === singleQuote || point === 92 || @@ -514,10 +516,20 @@ function strEscape(str) { result += `${StringPrototypeSlice(str, last, i)}${meta[point]}`; } last = i + 1; + } else if (point >= 0xd800 && point <= 0xdfff) { + if (point <= 0xdbff && i + 1 < str.length) { + const point = StringPrototypeCharCodeAt(str, i + 1); + if (point >= 0xdc00 && point <= 0xdfff) { + i++; + continue; + } + } + result += `${StringPrototypeSlice(str, last, i)}${`\\u${point.toString(16)}`}`; + last = i + 1; } } - if (last !== lastIndex) { + if (last !== str.length) { result += StringPrototypeSlice(str, last); } return addQuotes(result, singleQuote); diff --git a/test/parallel/test-util-inspect.js b/test/parallel/test-util-inspect.js index ca9d029e2f62b0..aeb6359fe331b7 100644 --- a/test/parallel/test-util-inspect.js +++ b/test/parallel/test-util-inspect.js @@ -837,6 +837,48 @@ assert.strictEqual(util.inspect(Object.create(Date.prototype)), 'Date {}'); ); } +// Escape unpaired surrogate pairs. +{ + const edgeChar = String.fromCharCode(0xd799); + + for (let charCode = 0xD800; charCode < 0xDFFF; charCode++) { + const surrogate = String.fromCharCode(charCode); + + assert.strictEqual( + util.inspect(surrogate), + `'\\u${charCode.toString(16)}'` + ); + assert.strictEqual( + util.inspect(`${'a'.repeat(200)}${surrogate}`), + `'${'a'.repeat(200)}\\u${charCode.toString(16)}'` + ); + assert.strictEqual( + util.inspect(`${surrogate}${'a'.repeat(200)}`), + `'\\u${charCode.toString(16)}${'a'.repeat(200)}'` + ); + if (charCode < 0xdc00) { + const highSurrogate = surrogate; + const lowSurrogate = String.fromCharCode(charCode + 1024); + assert( + !util.inspect( + `${edgeChar}${highSurrogate}${lowSurrogate}${edgeChar}` + ).includes('\\u') + ); + assert.strictEqual( + (util.inspect( + `${highSurrogate}${highSurrogate}${lowSurrogate}` + ).match(/\\u/g) ?? []).length, + 1 + ); + } else { + assert.strictEqual( + util.inspect(`${edgeChar}${surrogate}${edgeChar}`), + `'${edgeChar}\\u${charCode.toString(16)}${edgeChar}'` + ); + } + } +} + // Test util.inspect.styles and util.inspect.colors. { function testColorStyle(style, input, implicit) {