From 5477060491c89df636230f838b7d1f3fbbc1e460 Mon Sep 17 00:00:00 2001 From: Weijia Wang <381152119@qq.com> Date: Wed, 7 Feb 2018 11:22:51 +0800 Subject: [PATCH] url: reduce deplicated codes in `autoEscapeStr` PR-URL: https://github.com/nodejs/node/pull/18613 Reviewed-By: Ruben Bridgewater --- benchmark/url/url-parse.js | 22 ++++++++ lib/url.js | 112 +++++++++---------------------------- 2 files changed, 47 insertions(+), 87 deletions(-) create mode 100644 benchmark/url/url-parse.js diff --git a/benchmark/url/url-parse.js b/benchmark/url/url-parse.js new file mode 100644 index 00000000000000..83f626ccdadfe3 --- /dev/null +++ b/benchmark/url/url-parse.js @@ -0,0 +1,22 @@ +'use strict'; +const common = require('../common.js'); +const url = require('url'); + +const inputs = { + normal: 'http://foo.com/bar', + escaped: 'https://foo.bar/{}^`/abcd' +}; + +const bench = common.createBenchmark(main, { + type: Object.keys(inputs), + n: [1e7] +}); + +function main({ type, n }) { + const input = inputs[type] || ''; + + bench.start(); + for (var i = 0; i < n; i += 1) + url.parse(input); + bench.end(n); +} diff --git a/lib/url.js b/lib/url.js index cb524fd9a87347..ab4b2b4647edd2 100644 --- a/lib/url.js +++ b/lib/url.js @@ -439,6 +439,24 @@ function validateHostname(self, rest, hostname) { } } +// Escaped characters. Use empty strings to fill up unused entries. +// Using Array is faster than Object/Map +const escapedCodes = [ + /*0 - 9*/ '', '', '', '', '', '', '', '', '', '%09', + /*10 - 19*/ '%0A', '', '', '%0D', '', '', '', '', '', '', + /*20 - 29*/ '', '', '', '', '', '', '', '', '', '', + /*30 - 39*/ '', '', '%20', '', '%22', '', '', '', '', '%27', + /*40 - 49*/ '', '', '', '', '', '', '', '', '', '', + /*50 - 59*/ '', '', '', '', '', '', '', '', '', '', + /*60 - 69*/ '%3C', '', '%3E', '', '', '', '', '', '', '', + /*70 - 79*/ '', '', '', '', '', '', '', '', '', '', + /*80 - 89*/ '', '', '', '', '', '', '', '', '', '', + /*90 - 99*/ '', '', '%5C', '', '%5E', '', '%60', '', '', '', + /*100 - 109*/ '', '', '', '', '', '', '', '', '', '', + /*110 - 119*/ '', '', '', '', '', '', '', '', '', '', + /*120 - 125*/ '', '', '', '%7B', '%7C', '%7D' +]; + // Automatically escape all delimiters and unwise characters from RFC 2396. // Also escape single quotes in case of an XSS attack. // Return the escaped string. @@ -446,94 +464,14 @@ function autoEscapeStr(rest) { var escaped = ''; var lastEscapedPos = 0; for (var i = 0; i < rest.length; ++i) { - // Manual switching is faster than using a Map/Object. // `escaped` contains substring up to the last escaped character. - switch (rest.charCodeAt(i)) { - case 9: // '\t' - // Concat if there are ordinary characters in the middle. - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%09'; - lastEscapedPos = i + 1; - break; - case 10: // '\n' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%0A'; - lastEscapedPos = i + 1; - break; - case 13: // '\r' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%0D'; - lastEscapedPos = i + 1; - break; - case 32: // ' ' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%20'; - lastEscapedPos = i + 1; - break; - case 34: // '"' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%22'; - lastEscapedPos = i + 1; - break; - case 39: // '\'' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%27'; - lastEscapedPos = i + 1; - break; - case 60: // '<' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%3C'; - lastEscapedPos = i + 1; - break; - case 62: // '>' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%3E'; - lastEscapedPos = i + 1; - break; - case 92: // '\\' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%5C'; - lastEscapedPos = i + 1; - break; - case 94: // '^' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%5E'; - lastEscapedPos = i + 1; - break; - case 96: // '`' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%60'; - lastEscapedPos = i + 1; - break; - case 123: // '{' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%7B'; - lastEscapedPos = i + 1; - break; - case 124: // '|' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%7C'; - lastEscapedPos = i + 1; - break; - case 125: // '}' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%7D'; - lastEscapedPos = i + 1; - break; + var escapedChar = escapedCodes[rest.charCodeAt(i)]; + if (escapedChar) { + // Concat if there are ordinary characters in the middle. + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += escapedChar; + lastEscapedPos = i + 1; } } if (lastEscapedPos === 0) // Nothing has been escaped.