diff --git a/index.js b/index.js index 910c451..d78eb59 100644 --- a/index.js +++ b/index.js @@ -32,7 +32,7 @@ const deepEqual = require('fast-deep-equal') const { flattenNode, compressFlattenedNode, prettyPrintFlattenedNode, prettyPrintRoutesArray } = require('./lib/pretty-print') const { StaticNode, NODE_TYPES } = require('./custom_node') const Constrainer = require('./lib/constrainer') -const sanitizeUrl = require('./lib/url-sanitizer') +const { safeDecodeURI, safeDecodeURIComponent } = require('./lib/url-sanitizer') const httpMethods = http.METHODS const FULL_PATH_REGEXP = /^https?:\/\/.*?\// @@ -85,6 +85,7 @@ function Router (opts) { this.routes = [] this.trees = {} + this.decodeURIComponent = opts.decodeUriParameters || safeDecodeURIComponent this.constrainer = new Constrainer(opts.constraints) } @@ -167,6 +168,12 @@ Router.prototype._on = function _on (method, path, opts, handler, store) { continue } + if (path.charCodeAt(i) === 37) { + // We need to encode % char to prevent double decoding + path = path.slice(0, i + 1) + '25' + path.slice(i + 1) + continue + } + const isParametricNode = path.charCodeAt(i) === 58 const isWildcardNode = path.charCodeAt(i) === 42 @@ -329,10 +336,8 @@ Router.prototype.find = function find (method, path, derivedConstraints) { path = path.replace(FULL_PATH_REGEXP, '/') } - let sanitizedUrl try { - sanitizedUrl = sanitizeUrl(path, this.decodeUriParameters) - path = sanitizedUrl.path + path = safeDecodeURI(path) } catch (error) { return this._onBadUrl(path) } @@ -341,6 +346,8 @@ Router.prototype.find = function find (method, path, derivedConstraints) { path = trimLastSlash(path) } + const originPath = path + if (this.caseSensitive === false) { path = path.toLowerCase() } @@ -429,49 +436,55 @@ Router.prototype.find = function find (method, path, derivedConstraints) { } if (currentNode.kind === NODE_TYPES.WILDCARD) { - const decoded = sanitizedUrl.sliceParameter(pathIndex) - if (decoded === null) { - return this._onBadUrl(path.slice(pathIndex)) + let param = originPath.slice(pathIndex) + + const firstPercentIndex = param.indexOf('%') + if (firstPercentIndex !== -1) { + param = this.decodeURIComponent(param, firstPercentIndex) } - if (decoded.length > maxParamLength) { + if (param.length > maxParamLength) { return null } - params.push(decoded) + params.push(param) pathIndex = pathLen continue } if (currentNode.kind === NODE_TYPES.PARAMETRIC) { let paramEndIndex = pathIndex + let firstPercentIndex = -1 for (; paramEndIndex < pathLen; paramEndIndex++) { - if (path.charCodeAt(paramEndIndex) === 47) { + const charCode = path.charCodeAt(paramEndIndex) + if (charCode === 47) { break + } else if (firstPercentIndex === -1 && charCode === 37) { + firstPercentIndex = paramEndIndex - pathIndex } } - const decoded = sanitizedUrl.sliceParameter(pathIndex, paramEndIndex) - if (decoded === null) { - return this._onBadUrl(path.slice(pathIndex, paramEndIndex)) + let param = originPath.slice(pathIndex, paramEndIndex) + if (firstPercentIndex !== -1) { + param = this.decodeURIComponent(param, firstPercentIndex) } if (currentNode.isRegex) { - const matchedParameters = currentNode.regex.exec(decoded) + const matchedParameters = currentNode.regex.exec(param) if (matchedParameters === null) continue for (let i = 1; i < matchedParameters.length; i++) { - const param = matchedParameters[i] - if (param.length > maxParamLength) { + const matchedParam = matchedParameters[i] + if (matchedParam.length > maxParamLength) { return null } - params.push(param) + params.push(matchedParam) } } else { - if (decoded.length > maxParamLength) { + if (param.length > maxParamLength) { return null } - params.push(decoded) + params.push(param) } pathIndex = paramEndIndex diff --git a/lib/url-sanitizer.js b/lib/url-sanitizer.js index a0ef94c..368ba72 100644 --- a/lib/url-sanitizer.js +++ b/lib/url-sanitizer.js @@ -1,83 +1,87 @@ 'use strict' -const fastDecode = require('fast-decode-uri-component') - // It must spot all the chars where decodeURIComponent(x) !== decodeURI(x) // The chars are: # $ & + , / : ; = ? @ -const uriComponentsCharMap = new Array(53).fill(0x00) -uriComponentsCharMap[50] = new Array(103).fill(0x00) -uriComponentsCharMap[50][51] = true // # '%23' -uriComponentsCharMap[50][52] = true // $ '%24' -uriComponentsCharMap[50][54] = true // & '%26' -uriComponentsCharMap[50][66] = true // + '%2B' -uriComponentsCharMap[50][98] = true // + '%2b' -uriComponentsCharMap[50][67] = true // , '%2C' -uriComponentsCharMap[50][99] = true // , '%2c' -uriComponentsCharMap[50][70] = true // / '%2F' -uriComponentsCharMap[50][102] = true // / '%2f' - -uriComponentsCharMap[51] = new Array(103).fill(0x00) -uriComponentsCharMap[51][65] = true // : '%3A' -uriComponentsCharMap[51][97] = true // : '%3a' -uriComponentsCharMap[51][66] = true // ; '%3B' -uriComponentsCharMap[51][98] = true // ; '%3b' -uriComponentsCharMap[51][68] = true // = '%3D' -uriComponentsCharMap[51][100] = true // = '%3d' -uriComponentsCharMap[51][70] = true // ? '%3F' -uriComponentsCharMap[51][102] = true // ? '%3f' +function decodeComponentChar (highCharCode, lowCharCode) { + if (highCharCode === 50) { + if (lowCharCode === 53) return '%' -uriComponentsCharMap[52] = new Array(49).fill(0x00) -uriComponentsCharMap[52][48] = true // @ '%40' + if (lowCharCode === 51) return '#' + if (lowCharCode === 52) return '$' + if (lowCharCode === 54) return '&' + if (lowCharCode === 66) return '+' + if (lowCharCode === 98) return '+' + if (lowCharCode === 67) return ',' + if (lowCharCode === 99) return ',' + if (lowCharCode === 70) return '/' + if (lowCharCode === 102) return '/' + return null + } + if (highCharCode === 51) { + if (lowCharCode === 65) return ':' + if (lowCharCode === 97) return ':' + if (lowCharCode === 66) return ';' + if (lowCharCode === 98) return ';' + if (lowCharCode === 68) return '=' + if (lowCharCode === 100) return '=' + if (lowCharCode === 70) return '?' + if (lowCharCode === 102) return '?' + return null + } + if (highCharCode === 52 && lowCharCode === 48) { + return '@' + } + return null +} -function sanitizeUrl (url, customDecoder) { - let originPath = url +function safeDecodeURI (path) { let shouldDecode = false - let containsEncodedComponents = false - let highChar - let lowChar - for (var i = 0, len = url.length; i < len; i++) { - var charCode = url.charCodeAt(i) - if (shouldDecode && !containsEncodedComponents) { - if (highChar === 0 && uriComponentsCharMap[charCode]) { - highChar = charCode - lowChar = 0x00 - } else if (highChar && lowChar === 0 && uriComponentsCharMap[highChar][charCode]) { - containsEncodedComponents = true + for (let i = 1; i < path.length; i++) { + const charCode = path.charCodeAt(i) + + if (charCode === 37) { + const highCharCode = path.charCodeAt(i + 1) + const lowCharCode = path.charCodeAt(i + 2) + + if (decodeComponentChar(highCharCode, lowCharCode) === null) { + shouldDecode = true } else { - highChar = undefined - lowChar = undefined + // %25 - encoded % char. We need to encode one more time to prevent double decoding + if (highCharCode === 50 && lowCharCode === 53) { + shouldDecode = true + path = path.slice(0, i + 1) + '25' + path.slice(i + 1) + i += 2 + } + i += 2 } - } - // Some systems do not follow RFC and separate the path and query // string with a `;` character (code 59), e.g. `/foo;jsessionid=123456`. // Thus, we need to split on `;` as well as `?` and `#`. - if (charCode === 63 || charCode === 59 || charCode === 35) { - originPath = url.slice(0, i) + } else if (charCode === 63 || charCode === 59 || charCode === 35) { + path = path.slice(0, i) break - } else if (charCode === 37) { - shouldDecode = true - highChar = 0x00 } } - const decoded = shouldDecode ? decodeURI(originPath) : originPath + return shouldDecode ? decodeURI(path) : path +} - return { - path: decoded, - originPath, - sliceParameter: containsEncodedComponents - ? sliceAndDecode - : slicePath - } +function safeDecodeURIComponent (uriComponent, startIndex = 0) { + let decoded = '' + let lastIndex = startIndex - function sliceAndDecode (from, to) { - return (customDecoder || fastDecode)(this.originPath.slice(from, to)) - } -} + for (let i = startIndex; i < uriComponent.length; i++) { + if (uriComponent.charCodeAt(i) === 37) { + const highCharCode = uriComponent.charCodeAt(i + 1) + const lowCharCode = uriComponent.charCodeAt(i + 2) -module.exports = sanitizeUrl + const decodedChar = decodeComponentChar(highCharCode, lowCharCode) + decoded += uriComponent.slice(lastIndex, i) + decodedChar -function slicePath (from, to) { - return this.path.slice(from, to) + lastIndex = i + 3 + } + } + return uriComponent.slice(0, startIndex) + decoded + uriComponent.slice(lastIndex) } + +module.exports = { safeDecodeURI, safeDecodeURIComponent } diff --git a/package.json b/package.json index 65bc053..45410af 100644 --- a/package.json +++ b/package.json @@ -47,7 +47,6 @@ "tsd": "^0.13.1" }, "dependencies": { - "fast-decode-uri-component": "^1.0.1", "fast-deep-equal": "^3.1.3", "safe-regex2": "^2.0.0" }, diff --git a/test/custom-decode-uri.test.js b/test/custom-decode-uri.test.js index b00b6da..14c12ec 100644 --- a/test/custom-decode-uri.test.js +++ b/test/custom-decode-uri.test.js @@ -38,7 +38,7 @@ test('decodeUriParameters called when needed 2', t => { const results = [ 'ci%40o', 'foo%23bar', - '%23%F' + '%23🍌' ] const findMyWay = FindMyWay({ decodeUriParameters: (stringToDecode) => { diff --git a/test/issue-234.test.js b/test/issue-234.test.js new file mode 100644 index 0000000..29cafbc --- /dev/null +++ b/test/issue-234.test.js @@ -0,0 +1,95 @@ +'use strict' + +const t = require('tap') +const test = t.test +const FindMyWay = require('..') + +test('Match static url without encoding option', t => { + t.plan(2) + + const findMyWay = FindMyWay() + + const handler = () => {} + + findMyWay.on('GET', '/🍌', handler) + + t.same(findMyWay.find('GET', '/🍌').handler, handler) + t.same(findMyWay.find('GET', '/%F0%9F%8D%8C').handler, handler) +}) + +test('Match parametric url with encoding option', t => { + t.plan(2) + + const findMyWay = FindMyWay() + + findMyWay.on('GET', '/🍌/:param', () => {}) + + t.same(findMyWay.find('GET', '/🍌/@').params, { param: '@' }) + t.same(findMyWay.find('GET', '/%F0%9F%8D%8C/@').params, { param: '@' }) +}) + +test('Match encoded parametric url with encoding option', t => { + t.plan(2) + + const findMyWay = FindMyWay() + + findMyWay.on('GET', '/🍌/:param', () => {}) + + t.same(findMyWay.find('GET', '/🍌/%23').params, { param: '#' }) + t.same(findMyWay.find('GET', '/%F0%9F%8D%8C/%23').params, { param: '#' }) +}) + +test('Decode url components', t => { + t.plan(3) + + const findMyWay = FindMyWay() + + findMyWay.on('GET', '/:param1/:param2', () => {}) + + t.same(findMyWay.find('GET', '/foo%23bar/foo%23bar').params, { param1: 'foo#bar', param2: 'foo#bar' }) + t.same(findMyWay.find('GET', '/%F0%9F%8D%8C/%F0%9F%8D%8C').params, { param1: '🍌', param2: '🍌' }) + t.same(findMyWay.find('GET', '/%F0%9F%8D%8C/foo%23bar').params, { param1: '🍌', param2: 'foo#bar' }) +}) + +test('Decode url components', t => { + t.plan(5) + + const findMyWay = FindMyWay() + + findMyWay.on('GET', '/foo🍌bar/:param1/:param2', () => {}) + findMyWay.on('GET', '/user/:id', () => {}) + + t.same(findMyWay.find('GET', '/foo%F0%9F%8D%8Cbar/foo%23bar/foo%23bar').params, { param1: 'foo#bar', param2: 'foo#bar' }) + t.same(findMyWay.find('GET', '/user/maintainer+tomas').params, { id: 'maintainer+tomas' }) + t.same(findMyWay.find('GET', '/user/maintainer%2Btomas').params, { id: 'maintainer+tomas' }) + t.same(findMyWay.find('GET', '/user/maintainer%20tomas').params, { id: 'maintainer tomas' }) + t.same(findMyWay.find('GET', '/user/maintainer%252Btomas').params, { id: 'maintainer%2Btomas' }) +}) + +test('Decode url components', t => { + t.plan(18) + + const findMyWay = FindMyWay() + + findMyWay.on('GET', '/:param1', () => {}) + t.same(findMyWay.find('GET', '/foo%23bar').params, { param1: 'foo#bar' }) + t.same(findMyWay.find('GET', '/foo%24bar').params, { param1: 'foo$bar' }) + t.same(findMyWay.find('GET', '/foo%26bar').params, { param1: 'foo&bar' }) + t.same(findMyWay.find('GET', '/foo%2bbar').params, { param1: 'foo+bar' }) + t.same(findMyWay.find('GET', '/foo%2Bbar').params, { param1: 'foo+bar' }) + t.same(findMyWay.find('GET', '/foo%2cbar').params, { param1: 'foo,bar' }) + t.same(findMyWay.find('GET', '/foo%2Cbar').params, { param1: 'foo,bar' }) + t.same(findMyWay.find('GET', '/foo%2fbar').params, { param1: 'foo/bar' }) + t.same(findMyWay.find('GET', '/foo%2Fbar').params, { param1: 'foo/bar' }) + + t.same(findMyWay.find('GET', '/foo%3abar').params, { param1: 'foo:bar' }) + t.same(findMyWay.find('GET', '/foo%3Abar').params, { param1: 'foo:bar' }) + t.same(findMyWay.find('GET', '/foo%3bbar').params, { param1: 'foo;bar' }) + t.same(findMyWay.find('GET', '/foo%3Bbar').params, { param1: 'foo;bar' }) + t.same(findMyWay.find('GET', '/foo%3dbar').params, { param1: 'foo=bar' }) + t.same(findMyWay.find('GET', '/foo%3Dbar').params, { param1: 'foo=bar' }) + t.same(findMyWay.find('GET', '/foo%3fbar').params, { param1: 'foo?bar' }) + t.same(findMyWay.find('GET', '/foo%3Fbar').params, { param1: 'foo?bar' }) + + t.same(findMyWay.find('GET', '/foo%40bar').params, { param1: 'foo@bar' }) +})