From e07ade358f52f7f1c986e0410d4b30a9417cd4e0 Mon Sep 17 00:00:00 2001 From: Xvezda Date: Mon, 18 Mar 2024 23:25:03 +0900 Subject: [PATCH 1/8] fix(fetch): properly redirect non-ascii location header url --- lib/web/fetch/util.js | 3 +- test/fetch/fetch-url-after-redirect.js | 50 ++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/lib/web/fetch/util.js b/lib/web/fetch/util.js index f11ba162c51..5fb4499d876 100644 --- a/lib/web/fetch/util.js +++ b/lib/web/fetch/util.js @@ -44,7 +44,8 @@ function responseLocationURL (response, requestFragment) { // 3. If location is a header value, then set location to the result of // parsing location with response’s URL. if (location !== null && isValidHeaderValue(location)) { - location = new URL(location, responseURL(response)) + // Make sure location is properly encoded. + location = new URL(Buffer.from(location, 'ascii').toString('utf8'), responseURL(response)) } // 4. If location is a URL whose fragment is null, then set location’s diff --git a/test/fetch/fetch-url-after-redirect.js b/test/fetch/fetch-url-after-redirect.js index ecc112eb7c1..a24e3f08527 100644 --- a/test/fetch/fetch-url-after-redirect.js +++ b/test/fetch/fetch-url-after-redirect.js @@ -38,3 +38,53 @@ test('after redirecting the url of the response is set to the target url', async assert.strictEqual(response.url, `http://127.0.0.1:${port}/target`) }) + +test('location header with non-ASCII character redirects to a properly encoded url', async (t) => { + /** + * @example + * ```ts + * const server = createServer((req, res) => { + * // redirect -> %EC%95%88%EB%85%95 (안녕), not %C3%AC%C2%95%C2%88%C3%AB%C2%85%C2%95 + * if (res.req.url.endsWith('/redirect')) { + * const path = String.fromCharCode.apply( + * null, + * encodeURIComponent('안녕') + * .split('%') + * .slice(1) + * .map(n => parseInt(n, 16)) + * ) + * res.writeHead(302, { + * Location: `/${}` + * }) + * return res.end() + * } + * }) + * ``` + */ + const encodedPath = encodeURIComponent('안녕') + const nonEncodedPath = String.fromCharCode.apply( + null, + encodeURIComponent('안녕') + .split('%') + .slice(1) + .map(n => parseInt(n, 16)) + ) + + const server = createServer((req, res) => { + if (res.req.url.endsWith('/redirect')) { + res.writeHead(302, undefined, { Location: `/${nonEncodedPath}` }) + res.end() + } else { + res.writeHead(200, 'dummy', { 'Content-Type': 'text/plain' }) + res.end() + } + }) + t.after(closeServerAsPromise(server)) + + const listenAsync = promisify(server.listen.bind(server)) + await listenAsync(0) + const { port } = server.address() + const response = await fetch(`http://127.0.0.1:${port}/redirect`) + + assert.strictEqual(response.url, `http://127.0.0.1:${port}/${encodedPath}`) +}) From 48fd3c1c59a1e7dbac105bd693fa51045c33e021 Mon Sep 17 00:00:00 2001 From: Xvezda Date: Tue, 19 Mar 2024 02:00:50 +0900 Subject: [PATCH 2/8] chore: fix typo --- test/fetch/fetch-url-after-redirect.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/fetch/fetch-url-after-redirect.js b/test/fetch/fetch-url-after-redirect.js index a24e3f08527..3c5c81e0f37 100644 --- a/test/fetch/fetch-url-after-redirect.js +++ b/test/fetch/fetch-url-after-redirect.js @@ -54,7 +54,7 @@ test('location header with non-ASCII character redirects to a properly encoded u * .map(n => parseInt(n, 16)) * ) * res.writeHead(302, { - * Location: `/${}` + * Location: `/${path}` * }) * return res.end() * } From 01ceae8ad9f0ebcca56ea9c5444c39fe9177dd6d Mon Sep 17 00:00:00 2001 From: Xvezda Date: Tue, 19 Mar 2024 12:53:17 +0900 Subject: [PATCH 3/8] test: use simpler code --- test/fetch/fetch-url-after-redirect.js | 35 +++----------------------- 1 file changed, 3 insertions(+), 32 deletions(-) diff --git a/test/fetch/fetch-url-after-redirect.js b/test/fetch/fetch-url-after-redirect.js index 3c5c81e0f37..e387848f8e4 100644 --- a/test/fetch/fetch-url-after-redirect.js +++ b/test/fetch/fetch-url-after-redirect.js @@ -40,39 +40,10 @@ test('after redirecting the url of the response is set to the target url', async }) test('location header with non-ASCII character redirects to a properly encoded url', async (t) => { - /** - * @example - * ```ts - * const server = createServer((req, res) => { - * // redirect -> %EC%95%88%EB%85%95 (안녕), not %C3%AC%C2%95%C2%88%C3%AB%C2%85%C2%95 - * if (res.req.url.endsWith('/redirect')) { - * const path = String.fromCharCode.apply( - * null, - * encodeURIComponent('안녕') - * .split('%') - * .slice(1) - * .map(n => parseInt(n, 16)) - * ) - * res.writeHead(302, { - * Location: `/${path}` - * }) - * return res.end() - * } - * }) - * ``` - */ - const encodedPath = encodeURIComponent('안녕') - const nonEncodedPath = String.fromCharCode.apply( - null, - encodeURIComponent('안녕') - .split('%') - .slice(1) - .map(n => parseInt(n, 16)) - ) - + // redirect -> %EC%95%88%EB%85%95 (안녕), not %C3%AC%C2%95%C2%88%C3%AB%C2%85%C2%95 const server = createServer((req, res) => { if (res.req.url.endsWith('/redirect')) { - res.writeHead(302, undefined, { Location: `/${nonEncodedPath}` }) + res.writeHead(302, undefined, { Location: `/${Buffer.from('안녕').toString('binary')}` }) res.end() } else { res.writeHead(200, 'dummy', { 'Content-Type': 'text/plain' }) @@ -86,5 +57,5 @@ test('location header with non-ASCII character redirects to a properly encoded u const { port } = server.address() const response = await fetch(`http://127.0.0.1:${port}/redirect`) - assert.strictEqual(response.url, `http://127.0.0.1:${port}/${encodedPath}`) + assert.strictEqual(response.url, `http://127.0.0.1:${port}/${encodeURIComponent('안녕')}`) }) From 6d5f7702fa30481f378728f62b429817d2be3c71 Mon Sep 17 00:00:00 2001 From: Xvezda Date: Tue, 19 Mar 2024 17:49:49 +0900 Subject: [PATCH 4/8] chore: clarify what the code does --- lib/web/fetch/util.js | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/lib/web/fetch/util.js b/lib/web/fetch/util.js index 5fb4499d876..9c134d265ea 100644 --- a/lib/web/fetch/util.js +++ b/lib/web/fetch/util.js @@ -44,8 +44,10 @@ function responseLocationURL (response, requestFragment) { // 3. If location is a header value, then set location to the result of // parsing location with response’s URL. if (location !== null && isValidHeaderValue(location)) { - // Make sure location is properly encoded. - location = new URL(Buffer.from(location, 'ascii').toString('utf8'), responseURL(response)) + // Some websites respond location header in binary form without encoding them + // and major browsers redirect them to correctly UTF-8 encoded addresses. + // Here, we handle that behavior in the same way. + location = new URL(decodeBinaryStringToUtf8(location), responseURL(response)) } // 4. If location is a URL whose fragment is null, then set location’s @@ -58,6 +60,14 @@ function responseLocationURL (response, requestFragment) { return location } +/** + * @param {string} value + * @returns {string} + */ +function decodeBinaryStringToUtf8 (value) { + return Buffer.from(value, 'binary').toString('utf8') +} + /** @returns {URL} */ function requestCurrentURL (request) { return request.urlList[request.urlList.length - 1] From ea34bc2bfbda317c8e995ac103d2f02feeb00d0a Mon Sep 17 00:00:00 2001 From: Xvezda Date: Tue, 19 Mar 2024 20:10:54 +0900 Subject: [PATCH 5/8] chore: add comment --- lib/web/fetch/util.js | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/web/fetch/util.js b/lib/web/fetch/util.js index 9c134d265ea..9ee2649800d 100644 --- a/lib/web/fetch/util.js +++ b/lib/web/fetch/util.js @@ -47,7 +47,7 @@ function responseLocationURL (response, requestFragment) { // Some websites respond location header in binary form without encoding them // and major browsers redirect them to correctly UTF-8 encoded addresses. // Here, we handle that behavior in the same way. - location = new URL(decodeBinaryStringToUtf8(location), responseURL(response)) + location = new URL(normalizeBinaryStringToUtf8(location), responseURL(response)) } // 4. If location is a URL whose fragment is null, then set location’s @@ -61,10 +61,12 @@ function responseLocationURL (response, requestFragment) { } /** + * If string contains non-ASCII characters, assumes it's UTF-8 encoded and decodes it. + * Since UTF-8 is a superset of ASCII, this will work for ASCII strings as well. * @param {string} value * @returns {string} */ -function decodeBinaryStringToUtf8 (value) { +function normalizeBinaryStringToUtf8 (value) { return Buffer.from(value, 'binary').toString('utf8') } From efccdde924ccd5aa22f371f684ad024343b35147 Mon Sep 17 00:00:00 2001 From: Xvezda Date: Tue, 19 Mar 2024 23:14:30 +0900 Subject: [PATCH 6/8] chore: normalize location url only if it contains invalid character --- lib/web/fetch/util.js | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/lib/web/fetch/util.js b/lib/web/fetch/util.js index 9ee2649800d..0f7844dfdd8 100644 --- a/lib/web/fetch/util.js +++ b/lib/web/fetch/util.js @@ -44,10 +44,13 @@ function responseLocationURL (response, requestFragment) { // 3. If location is a header value, then set location to the result of // parsing location with response’s URL. if (location !== null && isValidHeaderValue(location)) { - // Some websites respond location header in binary form without encoding them - // and major browsers redirect them to correctly UTF-8 encoded addresses. - // Here, we handle that behavior in the same way. - location = new URL(normalizeBinaryStringToUtf8(location), responseURL(response)) + if (!isValidEncodedURL(location)) { + // Some websites respond location header in binary form without encoding them + // and major browsers redirect them to correctly UTF-8 encoded addresses. + // Here, we handle that behavior in the same way. + location = normalizeBinaryStringToUtf8(location) + } + location = new URL(location, responseURL(response)) } // 4. If location is a URL whose fragment is null, then set location’s @@ -60,6 +63,26 @@ function responseLocationURL (response, requestFragment) { return location } +/** + * @see https://www.rfc-editor.org/rfc/rfc1738#section-2.2 + * @param {string} url + * @returns {boolean} + */ +function isValidEncodedURL (url) { + for (const c of url) { + const code = c.charCodeAt(0) + // Not used in US-ASCII + if (code >= 0x80 && code <= 0xFF) { + return false + } + // Control characters + if ((code >= 0x00 && code <= 0x1F) || code === 0x7F) { + return false + } + } + return true +} + /** * If string contains non-ASCII characters, assumes it's UTF-8 encoded and decodes it. * Since UTF-8 is a superset of ASCII, this will work for ASCII strings as well. From 45f2e48a2ca318e170bcb97a27a1d0eb11ab8377 Mon Sep 17 00:00:00 2001 From: Xvezda Date: Tue, 19 Mar 2024 23:30:15 +0900 Subject: [PATCH 7/8] chore: apply suggestion See: https://github.com/nodejs/undici/pull/2971#discussion_r1530469304 --- lib/web/fetch/util.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/web/fetch/util.js b/lib/web/fetch/util.js index 0f7844dfdd8..4972498c75d 100644 --- a/lib/web/fetch/util.js +++ b/lib/web/fetch/util.js @@ -45,7 +45,7 @@ function responseLocationURL (response, requestFragment) { // parsing location with response’s URL. if (location !== null && isValidHeaderValue(location)) { if (!isValidEncodedURL(location)) { - // Some websites respond location header in binary form without encoding them + // Some websites respond location header in UTF-8 form without encoding them as ASCII // and major browsers redirect them to correctly UTF-8 encoded addresses. // Here, we handle that behavior in the same way. location = normalizeBinaryStringToUtf8(location) From 3519ca12cabb20caf424f3c3935c576c4c183b9d Mon Sep 17 00:00:00 2001 From: Xvezda Date: Tue, 19 Mar 2024 23:45:45 +0900 Subject: [PATCH 8/8] chore: remove redundant condition check --- lib/web/fetch/util.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/web/fetch/util.js b/lib/web/fetch/util.js index 4972498c75d..a1ef3f47a9d 100644 --- a/lib/web/fetch/util.js +++ b/lib/web/fetch/util.js @@ -72,7 +72,7 @@ function isValidEncodedURL (url) { for (const c of url) { const code = c.charCodeAt(0) // Not used in US-ASCII - if (code >= 0x80 && code <= 0xFF) { + if (code >= 0x80) { return false } // Control characters