From 1b625713c45d81eb6a7c683414a7b0ab6bee2159 Mon Sep 17 00:00:00 2001 From: Xvezda Date: Thu, 21 Mar 2024 21:11:31 +0900 Subject: [PATCH] fix(fetch): properly redirect non-ascii location header url (#2971) * fix(fetch): properly redirect non-ascii location header url * chore: fix typo * test: use simpler code * chore: clarify what the code does * chore: add comment * chore: normalize location url only if it contains invalid character * chore: apply suggestion See: https://github.com/nodejs/undici/pull/2971#discussion_r1530469304 * chore: remove redundant condition check --- lib/web/fetch/util.js | 36 ++++++++++++++++++++++++++ test/fetch/fetch-url-after-redirect.js | 21 +++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/lib/web/fetch/util.js b/lib/web/fetch/util.js index f11ba162c51..a1ef3f47a9d 100644 --- a/lib/web/fetch/util.js +++ b/lib/web/fetch/util.js @@ -44,6 +44,12 @@ function responseLocationURL (response, requestFragment) { // 3. If location is a header value, then set location to the result of // parsing location with response’s URL. if (location !== null && isValidHeaderValue(location)) { + if (!isValidEncodedURL(location)) { + // Some websites respond location header in UTF-8 form without encoding them as ASCII + // and major browsers redirect them to correctly UTF-8 encoded addresses. + // Here, we handle that behavior in the same way. + location = normalizeBinaryStringToUtf8(location) + } location = new URL(location, responseURL(response)) } @@ -57,6 +63,36 @@ function responseLocationURL (response, requestFragment) { return location } +/** + * @see https://www.rfc-editor.org/rfc/rfc1738#section-2.2 + * @param {string} url + * @returns {boolean} + */ +function isValidEncodedURL (url) { + for (const c of url) { + const code = c.charCodeAt(0) + // Not used in US-ASCII + if (code >= 0x80) { + return false + } + // Control characters + if ((code >= 0x00 && code <= 0x1F) || code === 0x7F) { + return false + } + } + return true +} + +/** + * If string contains non-ASCII characters, assumes it's UTF-8 encoded and decodes it. + * Since UTF-8 is a superset of ASCII, this will work for ASCII strings as well. + * @param {string} value + * @returns {string} + */ +function normalizeBinaryStringToUtf8 (value) { + return Buffer.from(value, 'binary').toString('utf8') +} + /** @returns {URL} */ function requestCurrentURL (request) { return request.urlList[request.urlList.length - 1] diff --git a/test/fetch/fetch-url-after-redirect.js b/test/fetch/fetch-url-after-redirect.js index ecc112eb7c1..e387848f8e4 100644 --- a/test/fetch/fetch-url-after-redirect.js +++ b/test/fetch/fetch-url-after-redirect.js @@ -38,3 +38,24 @@ test('after redirecting the url of the response is set to the target url', async assert.strictEqual(response.url, `http://127.0.0.1:${port}/target`) }) + +test('location header with non-ASCII character redirects to a properly encoded url', async (t) => { + // redirect -> %EC%95%88%EB%85%95 (안녕), not %C3%AC%C2%95%C2%88%C3%AB%C2%85%C2%95 + const server = createServer((req, res) => { + if (res.req.url.endsWith('/redirect')) { + res.writeHead(302, undefined, { Location: `/${Buffer.from('안녕').toString('binary')}` }) + res.end() + } else { + res.writeHead(200, 'dummy', { 'Content-Type': 'text/plain' }) + res.end() + } + }) + t.after(closeServerAsPromise(server)) + + const listenAsync = promisify(server.listen.bind(server)) + await listenAsync(0) + const { port } = server.address() + const response = await fetch(`http://127.0.0.1:${port}/redirect`) + + assert.strictEqual(response.url, `http://127.0.0.1:${port}/${encodeURIComponent('안녕')}`) +})