From 654f12ee2f6bea5309c0d4d1d5c4eda9af2be6dc Mon Sep 17 00:00:00 2001 From: Khafra Date: Tue, 30 Jan 2024 23:25:48 -0500 Subject: [PATCH 1/2] implement spec changes from a while ago --- lib/fetch/body.js | 38 +++++---- lib/fetch/dataURL.js | 112 ++++++++++++++++++++++---- lib/fetch/index.js | 9 ++- lib/fetch/util.js | 182 +++++++++++++++++++++++++++++++++++-------- 4 files changed, 275 insertions(+), 66 deletions(-) diff --git a/lib/fetch/body.js b/lib/fetch/body.js index 45aecd04c79..24aa5eff8f6 100644 --- a/lib/fetch/body.js +++ b/lib/fetch/body.js @@ -8,18 +8,19 @@ const { isReadableStreamLike, readableStreamClose, createDeferredPromise, - fullyReadBody + fullyReadBody, + extractMimeType } = require('./util') const { FormData } = require('./formdata') const { kState } = require('./symbols') const { webidl } = require('./webidl') const { Blob, File: NativeFile } = require('node:buffer') -const { kBodyUsed, kHeadersList } = require('../core/symbols') +const { kBodyUsed } = require('../core/symbols') const assert = require('node:assert') const { isErrored } = require('../core/util') const { isUint8Array, isArrayBuffer } = require('util/types') const { File: UndiciFile } = require('./file') -const { parseMIMEType, serializeAMimeType } = require('./dataURL') +const { serializeAMimeType } = require('./dataURL') /** @type {globalThis['File']} */ const File = NativeFile ?? UndiciFile @@ -330,7 +331,7 @@ function bodyMixinMethods (instance) { return specConsumeBody(this, (bytes) => { let mimeType = bodyMimeType(this) - if (mimeType === 'failure') { + if (mimeType === null) { mimeType = '' } else if (mimeType) { mimeType = serializeAMimeType(mimeType) @@ -369,9 +370,8 @@ function bodyMixinMethods (instance) { throwIfAborted(this[kState]) - const contentType = this.headers[kHeadersList].get('content-type', true) - - const mimeType = contentType !== null ? parseMIMEType(contentType) : 'failure' + // 1. Let mimeType be the result of get the MIME type with this. + const mimeType = bodyMimeType(this) // If mimeType’s essence is "multipart/form-data", then: if (mimeType !== 'failure' && mimeType.essence === 'multipart/form-data') { @@ -581,17 +581,25 @@ function parseJSONFromBytes (bytes) { /** * @see https://fetch.spec.whatwg.org/#concept-body-mime-type - * @param {import('./response').Response|import('./request').Request} object + * @param {import('./response').Response|import('./request').Request} requestOrResponse */ -function bodyMimeType (object) { - const { headersList } = object[kState] - const contentType = headersList.get('content-type') - - if (contentType === null) { - return 'failure' +function bodyMimeType (requestOrResponse) { + // 1. Let headers be null. + // 2. If requestOrResponse is a Request object, then set headers to requestOrResponse’s request’s header list. + // 3. Otherwise, set headers to requestOrResponse’s response’s header list. + /** @type {import('./headers').HeadersList} */ + const headers = requestOrResponse[kState].headersList + + // 4. Let mimeType be the result of extracting a MIME type from headers. + const mimeType = extractMimeType(headers) + + // 5. If mimeType is failure, then return null. + if (mimeType === 'failure') { + return null } - return parseMIMEType(contentType) + // 6. Return mimeType. + return mimeType } module.exports = { diff --git a/lib/fetch/dataURL.js b/lib/fetch/dataURL.js index 2b78efdaf27..ee7f72e9c93 100644 --- a/lib/fetch/dataURL.js +++ b/lib/fetch/dataURL.js @@ -1,5 +1,4 @@ const assert = require('node:assert') -const { isomorphicDecode } = require('./util') const encoder = new TextEncoder() @@ -604,18 +603,7 @@ function isHTTPWhiteSpace (char) { * @param {boolean} [trailing=true] */ function removeHTTPWhitespace (str, leading = true, trailing = true) { - let lead = 0 - let trail = str.length - 1 - - if (leading) { - while (lead < str.length && isHTTPWhiteSpace(str.charCodeAt(lead))) lead++ - } - - if (trailing) { - while (trail > 0 && isHTTPWhiteSpace(str.charCodeAt(trail))) trail-- - } - - return lead === 0 && trail === str.length - 1 ? str : str.slice(lead, trail + 1) + return removeChars(str, leading, trailing, isHTTPWhiteSpace) } /** @@ -634,20 +622,110 @@ function isASCIIWhitespace (char) { * @param {boolean} [trailing=true] */ function removeASCIIWhitespace (str, leading = true, trailing = true) { + return removeChars(str, leading, trailing, isASCIIWhitespace) +} + +/** + * + * @param {string} str + * @param {boolean} leading + * @param {boolean} trailing + * @param {(charCode: number) => boolean} predicate + * @returns + */ +function removeChars (str, leading, trailing, predicate) { let lead = 0 let trail = str.length - 1 if (leading) { - while (lead < str.length && isASCIIWhitespace(str.charCodeAt(lead))) lead++ + while (lead < str.length && predicate(str.charCodeAt(lead))) lead++ } if (trailing) { - while (trail > 0 && isASCIIWhitespace(str.charCodeAt(trail))) trail-- + while (trail > 0 && predicate(str.charCodeAt(trail))) trail-- } return lead === 0 && trail === str.length - 1 ? str : str.slice(lead, trail + 1) } +/** + * @see https://infra.spec.whatwg.org/#isomorphic-decode + * @param {Uint8Array} input + * @returns {string} + */ +function isomorphicDecode (input) { + // 1. To isomorphic decode a byte sequence input, return a string whose code point + // length is equal to input’s length and whose code points have the same values + // as the values of input’s bytes, in the same order. + const length = input.length + if ((2 << 15) - 1 > length) { + return String.fromCharCode.apply(null, input) + } + let result = ''; let i = 0 + let addition = (2 << 15) - 1 + while (i < length) { + if (i + addition > length) { + addition = length - i + } + result += String.fromCharCode.apply(null, input.subarray(i, i += addition)) + } + return result +} + +/** + * @see https://mimesniff.spec.whatwg.org/#minimize-a-supported-mime-type + * @param {Exclude, 'failure'>} mimeType + */ +function minimizeSupportedMimeType (mimeType) { + switch (mimeType.essence) { + case 'application/ecmascript': + case 'application/javascript': + case 'application/x-ecmascript': + case 'application/x-javascript': + case 'text/ecmascript': + case 'text/javascript': + case 'text/javascript1.0': + case 'text/javascript1.1': + case 'text/javascript1.2': + case 'text/javascript1.3': + case 'text/javascript1.4': + case 'text/javascript1.5': + case 'text/jscript': + case 'text/livescript': + case 'text/x-ecmascript': + case 'text/x-javascript': + // 1. If mimeType is a JavaScript MIME type, then return "text/javascript". + return 'text/javascript' + case 'application/json': + case 'text/json': + // 2. If mimeType is a JSON MIME type, then return "application/json". + return 'application/json' + case 'image/svg+xml': + // 3. If mimeType’s essence is "image/svg+xml", then return "image/svg+xml". + return 'image/svg+xml' + case 'text/xml': + case 'application/xml': + // 4. If mimeType is an XML MIME type, then return "application/xml". + return 'application/xml' + } + + // 2. If mimeType is a JSON MIME type, then return "application/json". + if (mimeType.subtype.endsWith('+json')) { + return 'application/json' + } + + // 4. If mimeType is an XML MIME type, then return "application/xml". + if (mimeType.subtype.endsWith('+xml')) { + return 'application/xml' + } + + // 5. If mimeType is supported by the user agent, then return mimeType’s essence. + // Technically, node doesn't support any mimetypes. + + // 6. Return the empty string. + return '' +} + module.exports = { dataURLProcessor, URLSerializer, @@ -656,5 +734,7 @@ module.exports = { stringPercentDecode, parseMIMEType, collectAnHTTPQuotedString, - serializeAMimeType + serializeAMimeType, + removeChars, + minimizeSupportedMimeType } diff --git a/lib/fetch/index.js b/lib/fetch/index.js index 7be223c2eca..9693782552f 100644 --- a/lib/fetch/index.js +++ b/lib/fetch/index.js @@ -44,7 +44,8 @@ const { clampAndCoarsenConnectionTimingInfo, simpleRangeHeaderValue, buildContentRange, - createInflate + createInflate, + extractMimeType } = require('./util') const { kState } = require('./symbols') const assert = require('node:assert') @@ -59,7 +60,7 @@ const { const EE = require('node:events') const { Readable, pipeline } = require('node:stream') const { addAbortListener, isErrored, isReadable, nodeMajor, nodeMinor, bufferToLowerCasedHeaderName } = require('../core/util') -const { dataURLProcessor, serializeAMimeType, parseMIMEType } = require('./dataURL') +const { dataURLProcessor, serializeAMimeType, minimizeSupportedMimeType } = require('./dataURL') const { getGlobalDispatcher } = require('../global') const { webidl } = require('./webidl') const { STATUS_CODES } = require('node:http') @@ -1027,11 +1028,11 @@ function fetchFinale (fetchParams, response) { responseStatus = response.status // 2. Let mimeType be the result of extracting a MIME type from response’s header list. - const mimeType = parseMIMEType(response.headersList.get('content-type', true)) // TODO: fix + const mimeType = extractMimeType(response.headersList) // 3. If mimeType is not failure, then set bodyInfo’s content type to the result of minimizing a supported MIME type given mimeType. if (mimeType !== 'failure') { - // TODO + bodyInfo.contentType = minimizeSupportedMimeType(mimeType) } } diff --git a/lib/fetch/util.js b/lib/fetch/util.js index ebc9b41601b..a306bdbbfe4 100644 --- a/lib/fetch/util.js +++ b/lib/fetch/util.js @@ -4,6 +4,7 @@ const { Transform } = require('node:stream') const zlib = require('node:zlib') const { redirectStatusSet, referrerPolicySet: referrerPolicyTokens, badPortsSet } = require('./constants') const { getGlobalOrigin } = require('./global') +const { collectASequenceOfCodePoints, collectAnHTTPQuotedString, removeChars, parseMIMEType } = require('./dataURL') const { performance } = require('node:perf_hooks') const { isBlobLike, toUSVString, ReadableStreamFrom, isValidHTTPToken } = require('../core/util') const assert = require('node:assert') @@ -889,29 +890,6 @@ function isReadableStreamLike (stream) { ) } -/** - * @see https://infra.spec.whatwg.org/#isomorphic-decode - * @param {Uint8Array} input - */ -function isomorphicDecode (input) { - // 1. To isomorphic decode a byte sequence input, return a string whose code point - // length is equal to input’s length and whose code points have the same values - // as the values of input’s bytes, in the same order. - const length = input.length - if ((2 << 15) - 1 > length) { - return String.fromCharCode.apply(null, input) - } - let result = ''; let i = 0 - let addition = (2 << 15) - 1 - while (i < length) { - if (i + addition > length) { - addition = length - i - } - result += String.fromCharCode.apply(null, input.subarray(i, i += addition)) - } - return result -} - /** * @param {ReadableStreamController} controller */ @@ -1009,18 +987,12 @@ function urlIsHttpHttpsScheme (url) { return protocol === 'http:' || protocol === 'https:' } -/** @type {import('./dataURL')['collectASequenceOfCodePoints']} */ -let collectASequenceOfCodePoints - /** * @see https://fetch.spec.whatwg.org/#simple-range-header-value * @param {string} value * @param {boolean} allowWhitespace */ function simpleRangeHeaderValue (value, allowWhitespace) { - // Note: avoid circular require - collectASequenceOfCodePoints ??= require('./dataURL').collectASequenceOfCodePoints - // 1. Let data be the isomorphic decoding of value. // Note: isomorphic decoding takes a sequence of bytes (ie. a Uint8Array) and turns it into a string, // nothing more. We obviously don't need to do that if value is a string already. @@ -1213,6 +1185,154 @@ function createInflate () { return new InflateStream() } +/** + * @see https://fetch.spec.whatwg.org/#concept-header-extract-mime-type + * @param {import('./headers').HeadersList} headers + */ +function extractMimeType (headers) { + // 1. Let charset be null. + let charset = null + + // 2. Let essence be null. + let essence = null + + // 3. Let mimeType be null. + let mimeType = null + + // 4. Let values be the result of getting, decoding, and splitting `Content-Type` from headers. + const values = getDecodeSplit('content-type', headers) + + // 5. If values is null, then return failure. + if (values === null) { + return 'failure' + } + + // 6. For each value of values: + for (const value of values) { + // 6.1. Let temporaryMimeType be the result of parsing value. + const temporaryMimeType = parseMIMEType(value) + + // 6.2. If temporaryMimeType is failure or its essence is "*/*", then continue. + if (temporaryMimeType === 'failure' || temporaryMimeType.essence === '*/*') { + continue + } + + // 6.3. Set mimeType to temporaryMimeType. + mimeType = temporaryMimeType + + // 6.4. If mimeType’s essence is not essence, then: + if (mimeType.essence !== essence) { + // 6.4.1. Set charset to null. + charset = null + + // 6.4.2. If mimeType’s parameters["charset"] exists, then set charset to + // mimeType’s parameters["charset"]. + if (mimeType.parameters.has('charset')) { + charset = mimeType.parameters.get('charset') + } + + // 6.4.3. Set essence to mimeType’s essence. + essence = mimeType.essence + } else if (!mimeType.parameters.has('charset') && charset !== null) { + // 6.5. Otherwise, if mimeType’s parameters["charset"] does not exist, and + // charset is non-null, set mimeType’s parameters["charset"] to charset. + mimeType.parameters.set('charset', charset) + } + } + + // 7. If mimeType is null, then return failure. + if (mimeType == null) { + return 'failure' + } + + // 8. Return mimeType. + return mimeType +} + +/** + * @see https://fetch.spec.whatwg.org/#header-value-get-decode-and-split + * @param {string|null} value + */ +function gettingDecodingSplitting (value) { + // 1. Let input be the result of isomorphic decoding value. + const input = value + + // 2. Let position be a position variable for input, initially pointing at the start of input. + const position = { position: 0 } + + // 3. Let values be a list of strings, initially empty. + const values = [] + + // 4. Let temporaryValue be the empty string. + let temporaryValue = '' + + // 5. While position is not past the end of input: + while (position.position < input.length) { + // 5.1. Append the result of collecting a sequence of code points that are not U+0022 (") + // or U+002C (,) from input, given position, to temporaryValue. + temporaryValue += collectASequenceOfCodePoints( + (char) => char !== '"' && char !== ',', + input, + position + ) + + // 5.2. If position is not past the end of input, then: + if (position.position < input.length) { + // 5.2.1. If the code point at position within input is U+0022 ("), then: + if (input.charCodeAt(position.position) === 0x22) { + // 5.2.1.1. Append the result of collecting an HTTP quoted string from input, given position, to temporaryValue. + temporaryValue += collectAnHTTPQuotedString( + input, + position + ) + + // 5.2.1.2. If position is not past the end of input, then continue. + if (position.position < input.length) { + continue + } + } else { + // 5.2.2. Otherwise: + + // 5.2.2.1. Assert: the code point at position within input is U+002C (,). + assert(input.charCodeAt(position.position) === 0x2C) + + // 5.2.2.2. Advance position by 1. + position.position++ + } + } + + // 5.3. Remove all HTTP tab or space from the start and end of temporaryValue. + temporaryValue = removeChars(temporaryValue, true, true, (char) => char === 0x9 || char === 0x20) + + // 5.4. Append temporaryValue to values. + values.push(temporaryValue) + + // 5.6. Set temporaryValue to the empty string. + temporaryValue = '' + } + + // 6. Return values. + return values +} + +/** + * @see https://fetch.spec.whatwg.org/#concept-header-list-get-decode-split + * @param {string} name lowercase header name + * @param {import('./headers').HeadersList} list + */ +function getDecodeSplit (name, list) { + // 1. Let value be the result of getting name from list. + const value = list.get(name, true) + + // 2. If value is null, then return null. + if (value === null) { + return null + } + + // 3. Return the result of getting, decoding, and splitting value. + return gettingDecodingSplitting(value) +} + module.exports = { isAborted, isCancelled, @@ -1252,7 +1372,6 @@ module.exports = { isReadableStreamLike, readableStreamClose, isomorphicEncode, - isomorphicDecode, urlIsLocal, urlHasHttpsScheme, urlIsHttpHttpsScheme, @@ -1261,5 +1380,6 @@ module.exports = { simpleRangeHeaderValue, buildContentRange, parseMetadata, - createInflate + createInflate, + extractMimeType } From 1175f4ccadfef5c213611a8984e4a6f26b185bd4 Mon Sep 17 00:00:00 2001 From: Khafra Date: Tue, 30 Jan 2024 23:27:19 -0500 Subject: [PATCH 2/2] fixup --- lib/fetch/body.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/fetch/body.js b/lib/fetch/body.js index 24aa5eff8f6..9ceb094ef8d 100644 --- a/lib/fetch/body.js +++ b/lib/fetch/body.js @@ -374,7 +374,7 @@ function bodyMixinMethods (instance) { const mimeType = bodyMimeType(this) // If mimeType’s essence is "multipart/form-data", then: - if (mimeType !== 'failure' && mimeType.essence === 'multipart/form-data') { + if (mimeType !== null && mimeType.essence === 'multipart/form-data') { const headers = {} for (const [key, value] of this.headers) headers[key] = value @@ -432,7 +432,7 @@ function bodyMixinMethods (instance) { await busboyResolve return responseFormData - } else if (mimeType !== 'failure' && mimeType.essence === 'application/x-www-form-urlencoded') { + } else if (mimeType !== null && mimeType.essence === 'application/x-www-form-urlencoded') { // Otherwise, if mimeType’s essence is "application/x-www-form-urlencoded", then: // 1. Let entries be the result of parsing bytes.