From 416864084a651cb42f1982e4de5042c1273b1971 Mon Sep 17 00:00:00 2001 From: Khafra Date: Sat, 2 Mar 2024 23:47:22 -0500 Subject: [PATCH 01/11] implement sync formdata parsser --- lib/web/fetch/body.js | 3 +- lib/web/fetch/data-url.js | 3 +- lib/web/fetch/formdata-parser.js | 379 +++++++++++++++++++++++++++++++ lib/web/fetch/formdata.js | 2 +- 4 files changed, 384 insertions(+), 3 deletions(-) create mode 100644 lib/web/fetch/formdata-parser.js diff --git a/lib/web/fetch/body.js b/lib/web/fetch/body.js index 12377cb511d..ee350bef4e9 100644 --- a/lib/web/fetch/body.js +++ b/lib/web/fetch/body.js @@ -577,5 +577,6 @@ module.exports = { extractBody, safelyExtractBody, cloneBody, - mixinBody + mixinBody, + utf8DecodeBytes } diff --git a/lib/web/fetch/data-url.js b/lib/web/fetch/data-url.js index a966ace3b8e..0fcf262709c 100644 --- a/lib/web/fetch/data-url.js +++ b/lib/web/fetch/data-url.js @@ -738,5 +738,6 @@ module.exports = { collectAnHTTPQuotedString, serializeAMimeType, removeChars, - minimizeSupportedMimeType + minimizeSupportedMimeType, + HTTP_TOKEN_CODEPOINTS } diff --git a/lib/web/fetch/formdata-parser.js b/lib/web/fetch/formdata-parser.js new file mode 100644 index 00000000000..c0e34d59856 --- /dev/null +++ b/lib/web/fetch/formdata-parser.js @@ -0,0 +1,379 @@ +'use strict' + +const { webidl } = require('./webidl') +const { utf8DecodeBytes } = require('./body') +const { collectASequenceOfCodePoints, removeChars, HTTP_TOKEN_CODEPOINTS } = require('./data-url') +const { isFileLike, File: UndiciFile } = require('./file') +const { makeEntry } = require('./formdata') +const assert = require('node:assert') +const { isAscii } = require('node:buffer') + +const File = globalThis.File ?? UndiciFile + +/** + * @see https://andreubotella.github.io/multipart-form-data/#multipart-form-data-boundary + * @param {string} boundary + */ +function validateBoundary (boundary) { + const length = boundary.length + + // - its length is greater or equal to 27 and lesser or equal to 70, and + if (length < 27 || length > 70) { + return false + } + + // - it is composed by bytes in the ranges 0x30 to 0x39, 0x41 to 0x5A, or + // 0x61 to 0x7A, inclusive (ASCII alphanumeric), or which are 0x27 ('), + // 0x2D (-) or 0x5F (_). + for (let i = 0; i < boundary.length; i++) { + const cp = boundary.charCodeAt(i) + + if (!( + (cp >= 0x30 && cp <= 0x39) || + (cp >= 0x41 && cp <= 0x5a) || + (cp >= 0x61 && cp <= 0x7a) || + cp === 0x27 || + cp === 0x3d || + cp === 0x5f + )) { + return false + } + } + + return true +} + +/** + * @see https://andreubotella.github.io/multipart-form-data/#escape-a-multipart-form-data-name + * @param {string} name + * @param {string?} encoding + * @param {boolean?} isFilename + */ +function escapeFormDataName (name, encoding = 'utf-8', isFilename = false) { + // 1. If isFilename is true: + if (isFilename) { + // 1.1. Set name to the result of converting name into a scalar value string. + name = webidl.converters.USVString(name) + } else { + // 2. Otherwise: + + // 2.1. Assert: name is a scalar value string. + assert(name === webidl.converters.USVString(name)) + + // 2.2. Replace every occurrence of U+000D (CR) not followed by U+000A (LF), + // and every occurrence of U+000A (LF) not preceded by U+000D (CR), in + // name, by a string consisting of U+000D (CR) and U+000A (LF). + name = name.replace(/\r\n?|\r?\n/g, '\r\n') + } + + // 3. Let encoded be the result of encoding name with encoding. + assert(Buffer.isEncoding(encoding)) + + // 4. Replace every 0x0A (LF) bytes in encoded with the byte sequence `%0A`, + // 0x0D (CR) with `%0D` and 0x22 (") with `%22`. + name = name + .replace(/\n/g, '%0A') + .replace(/\r/g, '%0D') + .replace(/"/g, '%22') + + // 5. Return encoded. + return Buffer.from(name, encoding) // encoded +} + +/** + * @see https://andreubotella.github.io/multipart-form-data/#multipart-form-data-parser + * @param {string} input + * @param {ReturnType} mimeType + */ +function multipartFormDataParser (input, mimeType) { + // 1. Assert: mimeType’s essence is "multipart/form-data". + assert(mimeType !== 'failure' && mimeType.essence === 'multipart/form-data') + + // 2. If mimeType’s parameters["boundary"] does not exist, return failure. + // Otherwise, let boundary be the result of UTF-8 decoding mimeType’s + // parameters["boundary"]. + if (!mimeType.parameters.has('boundary')) { + return 'failure' + } + + const boundary = utf8DecodeBytes(Buffer.from(mimeType.parameters.get('boundary'))) + + // 3. Let entry list be an empty entry list. + const entryList = [] + + // 4. Let position be a pointer to a byte in input, initially pointing at + // the first byte. + const position = { position: 0 } + + // 5. While true: + while (true) { + // 5.1. If position points to a sequence of bytes starting with 0x2D 0x2D + // (`--`) followed by boundary, advance position by 2 + the length of + // boundary. Otherwise, return failure. + if (input.slice(position.position, position.position + 2 + boundary.length) === `--${boundary}`) { + position.position += 2 + boundary.length + } else { + return 'failure' + } + + // 5.2. If position points to the sequence of bytes 0x2D 0x2D 0x0D 0x0A + // (`--` followed by CR LF) followed by the end of input, return entry list. + // TODO: a FormData body doesn't need to end with CRLF? + if (position.position === input.length - 2 && input.endsWith('--')) { + return entryList + } + + // 5.3. If position does not point to a sequence of bytes starting with 0x0D + // 0x0A (CR LF), return failure. + if (input.charCodeAt(position.position) !== 0x0d || input.charCodeAt(position.position + 1) !== 0x0a) { + return 'failure' + } + + // 5.4. Advance position by 2. (This skips past the newline.) + position.position += 2 + + // 5.5. Let name, filename and contentType be the result of parsing + // multipart/form-data headers on input and position, if the result + // is not failure. Otherwise, return failure. + const result = parseMultipartFormDataHeaders(input, position) + + if (result === 'failure') { + return 'failure' + } + + let { name, filename, contentType } = result + + // 5.6. Advance position by 2. (This skips past the empty line that marks + // the end of the headers.) + position.position += 2 + + // 5.7. Let body be the empty byte sequence. + let body = '' + + // 5.8. Body loop: While position is not past the end of input: + // TODO: the steps here are completely wrong + { + const boundaryIndex = input.indexOf(boundary, position.position) + + if (boundaryIndex === -1) { + return 'failure' + } + + body = input.slice(position.position, boundaryIndex - 4) + + position.position += body.length + } + + // 5.9. If position does not point to a sequence of bytes starting with + // 0x0D 0x0A (CR LF), return failure. Otherwise, advance position by 2. + if (input.charCodeAt(position.position) !== 0x0d || input.charCodeAt(position.position + 1) !== 0x0a) { + return 'failure' + } else { + position.position += 2 + } + + // 5.10. If filename is not null: + let value + + if (filename !== null) { + // 5.10.1. If contentType is null, set contentType to "text/plain". + contentType ??= 'text/plain' + + // 5.10.2. If contentType is not an ASCII string, set contentType to the empty string. + if (!isAscii(Buffer.from(contentType))) { + contentType = '' + } + + // 5.10.3. Let value be a new File object with name filename, type contentType, and body body. + value = new File([body], filename, { type: contentType }) + } else { + // 5.11. Otherwise: + + // 5.11.1. Let value be the UTF-8 decoding without BOM of body. + value = utf8DecodeBytes(Buffer.from(body)) + } + + // 5.12. Assert: name is a scalar value string and value is either a scalar value string or a File object. + assert(name === webidl.converters.USVString(name)) + assert((typeof value === 'string' && value === webidl.converters.USVString(value)) || isFileLike(value)) + + // 5.13. Create an entry with name and value, and append it to entry list. + entryList.push(makeEntry(name, value, filename)) + } +} + +/** + * @see https://andreubotella.github.io/multipart-form-data/#parse-multipart-form-data-headers + * @param {string} input + * @param {{ position: number }} position + */ +function parseMultipartFormDataHeaders (input, position) { + // 1. Let name, filename and contentType be null. + let name = null + let filename = null + let contentType = null + + // 2. While true: + while (true) { + // 2.1. If position points to a sequence of bytes starting with 0x0D 0x0A (CR LF): + if (input.charCodeAt(position.position) === 0x0d && input.charCodeAt(position.position + 1) === 0x0a) { + // 2.1.1. If name is null, return failure. + if (name === null) { + return 'failure' + } + + // 2.1.2. Return name, filename and contentType. + return { name, filename, contentType } + } + + // 2.2. Let header name be the result of collecting a sequence of bytes that are + // not 0x0A (LF), 0x0D (CR) or 0x3A (:), given position. + let headerName = collectASequenceOfCodePoints( + (char) => char !== '\n' && char !== '\n' && char !== ':', + input, + position + ) + + // 2.3. Remove any HTTP tab or space bytes from the start or end of header name. + headerName = removeChars(headerName, true, true, (char) => char === 0x9 || char === 0x20) + + // 2.4. If header name does not match the field-name token production, return failure. + if (!HTTP_TOKEN_CODEPOINTS.test(headerName)) { + return 'failure' + } + + // 2.5. If the byte at position is not 0x3A (:), return failure. + if (input.charCodeAt(position.position) !== 0x3a) { + return 'failure' + } + + // 2.6. Advance position by 1. + position.position++ + + // 2.7. Collect a sequence of bytes that are HTTP tab or space bytes given position. + // (Do nothing with those bytes.) + collectASequenceOfCodePoints( + (char) => char === ' ' || char === '\t', + input, + position + ) + + // 2.8. Byte-lowercase header name and switch on the result: + switch (headerName.toLowerCase()) { + case 'content-disposition': { + // 1. Set name and filename to null. + name = filename = null + + // 2. If position does not point to a sequence of bytes starting with + // `form-data; name="`, return failure. + if (!input.slice(position.position).startsWith('form-data; name="')) { + return 'failure' + } + + // 3. Advance position so it points at the byte after the next 0x22 (") + // byte (the one in the sequence of bytes matched above). + position.position += 17 + + // 4. Set name to the result of parsing a multipart/form-data name given + // input and position, if the result is not failure. Otherwise, return + // failure. + name = parseMultipartFormDataName(input, position) + + if (name === false) { + return 'failure' + } + + // 5. If position points to a sequence of bytes starting with `; filename="`: + if (input.slice(position.position).startsWith('; filename="')) { + // 1. Advance position so it points at the byte after the next 0x22 (") byte + // (the one in the sequence of bytes matched above). + position.position += 13 + + // 2. Set filename to the result of parsing a multipart/form-data name given + // input and position, if the result is not failure. Otherwise, return failure. + filename = parseMultipartFormDataName(input, position) + + if (filename === false) { + return 'failure' + } + } + + break + } + case 'content-type': { + // 1. Let header value be the result of collecting a sequence of bytes that are + // not 0x0A (LF) or 0x0D (CR), given position. + let headerValue = collectASequenceOfCodePoints( + (char) => char !== '\r' && char !== '\n', + input, + position + ) + + // 2. Remove any HTTP tab or space bytes from the end of header value. + headerValue = removeChars(headerValue, false, true, (char) => char === 0x9 || char === 0x20) + + // 3. Set contentType to the isomorphic decoding of header value. + contentType = headerValue + + break + } + default: { + // Collect a sequence of bytes that are not 0x0A (LF) or 0x0D (CR), given position. + // (Do nothing with those bytes.) + collectASequenceOfCodePoints( + (char) => char !== '\n' && char !== '\r', + input, + position + ) + } + } + + // 2.9. If position does not point to a sequence of bytes starting with 0x0D 0x0A + // (CR LF), return failure. Otherwise, advance position by 2 (past the newline). + if (input.charCodeAt(position.position) !== 0x0d && input.charCodeAt(position.position + 1) !== 0x0a) { + return 'failure' + } else { + position.position += 2 + } + } +} + +/** + * @see https://andreubotella.github.io/multipart-form-data/#parse-a-multipart-form-data-name + * @param {string} input + * @param {{ position: number }} position + */ +function parseMultipartFormDataName (input, position) { + // 1. Assert: The byte at (position - 1) is 0x22 ("). + assert(input.charCodeAt(position.position - 1) === 0x22) + + // 2. Let name be the result of collecting a sequence of bytes that are not 0x0A (LF), 0x0D (CR) or 0x22 ("), given position. + let name = collectASequenceOfCodePoints( + (char) => char !== '\n' && char !== '\r' && char !== '"', + input, + position + ) + + // 3. If the byte at position is not 0x22 ("), return failure. Otherwise, advance position by 1. + if (input.charCodeAt(position.position) !== 0x22) { + return false // name could be 'failure' + } else { + position.position++ + } + + // 4. Replace any occurrence of the following subsequences in name with the given byte: + // - `%0A`: 0x0A (LF) + // - `%0D`: 0x0D (CR) + // - `%22`: 0x22 (") + name = name + .replace(/%0A/i, '\n') + .replace(/%0D/i, '\r') + .replace(/%22/, '"') + + // 5. Return the UTF-8 decoding without BOM of name. + return Buffer.from(name, 'utf-8').toString('utf-8') +} + +module.exports = { + multipartFormDataParser +} diff --git a/lib/web/fetch/formdata.js b/lib/web/fetch/formdata.js index e8dcd6fa614..b8c4ccfc902 100644 --- a/lib/web/fetch/formdata.js +++ b/lib/web/fetch/formdata.js @@ -216,4 +216,4 @@ function makeEntry (name, value, filename) { return { name, value } } -module.exports = { FormData } +module.exports = { FormData, makeEntry } From 353f29381a402480cc9ebbc2661be2c97720f398 Mon Sep 17 00:00:00 2001 From: Khafra Date: Sun, 3 Mar 2024 00:52:08 -0500 Subject: [PATCH 02/11] move utf8decodebytes --- lib/web/fetch/body.js | 33 +++----------------------------- lib/web/fetch/formdata-parser.js | 2 +- lib/web/fetch/util.js | 31 +++++++++++++++++++++++++++++- 3 files changed, 34 insertions(+), 32 deletions(-) diff --git a/lib/web/fetch/body.js b/lib/web/fetch/body.js index ee350bef4e9..e6e17a6d9bc 100644 --- a/lib/web/fetch/body.js +++ b/lib/web/fetch/body.js @@ -9,7 +9,8 @@ const { readableStreamClose, createDeferredPromise, fullyReadBody, - extractMimeType + extractMimeType, + utf8DecodeBytes } = require('./util') const { FormData } = require('./formdata') const { kState } = require('./symbols') @@ -25,7 +26,6 @@ const { Readable } = require('node:stream') /** @type {globalThis['File']} */ const File = NativeFile ?? UndiciFile const textEncoder = new TextEncoder() -const textDecoder = new TextDecoder() // https://fetch.spec.whatwg.org/#concept-bodyinit-extract function extractBody (object, keepalive = false) { @@ -516,32 +516,6 @@ function bodyUnusable (body) { return body != null && (body.stream.locked || util.isDisturbed(body.stream)) } -/** - * @see https://encoding.spec.whatwg.org/#utf-8-decode - * @param {Buffer} buffer - */ -function utf8DecodeBytes (buffer) { - if (buffer.length === 0) { - return '' - } - - // 1. Let buffer be the result of peeking three bytes from - // ioQueue, converted to a byte sequence. - - // 2. If buffer is 0xEF 0xBB 0xBF, then read three - // bytes from ioQueue. (Do nothing with those bytes.) - if (buffer[0] === 0xEF && buffer[1] === 0xBB && buffer[2] === 0xBF) { - buffer = buffer.subarray(3) - } - - // 3. Process a queue with an instance of UTF-8’s - // decoder, ioQueue, output, and "replacement". - const output = textDecoder.decode(buffer) - - // 4. Return output. - return output -} - /** * @see https://infra.spec.whatwg.org/#parse-json-bytes-to-a-javascript-value * @param {Uint8Array} bytes @@ -577,6 +551,5 @@ module.exports = { extractBody, safelyExtractBody, cloneBody, - mixinBody, - utf8DecodeBytes + mixinBody } diff --git a/lib/web/fetch/formdata-parser.js b/lib/web/fetch/formdata-parser.js index c0e34d59856..94722799127 100644 --- a/lib/web/fetch/formdata-parser.js +++ b/lib/web/fetch/formdata-parser.js @@ -1,7 +1,7 @@ 'use strict' const { webidl } = require('./webidl') -const { utf8DecodeBytes } = require('./body') +const { utf8DecodeBytes } = require('./util') const { collectASequenceOfCodePoints, removeChars, HTTP_TOKEN_CODEPOINTS } = require('./data-url') const { isFileLike, File: UndiciFile } = require('./file') const { makeEntry } = require('./formdata') diff --git a/lib/web/fetch/util.js b/lib/web/fetch/util.js index 6cf679b2ced..f11ba162c51 100644 --- a/lib/web/fetch/util.js +++ b/lib/web/fetch/util.js @@ -1428,6 +1428,34 @@ function getDecodeSplit (name, list) { return gettingDecodingSplitting(value) } +const textDecoder = new TextDecoder() + +/** + * @see https://encoding.spec.whatwg.org/#utf-8-decode + * @param {Buffer} buffer + */ +function utf8DecodeBytes (buffer) { + if (buffer.length === 0) { + return '' + } + + // 1. Let buffer be the result of peeking three bytes from + // ioQueue, converted to a byte sequence. + + // 2. If buffer is 0xEF 0xBB 0xBF, then read three + // bytes from ioQueue. (Do nothing with those bytes.) + if (buffer[0] === 0xEF && buffer[1] === 0xBB && buffer[2] === 0xBF) { + buffer = buffer.subarray(3) + } + + // 3. Process a queue with an instance of UTF-8’s + // decoder, ioQueue, output, and "replacement". + const output = textDecoder.decode(buffer) + + // 4. Return output. + return output +} + module.exports = { isAborted, isCancelled, @@ -1477,5 +1505,6 @@ module.exports = { parseMetadata, createInflate, extractMimeType, - getDecodeSplit + getDecodeSplit, + utf8DecodeBytes } From 125a9e298caca3540febbc925006180862b017d9 Mon Sep 17 00:00:00 2001 From: Khafra Date: Sun, 3 Mar 2024 01:56:07 -0500 Subject: [PATCH 03/11] fixup --- lib/web/fetch/body.js | 154 +++++++++---------------------- lib/web/fetch/data-url.js | 4 +- lib/web/fetch/formdata-parser.js | 125 +++++++++++++++++-------- package.json | 7 +- 4 files changed, 133 insertions(+), 157 deletions(-) diff --git a/lib/web/fetch/body.js b/lib/web/fetch/body.js index e6e17a6d9bc..04bee503677 100644 --- a/lib/web/fetch/body.js +++ b/lib/web/fetch/body.js @@ -1,6 +1,5 @@ 'use strict' -const Busboy = require('@fastify/busboy') const util = require('../../core/util') const { ReadableStreamFrom, @@ -15,17 +14,15 @@ const { const { FormData } = require('./formdata') const { kState } = require('./symbols') const { webidl } = require('./webidl') -const { Blob, File: NativeFile } = require('node:buffer') +const { Blob } = require('node:buffer') const assert = require('node:assert') const { isErrored } = require('../../core/util') const { isArrayBuffer } = require('node:util/types') -const { File: UndiciFile } = require('./file') const { serializeAMimeType } = require('./data-url') -const { Readable } = require('node:stream') +const { multipartFormDataParser } = require('./formdata-parser') -/** @type {globalThis['File']} */ -const File = NativeFile ?? UndiciFile const textEncoder = new TextEncoder() +const textDecoder = new TextDecoder() // https://fetch.spec.whatwg.org/#concept-bodyinit-extract function extractBody (object, keepalive = false) { @@ -338,116 +335,53 @@ function bodyMixinMethods (instance) { return consumeBody(this, parseJSONFromBytes, instance) }, - async formData () { - webidl.brandCheck(this, instance) - - throwIfAborted(this[kState]) - - // 1. Let mimeType be the result of get the MIME type with this. - const mimeType = bodyMimeType(this) - - // If mimeType’s essence is "multipart/form-data", then: - if (mimeType !== null && mimeType.essence === 'multipart/form-data') { - const responseFormData = new FormData() - - let busboy - - try { - busboy = new Busboy({ - headers: { - 'content-type': serializeAMimeType(mimeType) - }, - preservePath: true - }) - } catch (err) { - throw new DOMException(`${err}`, 'AbortError') - } - - busboy.on('field', (name, value) => { - responseFormData.append(name, value) - }) - busboy.on('file', (name, value, filename, encoding, mimeType) => { - const chunks = [] - - if (encoding === 'base64' || encoding.toLowerCase() === 'base64') { - let base64chunk = '' - - value.on('data', (chunk) => { - base64chunk += chunk.toString().replace(/[\r\n]/gm, '') - - const end = base64chunk.length - base64chunk.length % 4 - chunks.push(Buffer.from(base64chunk.slice(0, end), 'base64')) - - base64chunk = base64chunk.slice(end) - }) - value.on('end', () => { - chunks.push(Buffer.from(base64chunk, 'base64')) - responseFormData.append(name, new File(chunks, filename, { type: mimeType })) - }) - } else { - value.on('data', (chunk) => { - chunks.push(chunk) - }) - value.on('end', () => { - responseFormData.append(name, new File(chunks, filename, { type: mimeType })) - }) - } - }) - - const busboyResolve = new Promise((resolve, reject) => { - busboy.on('finish', resolve) - busboy.on('error', (err) => reject(new TypeError(err))) - }) - - if (this.body !== null) { - Readable.from(this[kState].body.stream).pipe(busboy) - } - - await busboyResolve + formData () { + // The formData() method steps are to return the result of running + // consume body with this and the following step given a byte sequence bytes: + return consumeBody(this, (value) => { + // 1. Let mimeType be the result of get the MIME type with this. + const mimeType = bodyMimeType(this) + + // 2. If mimeType is non-null, then switch on mimeType’s essence and run + // the corresponding steps: + if (mimeType !== null) { + switch (mimeType.essence) { + case 'multipart/form-data': { + // 1. ... [long step] + const parsed = multipartFormDataParser(value, mimeType) + + // 2. If that fails for some reason, then throw a TypeError. + if (parsed === 'failure') { + throw new TypeError('Failed to parse body as FormData.') + } + + // 3. Return a new FormData object, appending each entry, + // resulting from the parsing operation, to its entry list. + const fd = new FormData() + fd[kState] = parsed + + return fd + } + case 'application/x-www-form-urlencoded': { + // 1. Let entries be the result of parsing bytes. + const entries = new URLSearchParams(textDecoder.decode(value)) - return responseFormData - } else if (mimeType !== null && mimeType.essence === 'application/x-www-form-urlencoded') { - // Otherwise, if mimeType’s essence is "application/x-www-form-urlencoded", then: + // 2. If entries is failure, then throw a TypeError. - // 1. Let entries be the result of parsing bytes. - let entries - try { - let text = '' - // application/x-www-form-urlencoded parser will keep the BOM. - // https://url.spec.whatwg.org/#concept-urlencoded-parser - // Note that streaming decoder is stateful and cannot be reused - const stream = this[kState].body.stream.pipeThrough(new TextDecoderStream('utf-8', { ignoreBOM: true })) + // 3. Return a new FormData object whose entry list is entries. + const fd = new FormData() + fd[kState] = [...entries] - for await (const chunk of stream) { - text += chunk + return fd + } } - - entries = new URLSearchParams(text) - } catch (err) { - // istanbul ignore next: Unclear when new URLSearchParams can fail on a string. - // 2. If entries is failure, then throw a TypeError. - throw new TypeError(err) } - // 3. Return a new FormData object whose entries are entries. - const formData = new FormData() - for (const [name, value] of entries) { - formData.append(name, value) - } - return formData - } else { - // Wait a tick before checking if the request has been aborted. - // Otherwise, a TypeError can be thrown when an AbortError should. - await Promise.resolve() - - throwIfAborted(this[kState]) - - // Otherwise, throw a TypeError. - throw webidl.errors.exception({ - header: `${instance.name}.formData`, - message: 'Could not parse content as FormData.' - }) - } + // 3. Throw a TypeError. + throw new TypeError( + 'Content-Type was not one of "multipart/form-data" or "application/x-www-form-urlencoded".' + ) + }, instance) } } diff --git a/lib/web/fetch/data-url.js b/lib/web/fetch/data-url.js index 0fcf262709c..ef292011c2c 100644 --- a/lib/web/fetch/data-url.js +++ b/lib/web/fetch/data-url.js @@ -628,7 +628,6 @@ function removeASCIIWhitespace (str, leading = true, trailing = true) { } /** - * * @param {string} str * @param {boolean} leading * @param {boolean} trailing @@ -739,5 +738,6 @@ module.exports = { serializeAMimeType, removeChars, minimizeSupportedMimeType, - HTTP_TOKEN_CODEPOINTS + HTTP_TOKEN_CODEPOINTS, + isomorphicDecode } diff --git a/lib/web/fetch/formdata-parser.js b/lib/web/fetch/formdata-parser.js index 94722799127..26c5b762536 100644 --- a/lib/web/fetch/formdata-parser.js +++ b/lib/web/fetch/formdata-parser.js @@ -2,7 +2,7 @@ const { webidl } = require('./webidl') const { utf8DecodeBytes } = require('./util') -const { collectASequenceOfCodePoints, removeChars, HTTP_TOKEN_CODEPOINTS } = require('./data-url') +const { HTTP_TOKEN_CODEPOINTS, isomorphicDecode } = require('./data-url') const { isFileLike, File: UndiciFile } = require('./file') const { makeEntry } = require('./formdata') const assert = require('node:assert') @@ -10,6 +10,9 @@ const { isAscii } = require('node:buffer') const File = globalThis.File ?? UndiciFile +const formDataNameBuffer = Buffer.from('form-data; name="') +const filenameBuffer = Buffer.from('; filename="') + /** * @see https://andreubotella.github.io/multipart-form-data/#multipart-form-data-boundary * @param {string} boundary @@ -46,8 +49,8 @@ function validateBoundary (boundary) { /** * @see https://andreubotella.github.io/multipart-form-data/#escape-a-multipart-form-data-name * @param {string} name - * @param {string?} encoding - * @param {boolean?} isFilename + * @param {string} [encoding='utf-8'] + * @param {boolean} [isFilename=false] */ function escapeFormDataName (name, encoding = 'utf-8', isFilename = false) { // 1. If isFilename is true: @@ -82,7 +85,7 @@ function escapeFormDataName (name, encoding = 'utf-8', isFilename = false) { /** * @see https://andreubotella.github.io/multipart-form-data/#multipart-form-data-parser - * @param {string} input + * @param {Buffer} input * @param {ReturnType} mimeType */ function multipartFormDataParser (input, mimeType) { @@ -96,7 +99,7 @@ function multipartFormDataParser (input, mimeType) { return 'failure' } - const boundary = utf8DecodeBytes(Buffer.from(mimeType.parameters.get('boundary'))) + const boundary = Buffer.from(`--${mimeType.parameters.get('boundary')}`, 'utf8') // 3. Let entry list be an empty entry list. const entryList = [] @@ -110,8 +113,9 @@ function multipartFormDataParser (input, mimeType) { // 5.1. If position points to a sequence of bytes starting with 0x2D 0x2D // (`--`) followed by boundary, advance position by 2 + the length of // boundary. Otherwise, return failure. - if (input.slice(position.position, position.position + 2 + boundary.length) === `--${boundary}`) { - position.position += 2 + boundary.length + // Note: boundary is padded with 2 dashes already, no need to add 2. + if (input.subarray(position.position, position.position + boundary.length).equals(boundary)) { + position.position += boundary.length } else { return 'failure' } @@ -119,13 +123,13 @@ function multipartFormDataParser (input, mimeType) { // 5.2. If position points to the sequence of bytes 0x2D 0x2D 0x0D 0x0A // (`--` followed by CR LF) followed by the end of input, return entry list. // TODO: a FormData body doesn't need to end with CRLF? - if (position.position === input.length - 2 && input.endsWith('--')) { + if (position.position === input.length - 2 && input.at(-1) === 0x2d && input.at(-2) === 0x2d) { return entryList } // 5.3. If position does not point to a sequence of bytes starting with 0x0D // 0x0A (CR LF), return failure. - if (input.charCodeAt(position.position) !== 0x0d || input.charCodeAt(position.position + 1) !== 0x0a) { + if (input[position.position] !== 0x0d || input[position.position + 1] !== 0x0a) { return 'failure' } @@ -148,25 +152,25 @@ function multipartFormDataParser (input, mimeType) { position.position += 2 // 5.7. Let body be the empty byte sequence. - let body = '' + let body // 5.8. Body loop: While position is not past the end of input: // TODO: the steps here are completely wrong { - const boundaryIndex = input.indexOf(boundary, position.position) + const boundaryIndex = input.indexOf(boundary.subarray(2), position.position) if (boundaryIndex === -1) { return 'failure' } - body = input.slice(position.position, boundaryIndex - 4) + body = input.subarray(position.position, boundaryIndex - 4) position.position += body.length } // 5.9. If position does not point to a sequence of bytes starting with // 0x0D 0x0A (CR LF), return failure. Otherwise, advance position by 2. - if (input.charCodeAt(position.position) !== 0x0d || input.charCodeAt(position.position + 1) !== 0x0a) { + if (input[position.position] !== 0x0d || input[position.position + 1] !== 0x0a) { return 'failure' } else { position.position += 2 @@ -204,7 +208,7 @@ function multipartFormDataParser (input, mimeType) { /** * @see https://andreubotella.github.io/multipart-form-data/#parse-multipart-form-data-headers - * @param {string} input + * @param {Buffer} input * @param {{ position: number }} position */ function parseMultipartFormDataHeaders (input, position) { @@ -216,7 +220,7 @@ function parseMultipartFormDataHeaders (input, position) { // 2. While true: while (true) { // 2.1. If position points to a sequence of bytes starting with 0x0D 0x0A (CR LF): - if (input.charCodeAt(position.position) === 0x0d && input.charCodeAt(position.position + 1) === 0x0a) { + if (input[position.position] === 0x0d && input[position.position + 1] === 0x0a) { // 2.1.1. If name is null, return failure. if (name === null) { return 'failure' @@ -228,8 +232,8 @@ function parseMultipartFormDataHeaders (input, position) { // 2.2. Let header name be the result of collecting a sequence of bytes that are // not 0x0A (LF), 0x0D (CR) or 0x3A (:), given position. - let headerName = collectASequenceOfCodePoints( - (char) => char !== '\n' && char !== '\n' && char !== ':', + let headerName = collectASequenceOfBytes( + (char) => char !== 0x0a && char !== 0x0d && char !== 0x3a, input, position ) @@ -238,12 +242,12 @@ function parseMultipartFormDataHeaders (input, position) { headerName = removeChars(headerName, true, true, (char) => char === 0x9 || char === 0x20) // 2.4. If header name does not match the field-name token production, return failure. - if (!HTTP_TOKEN_CODEPOINTS.test(headerName)) { + if (!HTTP_TOKEN_CODEPOINTS.test(headerName.toString())) { return 'failure' } // 2.5. If the byte at position is not 0x3A (:), return failure. - if (input.charCodeAt(position.position) !== 0x3a) { + if (input[position.position] !== 0x3a) { return 'failure' } @@ -252,21 +256,21 @@ function parseMultipartFormDataHeaders (input, position) { // 2.7. Collect a sequence of bytes that are HTTP tab or space bytes given position. // (Do nothing with those bytes.) - collectASequenceOfCodePoints( - (char) => char === ' ' || char === '\t', + collectASequenceOfBytes( + (char) => char === 0x20 || char === 0x09, input, position ) // 2.8. Byte-lowercase header name and switch on the result: - switch (headerName.toLowerCase()) { + switch (new TextDecoder().decode(headerName).toLowerCase()) { case 'content-disposition': { // 1. Set name and filename to null. name = filename = null // 2. If position does not point to a sequence of bytes starting with // `form-data; name="`, return failure. - if (!input.slice(position.position).startsWith('form-data; name="')) { + if (input.indexOf(formDataNameBuffer, position.position) !== position.position) { return 'failure' } @@ -279,12 +283,12 @@ function parseMultipartFormDataHeaders (input, position) { // failure. name = parseMultipartFormDataName(input, position) - if (name === false) { + if (name === null) { return 'failure' } // 5. If position points to a sequence of bytes starting with `; filename="`: - if (input.slice(position.position).startsWith('; filename="')) { + if (input.indexOf(filenameBuffer, position.position) === position.position) { // 1. Advance position so it points at the byte after the next 0x22 (") byte // (the one in the sequence of bytes matched above). position.position += 13 @@ -293,7 +297,7 @@ function parseMultipartFormDataHeaders (input, position) { // input and position, if the result is not failure. Otherwise, return failure. filename = parseMultipartFormDataName(input, position) - if (filename === false) { + if (filename === null) { return 'failure' } } @@ -303,8 +307,8 @@ function parseMultipartFormDataHeaders (input, position) { case 'content-type': { // 1. Let header value be the result of collecting a sequence of bytes that are // not 0x0A (LF) or 0x0D (CR), given position. - let headerValue = collectASequenceOfCodePoints( - (char) => char !== '\r' && char !== '\n', + let headerValue = collectASequenceOfBytes( + (char) => char !== 0x0a && char !== 0x0d, input, position ) @@ -313,15 +317,15 @@ function parseMultipartFormDataHeaders (input, position) { headerValue = removeChars(headerValue, false, true, (char) => char === 0x9 || char === 0x20) // 3. Set contentType to the isomorphic decoding of header value. - contentType = headerValue + contentType = isomorphicDecode(headerValue) break } default: { // Collect a sequence of bytes that are not 0x0A (LF) or 0x0D (CR), given position. // (Do nothing with those bytes.) - collectASequenceOfCodePoints( - (char) => char !== '\n' && char !== '\r', + collectASequenceOfBytes( + (char) => char !== 0x0a && char !== 0x0d, input, position ) @@ -330,7 +334,7 @@ function parseMultipartFormDataHeaders (input, position) { // 2.9. If position does not point to a sequence of bytes starting with 0x0D 0x0A // (CR LF), return failure. Otherwise, advance position by 2 (past the newline). - if (input.charCodeAt(position.position) !== 0x0d && input.charCodeAt(position.position + 1) !== 0x0a) { + if (input[position.position] !== 0x0d && input[position.position + 1] !== 0x0a) { return 'failure' } else { position.position += 2 @@ -340,23 +344,24 @@ function parseMultipartFormDataHeaders (input, position) { /** * @see https://andreubotella.github.io/multipart-form-data/#parse-a-multipart-form-data-name - * @param {string} input + * @param {Buffer} input * @param {{ position: number }} position */ function parseMultipartFormDataName (input, position) { // 1. Assert: The byte at (position - 1) is 0x22 ("). - assert(input.charCodeAt(position.position - 1) === 0x22) + assert(input[position.position - 1] === 0x22) // 2. Let name be the result of collecting a sequence of bytes that are not 0x0A (LF), 0x0D (CR) or 0x22 ("), given position. - let name = collectASequenceOfCodePoints( - (char) => char !== '\n' && char !== '\r' && char !== '"', + /** @type {string | Buffer} */ + let name = collectASequenceOfBytes( + (char) => char !== 0x0a && char !== 0x0d && char !== 0x22, input, position ) // 3. If the byte at position is not 0x22 ("), return failure. Otherwise, advance position by 1. - if (input.charCodeAt(position.position) !== 0x22) { - return false // name could be 'failure' + if (input[position.position] !== 0x22) { + return null // name could be 'failure' } else { position.position++ } @@ -365,13 +370,53 @@ function parseMultipartFormDataName (input, position) { // - `%0A`: 0x0A (LF) // - `%0D`: 0x0D (CR) // - `%22`: 0x22 (") - name = name + name = new TextDecoder().decode(name) .replace(/%0A/i, '\n') .replace(/%0D/i, '\r') .replace(/%22/, '"') // 5. Return the UTF-8 decoding without BOM of name. - return Buffer.from(name, 'utf-8').toString('utf-8') + return name +} + +/** + * @param {(char: number) => boolean} condition + * @param {Buffer} input + * @param {{ position: number }} position + */ +function collectASequenceOfBytes (condition, input, position) { + const result = [] + let index = 0 + + while (position.position < input.length && condition(input[position.position])) { + result[index++] = input[position.position] + + position.position++ + } + + return Buffer.from(result, result.length) +} + +/** + * @param {Buffer} str + * @param {boolean} leading + * @param {boolean} trailing + * @param {(charCode: number) => boolean} predicate + * @returns + */ +function removeChars (str, leading, trailing, predicate) { + let lead = 0 + let trail = str.length - 1 + + if (leading) { + while (lead < str.length && predicate(str[lead])) lead++ + } + + if (trailing) { + while (trail > 0 && predicate(str[trail])) trail-- + } + + return lead === 0 && trail === str.length - 1 ? str : str.subarray(lead, trail + 1) } module.exports = { diff --git a/package.json b/package.json index bc1d2f6520c..ede94d0d99f 100644 --- a/package.json +++ b/package.json @@ -86,8 +86,8 @@ "coverage:clean": "node ./scripts/clean-coverage.js", "coverage:report": "cross-env NODE_V8_COVERAGE= c8 report", "coverage:report:ci": "c8 report", - "bench": "echo \"Error: Benchmarks have been moved to '\/benchmarks'\" && exit 1", - "serve:website": "echo \"Error: Documentation has been moved to '\/docs'\" && exit 1", + "bench": "echo \"Error: Benchmarks have been moved to '/benchmarks'\" && exit 1", + "serve:website": "echo \"Error: Documentation has been moved to '/docs'\" && exit 1", "prepare": "husky install && node ./scripts/platform-shell.js", "fuzz": "jsfuzz test/fuzzing/fuzz.js corpus" }, @@ -143,8 +143,5 @@ "testMatch": [ "/test/jest/**" ] - }, - "dependencies": { - "@fastify/busboy": "^2.0.0" } } From fe9e5ed4c6fbc19b7940d022dfe24bad90e8749c Mon Sep 17 00:00:00 2001 From: Khafra Date: Sun, 3 Mar 2024 02:08:15 -0500 Subject: [PATCH 04/11] fixup! off-by-one --- lib/web/fetch/formdata-parser.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/web/fetch/formdata-parser.js b/lib/web/fetch/formdata-parser.js index 26c5b762536..bd8657da293 100644 --- a/lib/web/fetch/formdata-parser.js +++ b/lib/web/fetch/formdata-parser.js @@ -291,7 +291,7 @@ function parseMultipartFormDataHeaders (input, position) { if (input.indexOf(filenameBuffer, position.position) === position.position) { // 1. Advance position so it points at the byte after the next 0x22 (") byte // (the one in the sequence of bytes matched above). - position.position += 13 + position.position += 12 // 2. Set filename to the result of parsing a multipart/form-data name given // input and position, if the result is not failure. Otherwise, return failure. From a4c825c1e2044eb085d12ae4d1383d38aea625aa Mon Sep 17 00:00:00 2001 From: Khafra Date: Sun, 3 Mar 2024 02:09:11 -0500 Subject: [PATCH 05/11] fixup --- lib/web/fetch/formdata-parser.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/web/fetch/formdata-parser.js b/lib/web/fetch/formdata-parser.js index bd8657da293..e1e7432ddf0 100644 --- a/lib/web/fetch/formdata-parser.js +++ b/lib/web/fetch/formdata-parser.js @@ -371,9 +371,9 @@ function parseMultipartFormDataName (input, position) { // - `%0D`: 0x0D (CR) // - `%22`: 0x22 (") name = new TextDecoder().decode(name) - .replace(/%0A/i, '\n') - .replace(/%0D/i, '\r') - .replace(/%22/, '"') + .replace(/%0A/ig, '\n') + .replace(/%0D/ig, '\r') + .replace(/%22/g, '"') // 5. Return the UTF-8 decoding without BOM of name. return name From 02b18a01443d49f30739202b87398af0fa081073 Mon Sep 17 00:00:00 2001 From: Khafra Date: Sun, 3 Mar 2024 02:13:37 -0500 Subject: [PATCH 06/11] fixup --- lib/web/fetch/body.js | 5 ++++- test/fetch/client-fetch.js | 11 ++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/lib/web/fetch/body.js b/lib/web/fetch/body.js index 04bee503677..99b804bb26f 100644 --- a/lib/web/fetch/body.js +++ b/lib/web/fetch/body.js @@ -370,7 +370,10 @@ function bodyMixinMethods (instance) { // 3. Return a new FormData object whose entry list is entries. const fd = new FormData() - fd[kState] = [...entries] + + for (const [name, value] of entries) { + fd.append(name, value) + } return fd } diff --git a/test/fetch/client-fetch.js b/test/fetch/client-fetch.js index e0fcb97eca1..a6d4c9e7814 100644 --- a/test/fetch/client-fetch.js +++ b/test/fetch/client-fetch.js @@ -213,7 +213,16 @@ test('multipart formdata base64', (t, done) => { // Example form data with base64 encoding const data = randomFillSync(Buffer.alloc(256)) - const formRaw = `------formdata-undici-0.5786922755719377\r\nContent-Disposition: form-data; name="file"; filename="test.txt"\r\nContent-Type: application/octet-stream\r\nContent-Transfer-Encoding: base64\r\n\r\n${data.toString('base64')}\r\n------formdata-undici-0.5786922755719377--` + const formRaw = + '------formdata-undici-0.5786922755719377\r\n' + + 'Content-Disposition: form-data; name="file"; filename="test.txt"\r\n' + + 'Content-Type: application/octet-stream\r\n' + + 'Content-Transfer-Encoding: base64\r\n' + + '\r\n' + + data.toString('base64') + + '\r\n' + + '------formdata-undici-0.5786922755719377--' + const server = createServer(async (req, res) => { res.setHeader('content-type', 'multipart/form-data; boundary=----formdata-undici-0.5786922755719377') From 632e1f7a1cf9e06e4d7aeb788d12231a696f8313 Mon Sep 17 00:00:00 2001 From: Khafra Date: Sun, 3 Mar 2024 10:34:44 -0500 Subject: [PATCH 07/11] fix bugs, fix lint --- lib/web/fetch/body.js | 3 +-- lib/web/fetch/formdata-parser.js | 28 +++++++++++++++++++++++++--- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/lib/web/fetch/body.js b/lib/web/fetch/body.js index 99b804bb26f..0e33d2c9057 100644 --- a/lib/web/fetch/body.js +++ b/lib/web/fetch/body.js @@ -22,7 +22,6 @@ const { serializeAMimeType } = require('./data-url') const { multipartFormDataParser } = require('./formdata-parser') const textEncoder = new TextEncoder() -const textDecoder = new TextDecoder() // https://fetch.spec.whatwg.org/#concept-bodyinit-extract function extractBody (object, keepalive = false) { @@ -364,7 +363,7 @@ function bodyMixinMethods (instance) { } case 'application/x-www-form-urlencoded': { // 1. Let entries be the result of parsing bytes. - const entries = new URLSearchParams(textDecoder.decode(value)) + const entries = new URLSearchParams(value.toString()) // 2. If entries is failure, then throw a TypeError. diff --git a/lib/web/fetch/formdata-parser.js b/lib/web/fetch/formdata-parser.js index e1e7432ddf0..6cf63bce292 100644 --- a/lib/web/fetch/formdata-parser.js +++ b/lib/web/fetch/formdata-parser.js @@ -145,7 +145,7 @@ function multipartFormDataParser (input, mimeType) { return 'failure' } - let { name, filename, contentType } = result + let { name, filename, contentType, encoding } = result // 5.6. Advance position by 2. (This skips past the empty line that marks // the end of the headers.) @@ -166,6 +166,12 @@ function multipartFormDataParser (input, mimeType) { body = input.subarray(position.position, boundaryIndex - 4) position.position += body.length + + // Note: position must be advanced by the body's length before being + // decoded, otherwise the parsing will fail. + if (encoding === 'base64') { + body = Buffer.from(body.toString(), 'base64') + } } // 5.9. If position does not point to a sequence of bytes starting with @@ -216,6 +222,7 @@ function parseMultipartFormDataHeaders (input, position) { let name = null let filename = null let contentType = null + let encoding = null // 2. While true: while (true) { @@ -227,7 +234,7 @@ function parseMultipartFormDataHeaders (input, position) { } // 2.1.2. Return name, filename and contentType. - return { name, filename, contentType } + return { name, filename, contentType, encoding } } // 2.2. Let header name be the result of collecting a sequence of bytes that are @@ -321,6 +328,19 @@ function parseMultipartFormDataHeaders (input, position) { break } + case 'content-transfer-encoding': { + let headerValue = collectASequenceOfBytes( + (char) => char !== 0x0a && char !== 0x0d, + input, + position + ) + + headerValue = removeChars(headerValue, false, true, (char) => char === 0x9 || char === 0x20) + + encoding = isomorphicDecode(headerValue) + + break + } default: { // Collect a sequence of bytes that are not 0x0A (LF) or 0x0D (CR), given position. // (Do nothing with those bytes.) @@ -420,5 +440,7 @@ function removeChars (str, leading, trailing, predicate) { } module.exports = { - multipartFormDataParser + multipartFormDataParser, + validateBoundary, + escapeFormDataName } From 94bee966a2d809dd3bafe876e6d5dd6903fcc69b Mon Sep 17 00:00:00 2001 From: Khafra Date: Sun, 3 Mar 2024 10:51:20 -0500 Subject: [PATCH 08/11] mark wpts as passing --- test/wpt/status/fetch.status.json | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/test/wpt/status/fetch.status.json b/test/wpt/status/fetch.status.json index 008baf5bd12..60be3801ab3 100644 --- a/test/wpt/status/fetch.status.json +++ b/test/wpt/status/fetch.status.json @@ -399,21 +399,6 @@ "Consume empty FormData response body as text" ] }, - "response-consume-stream.any.js": { - "fail": [ - "Read blob response's body as readableStream with mode=byob", - "Read text response's body as readableStream with mode=byob", - "Read URLSearchParams response's body as readableStream with mode=byob", - "Read array buffer response's body as readableStream with mode=byob", - "Read form data response's body as readableStream with mode=byob" - ] - }, - "response-error-from-stream.any.js": { - "fail": [ - "ReadableStream start() Error propagates to Response.formData() Promise", - "ReadableStream pull() Error propagates to Response.formData() Promise" - ] - }, "response-stream-with-broken-then.any.js": { "note": "this is a bug in webstreams, see https://github.com/nodejs/node/issues/46786", "skip": true From ee45aa8d524adaa4bbd920fc67d142714f61ab34 Mon Sep 17 00:00:00 2001 From: Khafra Date: Sun, 3 Mar 2024 11:00:30 -0500 Subject: [PATCH 09/11] this one fails on node 18 --- test/wpt/status/fetch.status.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/wpt/status/fetch.status.json b/test/wpt/status/fetch.status.json index 60be3801ab3..62677d62025 100644 --- a/test/wpt/status/fetch.status.json +++ b/test/wpt/status/fetch.status.json @@ -399,6 +399,15 @@ "Consume empty FormData response body as text" ] }, + "response-consume-stream.any.js": { + "fail": [ + "Read blob response's body as readableStream with mode=byob", + "Read text response's body as readableStream with mode=byob", + "Read URLSearchParams response's body as readableStream with mode=byob", + "Read array buffer response's body as readableStream with mode=byob", + "Read form data response's body as readableStream with mode=byob" + ] + }, "response-stream-with-broken-then.any.js": { "note": "this is a bug in webstreams, see https://github.com/nodejs/node/issues/46786", "skip": true From 57b68ac36ebcb7a1f6724cdffd9da8dbf45997b4 Mon Sep 17 00:00:00 2001 From: Khafra Date: Sun, 3 Mar 2024 23:01:17 -0500 Subject: [PATCH 10/11] apply suggestions --- lib/web/fetch/formdata-parser.js | 40 ++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/lib/web/fetch/formdata-parser.js b/lib/web/fetch/formdata-parser.js index 6cf63bce292..ef89c6e7b98 100644 --- a/lib/web/fetch/formdata-parser.js +++ b/lib/web/fetch/formdata-parser.js @@ -36,7 +36,7 @@ function validateBoundary (boundary) { (cp >= 0x41 && cp <= 0x5a) || (cp >= 0x61 && cp <= 0x7a) || cp === 0x27 || - cp === 0x3d || + cp === 0x2d || cp === 0x5f )) { return false @@ -277,7 +277,7 @@ function parseMultipartFormDataHeaders (input, position) { // 2. If position does not point to a sequence of bytes starting with // `form-data; name="`, return failure. - if (input.indexOf(formDataNameBuffer, position.position) !== position.position) { + if (!bufferStartsWith(input, formDataNameBuffer, position)) { return 'failure' } @@ -295,7 +295,7 @@ function parseMultipartFormDataHeaders (input, position) { } // 5. If position points to a sequence of bytes starting with `; filename="`: - if (input.indexOf(filenameBuffer, position.position) === position.position) { + if (bufferStartsWith(input, filenameBuffer, position)) { // 1. Advance position so it points at the byte after the next 0x22 (") byte // (the one in the sequence of bytes matched above). position.position += 12 @@ -418,25 +418,45 @@ function collectASequenceOfBytes (condition, input, position) { } /** - * @param {Buffer} str + * @param {Buffer} buf * @param {boolean} leading * @param {boolean} trailing * @param {(charCode: number) => boolean} predicate - * @returns + * @returns {Buffer} */ -function removeChars (str, leading, trailing, predicate) { +function removeChars (buf, leading, trailing, predicate) { let lead = 0 - let trail = str.length - 1 + let trail = buf.length - 1 if (leading) { - while (lead < str.length && predicate(str[lead])) lead++ + while (lead < buf.length && predicate(buf[lead])) lead++ } if (trailing) { - while (trail > 0 && predicate(str[trail])) trail-- + while (trail > 0 && predicate(buf[trail])) trail-- } - return lead === 0 && trail === str.length - 1 ? str : str.subarray(lead, trail + 1) + return lead === 0 && trail === buf.length - 1 ? buf : buf.subarray(lead, trail + 1) +} + +/** + * Checks if {@param buffer} starts with {@param start} + * @param {Buffer} buffer + * @param {Buffer} start + * @param {{ position: number }} position + */ +function bufferStartsWith (buffer, start, position) { + if (buffer.length < start.length) { + return false + } + + for (let i = 0; i < start.length; i++) { + if (start[i] !== buffer[position.position + i]) { + return false + } + } + + return true } module.exports = { From 2f69f5d95964ef2530f22ef6d8e311a1e2df178d Mon Sep 17 00:00:00 2001 From: Khafra Date: Sun, 3 Mar 2024 23:34:13 -0500 Subject: [PATCH 11/11] fixup! body can end with CRLF --- lib/web/fetch/formdata-parser.js | 9 +++++++-- test/fetch/formdata.js | 19 ++++++++++++++++++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/lib/web/fetch/formdata-parser.js b/lib/web/fetch/formdata-parser.js index ef89c6e7b98..a338631fc06 100644 --- a/lib/web/fetch/formdata-parser.js +++ b/lib/web/fetch/formdata-parser.js @@ -12,6 +12,8 @@ const File = globalThis.File ?? UndiciFile const formDataNameBuffer = Buffer.from('form-data; name="') const filenameBuffer = Buffer.from('; filename="') +const dd = Buffer.from('--') +const ddcrlf = Buffer.from('--\r\n') /** * @see https://andreubotella.github.io/multipart-form-data/#multipart-form-data-boundary @@ -122,8 +124,11 @@ function multipartFormDataParser (input, mimeType) { // 5.2. If position points to the sequence of bytes 0x2D 0x2D 0x0D 0x0A // (`--` followed by CR LF) followed by the end of input, return entry list. - // TODO: a FormData body doesn't need to end with CRLF? - if (position.position === input.length - 2 && input.at(-1) === 0x2d && input.at(-2) === 0x2d) { + // Note: a body does NOT need to end with CRLF. It can end with --. + if ( + (position.position === input.length - 2 && bufferStartsWith(input, dd, position)) || + (position.position === input.length - 4 && bufferStartsWith(input, ddcrlf, position)) + ) { return entryList } diff --git a/test/fetch/formdata.js b/test/fetch/formdata.js index 3558001f10a..3cdfd397fd0 100644 --- a/test/fetch/formdata.js +++ b/test/fetch/formdata.js @@ -3,7 +3,7 @@ const { test } = require('node:test') const assert = require('node:assert') const { tspl } = require('@matteo.collina/tspl') -const { FormData, File, Response } = require('../../') +const { FormData, File, Response, Request } = require('../../') const { Blob: ThirdPartyBlob } = require('formdata-node') const { Blob } = require('node:buffer') const { isFormDataLike } = require('../../lib/core/util') @@ -371,3 +371,20 @@ test('FormData returned from bodyMixin.formData is not a clone', async () => { assert.strictEqual(fd2.get('foo'), 'baz') assert.strictEqual(fd.get('foo'), 'foo') }) + +test('.formData() with multipart/form-data body that ends with --\r\n', async (t) => { + const request = new Request('http://localhost', { + method: 'POST', + headers: { + 'Content-Type': 'multipart/form-data; boundary=----formdata-undici-0.6204674738279623' + }, + body: + '------formdata-undici-0.6204674738279623\r\n' + + 'Content-Disposition: form-data; name="fiŝo"\r\n' + + '\r\n' + + 'value1\r\n' + + '------formdata-undici-0.6204674738279623--\r\n' + }) + + await request.formData() +})