Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix filename* parsing #3768

Merged
merged 2 commits into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions lib/web/fetch/body.js
Original file line number Diff line number Diff line change
Expand Up @@ -364,12 +364,8 @@ function bodyMixinMethods (instance, getInternalState) {
switch (mimeType.essence) {
case 'multipart/form-data': {
// 1. ... [long step]
const parsed = multipartFormDataParser(value, mimeType)

// 2. If that fails for some reason, then throw a TypeError.
if (parsed === 'failure') {
throw new TypeError('Failed to parse body as FormData.')
}
const parsed = multipartFormDataParser(value, mimeType)

// 3. Return a new FormData object, appending each entry,
// resulting from the parsing operation, to its entry list.
Expand Down
113 changes: 70 additions & 43 deletions lib/web/fetch/formdata-parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ const { File: NodeFile } = require('node:buffer')
const File = globalThis.File ?? NodeFile

const formDataNameBuffer = Buffer.from('form-data; name="')
const filenameBuffer = Buffer.from('; filename')
const filenameBuffer = Buffer.from('filename')
const dd = Buffer.from('--')
const ddcrlf = Buffer.from('--\r\n')

Expand Down Expand Up @@ -75,7 +75,7 @@ function multipartFormDataParser (input, mimeType) {
// Otherwise, let boundary be the result of UTF-8 decoding mimeType’s
// parameters["boundary"].
if (boundaryString === undefined) {
return 'failure'
throw parsingError('missing boundary in content-type header')
}

const boundary = Buffer.from(`--${boundaryString}`, 'utf8')
Expand Down Expand Up @@ -111,7 +111,7 @@ function multipartFormDataParser (input, mimeType) {
if (input.subarray(position.position, position.position + boundary.length).equals(boundary)) {
position.position += boundary.length
} else {
return 'failure'
throw parsingError('expected a value starting with -- and the boundary')
}

// 5.2. If position points to the sequence of bytes 0x2D 0x2D 0x0D 0x0A
Expand All @@ -127,7 +127,7 @@ function multipartFormDataParser (input, mimeType) {
// 5.3. If position does not point to a sequence of bytes starting with 0x0D
// 0x0A (CR LF), return failure.
if (input[position.position] !== 0x0d || input[position.position + 1] !== 0x0a) {
return 'failure'
throw parsingError('expected CRLF')
}

// 5.4. Advance position by 2. (This skips past the newline.)
Expand All @@ -138,10 +138,6 @@ function multipartFormDataParser (input, mimeType) {
// is not failure. Otherwise, return failure.
const result = parseMultipartFormDataHeaders(input, position)

if (result === 'failure') {
return 'failure'
}

let { name, filename, contentType, encoding } = result

// 5.6. Advance position by 2. (This skips past the empty line that marks
Expand All @@ -157,7 +153,7 @@ function multipartFormDataParser (input, mimeType) {
const boundaryIndex = input.indexOf(boundary.subarray(2), position.position)

if (boundaryIndex === -1) {
return 'failure'
throw parsingError('expected boundary after body')
}

body = input.subarray(position.position, boundaryIndex - 4)
Expand All @@ -174,7 +170,7 @@ function multipartFormDataParser (input, mimeType) {
// 5.9. If position does not point to a sequence of bytes starting with
// 0x0D 0x0A (CR LF), return failure. Otherwise, advance position by 2.
if (input[position.position] !== 0x0d || input[position.position + 1] !== 0x0a) {
return 'failure'
throw parsingError('expected CRLF')
} else {
position.position += 2
}
Expand Down Expand Up @@ -230,7 +226,7 @@ function parseMultipartFormDataHeaders (input, position) {
if (input[position.position] === 0x0d && input[position.position + 1] === 0x0a) {
// 2.1.1. If name is null, return failure.
if (name === null) {
return 'failure'
throw parsingError('header name is null')
}

// 2.1.2. Return name, filename and contentType.
Expand All @@ -250,12 +246,12 @@ function parseMultipartFormDataHeaders (input, position) {

// 2.4. If header name does not match the field-name token production, return failure.
if (!HTTP_TOKEN_CODEPOINTS.test(headerName.toString())) {
return 'failure'
throw parsingError('header name does not match the field-name token production')
}

// 2.5. If the byte at position is not 0x3A (:), return failure.
if (input[position.position] !== 0x3a) {
return 'failure'
throw parsingError('expected :')
}

// 2.6. Advance position by 1.
Expand All @@ -278,7 +274,7 @@ function parseMultipartFormDataHeaders (input, position) {
// 2. If position does not point to a sequence of bytes starting with
// `form-data; name="`, return failure.
if (!bufferStartsWith(input, formDataNameBuffer, position)) {
return 'failure'
throw parsingError('expected form-data; name=" for content-disposition header')
}

// 3. Advance position so it points at the byte after the next 0x22 (")
Expand All @@ -290,34 +286,61 @@ function parseMultipartFormDataHeaders (input, position) {
// failure.
name = parseMultipartFormDataName(input, position)

if (name === null) {
return 'failure'
}

// 5. If position points to a sequence of bytes starting with `; filename="`:
if (bufferStartsWith(input, filenameBuffer, position)) {
// Note: undici also handles filename*
let check = position.position + filenameBuffer.length

if (input[check] === 0x2a) {
position.position += 1
check += 1
}

if (input[check] !== 0x3d || input[check + 1] !== 0x22) { // ="
return 'failure'
}

// 1. Advance position so it points at the byte after the next 0x22 (") byte
// (the one in the sequence of bytes matched above).
position.position += 12

// 2. Set filename to the result of parsing a multipart/form-data name given
// input and position, if the result is not failure. Otherwise, return failure.
filename = parseMultipartFormDataName(input, position)

if (filename === null) {
return 'failure'
if (input[position.position] === 0x3b /* ; */ && input[position.position + 1] === 0x20 /* ' ' */) {
const at = { position: position.position + 2 }

if (bufferStartsWith(input, filenameBuffer, at)) {
if (input[at.position + 8] === 0x2a /* '*' */) {
at.position += 10 // skip past filename*=

// Remove leading http tab and spaces. See RFC for examples.
// https://datatracker.ietf.org/doc/html/rfc6266#section-5
collectASequenceOfBytes(
(char) => char === 0x20 || char === 0x09,
input,
at
)

const headerValue = collectASequenceOfBytes(
(char) => char !== 0x20 && char !== 0x0d && char !== 0x0a, // ' ' or CRLF
input,
at
)

if (
(headerValue[0] !== 0x75 && headerValue[0] !== 0x55) || // u or U
(headerValue[1] !== 0x74 && headerValue[1] !== 0x54) || // t or T
(headerValue[2] !== 0x66 && headerValue[2] !== 0x46) || // f or F
headerValue[3] !== 0x2d || // -
headerValue[4] !== 0x38 // 8
) {
throw parsingError('unknown encoding, expected utf-8\'\'')
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have no idea if utf-8 is the only accepted value here. Could not find references to other encodings in the RFC.

}

// skip utf-8''
filename = decodeURIComponent(new TextDecoder().decode(headerValue.subarray(7)))

position.position = at.position
} else {
// 1. Advance position so it points at the byte after the next 0x22 (") byte
// (the one in the sequence of bytes matched above).
position.position += 11

// Remove leading http tab and spaces. See RFC for examples.
// https://datatracker.ietf.org/doc/html/rfc6266#section-5
collectASequenceOfBytes(
(char) => char === 0x20 || char === 0x09,
input,
position
)

position.position++ // skip past " after removing whitespace

// 2. Set filename to the result of parsing a multipart/form-data name given
// input and position, if the result is not failure. Otherwise, return failure.
filename = parseMultipartFormDataName(input, position)
}
}
}

Expand Down Expand Up @@ -367,7 +390,7 @@ function parseMultipartFormDataHeaders (input, position) {
// 2.9. If position does not point to a sequence of bytes starting with 0x0D 0x0A
// (CR LF), return failure. Otherwise, advance position by 2 (past the newline).
if (input[position.position] !== 0x0d && input[position.position + 1] !== 0x0a) {
return 'failure'
throw parsingError('expected CRLF')
} else {
position.position += 2
}
Expand All @@ -393,7 +416,7 @@ function parseMultipartFormDataName (input, position) {

// 3. If the byte at position is not 0x22 ("), return failure. Otherwise, advance position by 1.
if (input[position.position] !== 0x22) {
return null // name could be 'failure'
throw parsingError('expected "')
} else {
position.position++
}
Expand Down Expand Up @@ -468,6 +491,10 @@ function bufferStartsWith (buffer, start, position) {
return true
}

function parsingError (cause) {
return new TypeError('Failed to parse body as FormData.', { cause: new TypeError(cause) })
}

module.exports = {
multipartFormDataParser,
validateBoundary
Expand Down
59 changes: 59 additions & 0 deletions test/busboy/issue-3760.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
'use strict'

const { test } = require('node:test')
const assert = require('node:assert')
const { Response } = require('../..')

// https://github.com/nodejs/undici/issues/3760
test('filename* parameter is parsed properly', async (t) => {
const response = new Response([
'--83d82e0d-9ced-44c0-ac79-4e66a827415b\r\n' +
'Content-Type: text/plain\r\n' +
'Content-Disposition: form-data; name="file"; filename*=UTF-8\'\'%e2%82%ac%20rates\r\n' +
'\r\n' +
'testabc\r\n' +
'--83d82e0d-9ced-44c0-ac79-4e66a827415b--\r\n' +
'\r\n'
].join(''), {
headers: {
'content-type': 'multipart/form-data; boundary="83d82e0d-9ced-44c0-ac79-4e66a827415b"'
}
})

const fd = await response.formData()
assert.deepEqual(fd.get('file').name, '€ rates')
})

test('whitespace after filename[*]= is ignored', async () => {
for (const response of [
new Response([
'--83d82e0d-9ced-44c0-ac79-4e66a827415b\r\n' +
'Content-Type: text/plain\r\n' +
'Content-Disposition: form-data; name="file"; filename*= utf-8\'\'hello\r\n' +
'\r\n' +
'testabc\r\n' +
'--83d82e0d-9ced-44c0-ac79-4e66a827415b--\r\n' +
'\r\n'
].join(''), {
headers: {
'content-type': 'multipart/form-data; boundary="83d82e0d-9ced-44c0-ac79-4e66a827415b"'
}
}),
new Response([
'--83d82e0d-9ced-44c0-ac79-4e66a827415b\r\n' +
'Content-Type: text/plain\r\n' +
'Content-Disposition: form-data; name="file"; filename= "hello"\r\n' +
'\r\n' +
'testabc\r\n' +
'--83d82e0d-9ced-44c0-ac79-4e66a827415b--\r\n' +
'\r\n'
].join(''), {
headers: {
'content-type': 'multipart/form-data; boundary="83d82e0d-9ced-44c0-ac79-4e66a827415b"'
}
})
]) {
const fd = await response.formData()
assert.deepEqual(fd.get('file').name, 'hello')
}
})
Loading