Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
298 changes: 190 additions & 108 deletions lib/web/fetch/formdata-parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ const assert = require('node:assert')
const { isomorphicDecode } = require('../infra')
const { utf8DecodeBytes } = require('../../encoding')

const formDataNameBuffer = Buffer.from('form-data; name="')
const filenameBuffer = Buffer.from('filename')
const dd = Buffer.from('--')
const ddcrlf = Buffer.from('--\r\n')

const decoder = new TextDecoder()

/**
* @param {string} chars
*/
Expand Down Expand Up @@ -206,6 +206,113 @@ function multipartFormDataParser (input, mimeType) {
}
}

/**
* Parses content-disposition attributes (e.g., name="value" or filename*=utf-8''encoded)
* @param {Buffer} input
* @param {{ position: number }} position
* @returns {{ name: string, value: string }}
*/
function parseContentDispositionAttribute (input, position) {
// Skip leading semicolon and whitespace
if (input[position.position] === 0x3b /* ; */) {
position.position++
}

// Skip whitespace
collectASequenceOfBytes(
(char) => char === 0x20 || char === 0x09,
input,
position
)

// Collect attribute name (token characters)
const attributeName = collectASequenceOfBytes(
(char) => isToken(char) && char !== 0x3d && char !== 0x2a, // not = or *
input,
position
)

if (attributeName.length === 0) {
return null
}

const attrNameStr = attributeName.toString('ascii').toLowerCase()

// Check for extended notation (attribute*)
const isExtended = input[position.position] === 0x2a /* * */
if (isExtended) {
position.position++ // skip *
}

// Expect = sign
if (input[position.position] !== 0x3d /* = */) {
return null
}
position.position++ // skip =

// Skip whitespace
collectASequenceOfBytes(
(char) => char === 0x20 || char === 0x09,
input,
position
)

let value

if (isExtended) {
// Extended attribute format: charset'language'encoded-value
const headerValue = collectASequenceOfBytes(
(char) => char !== 0x20 && char !== 0x0d && char !== 0x0a && char !== 0x3b, // not space, CRLF, or ;
input,
position
)

// Check for utf-8'' prefix (case insensitive)
if (
(headerValue[0] !== 0x75 && headerValue[0] !== 0x55) || // u or U
(headerValue[1] !== 0x74 && headerValue[1] !== 0x54) || // t or T
(headerValue[2] !== 0x66 && headerValue[2] !== 0x46) || // f or F
headerValue[3] !== 0x2d || // -
headerValue[4] !== 0x38 // 8
) {
throw parsingError('unknown encoding, expected utf-8\'\'')
}

// Skip utf-8'' and decode the rest
value = decodeURIComponent(decoder.decode(headerValue.subarray(7)))
} else if (input[position.position] === 0x22 /* " */) {
// Quoted string
position.position++ // skip opening quote

const quotedValue = collectASequenceOfBytes(
(char) => char !== 0x0a && char !== 0x0d && char !== 0x22, // not LF, CR, or "
input,
position
)

if (input[position.position] !== 0x22) {
throw parsingError('Closing quote not found')
}
position.position++ // skip closing quote

value = decoder.decode(quotedValue)
.replace(/%0A/ig, '\n')
.replace(/%0D/ig, '\r')
.replace(/%22/g, '"')
} else {
// Token value (no quotes)
const tokenValue = collectASequenceOfBytes(
(char) => isToken(char) && char !== 0x3b, // not ;
input,
position
)

value = decoder.decode(tokenValue)
}

return { name: attrNameStr, value }
}

/**
* @see https://andreubotella.github.io/multipart-form-data/#parse-multipart-form-data-headers
* @param {Buffer} input
Expand Down Expand Up @@ -266,82 +373,42 @@ function parseMultipartFormDataHeaders (input, position) {
// 2.8. Byte-lowercase header name and switch on the result:
switch (bufferToLowerCasedHeaderName(headerName)) {
case 'content-disposition': {
// 1. Set name and filename to null.
name = filename = null

// 2. If position does not point to a sequence of bytes starting with
// `form-data; name="`, return failure.
if (!bufferStartsWith(input, formDataNameBuffer, position)) {
throw parsingError('expected form-data; name=" for content-disposition header')
// Collect the disposition type (should be "form-data")
const dispositionType = collectASequenceOfBytes(
(char) => isToken(char),
input,
position
)

if (dispositionType.toString('ascii').toLowerCase() !== 'form-data') {
throw parsingError('expected form-data for content-disposition header')
}

// 3. Advance position so it points at the byte after the next 0x22 (")
// byte (the one in the sequence of bytes matched above).
position.position += 17

// 4. Set name to the result of parsing a multipart/form-data name given
// input and position, if the result is not failure. Otherwise, return
// failure.
name = parseMultipartFormDataName(input, position)

// 5. If position points to a sequence of bytes starting with `; filename="`:
if (input[position.position] === 0x3b /* ; */ && input[position.position + 1] === 0x20 /* ' ' */) {
const at = { position: position.position + 2 }

if (bufferStartsWith(input, filenameBuffer, at)) {
if (input[at.position + 8] === 0x2a /* '*' */) {
at.position += 10 // skip past filename*=

// Remove leading http tab and spaces. See RFC for examples.
// https://datatracker.ietf.org/doc/html/rfc6266#section-5
collectASequenceOfBytes(
(char) => char === 0x20 || char === 0x09,
input,
at
)

const headerValue = collectASequenceOfBytes(
(char) => char !== 0x20 && char !== 0x0d && char !== 0x0a, // ' ' or CRLF
input,
at
)

if (
(headerValue[0] !== 0x75 && headerValue[0] !== 0x55) || // u or U
(headerValue[1] !== 0x74 && headerValue[1] !== 0x54) || // t or T
(headerValue[2] !== 0x66 && headerValue[2] !== 0x46) || // f or F
headerValue[3] !== 0x2d || // -
headerValue[4] !== 0x38 // 8
) {
throw parsingError('unknown encoding, expected utf-8\'\'')
}

// skip utf-8''
filename = decodeURIComponent(new TextDecoder().decode(headerValue.subarray(7)))

position.position = at.position
} else {
// 1. Advance position so it points at the byte after the next 0x22 (") byte
// (the one in the sequence of bytes matched above).
position.position += 11

// Remove leading http tab and spaces. See RFC for examples.
// https://datatracker.ietf.org/doc/html/rfc6266#section-5
collectASequenceOfBytes(
(char) => char === 0x20 || char === 0x09,
input,
position
)

position.position++ // skip past " after removing whitespace

// 2. Set filename to the result of parsing a multipart/form-data name given
// input and position, if the result is not failure. Otherwise, return failure.
filename = parseMultipartFormDataName(input, position)
}
// Parse attributes recursively until CRLF
while (
position.position < input.length &&
input[position.position] !== 0x0d &&
input[position.position + 1] !== 0x0a
) {
const attribute = parseContentDispositionAttribute(input, position)

if (!attribute) {
break
}

if (attribute.name === 'name') {
name = attribute.value
} else if (attribute.name === 'filename') {
filename = attribute.value
}
}

if (name === null) {
throw parsingError('name attribute is required in content-disposition header')
}

break
}
case 'content-type': {
Expand Down Expand Up @@ -395,43 +462,6 @@ function parseMultipartFormDataHeaders (input, position) {
}
}

/**
* @see https://andreubotella.github.io/multipart-form-data/#parse-a-multipart-form-data-name
* @param {Buffer} input
* @param {{ position: number }} position
*/
function parseMultipartFormDataName (input, position) {
// 1. Assert: The byte at (position - 1) is 0x22 (").
assert(input[position.position - 1] === 0x22)

// 2. Let name be the result of collecting a sequence of bytes that are not 0x0A (LF), 0x0D (CR) or 0x22 ("), given position.
/** @type {string | Buffer} */
let name = collectASequenceOfBytes(
(char) => char !== 0x0a && char !== 0x0d && char !== 0x22,
input,
position
)

// 3. If the byte at position is not 0x22 ("), return failure. Otherwise, advance position by 1.
if (input[position.position] !== 0x22) {
throw parsingError('expected "')
} else {
position.position++
}

// 4. Replace any occurrence of the following subsequences in name with the given byte:
// - `%0A`: 0x0A (LF)
// - `%0D`: 0x0D (CR)
// - `%22`: 0x22 (")
name = new TextDecoder().decode(name)
.replace(/%0A/ig, '\n')
.replace(/%0D/ig, '\r')
.replace(/%22/g, '"')

// 5. Return the UTF-8 decoding without BOM of name.
return name
}

/**
* @param {(char: number) => boolean} condition
* @param {Buffer} input
Expand Down Expand Up @@ -493,6 +523,58 @@ function parsingError (cause) {
return new TypeError('Failed to parse body as FormData.', { cause: new TypeError(cause) })
}

/**
* CTL = <any US-ASCII control character
* (octets 0 - 31) and DEL (127)>
* @param {number} char
*/
function isCTL (char) {
return char <= 0x1f || char === 0x7f
}

/**
* tspecials := "(" / ")" / "<" / ">" / "@" /
* "," / ";" / ":" / "\" / <">
* "/" / "[" / "]" / "?" / "="
* ; Must be in quoted-string,
* ; to use within parameter values
* @param {number} char
*/
function isTSpecial (char) {
return (
char === 0x28 || // (
char === 0x29 || // )
char === 0x3c || // <
char === 0x3e || // >
char === 0x40 || // @
char === 0x2c || // ,
char === 0x3b || // ;
char === 0x3a || // :
char === 0x5c || // \
char === 0x22 || // "
char === 0x2f || // /
char === 0x5b || // [
char === 0x5d || // ]
char === 0x3f || // ?
char === 0x3d // +
)
}

/**
* token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
* or tspecials>
* @param {number} char
*/
function isToken (char) {
return (
char <= 0x7f && // ascii
char !== 0x20 && // space
char !== 0x09 &&
!isCTL(char) &&
!isTSpecial(char)
)
}

module.exports = {
multipartFormDataParser,
validateBoundary
Expand Down
Loading
Loading