Skip to content

Commit f29471c

Browse files
authored
fix fd parsing unquoted attribute values (#4662)
1 parent 7cc4fcf commit f29471c

File tree

2 files changed

+252
-108
lines changed

2 files changed

+252
-108
lines changed

lib/web/fetch/formdata-parser.js

Lines changed: 190 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@ const assert = require('node:assert')
88
const { isomorphicDecode } = require('../infra')
99
const { utf8DecodeBytes } = require('../../encoding')
1010

11-
const formDataNameBuffer = Buffer.from('form-data; name="')
12-
const filenameBuffer = Buffer.from('filename')
1311
const dd = Buffer.from('--')
1412
const ddcrlf = Buffer.from('--\r\n')
1513

14+
const decoder = new TextDecoder()
15+
1616
/**
1717
* @param {string} chars
1818
*/
@@ -206,6 +206,113 @@ function multipartFormDataParser (input, mimeType) {
206206
}
207207
}
208208

209+
/**
210+
* Parses content-disposition attributes (e.g., name="value" or filename*=utf-8''encoded)
211+
* @param {Buffer} input
212+
* @param {{ position: number }} position
213+
* @returns {{ name: string, value: string }}
214+
*/
215+
function parseContentDispositionAttribute (input, position) {
216+
// Skip leading semicolon and whitespace
217+
if (input[position.position] === 0x3b /* ; */) {
218+
position.position++
219+
}
220+
221+
// Skip whitespace
222+
collectASequenceOfBytes(
223+
(char) => char === 0x20 || char === 0x09,
224+
input,
225+
position
226+
)
227+
228+
// Collect attribute name (token characters)
229+
const attributeName = collectASequenceOfBytes(
230+
(char) => isToken(char) && char !== 0x3d && char !== 0x2a, // not = or *
231+
input,
232+
position
233+
)
234+
235+
if (attributeName.length === 0) {
236+
return null
237+
}
238+
239+
const attrNameStr = attributeName.toString('ascii').toLowerCase()
240+
241+
// Check for extended notation (attribute*)
242+
const isExtended = input[position.position] === 0x2a /* * */
243+
if (isExtended) {
244+
position.position++ // skip *
245+
}
246+
247+
// Expect = sign
248+
if (input[position.position] !== 0x3d /* = */) {
249+
return null
250+
}
251+
position.position++ // skip =
252+
253+
// Skip whitespace
254+
collectASequenceOfBytes(
255+
(char) => char === 0x20 || char === 0x09,
256+
input,
257+
position
258+
)
259+
260+
let value
261+
262+
if (isExtended) {
263+
// Extended attribute format: charset'language'encoded-value
264+
const headerValue = collectASequenceOfBytes(
265+
(char) => char !== 0x20 && char !== 0x0d && char !== 0x0a && char !== 0x3b, // not space, CRLF, or ;
266+
input,
267+
position
268+
)
269+
270+
// Check for utf-8'' prefix (case insensitive)
271+
if (
272+
(headerValue[0] !== 0x75 && headerValue[0] !== 0x55) || // u or U
273+
(headerValue[1] !== 0x74 && headerValue[1] !== 0x54) || // t or T
274+
(headerValue[2] !== 0x66 && headerValue[2] !== 0x46) || // f or F
275+
headerValue[3] !== 0x2d || // -
276+
headerValue[4] !== 0x38 // 8
277+
) {
278+
throw parsingError('unknown encoding, expected utf-8\'\'')
279+
}
280+
281+
// Skip utf-8'' and decode the rest
282+
value = decodeURIComponent(decoder.decode(headerValue.subarray(7)))
283+
} else if (input[position.position] === 0x22 /* " */) {
284+
// Quoted string
285+
position.position++ // skip opening quote
286+
287+
const quotedValue = collectASequenceOfBytes(
288+
(char) => char !== 0x0a && char !== 0x0d && char !== 0x22, // not LF, CR, or "
289+
input,
290+
position
291+
)
292+
293+
if (input[position.position] !== 0x22) {
294+
throw parsingError('Closing quote not found')
295+
}
296+
position.position++ // skip closing quote
297+
298+
value = decoder.decode(quotedValue)
299+
.replace(/%0A/ig, '\n')
300+
.replace(/%0D/ig, '\r')
301+
.replace(/%22/g, '"')
302+
} else {
303+
// Token value (no quotes)
304+
const tokenValue = collectASequenceOfBytes(
305+
(char) => isToken(char) && char !== 0x3b, // not ;
306+
input,
307+
position
308+
)
309+
310+
value = decoder.decode(tokenValue)
311+
}
312+
313+
return { name: attrNameStr, value }
314+
}
315+
209316
/**
210317
* @see https://andreubotella.github.io/multipart-form-data/#parse-multipart-form-data-headers
211318
* @param {Buffer} input
@@ -266,82 +373,42 @@ function parseMultipartFormDataHeaders (input, position) {
266373
// 2.8. Byte-lowercase header name and switch on the result:
267374
switch (bufferToLowerCasedHeaderName(headerName)) {
268375
case 'content-disposition': {
269-
// 1. Set name and filename to null.
270376
name = filename = null
271377

272-
// 2. If position does not point to a sequence of bytes starting with
273-
// `form-data; name="`, return failure.
274-
if (!bufferStartsWith(input, formDataNameBuffer, position)) {
275-
throw parsingError('expected form-data; name=" for content-disposition header')
378+
// Collect the disposition type (should be "form-data")
379+
const dispositionType = collectASequenceOfBytes(
380+
(char) => isToken(char),
381+
input,
382+
position
383+
)
384+
385+
if (dispositionType.toString('ascii').toLowerCase() !== 'form-data') {
386+
throw parsingError('expected form-data for content-disposition header')
276387
}
277388

278-
// 3. Advance position so it points at the byte after the next 0x22 (")
279-
// byte (the one in the sequence of bytes matched above).
280-
position.position += 17
281-
282-
// 4. Set name to the result of parsing a multipart/form-data name given
283-
// input and position, if the result is not failure. Otherwise, return
284-
// failure.
285-
name = parseMultipartFormDataName(input, position)
286-
287-
// 5. If position points to a sequence of bytes starting with `; filename="`:
288-
if (input[position.position] === 0x3b /* ; */ && input[position.position + 1] === 0x20 /* ' ' */) {
289-
const at = { position: position.position + 2 }
290-
291-
if (bufferStartsWith(input, filenameBuffer, at)) {
292-
if (input[at.position + 8] === 0x2a /* '*' */) {
293-
at.position += 10 // skip past filename*=
294-
295-
// Remove leading http tab and spaces. See RFC for examples.
296-
// https://datatracker.ietf.org/doc/html/rfc6266#section-5
297-
collectASequenceOfBytes(
298-
(char) => char === 0x20 || char === 0x09,
299-
input,
300-
at
301-
)
302-
303-
const headerValue = collectASequenceOfBytes(
304-
(char) => char !== 0x20 && char !== 0x0d && char !== 0x0a, // ' ' or CRLF
305-
input,
306-
at
307-
)
308-
309-
if (
310-
(headerValue[0] !== 0x75 && headerValue[0] !== 0x55) || // u or U
311-
(headerValue[1] !== 0x74 && headerValue[1] !== 0x54) || // t or T
312-
(headerValue[2] !== 0x66 && headerValue[2] !== 0x46) || // f or F
313-
headerValue[3] !== 0x2d || // -
314-
headerValue[4] !== 0x38 // 8
315-
) {
316-
throw parsingError('unknown encoding, expected utf-8\'\'')
317-
}
318-
319-
// skip utf-8''
320-
filename = decodeURIComponent(new TextDecoder().decode(headerValue.subarray(7)))
321-
322-
position.position = at.position
323-
} else {
324-
// 1. Advance position so it points at the byte after the next 0x22 (") byte
325-
// (the one in the sequence of bytes matched above).
326-
position.position += 11
327-
328-
// Remove leading http tab and spaces. See RFC for examples.
329-
// https://datatracker.ietf.org/doc/html/rfc6266#section-5
330-
collectASequenceOfBytes(
331-
(char) => char === 0x20 || char === 0x09,
332-
input,
333-
position
334-
)
335-
336-
position.position++ // skip past " after removing whitespace
337-
338-
// 2. Set filename to the result of parsing a multipart/form-data name given
339-
// input and position, if the result is not failure. Otherwise, return failure.
340-
filename = parseMultipartFormDataName(input, position)
341-
}
389+
// Parse attributes recursively until CRLF
390+
while (
391+
position.position < input.length &&
392+
input[position.position] !== 0x0d &&
393+
input[position.position + 1] !== 0x0a
394+
) {
395+
const attribute = parseContentDispositionAttribute(input, position)
396+
397+
if (!attribute) {
398+
break
399+
}
400+
401+
if (attribute.name === 'name') {
402+
name = attribute.value
403+
} else if (attribute.name === 'filename') {
404+
filename = attribute.value
342405
}
343406
}
344407

408+
if (name === null) {
409+
throw parsingError('name attribute is required in content-disposition header')
410+
}
411+
345412
break
346413
}
347414
case 'content-type': {
@@ -395,43 +462,6 @@ function parseMultipartFormDataHeaders (input, position) {
395462
}
396463
}
397464

398-
/**
399-
* @see https://andreubotella.github.io/multipart-form-data/#parse-a-multipart-form-data-name
400-
* @param {Buffer} input
401-
* @param {{ position: number }} position
402-
*/
403-
function parseMultipartFormDataName (input, position) {
404-
// 1. Assert: The byte at (position - 1) is 0x22 (").
405-
assert(input[position.position - 1] === 0x22)
406-
407-
// 2. Let name be the result of collecting a sequence of bytes that are not 0x0A (LF), 0x0D (CR) or 0x22 ("), given position.
408-
/** @type {string | Buffer} */
409-
let name = collectASequenceOfBytes(
410-
(char) => char !== 0x0a && char !== 0x0d && char !== 0x22,
411-
input,
412-
position
413-
)
414-
415-
// 3. If the byte at position is not 0x22 ("), return failure. Otherwise, advance position by 1.
416-
if (input[position.position] !== 0x22) {
417-
throw parsingError('expected "')
418-
} else {
419-
position.position++
420-
}
421-
422-
// 4. Replace any occurrence of the following subsequences in name with the given byte:
423-
// - `%0A`: 0x0A (LF)
424-
// - `%0D`: 0x0D (CR)
425-
// - `%22`: 0x22 (")
426-
name = new TextDecoder().decode(name)
427-
.replace(/%0A/ig, '\n')
428-
.replace(/%0D/ig, '\r')
429-
.replace(/%22/g, '"')
430-
431-
// 5. Return the UTF-8 decoding without BOM of name.
432-
return name
433-
}
434-
435465
/**
436466
* @param {(char: number) => boolean} condition
437467
* @param {Buffer} input
@@ -493,6 +523,58 @@ function parsingError (cause) {
493523
return new TypeError('Failed to parse body as FormData.', { cause: new TypeError(cause) })
494524
}
495525

526+
/**
527+
* CTL = <any US-ASCII control character
528+
* (octets 0 - 31) and DEL (127)>
529+
* @param {number} char
530+
*/
531+
function isCTL (char) {
532+
return char <= 0x1f || char === 0x7f
533+
}
534+
535+
/**
536+
* tspecials := "(" / ")" / "<" / ">" / "@" /
537+
* "," / ";" / ":" / "\" / <">
538+
* "/" / "[" / "]" / "?" / "="
539+
* ; Must be in quoted-string,
540+
* ; to use within parameter values
541+
* @param {number} char
542+
*/
543+
function isTSpecial (char) {
544+
return (
545+
char === 0x28 || // (
546+
char === 0x29 || // )
547+
char === 0x3c || // <
548+
char === 0x3e || // >
549+
char === 0x40 || // @
550+
char === 0x2c || // ,
551+
char === 0x3b || // ;
552+
char === 0x3a || // :
553+
char === 0x5c || // \
554+
char === 0x22 || // "
555+
char === 0x2f || // /
556+
char === 0x5b || // [
557+
char === 0x5d || // ]
558+
char === 0x3f || // ?
559+
char === 0x3d // +
560+
)
561+
}
562+
563+
/**
564+
* token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
565+
* or tspecials>
566+
* @param {number} char
567+
*/
568+
function isToken (char) {
569+
return (
570+
char <= 0x7f && // ascii
571+
char !== 0x20 && // space
572+
char !== 0x09 &&
573+
!isCTL(char) &&
574+
!isTSpecial(char)
575+
)
576+
}
577+
496578
module.exports = {
497579
multipartFormDataParser,
498580
validateBoundary

0 commit comments

Comments
 (0)