@@ -8,11 +8,11 @@ const assert = require('node:assert')
88const { isomorphicDecode } = require ( '../infra' )
99const { utf8DecodeBytes } = require ( '../../encoding' )
1010
11- const formDataNameBuffer = Buffer . from ( 'form-data; name="' )
12- const filenameBuffer = Buffer . from ( 'filename' )
1311const dd = Buffer . from ( '--' )
1412const ddcrlf = Buffer . from ( '--\r\n' )
1513
14+ const decoder = new TextDecoder ( )
15+
1616/**
1717 * @param {string } chars
1818 */
@@ -206,6 +206,113 @@ function multipartFormDataParser (input, mimeType) {
206206 }
207207}
208208
209+ /**
210+ * Parses content-disposition attributes (e.g., name="value" or filename*=utf-8''encoded)
211+ * @param {Buffer } input
212+ * @param {{ position: number } } position
213+ * @returns {{ name: string, value: string } }
214+ */
215+ function parseContentDispositionAttribute ( input , position ) {
216+ // Skip leading semicolon and whitespace
217+ if ( input [ position . position ] === 0x3b /* ; */ ) {
218+ position . position ++
219+ }
220+
221+ // Skip whitespace
222+ collectASequenceOfBytes (
223+ ( char ) => char === 0x20 || char === 0x09 ,
224+ input ,
225+ position
226+ )
227+
228+ // Collect attribute name (token characters)
229+ const attributeName = collectASequenceOfBytes (
230+ ( char ) => isToken ( char ) && char !== 0x3d && char !== 0x2a , // not = or *
231+ input ,
232+ position
233+ )
234+
235+ if ( attributeName . length === 0 ) {
236+ return null
237+ }
238+
239+ const attrNameStr = attributeName . toString ( 'ascii' ) . toLowerCase ( )
240+
241+ // Check for extended notation (attribute*)
242+ const isExtended = input [ position . position ] === 0x2a /* * */
243+ if ( isExtended ) {
244+ position . position ++ // skip *
245+ }
246+
247+ // Expect = sign
248+ if ( input [ position . position ] !== 0x3d /* = */ ) {
249+ return null
250+ }
251+ position . position ++ // skip =
252+
253+ // Skip whitespace
254+ collectASequenceOfBytes (
255+ ( char ) => char === 0x20 || char === 0x09 ,
256+ input ,
257+ position
258+ )
259+
260+ let value
261+
262+ if ( isExtended ) {
263+ // Extended attribute format: charset'language'encoded-value
264+ const headerValue = collectASequenceOfBytes (
265+ ( char ) => char !== 0x20 && char !== 0x0d && char !== 0x0a && char !== 0x3b , // not space, CRLF, or ;
266+ input ,
267+ position
268+ )
269+
270+ // Check for utf-8'' prefix (case insensitive)
271+ if (
272+ ( headerValue [ 0 ] !== 0x75 && headerValue [ 0 ] !== 0x55 ) || // u or U
273+ ( headerValue [ 1 ] !== 0x74 && headerValue [ 1 ] !== 0x54 ) || // t or T
274+ ( headerValue [ 2 ] !== 0x66 && headerValue [ 2 ] !== 0x46 ) || // f or F
275+ headerValue [ 3 ] !== 0x2d || // -
276+ headerValue [ 4 ] !== 0x38 // 8
277+ ) {
278+ throw parsingError ( 'unknown encoding, expected utf-8\'\'' )
279+ }
280+
281+ // Skip utf-8'' and decode the rest
282+ value = decodeURIComponent ( decoder . decode ( headerValue . subarray ( 7 ) ) )
283+ } else if ( input [ position . position ] === 0x22 /* " */ ) {
284+ // Quoted string
285+ position . position ++ // skip opening quote
286+
287+ const quotedValue = collectASequenceOfBytes (
288+ ( char ) => char !== 0x0a && char !== 0x0d && char !== 0x22 , // not LF, CR, or "
289+ input ,
290+ position
291+ )
292+
293+ if ( input [ position . position ] !== 0x22 ) {
294+ throw parsingError ( 'Closing quote not found' )
295+ }
296+ position . position ++ // skip closing quote
297+
298+ value = decoder . decode ( quotedValue )
299+ . replace ( / % 0 A / ig, '\n' )
300+ . replace ( / % 0 D / ig, '\r' )
301+ . replace ( / % 2 2 / g, '"' )
302+ } else {
303+ // Token value (no quotes)
304+ const tokenValue = collectASequenceOfBytes (
305+ ( char ) => isToken ( char ) && char !== 0x3b , // not ;
306+ input ,
307+ position
308+ )
309+
310+ value = decoder . decode ( tokenValue )
311+ }
312+
313+ return { name : attrNameStr , value }
314+ }
315+
209316/**
210317 * @see https://andreubotella.github.io/multipart-form-data/#parse-multipart-form-data-headers
211318 * @param {Buffer } input
@@ -266,82 +373,42 @@ function parseMultipartFormDataHeaders (input, position) {
266373 // 2.8. Byte-lowercase header name and switch on the result:
267374 switch ( bufferToLowerCasedHeaderName ( headerName ) ) {
268375 case 'content-disposition' : {
269- // 1. Set name and filename to null.
270376 name = filename = null
271377
272- // 2. If position does not point to a sequence of bytes starting with
273- // `form-data; name="`, return failure.
274- if ( ! bufferStartsWith ( input , formDataNameBuffer , position ) ) {
275- throw parsingError ( 'expected form-data; name=" for content-disposition header' )
378+ // Collect the disposition type (should be "form-data")
379+ const dispositionType = collectASequenceOfBytes (
380+ ( char ) => isToken ( char ) ,
381+ input ,
382+ position
383+ )
384+
385+ if ( dispositionType . toString ( 'ascii' ) . toLowerCase ( ) !== 'form-data' ) {
386+ throw parsingError ( 'expected form-data for content-disposition header' )
276387 }
277388
278- // 3. Advance position so it points at the byte after the next 0x22 (")
279- // byte (the one in the sequence of bytes matched above).
280- position . position += 17
281-
282- // 4. Set name to the result of parsing a multipart/form-data name given
283- // input and position, if the result is not failure. Otherwise, return
284- // failure.
285- name = parseMultipartFormDataName ( input , position )
286-
287- // 5. If position points to a sequence of bytes starting with `; filename="`:
288- if ( input [ position . position ] === 0x3b /* ; */ && input [ position . position + 1 ] === 0x20 /* ' ' */ ) {
289- const at = { position : position . position + 2 }
290-
291- if ( bufferStartsWith ( input , filenameBuffer , at ) ) {
292- if ( input [ at . position + 8 ] === 0x2a /* '*' */ ) {
293- at . position += 10 // skip past filename*=
294-
295- // Remove leading http tab and spaces. See RFC for examples.
296- // https://datatracker.ietf.org/doc/html/rfc6266#section-5
297- collectASequenceOfBytes (
298- ( char ) => char === 0x20 || char === 0x09 ,
299- input ,
300- at
301- )
302-
303- const headerValue = collectASequenceOfBytes (
304- ( char ) => char !== 0x20 && char !== 0x0d && char !== 0x0a , // ' ' or CRLF
305- input ,
306- at
307- )
308-
309- if (
310- ( headerValue [ 0 ] !== 0x75 && headerValue [ 0 ] !== 0x55 ) || // u or U
311- ( headerValue [ 1 ] !== 0x74 && headerValue [ 1 ] !== 0x54 ) || // t or T
312- ( headerValue [ 2 ] !== 0x66 && headerValue [ 2 ] !== 0x46 ) || // f or F
313- headerValue [ 3 ] !== 0x2d || // -
314- headerValue [ 4 ] !== 0x38 // 8
315- ) {
316- throw parsingError ( 'unknown encoding, expected utf-8\'\'' )
317- }
318-
319- // skip utf-8''
320- filename = decodeURIComponent ( new TextDecoder ( ) . decode ( headerValue . subarray ( 7 ) ) )
321-
322- position . position = at . position
323- } else {
324- // 1. Advance position so it points at the byte after the next 0x22 (") byte
325- // (the one in the sequence of bytes matched above).
326- position . position += 11
327-
328- // Remove leading http tab and spaces. See RFC for examples.
329- // https://datatracker.ietf.org/doc/html/rfc6266#section-5
330- collectASequenceOfBytes (
331- ( char ) => char === 0x20 || char === 0x09 ,
332- input ,
333- position
334- )
335-
336- position . position ++ // skip past " after removing whitespace
337-
338- // 2. Set filename to the result of parsing a multipart/form-data name given
339- // input and position, if the result is not failure. Otherwise, return failure.
340- filename = parseMultipartFormDataName ( input , position )
341- }
389+ // Parse attributes recursively until CRLF
390+ while (
391+ position . position < input . length &&
392+ input [ position . position ] !== 0x0d &&
393+ input [ position . position + 1 ] !== 0x0a
394+ ) {
395+ const attribute = parseContentDispositionAttribute ( input , position )
396+
397+ if ( ! attribute ) {
398+ break
399+ }
400+
401+ if ( attribute . name === 'name' ) {
402+ name = attribute . value
403+ } else if ( attribute . name === 'filename' ) {
404+ filename = attribute . value
342405 }
343406 }
344407
408+ if ( name === null ) {
409+ throw parsingError ( 'name attribute is required in content-disposition header' )
410+ }
411+
345412 break
346413 }
347414 case 'content-type' : {
@@ -395,43 +462,6 @@ function parseMultipartFormDataHeaders (input, position) {
395462 }
396463}
397464
398- /**
399- * @see https://andreubotella.github.io/multipart-form-data/#parse-a-multipart-form-data-name
400- * @param {Buffer } input
401- * @param {{ position: number } } position
402- */
403- function parseMultipartFormDataName ( input , position ) {
404- // 1. Assert: The byte at (position - 1) is 0x22 (").
405- assert ( input [ position . position - 1 ] === 0x22 )
406-
407- // 2. Let name be the result of collecting a sequence of bytes that are not 0x0A (LF), 0x0D (CR) or 0x22 ("), given position.
408- /** @type {string | Buffer } */
409- let name = collectASequenceOfBytes (
410- ( char ) => char !== 0x0a && char !== 0x0d && char !== 0x22 ,
411- input ,
412- position
413- )
414-
415- // 3. If the byte at position is not 0x22 ("), return failure. Otherwise, advance position by 1.
416- if ( input [ position . position ] !== 0x22 ) {
417- throw parsingError ( 'expected "' )
418- } else {
419- position . position ++
420- }
421-
422- // 4. Replace any occurrence of the following subsequences in name with the given byte:
423- // - `%0A`: 0x0A (LF)
424- // - `%0D`: 0x0D (CR)
425- // - `%22`: 0x22 (")
426- name = new TextDecoder ( ) . decode ( name )
427- . replace ( / % 0 A / ig, '\n' )
428- . replace ( / % 0 D / ig, '\r' )
429- . replace ( / % 2 2 / g, '"' )
430-
431- // 5. Return the UTF-8 decoding without BOM of name.
432- return name
433- }
434-
435465/**
436466 * @param {(char: number) => boolean } condition
437467 * @param {Buffer } input
@@ -493,6 +523,58 @@ function parsingError (cause) {
493523 return new TypeError ( 'Failed to parse body as FormData.' , { cause : new TypeError ( cause ) } )
494524}
495525
526+ /**
527+ * CTL = <any US-ASCII control character
528+ * (octets 0 - 31) and DEL (127)>
529+ * @param {number } char
530+ */
531+ function isCTL ( char ) {
532+ return char <= 0x1f || char === 0x7f
533+ }
534+
535+ /**
536+ * tspecials := "(" / ")" / "<" / ">" / "@" /
537+ * "," / ";" / ":" / "\" / <">
538+ * "/" / "[" / "]" / "?" / "="
539+ * ; Must be in quoted-string,
540+ * ; to use within parameter values
541+ * @param {number } char
542+ */
543+ function isTSpecial ( char ) {
544+ return (
545+ char === 0x28 || // (
546+ char === 0x29 || // )
547+ char === 0x3c || // <
548+ char === 0x3e || // >
549+ char === 0x40 || // @
550+ char === 0x2c || // ,
551+ char === 0x3b || // ;
552+ char === 0x3a || // :
553+ char === 0x5c || // \
554+ char === 0x22 || // "
555+ char === 0x2f || // /
556+ char === 0x5b || // [
557+ char === 0x5d || // ]
558+ char === 0x3f || // ?
559+ char === 0x3d // +
560+ )
561+ }
562+
563+ /**
564+ * token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
565+ * or tspecials>
566+ * @param {number } char
567+ */
568+ function isToken ( char ) {
569+ return (
570+ char <= 0x7f && // ascii
571+ char !== 0x20 && // space
572+ char !== 0x09 &&
573+ ! isCTL ( char ) &&
574+ ! isTSpecial ( char )
575+ )
576+ }
577+
496578module . exports = {
497579 multipartFormDataParser,
498580 validateBoundary
0 commit comments