nodejs
diff --git a/‎lib/web/fetch/formdata-parser.js‎
Lines changed: 190 additions & 108 deletions b/‎lib/web/fetch/formdata-parser.js‎
Lines changed: 190 additions & 108 deletions
@@ -8,11 +8,11 @@ const assert = require('node:assert')
 const { isomorphicDecode } = require('../infra')
 const { utf8DecodeBytes } = require('../../encoding')
 
-const formDataNameBuffer = Buffer.from('form-data; name="')
-const filenameBuffer = Buffer.from('filename')
 const dd = Buffer.from('--')
 const ddcrlf = Buffer.from('--\r\n')
 
+const decoder = new TextDecoder()
+
 /**
  * @param {string} chars
  */
@@ -206,6 +206,113 @@ function multipartFormDataParser (input, mimeType) {
   }
 }
 
+/**
+ * Parses content-disposition attributes (e.g., name="value" or filename*=utf-8''encoded)
+ * @param {Buffer} input
+ * @param {{ position: number }} position
+ * @returns {{ name: string, value: string }}
+ */
+function parseContentDispositionAttribute (input, position) {
+  // Skip leading semicolon and whitespace
+  if (input[position.position] === 0x3b /* ; */) {
+    position.position++
+  }
+
+  // Skip whitespace
+  collectASequenceOfBytes(
+    (char) => char === 0x20 || char === 0x09,
+    input,
+    position
+  )
+
+  // Collect attribute name (token characters)
+  const attributeName = collectASequenceOfBytes(
+    (char) => isToken(char) && char !== 0x3d && char !== 0x2a, // not = or *
+    input,
+    position
+  )
+
+  if (attributeName.length === 0) {
+    return null
+  }
+
+  const attrNameStr = attributeName.toString('ascii').toLowerCase()
+
+  // Check for extended notation (attribute*)
+  const isExtended = input[position.position] === 0x2a /* * */
+  if (isExtended) {
+    position.position++ // skip *
+  }
+
+  // Expect = sign
+  if (input[position.position] !== 0x3d /* = */) {
+    return null
+  }
+  position.position++ // skip =
+
+  // Skip whitespace
+  collectASequenceOfBytes(
+    (char) => char === 0x20 || char === 0x09,
+    input,
+    position
+  )
+
+  let value
+
+  if (isExtended) {
+    // Extended attribute format: charset'language'encoded-value
+    const headerValue = collectASequenceOfBytes(
+      (char) => char !== 0x20 && char !== 0x0d && char !== 0x0a && char !== 0x3b, // not space, CRLF, or ;
+      input,
+      position
+    )
+
+    // Check for utf-8'' prefix (case insensitive)
+    if (
+      (headerValue[0] !== 0x75 && headerValue[0] !== 0x55) || // u or U
+      (headerValue[1] !== 0x74 && headerValue[1] !== 0x54) || // t or T
+      (headerValue[2] !== 0x66 && headerValue[2] !== 0x46) || // f or F
+      headerValue[3] !== 0x2d || // -
+      headerValue[4] !== 0x38 // 8
+    ) {
+      throw parsingError('unknown encoding, expected utf-8\'\'')
+    }
+
+    // Skip utf-8'' and decode the rest
+    value = decodeURIComponent(decoder.decode(headerValue.subarray(7)))
+  } else if (input[position.position] === 0x22 /* " */) {
+    // Quoted string
+    position.position++ // skip opening quote
+
+    const quotedValue = collectASequenceOfBytes(
+      (char) => char !== 0x0a && char !== 0x0d && char !== 0x22, // not LF, CR, or "
+      input,
+      position
+    )
+
+    if (input[position.position] !== 0x22) {
+      throw parsingError('Closing quote not found')
+    }
+    position.position++ // skip closing quote
+
+    value = decoder.decode(quotedValue)
+      .replace(/%0A/ig, '\n')
+      .replace(/%0D/ig, '\r')
+      .replace(/%22/g, '"')
+  } else {
+    // Token value (no quotes)
+    const tokenValue = collectASequenceOfBytes(
+      (char) => isToken(char) && char !== 0x3b, // not ;
+      input,
+      position
+    )
+
+    value = decoder.decode(tokenValue)
+  }
+
+  return { name: attrNameStr, value }
+}
+
 /**
  * @see https://andreubotella.github.io/multipart-form-data/#parse-multipart-form-data-headers
  * @param {Buffer} input
@@ -266,82 +373,42 @@ function parseMultipartFormDataHeaders (input, position) {
     // 2.8. Byte-lowercase header name and switch on the result:
     switch (bufferToLowerCasedHeaderName(headerName)) {
       case 'content-disposition': {
-        // 1. Set name and filename to null.
         name = filename = null
 
-        // 2. If position does not point to a sequence of bytes starting with
-        //    `form-data; name="`, return failure.
-        if (!bufferStartsWith(input, formDataNameBuffer, position)) {
-          throw parsingError('expected form-data; name=" for content-disposition header')
+        // Collect the disposition type (should be "form-data")
+        const dispositionType = collectASequenceOfBytes(
+          (char) => isToken(char),
+          input,
+          position
+        )
+
+        if (dispositionType.toString('ascii').toLowerCase() !== 'form-data') {
+          throw parsingError('expected form-data for content-disposition header')
         }
 
-        // 3. Advance position so it points at the byte after the next 0x22 (")
-        //    byte (the one in the sequence of bytes matched above).
-        position.position += 17
-
-        // 4. Set name to the result of parsing a multipart/form-data name given
-        //    input and position, if the result is not failure. Otherwise, return
-        //    failure.
-        name = parseMultipartFormDataName(input, position)
-
-        // 5. If position points to a sequence of bytes starting with `; filename="`:
-        if (input[position.position] === 0x3b /* ; */ && input[position.position + 1] === 0x20 /* ' ' */) {
-          const at = { position: position.position + 2 }
-
-          if (bufferStartsWith(input, filenameBuffer, at)) {
-            if (input[at.position + 8] === 0x2a /* '*' */) {
-              at.position += 10 // skip past filename*=
-
-              // Remove leading http tab and spaces. See RFC for examples.
-              // https://datatracker.ietf.org/doc/html/rfc6266#section-5
-              collectASequenceOfBytes(
-                (char) => char === 0x20 || char === 0x09,
-                input,
-                at
-              )
-
-              const headerValue = collectASequenceOfBytes(
-                (char) => char !== 0x20 && char !== 0x0d && char !== 0x0a, // ' ' or CRLF
-                input,
-                at
-              )
-
-              if (
-                (headerValue[0] !== 0x75 && headerValue[0] !== 0x55) || // u or U
-                (headerValue[1] !== 0x74 && headerValue[1] !== 0x54) || // t or T
-                (headerValue[2] !== 0x66 && headerValue[2] !== 0x46) || // f or F
-                headerValue[3] !== 0x2d || // -
-                headerValue[4] !== 0x38 // 8
-              ) {
-                throw parsingError('unknown encoding, expected utf-8\'\'')
-              }
-
-              // skip utf-8''
-              filename = decodeURIComponent(new TextDecoder().decode(headerValue.subarray(7)))
-
-              position.position = at.position
-            } else {
-              // 1. Advance position so it points at the byte after the next 0x22 (") byte
-              //    (the one in the sequence of bytes matched above).
-              position.position += 11
-
-              // Remove leading http tab and spaces. See RFC for examples.
-              // https://datatracker.ietf.org/doc/html/rfc6266#section-5
-              collectASequenceOfBytes(
-                (char) => char === 0x20 || char === 0x09,
-                input,
-                position
-              )
-
-              position.position++ // skip past " after removing whitespace
-
-              // 2. Set filename to the result of parsing a multipart/form-data name given
-              //    input and position, if the result is not failure. Otherwise, return failure.
-              filename = parseMultipartFormDataName(input, position)
-            }
+        // Parse attributes recursively until CRLF
+        while (
+          position.position < input.length &&
+          input[position.position] !== 0x0d &&
+          input[position.position + 1] !== 0x0a
+        ) {
+          const attribute = parseContentDispositionAttribute(input, position)
+
+          if (!attribute) {
+            break
+          }
+
+          if (attribute.name === 'name') {
+            name = attribute.value
+          } else if (attribute.name === 'filename') {
+            filename = attribute.value
           }
         }
 
+        if (name === null) {
+          throw parsingError('name attribute is required in content-disposition header')
+        }
+
         break
       }
       case 'content-type': {
@@ -395,43 +462,6 @@ function parseMultipartFormDataHeaders (input, position) {
   }
 }
 
-/**
- * @see https://andreubotella.github.io/multipart-form-data/#parse-a-multipart-form-data-name
- * @param {Buffer} input
- * @param {{ position: number }} position
- */
-function parseMultipartFormDataName (input, position) {
-  // 1. Assert: The byte at (position - 1) is 0x22 (").
-  assert(input[position.position - 1] === 0x22)
-
-  // 2. Let name be the result of collecting a sequence of bytes that are not 0x0A (LF), 0x0D (CR) or 0x22 ("), given position.
-  /** @type {string | Buffer} */
-  let name = collectASequenceOfBytes(
-    (char) => char !== 0x0a && char !== 0x0d && char !== 0x22,
-    input,
-    position
-  )
-
-  // 3. If the byte at position is not 0x22 ("), return failure. Otherwise, advance position by 1.
-  if (input[position.position] !== 0x22) {
-    throw parsingError('expected "')
-  } else {
-    position.position++
-  }
-
-  // 4. Replace any occurrence of the following subsequences in name with the given byte:
-  // - `%0A`: 0x0A (LF)
-  // - `%0D`: 0x0D (CR)
-  // - `%22`: 0x22 (")
-  name = new TextDecoder().decode(name)
-    .replace(/%0A/ig, '\n')
-    .replace(/%0D/ig, '\r')
-    .replace(/%22/g, '"')
-
-  // 5. Return the UTF-8 decoding without BOM of name.
-  return name
-}
-
 /**
  * @param {(char: number) => boolean} condition
  * @param {Buffer} input
@@ -493,6 +523,58 @@ function parsingError (cause) {
   return new TypeError('Failed to parse body as FormData.', { cause: new TypeError(cause) })
 }
 
+/**
+ * CTL            = <any US-ASCII control character
+ *                  (octets 0 - 31) and DEL (127)>
+ * @param {number} char
+ */
+function isCTL (char) {
+  return char <= 0x1f || char === 0x7f
+}
+
+/**
+ * tspecials :=  "(" / ")" / "<" / ">" / "@" /
+ *                "," / ";" / ":" / "\" / <">
+ *                "/" / "[" / "]" / "?" / "="
+ *                ; Must be in quoted-string,
+ *                ; to use within parameter values
+ * @param {number} char
+ */
+function isTSpecial (char) {
+  return (
+    char === 0x28 || // (
+    char === 0x29 || // )
+    char === 0x3c || // <
+    char === 0x3e || // >
+    char === 0x40 || // @
+    char === 0x2c || // ,
+    char === 0x3b || // ;
+    char === 0x3a || // :
+    char === 0x5c || // \
+    char === 0x22 || // "
+    char === 0x2f || // /
+    char === 0x5b || // [
+    char === 0x5d || // ]
+    char === 0x3f || // ?
+    char === 0x3d    // +
+  )
+}
+
+/**
+ * token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
+ *          or tspecials>
+ * @param {number} char
+ */
+function isToken (char) {
+  return (
+    char <= 0x7f &&  // ascii
+    char !== 0x20 && // space
+    char !== 0x09 &&
+    !isCTL(char) &&
+    !isTSpecial(char)
+  )
+}
+
 module.exports = {
   multipartFormDataParser,
   validateBoundary