| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575 |
- 'use strict'
- const { bufferToLowerCasedHeaderName } = require('../../core/util')
- const { HTTP_TOKEN_CODEPOINTS } = require('./data-url')
- const { makeEntry } = require('./formdata')
- const { webidl } = require('../webidl')
- const assert = require('node:assert')
- const { isomorphicDecode } = require('../infra')
- const { utf8DecodeBytes } = require('../../encoding')
- const dd = Buffer.from('--')
- const decoder = new TextDecoder()
- /**
- * @param {string} chars
- */
- function isAsciiString (chars) {
- for (let i = 0; i < chars.length; ++i) {
- if ((chars.charCodeAt(i) & ~0x7F) !== 0) {
- return false
- }
- }
- return true
- }
- /**
- * @see https://andreubotella.github.io/multipart-form-data/#multipart-form-data-boundary
- * @param {string} boundary
- */
- function validateBoundary (boundary) {
- const length = boundary.length
- // - its length is greater or equal to 27 and lesser or equal to 70, and
- if (length < 27 || length > 70) {
- return false
- }
- // - it is composed by bytes in the ranges 0x30 to 0x39, 0x41 to 0x5A, or
- // 0x61 to 0x7A, inclusive (ASCII alphanumeric), or which are 0x27 ('),
- // 0x2D (-) or 0x5F (_).
- for (let i = 0; i < length; ++i) {
- const cp = boundary.charCodeAt(i)
- if (!(
- (cp >= 0x30 && cp <= 0x39) ||
- (cp >= 0x41 && cp <= 0x5a) ||
- (cp >= 0x61 && cp <= 0x7a) ||
- cp === 0x27 ||
- cp === 0x2d ||
- cp === 0x5f
- )) {
- return false
- }
- }
- return true
- }
- /**
- * @see https://andreubotella.github.io/multipart-form-data/#multipart-form-data-parser
- * @param {Buffer} input
- * @param {ReturnType<import('./data-url')['parseMIMEType']>} mimeType
- */
- function multipartFormDataParser (input, mimeType) {
- // 1. Assert: mimeType’s essence is "multipart/form-data".
- assert(mimeType !== 'failure' && mimeType.essence === 'multipart/form-data')
- const boundaryString = mimeType.parameters.get('boundary')
- // 2. If mimeType’s parameters["boundary"] does not exist, return failure.
- // Otherwise, let boundary be the result of UTF-8 decoding mimeType’s
- // parameters["boundary"].
- if (boundaryString === undefined) {
- throw parsingError('missing boundary in content-type header')
- }
- const boundary = Buffer.from(`--${boundaryString}`, 'utf8')
- // 3. Let entry list be an empty entry list.
- const entryList = []
- // 4. Let position be a pointer to a byte in input, initially pointing at
- // the first byte.
- const position = { position: 0 }
- // Note: Per RFC 2046 Section 5.1.1, we must ignore anything before the
- // first boundary delimiter line (preamble). Search for the first boundary.
- const firstBoundaryIndex = input.indexOf(boundary)
- if (firstBoundaryIndex === -1) {
- throw parsingError('no boundary found in multipart body')
- }
- // Start parsing from the first boundary, ignoring any preamble
- position.position = firstBoundaryIndex
- // 5. While true:
- while (true) {
- // 5.1. If position points to a sequence of bytes starting with 0x2D 0x2D
- // (`--`) followed by boundary, advance position by 2 + the length of
- // boundary. Otherwise, return failure.
- // Note: boundary is padded with 2 dashes already, no need to add 2.
- if (input.subarray(position.position, position.position + boundary.length).equals(boundary)) {
- position.position += boundary.length
- } else {
- throw parsingError('expected a value starting with -- and the boundary')
- }
- // 5.2. If position points to the sequence of bytes 0x2D 0x2D 0x0D 0x0A
- // (`--` followed by CR LF) followed by the end of input, return entry list.
- // Note: Per RFC 2046 Section 5.1.1, we must ignore anything after the
- // final boundary delimiter (epilogue). Check for -- or --CRLF and return
- // regardless of what follows.
- if (bufferStartsWith(input, dd, position)) {
- // Found closing boundary delimiter (--), ignore any epilogue
- return entryList
- }
- // 5.3. If position does not point to a sequence of bytes starting with 0x0D
- // 0x0A (CR LF), return failure.
- if (input[position.position] !== 0x0d || input[position.position + 1] !== 0x0a) {
- throw parsingError('expected CRLF')
- }
- // 5.4. Advance position by 2. (This skips past the newline.)
- position.position += 2
- // 5.5. Let name, filename and contentType be the result of parsing
- // multipart/form-data headers on input and position, if the result
- // is not failure. Otherwise, return failure.
- const result = parseMultipartFormDataHeaders(input, position)
- let { name, filename, contentType, encoding } = result
- // 5.6. Advance position by 2. (This skips past the empty line that marks
- // the end of the headers.)
- position.position += 2
- // 5.7. Let body be the empty byte sequence.
- let body
- // 5.8. Body loop: While position is not past the end of input:
- // TODO: the steps here are completely wrong
- {
- const boundaryIndex = input.indexOf(boundary.subarray(2), position.position)
- if (boundaryIndex === -1) {
- throw parsingError('expected boundary after body')
- }
- body = input.subarray(position.position, boundaryIndex - 4)
- position.position += body.length
- // Note: position must be advanced by the body's length before being
- // decoded, otherwise the parsing will fail.
- if (encoding === 'base64') {
- body = Buffer.from(body.toString(), 'base64')
- }
- }
- // 5.9. If position does not point to a sequence of bytes starting with
- // 0x0D 0x0A (CR LF), return failure. Otherwise, advance position by 2.
- if (input[position.position] !== 0x0d || input[position.position + 1] !== 0x0a) {
- throw parsingError('expected CRLF')
- } else {
- position.position += 2
- }
- // 5.10. If filename is not null:
- let value
- if (filename !== null) {
- // 5.10.1. If contentType is null, set contentType to "text/plain".
- contentType ??= 'text/plain'
- // 5.10.2. If contentType is not an ASCII string, set contentType to the empty string.
- // Note: `buffer.isAscii` can be used at zero-cost, but converting a string to a buffer is a high overhead.
- // Content-Type is a relatively small string, so it is faster to use `String#charCodeAt`.
- if (!isAsciiString(contentType)) {
- contentType = ''
- }
- // 5.10.3. Let value be a new File object with name filename, type contentType, and body body.
- value = new File([body], filename, { type: contentType })
- } else {
- // 5.11. Otherwise:
- // 5.11.1. Let value be the UTF-8 decoding without BOM of body.
- value = utf8DecodeBytes(Buffer.from(body))
- }
- // 5.12. Assert: name is a scalar value string and value is either a scalar value string or a File object.
- assert(webidl.is.USVString(name))
- assert((typeof value === 'string' && webidl.is.USVString(value)) || webidl.is.File(value))
- // 5.13. Create an entry with name and value, and append it to entry list.
- entryList.push(makeEntry(name, value, filename))
- }
- }
- /**
- * Parses content-disposition attributes (e.g., name="value" or filename*=utf-8''encoded)
- * @param {Buffer} input
- * @param {{ position: number }} position
- * @returns {{ name: string, value: string }}
- */
- function parseContentDispositionAttribute (input, position) {
- // Skip leading semicolon and whitespace
- if (input[position.position] === 0x3b /* ; */) {
- position.position++
- }
- // Skip whitespace
- collectASequenceOfBytes(
- (char) => char === 0x20 || char === 0x09,
- input,
- position
- )
- // Collect attribute name (token characters)
- const attributeName = collectASequenceOfBytes(
- (char) => isToken(char) && char !== 0x3d && char !== 0x2a, // not = or *
- input,
- position
- )
- if (attributeName.length === 0) {
- return null
- }
- const attrNameStr = attributeName.toString('ascii').toLowerCase()
- // Check for extended notation (attribute*)
- const isExtended = input[position.position] === 0x2a /* * */
- if (isExtended) {
- position.position++ // skip *
- }
- // Expect = sign
- if (input[position.position] !== 0x3d /* = */) {
- return null
- }
- position.position++ // skip =
- // Skip whitespace
- collectASequenceOfBytes(
- (char) => char === 0x20 || char === 0x09,
- input,
- position
- )
- let value
- if (isExtended) {
- // Extended attribute format: charset'language'encoded-value
- const headerValue = collectASequenceOfBytes(
- (char) => char !== 0x20 && char !== 0x0d && char !== 0x0a && char !== 0x3b, // not space, CRLF, or ;
- input,
- position
- )
- // Check for utf-8'' prefix (case insensitive)
- if (
- (headerValue[0] !== 0x75 && headerValue[0] !== 0x55) || // u or U
- (headerValue[1] !== 0x74 && headerValue[1] !== 0x54) || // t or T
- (headerValue[2] !== 0x66 && headerValue[2] !== 0x46) || // f or F
- headerValue[3] !== 0x2d || // -
- headerValue[4] !== 0x38 // 8
- ) {
- throw parsingError('unknown encoding, expected utf-8\'\'')
- }
- // Skip utf-8'' and decode the rest
- value = decodeURIComponent(decoder.decode(headerValue.subarray(7)))
- } else if (input[position.position] === 0x22 /* " */) {
- // Quoted string
- position.position++ // skip opening quote
- const quotedValue = collectASequenceOfBytes(
- (char) => char !== 0x0a && char !== 0x0d && char !== 0x22, // not LF, CR, or "
- input,
- position
- )
- if (input[position.position] !== 0x22) {
- throw parsingError('Closing quote not found')
- }
- position.position++ // skip closing quote
- value = decoder.decode(quotedValue)
- .replace(/%0A/ig, '\n')
- .replace(/%0D/ig, '\r')
- .replace(/%22/g, '"')
- } else {
- // Token value (no quotes)
- const tokenValue = collectASequenceOfBytes(
- (char) => isToken(char) && char !== 0x3b, // not ;
- input,
- position
- )
- value = decoder.decode(tokenValue)
- }
- return { name: attrNameStr, value }
- }
- /**
- * @see https://andreubotella.github.io/multipart-form-data/#parse-multipart-form-data-headers
- * @param {Buffer} input
- * @param {{ position: number }} position
- */
- function parseMultipartFormDataHeaders (input, position) {
- // 1. Let name, filename and contentType be null.
- let name = null
- let filename = null
- let contentType = null
- let encoding = null
- // 2. While true:
- while (true) {
- // 2.1. If position points to a sequence of bytes starting with 0x0D 0x0A (CR LF):
- if (input[position.position] === 0x0d && input[position.position + 1] === 0x0a) {
- // 2.1.1. If name is null, return failure.
- if (name === null) {
- throw parsingError('header name is null')
- }
- // 2.1.2. Return name, filename and contentType.
- return { name, filename, contentType, encoding }
- }
- // 2.2. Let header name be the result of collecting a sequence of bytes that are
- // not 0x0A (LF), 0x0D (CR) or 0x3A (:), given position.
- let headerName = collectASequenceOfBytes(
- (char) => char !== 0x0a && char !== 0x0d && char !== 0x3a,
- input,
- position
- )
- // 2.3. Remove any HTTP tab or space bytes from the start or end of header name.
- headerName = removeChars(headerName, true, true, (char) => char === 0x9 || char === 0x20)
- // 2.4. If header name does not match the field-name token production, return failure.
- if (!HTTP_TOKEN_CODEPOINTS.test(headerName.toString())) {
- throw parsingError('header name does not match the field-name token production')
- }
- // 2.5. If the byte at position is not 0x3A (:), return failure.
- if (input[position.position] !== 0x3a) {
- throw parsingError('expected :')
- }
- // 2.6. Advance position by 1.
- position.position++
- // 2.7. Collect a sequence of bytes that are HTTP tab or space bytes given position.
- // (Do nothing with those bytes.)
- collectASequenceOfBytes(
- (char) => char === 0x20 || char === 0x09,
- input,
- position
- )
- // 2.8. Byte-lowercase header name and switch on the result:
- switch (bufferToLowerCasedHeaderName(headerName)) {
- case 'content-disposition': {
- name = filename = null
- // Collect the disposition type (should be "form-data")
- const dispositionType = collectASequenceOfBytes(
- (char) => isToken(char),
- input,
- position
- )
- if (dispositionType.toString('ascii').toLowerCase() !== 'form-data') {
- throw parsingError('expected form-data for content-disposition header')
- }
- // Parse attributes recursively until CRLF
- while (
- position.position < input.length &&
- input[position.position] !== 0x0d &&
- input[position.position + 1] !== 0x0a
- ) {
- const attribute = parseContentDispositionAttribute(input, position)
- if (!attribute) {
- break
- }
- if (attribute.name === 'name') {
- name = attribute.value
- } else if (attribute.name === 'filename') {
- filename = attribute.value
- }
- }
- if (name === null) {
- throw parsingError('name attribute is required in content-disposition header')
- }
- break
- }
- case 'content-type': {
- // 1. Let header value be the result of collecting a sequence of bytes that are
- // not 0x0A (LF) or 0x0D (CR), given position.
- let headerValue = collectASequenceOfBytes(
- (char) => char !== 0x0a && char !== 0x0d,
- input,
- position
- )
- // 2. Remove any HTTP tab or space bytes from the end of header value.
- headerValue = removeChars(headerValue, false, true, (char) => char === 0x9 || char === 0x20)
- // 3. Set contentType to the isomorphic decoding of header value.
- contentType = isomorphicDecode(headerValue)
- break
- }
- case 'content-transfer-encoding': {
- let headerValue = collectASequenceOfBytes(
- (char) => char !== 0x0a && char !== 0x0d,
- input,
- position
- )
- headerValue = removeChars(headerValue, false, true, (char) => char === 0x9 || char === 0x20)
- encoding = isomorphicDecode(headerValue)
- break
- }
- default: {
- // Collect a sequence of bytes that are not 0x0A (LF) or 0x0D (CR), given position.
- // (Do nothing with those bytes.)
- collectASequenceOfBytes(
- (char) => char !== 0x0a && char !== 0x0d,
- input,
- position
- )
- }
- }
- // 2.9. If position does not point to a sequence of bytes starting with 0x0D 0x0A
- // (CR LF), return failure. Otherwise, advance position by 2 (past the newline).
- if (input[position.position] !== 0x0d && input[position.position + 1] !== 0x0a) {
- throw parsingError('expected CRLF')
- } else {
- position.position += 2
- }
- }
- }
- /**
- * @param {(char: number) => boolean} condition
- * @param {Buffer} input
- * @param {{ position: number }} position
- */
- function collectASequenceOfBytes (condition, input, position) {
- let start = position.position
- while (start < input.length && condition(input[start])) {
- ++start
- }
- return input.subarray(position.position, (position.position = start))
- }
- /**
- * @param {Buffer} buf
- * @param {boolean} leading
- * @param {boolean} trailing
- * @param {(charCode: number) => boolean} predicate
- * @returns {Buffer}
- */
- function removeChars (buf, leading, trailing, predicate) {
- let lead = 0
- let trail = buf.length - 1
- if (leading) {
- while (lead < buf.length && predicate(buf[lead])) lead++
- }
- if (trailing) {
- while (trail > 0 && predicate(buf[trail])) trail--
- }
- return lead === 0 && trail === buf.length - 1 ? buf : buf.subarray(lead, trail + 1)
- }
- /**
- * Checks if {@param buffer} starts with {@param start}
- * @param {Buffer} buffer
- * @param {Buffer} start
- * @param {{ position: number }} position
- */
- function bufferStartsWith (buffer, start, position) {
- if (buffer.length < start.length) {
- return false
- }
- for (let i = 0; i < start.length; i++) {
- if (start[i] !== buffer[position.position + i]) {
- return false
- }
- }
- return true
- }
- function parsingError (cause) {
- return new TypeError('Failed to parse body as FormData.', { cause: new TypeError(cause) })
- }
- /**
- * CTL = <any US-ASCII control character
- * (octets 0 - 31) and DEL (127)>
- * @param {number} char
- */
- function isCTL (char) {
- return char <= 0x1f || char === 0x7f
- }
- /**
- * tspecials := "(" / ")" / "<" / ">" / "@" /
- * "," / ";" / ":" / "\" / <">
- * "/" / "[" / "]" / "?" / "="
- * ; Must be in quoted-string,
- * ; to use within parameter values
- * @param {number} char
- */
- function isTSpecial (char) {
- return (
- char === 0x28 || // (
- char === 0x29 || // )
- char === 0x3c || // <
- char === 0x3e || // >
- char === 0x40 || // @
- char === 0x2c || // ,
- char === 0x3b || // ;
- char === 0x3a || // :
- char === 0x5c || // \
- char === 0x22 || // "
- char === 0x2f || // /
- char === 0x5b || // [
- char === 0x5d || // ]
- char === 0x3f || // ?
- char === 0x3d // +
- )
- }
- /**
- * token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
- * or tspecials>
- * @param {number} char
- */
- function isToken (char) {
- return (
- char <= 0x7f && // ascii
- char !== 0x20 && // space
- char !== 0x09 &&
- !isCTL(char) &&
- !isTSpecial(char)
- )
- }
- module.exports = {
- multipartFormDataParser,
- validateBoundary
- }
|