data-url.js 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596
  1. 'use strict'
  2. const assert = require('node:assert')
  3. const { forgivingBase64, collectASequenceOfCodePoints, collectASequenceOfCodePointsFast, isomorphicDecode, removeASCIIWhitespace, removeChars } = require('../infra')
  4. const encoder = new TextEncoder()
  5. /**
  6. * @see https://mimesniff.spec.whatwg.org/#http-token-code-point
  7. */
  8. const HTTP_TOKEN_CODEPOINTS = /^[-!#$%&'*+.^_|~A-Za-z0-9]+$/u
  9. const HTTP_WHITESPACE_REGEX = /[\u000A\u000D\u0009\u0020]/u // eslint-disable-line
  10. /**
  11. * @see https://mimesniff.spec.whatwg.org/#http-quoted-string-token-code-point
  12. */
  13. const HTTP_QUOTED_STRING_TOKENS = /^[\u0009\u0020-\u007E\u0080-\u00FF]+$/u // eslint-disable-line
  14. // https://fetch.spec.whatwg.org/#data-url-processor
  15. /** @param {URL} dataURL */
  16. function dataURLProcessor (dataURL) {
  17. // 1. Assert: dataURL’s scheme is "data".
  18. assert(dataURL.protocol === 'data:')
  19. // 2. Let input be the result of running the URL
  20. // serializer on dataURL with exclude fragment
  21. // set to true.
  22. let input = URLSerializer(dataURL, true)
  23. // 3. Remove the leading "data:" string from input.
  24. input = input.slice(5)
  25. // 4. Let position point at the start of input.
  26. const position = { position: 0 }
  27. // 5. Let mimeType be the result of collecting a
  28. // sequence of code points that are not equal
  29. // to U+002C (,), given position.
  30. let mimeType = collectASequenceOfCodePointsFast(
  31. ',',
  32. input,
  33. position
  34. )
  35. // 6. Strip leading and trailing ASCII whitespace
  36. // from mimeType.
  37. // Undici implementation note: we need to store the
  38. // length because if the mimetype has spaces removed,
  39. // the wrong amount will be sliced from the input in
  40. // step #9
  41. const mimeTypeLength = mimeType.length
  42. mimeType = removeASCIIWhitespace(mimeType, true, true)
  43. // 7. If position is past the end of input, then
  44. // return failure
  45. if (position.position >= input.length) {
  46. return 'failure'
  47. }
  48. // 8. Advance position by 1.
  49. position.position++
  50. // 9. Let encodedBody be the remainder of input.
  51. const encodedBody = input.slice(mimeTypeLength + 1)
  52. // 10. Let body be the percent-decoding of encodedBody.
  53. let body = stringPercentDecode(encodedBody)
  54. // 11. If mimeType ends with U+003B (;), followed by
  55. // zero or more U+0020 SPACE, followed by an ASCII
  56. // case-insensitive match for "base64", then:
  57. if (/;(?:\u0020*)base64$/ui.test(mimeType)) {
  58. // 1. Let stringBody be the isomorphic decode of body.
  59. const stringBody = isomorphicDecode(body)
  60. // 2. Set body to the forgiving-base64 decode of
  61. // stringBody.
  62. body = forgivingBase64(stringBody)
  63. // 3. If body is failure, then return failure.
  64. if (body === 'failure') {
  65. return 'failure'
  66. }
  67. // 4. Remove the last 6 code points from mimeType.
  68. mimeType = mimeType.slice(0, -6)
  69. // 5. Remove trailing U+0020 SPACE code points from mimeType,
  70. // if any.
  71. mimeType = mimeType.replace(/(\u0020+)$/u, '')
  72. // 6. Remove the last U+003B (;) code point from mimeType.
  73. mimeType = mimeType.slice(0, -1)
  74. }
  75. // 12. If mimeType starts with U+003B (;), then prepend
  76. // "text/plain" to mimeType.
  77. if (mimeType.startsWith(';')) {
  78. mimeType = 'text/plain' + mimeType
  79. }
  80. // 13. Let mimeTypeRecord be the result of parsing
  81. // mimeType.
  82. let mimeTypeRecord = parseMIMEType(mimeType)
  83. // 14. If mimeTypeRecord is failure, then set
  84. // mimeTypeRecord to text/plain;charset=US-ASCII.
  85. if (mimeTypeRecord === 'failure') {
  86. mimeTypeRecord = parseMIMEType('text/plain;charset=US-ASCII')
  87. }
  88. // 15. Return a new data: URL struct whose MIME
  89. // type is mimeTypeRecord and body is body.
  90. // https://fetch.spec.whatwg.org/#data-url-struct
  91. return { mimeType: mimeTypeRecord, body }
  92. }
  93. // https://url.spec.whatwg.org/#concept-url-serializer
  94. /**
  95. * @param {URL} url
  96. * @param {boolean} excludeFragment
  97. */
  98. function URLSerializer (url, excludeFragment = false) {
  99. if (!excludeFragment) {
  100. return url.href
  101. }
  102. const href = url.href
  103. const hashLength = url.hash.length
  104. const serialized = hashLength === 0 ? href : href.substring(0, href.length - hashLength)
  105. if (!hashLength && href.endsWith('#')) {
  106. return serialized.slice(0, -1)
  107. }
  108. return serialized
  109. }
  110. // https://url.spec.whatwg.org/#string-percent-decode
  111. /** @param {string} input */
  112. function stringPercentDecode (input) {
  113. // 1. Let bytes be the UTF-8 encoding of input.
  114. const bytes = encoder.encode(input)
  115. // 2. Return the percent-decoding of bytes.
  116. return percentDecode(bytes)
  117. }
  118. /**
  119. * @param {number} byte
  120. */
  121. function isHexCharByte (byte) {
  122. // 0-9 A-F a-f
  123. return (byte >= 0x30 && byte <= 0x39) || (byte >= 0x41 && byte <= 0x46) || (byte >= 0x61 && byte <= 0x66)
  124. }
  125. /**
  126. * @param {number} byte
  127. */
  128. function hexByteToNumber (byte) {
  129. return (
  130. // 0-9
  131. byte >= 0x30 && byte <= 0x39
  132. ? (byte - 48)
  133. // Convert to uppercase
  134. // ((byte & 0xDF) - 65) + 10
  135. : ((byte & 0xDF) - 55)
  136. )
  137. }
  138. // https://url.spec.whatwg.org/#percent-decode
  139. /** @param {Uint8Array} input */
  140. function percentDecode (input) {
  141. const length = input.length
  142. // 1. Let output be an empty byte sequence.
  143. /** @type {Uint8Array} */
  144. const output = new Uint8Array(length)
  145. let j = 0
  146. let i = 0
  147. // 2. For each byte byte in input:
  148. while (i < length) {
  149. const byte = input[i]
  150. // 1. If byte is not 0x25 (%), then append byte to output.
  151. if (byte !== 0x25) {
  152. output[j++] = byte
  153. // 2. Otherwise, if byte is 0x25 (%) and the next two bytes
  154. // after byte in input are not in the ranges
  155. // 0x30 (0) to 0x39 (9), 0x41 (A) to 0x46 (F),
  156. // and 0x61 (a) to 0x66 (f), all inclusive, append byte
  157. // to output.
  158. } else if (
  159. byte === 0x25 &&
  160. !(isHexCharByte(input[i + 1]) && isHexCharByte(input[i + 2]))
  161. ) {
  162. output[j++] = 0x25
  163. // 3. Otherwise:
  164. } else {
  165. // 1. Let bytePoint be the two bytes after byte in input,
  166. // decoded, and then interpreted as hexadecimal number.
  167. // 2. Append a byte whose value is bytePoint to output.
  168. output[j++] = (hexByteToNumber(input[i + 1]) << 4) | hexByteToNumber(input[i + 2])
  169. // 3. Skip the next two bytes in input.
  170. i += 2
  171. }
  172. ++i
  173. }
  174. // 3. Return output.
  175. return length === j ? output : output.subarray(0, j)
  176. }
  177. // https://mimesniff.spec.whatwg.org/#parse-a-mime-type
  178. /** @param {string} input */
  179. function parseMIMEType (input) {
  180. // 1. Remove any leading and trailing HTTP whitespace
  181. // from input.
  182. input = removeHTTPWhitespace(input, true, true)
  183. // 2. Let position be a position variable for input,
  184. // initially pointing at the start of input.
  185. const position = { position: 0 }
  186. // 3. Let type be the result of collecting a sequence
  187. // of code points that are not U+002F (/) from
  188. // input, given position.
  189. const type = collectASequenceOfCodePointsFast(
  190. '/',
  191. input,
  192. position
  193. )
  194. // 4. If type is the empty string or does not solely
  195. // contain HTTP token code points, then return failure.
  196. // https://mimesniff.spec.whatwg.org/#http-token-code-point
  197. if (type.length === 0 || !HTTP_TOKEN_CODEPOINTS.test(type)) {
  198. return 'failure'
  199. }
  200. // 5. If position is past the end of input, then return
  201. // failure
  202. if (position.position >= input.length) {
  203. return 'failure'
  204. }
  205. // 6. Advance position by 1. (This skips past U+002F (/).)
  206. position.position++
  207. // 7. Let subtype be the result of collecting a sequence of
  208. // code points that are not U+003B (;) from input, given
  209. // position.
  210. let subtype = collectASequenceOfCodePointsFast(
  211. ';',
  212. input,
  213. position
  214. )
  215. // 8. Remove any trailing HTTP whitespace from subtype.
  216. subtype = removeHTTPWhitespace(subtype, false, true)
  217. // 9. If subtype is the empty string or does not solely
  218. // contain HTTP token code points, then return failure.
  219. if (subtype.length === 0 || !HTTP_TOKEN_CODEPOINTS.test(subtype)) {
  220. return 'failure'
  221. }
  222. const typeLowercase = type.toLowerCase()
  223. const subtypeLowercase = subtype.toLowerCase()
  224. // 10. Let mimeType be a new MIME type record whose type
  225. // is type, in ASCII lowercase, and subtype is subtype,
  226. // in ASCII lowercase.
  227. // https://mimesniff.spec.whatwg.org/#mime-type
  228. const mimeType = {
  229. type: typeLowercase,
  230. subtype: subtypeLowercase,
  231. /** @type {Map<string, string>} */
  232. parameters: new Map(),
  233. // https://mimesniff.spec.whatwg.org/#mime-type-essence
  234. essence: `${typeLowercase}/${subtypeLowercase}`
  235. }
  236. // 11. While position is not past the end of input:
  237. while (position.position < input.length) {
  238. // 1. Advance position by 1. (This skips past U+003B (;).)
  239. position.position++
  240. // 2. Collect a sequence of code points that are HTTP
  241. // whitespace from input given position.
  242. collectASequenceOfCodePoints(
  243. // https://fetch.spec.whatwg.org/#http-whitespace
  244. char => HTTP_WHITESPACE_REGEX.test(char),
  245. input,
  246. position
  247. )
  248. // 3. Let parameterName be the result of collecting a
  249. // sequence of code points that are not U+003B (;)
  250. // or U+003D (=) from input, given position.
  251. let parameterName = collectASequenceOfCodePoints(
  252. (char) => char !== ';' && char !== '=',
  253. input,
  254. position
  255. )
  256. // 4. Set parameterName to parameterName, in ASCII
  257. // lowercase.
  258. parameterName = parameterName.toLowerCase()
  259. // 5. If position is not past the end of input, then:
  260. if (position.position < input.length) {
  261. // 1. If the code point at position within input is
  262. // U+003B (;), then continue.
  263. if (input[position.position] === ';') {
  264. continue
  265. }
  266. // 2. Advance position by 1. (This skips past U+003D (=).)
  267. position.position++
  268. }
  269. // 6. If position is past the end of input, then break.
  270. if (position.position >= input.length) {
  271. break
  272. }
  273. // 7. Let parameterValue be null.
  274. let parameterValue = null
  275. // 8. If the code point at position within input is
  276. // U+0022 ("), then:
  277. if (input[position.position] === '"') {
  278. // 1. Set parameterValue to the result of collecting
  279. // an HTTP quoted string from input, given position
  280. // and the extract-value flag.
  281. parameterValue = collectAnHTTPQuotedString(input, position, true)
  282. // 2. Collect a sequence of code points that are not
  283. // U+003B (;) from input, given position.
  284. collectASequenceOfCodePointsFast(
  285. ';',
  286. input,
  287. position
  288. )
  289. // 9. Otherwise:
  290. } else {
  291. // 1. Set parameterValue to the result of collecting
  292. // a sequence of code points that are not U+003B (;)
  293. // from input, given position.
  294. parameterValue = collectASequenceOfCodePointsFast(
  295. ';',
  296. input,
  297. position
  298. )
  299. // 2. Remove any trailing HTTP whitespace from parameterValue.
  300. parameterValue = removeHTTPWhitespace(parameterValue, false, true)
  301. // 3. If parameterValue is the empty string, then continue.
  302. if (parameterValue.length === 0) {
  303. continue
  304. }
  305. }
  306. // 10. If all of the following are true
  307. // - parameterName is not the empty string
  308. // - parameterName solely contains HTTP token code points
  309. // - parameterValue solely contains HTTP quoted-string token code points
  310. // - mimeType’s parameters[parameterName] does not exist
  311. // then set mimeType’s parameters[parameterName] to parameterValue.
  312. if (
  313. parameterName.length !== 0 &&
  314. HTTP_TOKEN_CODEPOINTS.test(parameterName) &&
  315. (parameterValue.length === 0 || HTTP_QUOTED_STRING_TOKENS.test(parameterValue)) &&
  316. !mimeType.parameters.has(parameterName)
  317. ) {
  318. mimeType.parameters.set(parameterName, parameterValue)
  319. }
  320. }
  321. // 12. Return mimeType.
  322. return mimeType
  323. }
  324. // https://fetch.spec.whatwg.org/#collect-an-http-quoted-string
  325. // tests: https://fetch.spec.whatwg.org/#example-http-quoted-string
  326. /**
  327. * @param {string} input
  328. * @param {{ position: number }} position
  329. * @param {boolean} [extractValue=false]
  330. */
  331. function collectAnHTTPQuotedString (input, position, extractValue = false) {
  332. // 1. Let positionStart be position.
  333. const positionStart = position.position
  334. // 2. Let value be the empty string.
  335. let value = ''
  336. // 3. Assert: the code point at position within input
  337. // is U+0022 (").
  338. assert(input[position.position] === '"')
  339. // 4. Advance position by 1.
  340. position.position++
  341. // 5. While true:
  342. while (true) {
  343. // 1. Append the result of collecting a sequence of code points
  344. // that are not U+0022 (") or U+005C (\) from input, given
  345. // position, to value.
  346. value += collectASequenceOfCodePoints(
  347. (char) => char !== '"' && char !== '\\',
  348. input,
  349. position
  350. )
  351. // 2. If position is past the end of input, then break.
  352. if (position.position >= input.length) {
  353. break
  354. }
  355. // 3. Let quoteOrBackslash be the code point at position within
  356. // input.
  357. const quoteOrBackslash = input[position.position]
  358. // 4. Advance position by 1.
  359. position.position++
  360. // 5. If quoteOrBackslash is U+005C (\), then:
  361. if (quoteOrBackslash === '\\') {
  362. // 1. If position is past the end of input, then append
  363. // U+005C (\) to value and break.
  364. if (position.position >= input.length) {
  365. value += '\\'
  366. break
  367. }
  368. // 2. Append the code point at position within input to value.
  369. value += input[position.position]
  370. // 3. Advance position by 1.
  371. position.position++
  372. // 6. Otherwise:
  373. } else {
  374. // 1. Assert: quoteOrBackslash is U+0022 (").
  375. assert(quoteOrBackslash === '"')
  376. // 2. Break.
  377. break
  378. }
  379. }
  380. // 6. If the extract-value flag is set, then return value.
  381. if (extractValue) {
  382. return value
  383. }
  384. // 7. Return the code points from positionStart to position,
  385. // inclusive, within input.
  386. return input.slice(positionStart, position.position)
  387. }
  388. /**
  389. * @see https://mimesniff.spec.whatwg.org/#serialize-a-mime-type
  390. */
  391. function serializeAMimeType (mimeType) {
  392. assert(mimeType !== 'failure')
  393. const { parameters, essence } = mimeType
  394. // 1. Let serialization be the concatenation of mimeType’s
  395. // type, U+002F (/), and mimeType’s subtype.
  396. let serialization = essence
  397. // 2. For each name → value of mimeType’s parameters:
  398. for (let [name, value] of parameters.entries()) {
  399. // 1. Append U+003B (;) to serialization.
  400. serialization += ';'
  401. // 2. Append name to serialization.
  402. serialization += name
  403. // 3. Append U+003D (=) to serialization.
  404. serialization += '='
  405. // 4. If value does not solely contain HTTP token code
  406. // points or value is the empty string, then:
  407. if (!HTTP_TOKEN_CODEPOINTS.test(value)) {
  408. // 1. Precede each occurrence of U+0022 (") or
  409. // U+005C (\) in value with U+005C (\).
  410. value = value.replace(/[\\"]/ug, '\\$&')
  411. // 2. Prepend U+0022 (") to value.
  412. value = '"' + value
  413. // 3. Append U+0022 (") to value.
  414. value += '"'
  415. }
  416. // 5. Append value to serialization.
  417. serialization += value
  418. }
  419. // 3. Return serialization.
  420. return serialization
  421. }
  422. /**
  423. * @see https://fetch.spec.whatwg.org/#http-whitespace
  424. * @param {number} char
  425. */
  426. function isHTTPWhiteSpace (char) {
  427. // "\r\n\t "
  428. return char === 0x00d || char === 0x00a || char === 0x009 || char === 0x020
  429. }
  430. /**
  431. * @see https://fetch.spec.whatwg.org/#http-whitespace
  432. * @param {string} str
  433. * @param {boolean} [leading=true]
  434. * @param {boolean} [trailing=true]
  435. */
  436. function removeHTTPWhitespace (str, leading = true, trailing = true) {
  437. return removeChars(str, leading, trailing, isHTTPWhiteSpace)
  438. }
  439. /**
  440. * @see https://mimesniff.spec.whatwg.org/#minimize-a-supported-mime-type
  441. * @param {Exclude<ReturnType<typeof parseMIMEType>, 'failure'>} mimeType
  442. */
  443. function minimizeSupportedMimeType (mimeType) {
  444. switch (mimeType.essence) {
  445. case 'application/ecmascript':
  446. case 'application/javascript':
  447. case 'application/x-ecmascript':
  448. case 'application/x-javascript':
  449. case 'text/ecmascript':
  450. case 'text/javascript':
  451. case 'text/javascript1.0':
  452. case 'text/javascript1.1':
  453. case 'text/javascript1.2':
  454. case 'text/javascript1.3':
  455. case 'text/javascript1.4':
  456. case 'text/javascript1.5':
  457. case 'text/jscript':
  458. case 'text/livescript':
  459. case 'text/x-ecmascript':
  460. case 'text/x-javascript':
  461. // 1. If mimeType is a JavaScript MIME type, then return "text/javascript".
  462. return 'text/javascript'
  463. case 'application/json':
  464. case 'text/json':
  465. // 2. If mimeType is a JSON MIME type, then return "application/json".
  466. return 'application/json'
  467. case 'image/svg+xml':
  468. // 3. If mimeType’s essence is "image/svg+xml", then return "image/svg+xml".
  469. return 'image/svg+xml'
  470. case 'text/xml':
  471. case 'application/xml':
  472. // 4. If mimeType is an XML MIME type, then return "application/xml".
  473. return 'application/xml'
  474. }
  475. // 2. If mimeType is a JSON MIME type, then return "application/json".
  476. if (mimeType.subtype.endsWith('+json')) {
  477. return 'application/json'
  478. }
  479. // 4. If mimeType is an XML MIME type, then return "application/xml".
  480. if (mimeType.subtype.endsWith('+xml')) {
  481. return 'application/xml'
  482. }
  483. // 5. If mimeType is supported by the user agent, then return mimeType’s essence.
  484. // Technically, node doesn't support any mimetypes.
  485. // 6. Return the empty string.
  486. return ''
  487. }
  488. module.exports = {
  489. dataURLProcessor,
  490. URLSerializer,
  491. stringPercentDecode,
  492. parseMIMEType,
  493. collectAnHTTPQuotedString,
  494. serializeAMimeType,
  495. removeHTTPWhitespace,
  496. minimizeSupportedMimeType,
  497. HTTP_TOKEN_CODEPOINTS
  498. }