single-byte.js 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. import { asciiPrefix, decodeAscii, decodeLatin1 } from './latin1.js'
  2. import encodings from './single-byte.encodings.js'
  3. import { decode2string, nativeDecoder } from './platform.js'
  4. export const E_STRICT = 'Input is not well-formed for this encoding'
  5. const xUserDefined = 'x-user-defined'
  6. const iso8i = 'iso-8859-8-i'
  7. export const assertEncoding = (encoding) => {
  8. if (Object.hasOwn(encodings, encoding) || encoding === xUserDefined || encoding === iso8i) return
  9. throw new RangeError('Unsupported encoding')
  10. }
  11. const r = 0xff_fd
  12. export function getEncoding(encoding) {
  13. assertEncoding(encoding)
  14. if (encoding === xUserDefined) return Array.from({ length: 128 }, (_, i) => 0xf7_80 + i)
  15. if (encoding === iso8i) encoding = 'iso-8859-8'
  16. const enc = encodings[encoding]
  17. const deltas = enc.flatMap((x) => (Array.isArray(x) ? new Array(x[0]).fill(x[1] ?? 0) : x))
  18. return deltas.map((x, i) => (x === r ? x : x + 128 + i))
  19. }
  20. const mappers = new Map()
  21. const decoders = new Map()
  22. const encmaps = new Map()
  23. // Used only on Node.js, no reason to optimize for anything else
  24. // E.g. avoiding .from and filling zero-initialized arr manually is faster on Hermes, but we avoid this codepath on Hermes completely
  25. export function encodingMapper(encoding) {
  26. const cached = mappers.get(encoding)
  27. if (cached) return cached
  28. const codes = getEncoding(encoding)
  29. const incomplete = codes.includes(r)
  30. let map
  31. const mapper = (arr, start = 0) => {
  32. if (!map) {
  33. map = new Uint16Array(256).map((_, i) => i) // Unicode subset
  34. map.set(Uint16Array.from(codes), 128)
  35. }
  36. const o = Uint16Array.from(start === 0 ? arr : arr.subarray(start)) // copy to modify in-place, also those are 16-bit now
  37. let i = 0
  38. for (const end7 = o.length - 7; i < end7; i += 8) {
  39. o[i] = map[o[i]]
  40. o[i + 1] = map[o[i + 1]]
  41. o[i + 2] = map[o[i + 2]]
  42. o[i + 3] = map[o[i + 3]]
  43. o[i + 4] = map[o[i + 4]]
  44. o[i + 5] = map[o[i + 5]]
  45. o[i + 6] = map[o[i + 6]]
  46. o[i + 7] = map[o[i + 7]]
  47. }
  48. for (const end = o.length; i < end; i++) o[i] = map[o[i]]
  49. return o
  50. }
  51. mappers.set(encoding, { mapper, incomplete })
  52. return { mapper, incomplete }
  53. }
  54. export function encodingDecoder(encoding) {
  55. const cached = decoders.get(encoding)
  56. if (cached) return cached
  57. const isLatin1 = encoding === 'iso-8859-1'
  58. if (isLatin1 && !nativeDecoder) return (arr, loose = false) => decodeLatin1(arr) // native decoder is faster for ascii below
  59. let strings
  60. const codes = getEncoding(encoding)
  61. const incomplete = codes.includes(r)
  62. const decoder = (arr, loose = false) => {
  63. if (!strings) {
  64. const allCodes = Array.from({ length: 128 }, (_, i) => i).concat(codes)
  65. while (allCodes.length < 256) allCodes.push(allCodes.length)
  66. strings = allCodes.map((c) => String.fromCharCode(c))
  67. }
  68. const prefixLen = asciiPrefix(arr)
  69. if (prefixLen === arr.length) return decodeAscii(arr)
  70. if (isLatin1) return decodeLatin1(arr) // TODO: check if decodeAscii with subarray is faster for small prefixes too
  71. const prefix = decodeLatin1(arr, 0, prefixLen) // TODO: check if decodeAscii with subarray is faster for small prefixes too
  72. const suffix = decode2string(arr, prefix.length, arr.length, strings)
  73. if (!loose && incomplete && suffix.includes('\uFFFD')) throw new TypeError(E_STRICT)
  74. return prefix + suffix
  75. }
  76. decoders.set(encoding, decoder)
  77. return decoder
  78. }
  79. export function encodeMap(encoding) {
  80. const cached = encmaps.get(encoding)
  81. if (cached) return cached
  82. const codes = getEncoding(encoding)
  83. let max = 128
  84. while (codes.length < 128) codes.push(128 + codes.length)
  85. for (const code of codes) if (code > max && code !== r) max = code
  86. const map = new Uint8Array(max + 1) // < 10 KiB for all except macintosh, 63 KiB for macintosh
  87. for (let i = 0; i < 128; i++) {
  88. map[i] = i
  89. if (codes[i] !== r) map[codes[i]] = 128 + i
  90. }
  91. encmaps.set(encoding, map)
  92. return map
  93. }