utf8.node.js 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. import { typedView } from './array.js'
  2. import { assertU8, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
  3. import { E_STRICT } from './fallback/utf8.js'
  4. import { isAscii } from 'node:buffer'
  5. if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
  6. let decoderFatal
  7. const decoderLoose = new TextDecoder('utf-8', { ignoreBOM: true })
  8. const { isWellFormed } = String.prototype
  9. const isDeno = !!globalThis.Deno
  10. try {
  11. decoderFatal = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true })
  12. } catch {
  13. // Without ICU, Node.js doesn't support fatal option for utf-8
  14. }
  15. function encode(str, loose = false) {
  16. if (typeof str !== 'string') throw new TypeError(E_STRING)
  17. const strLength = str.length
  18. if (strLength === 0) return new Uint8Array() // faster than Uint8Array.of
  19. let res
  20. if (strLength > 0x4_00 && !isDeno) {
  21. // Faster for large strings
  22. const byteLength = Buffer.byteLength(str)
  23. res = Buffer.allocUnsafe(byteLength)
  24. const ascii = byteLength === strLength
  25. const written = ascii ? res.latin1Write(str) : res.utf8Write(str)
  26. if (written !== byteLength) throw new Error('Failed to write all bytes') // safeguard just in case
  27. if (ascii || loose) return res // no further checks needed
  28. } else {
  29. res = Buffer.from(str)
  30. if (res.length === strLength || loose) return res
  31. }
  32. if (!isWellFormed.call(str)) throw new TypeError(E_STRICT_UNICODE)
  33. return res
  34. }
  35. function decode(arr, loose = false) {
  36. assertU8(arr)
  37. const byteLength = arr.byteLength
  38. if (byteLength === 0) return ''
  39. if (byteLength > 0x6_00 && !(isDeno && loose) && isAscii(arr)) {
  40. // On non-ascii strings, this loses ~10% * [relative position of the first non-ascii byte] (up to 10% total)
  41. // On ascii strings, this wins 1.5x on loose = false and 1.3x on loose = true
  42. // Only makes sense for large enough strings
  43. const buf = Buffer.from(arr.buffer, arr.byteOffset, arr.byteLength)
  44. if (isDeno) return buf.toString() // Deno suffers from .latin1Slice
  45. return buf.latin1Slice(0, arr.byteLength) // .latin1Slice is faster than .asciiSlice
  46. }
  47. if (loose) return decoderLoose.decode(arr)
  48. if (decoderFatal) return decoderFatal.decode(arr)
  49. // We are in an env without native fatal decoder support (non-fixed Node.js without ICU)
  50. // Well, just recheck against encode if it contains replacement then, this is still faster than js impl
  51. const str = decoderLoose.decode(arr)
  52. if (str.includes('\uFFFD') && !Buffer.from(str).equals(arr)) throw new TypeError(E_STRICT)
  53. return str
  54. }
  55. export const utf8fromString = (str, format = 'uint8') => typedView(encode(str, false), format)
  56. export const utf8fromStringLoose = (str, format = 'uint8') => typedView(encode(str, true), format)
  57. export const utf8toString = (arr) => decode(arr, false)
  58. export const utf8toStringLoose = (arr) => decode(arr, true)