utf16.node.js 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. import { assertU8, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
  2. import { isDeno, isLE } from './fallback/platform.js'
  3. import { E_STRICT, decodeApiDecoders } from './fallback/utf16.js'
  4. if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
  5. const { isWellFormed, toWellFormed } = String.prototype
  6. const to8 = (a) => new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
  7. // Unlike utf8, operates on Uint16Arrays by default
  8. function encode(str, loose = false, format = 'uint16') {
  9. if (typeof str !== 'string') throw new TypeError(E_STRING)
  10. if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
  11. throw new TypeError('Unknown format')
  12. }
  13. if (loose) {
  14. str = toWellFormed.call(str) // Buffer doesn't do this with utf16 encoding
  15. } else if (!isWellFormed.call(str)) {
  16. throw new TypeError(E_STRICT_UNICODE)
  17. }
  18. const ble = Buffer.from(str, 'utf-16le')
  19. if (format === 'uint8-le') return to8(ble)
  20. if (format === 'uint8-be') return to8(ble.swap16())
  21. if (format === 'uint16') {
  22. const b = ble.byteOffset % 2 === 0 ? ble : Buffer.from(ble) // it should be already aligned, but just in case
  23. if (!isLE) b.swap16()
  24. return new Uint16Array(b.buffer, b.byteOffset, b.byteLength / 2)
  25. }
  26. /* c8 ignore next */
  27. throw new Error('Unreachable')
  28. }
  29. // Convert to Buffer view or a swapped Buffer copy
  30. const swapped = (x, swap) => {
  31. const b = Buffer.from(x.buffer, x.byteOffset, x.byteLength)
  32. return swap ? Buffer.from(b).swap16() : b
  33. }
  34. // We skip TextDecoder on Node.js, as it's is somewhy significantly slower than Buffer for utf16
  35. // Also, it incorrectly misses replacements with Node.js is built without ICU, we fix that
  36. function decodeNode(input, loose = false, format = 'uint16') {
  37. let ble
  38. if (format === 'uint16') {
  39. if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
  40. ble = swapped(input, !isLE)
  41. } else if (format === 'uint8-le' || format === 'uint8-be') {
  42. assertU8(input)
  43. if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
  44. ble = swapped(input, format === 'uint8-be')
  45. } else {
  46. throw new TypeError('Unknown format')
  47. }
  48. const str = ble.ucs2Slice(0, ble.byteLength)
  49. if (loose) return toWellFormed.call(str)
  50. if (isWellFormed.call(str)) return str
  51. throw new TypeError(E_STRICT)
  52. }
  53. const decode = isDeno ? decodeApiDecoders : decodeNode
  54. export const utf16fromString = (str, format = 'uint16') => encode(str, false, format)
  55. export const utf16fromStringLoose = (str, format = 'uint16') => encode(str, true, format)
  56. export const utf16toString = (arr, format = 'uint16') => decode(arr, false, format)
  57. export const utf16toStringLoose = (arr, format = 'uint16') => decode(arr, true, format)