single-byte.d.ts 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. /**
  2. * Decode / encode the legacy single-byte encodings according to the
  3. * [Encoding standard](https://encoding.spec.whatwg.org/)
  4. * ([§9](https://encoding.spec.whatwg.org/#legacy-single-byte-encodings),
  5. * [§14.5](https://encoding.spec.whatwg.org/#x-user-defined)),
  6. * and [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859) `iso-8859-*` mappings.
  7. *
  8. * ```js
  9. * import { createSinglebyteDecoder, createSinglebyteEncoder } from '@exodus/bytes/single-byte.js'
  10. * import { windows1252toString, windows1252fromString } from '@exodus/bytes/single-byte.js'
  11. * import { latin1toString, latin1fromString } from '@exodus/bytes/single-byte.js'
  12. * ```
  13. *
  14. * > [!WARNING]
  15. * > This is a lower-level API for single-byte encodings.
  16. * > It might not match what you expect, as it supports both WHATWG and unicode.org encodings under
  17. * > different names, with the main intended usecase for the latter being either non-web or legacy contexts.
  18. * >
  19. * > For a safe WHATWG Encoding-compatible API, see `@exodus/bytes/encoding.js` import (and variants of it).
  20. * >
  21. * > Be sure to know what you are doing and check documentation when directly using encodings from this file.
  22. *
  23. * Supports all single-byte encodings listed in the WHATWG Encoding standard:
  24. * `ibm866`, `iso-8859-2`, `iso-8859-3`, `iso-8859-4`, `iso-8859-5`, `iso-8859-6`, `iso-8859-7`, `iso-8859-8`,
  25. * `iso-8859-8-i`, `iso-8859-10`, `iso-8859-13`, `iso-8859-14`, `iso-8859-15`, `iso-8859-16`, `koi8-r`, `koi8-u`,
  26. * `macintosh`, `windows-874`, `windows-1250`, `windows-1251`, `windows-1252`, `windows-1253`, `windows-1254`,
  27. * `windows-1255`, `windows-1256`, `windows-1257`, `windows-1258`, `x-mac-cyrillic` and `x-user-defined`.
  28. *
  29. * Also supports `iso-8859-1`, `iso-8859-9`, `iso-8859-11` as defined at
  30. * [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859)
  31. * (and all other `iso-8859-*` encodings there as they match WHATWG).
  32. *
  33. * > [!NOTE]
  34. * > While all `iso-8859-*` encodings supported by the [WHATWG Encoding standard](https://encoding.spec.whatwg.org/) match
  35. * > [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859), the WHATWG Encoding spec doesn't support
  36. * > `iso-8859-1`, `iso-8859-9`, `iso-8859-11`, and instead maps them as labels to `windows-1252`, `windows-1254`, `windows-874`.\
  37. * > `createSinglebyteDecoder()` (unlike `TextDecoder` or `legacyHookDecode()`) does not do such mapping,
  38. * > so its results will differ from `TextDecoder` for those encoding names.
  39. *
  40. * ```js
  41. * > new TextDecoder('iso-8859-1').encoding
  42. * 'windows-1252'
  43. * > new TextDecoder('iso-8859-9').encoding
  44. * 'windows-1254'
  45. * > new TextDecoder('iso-8859-11').encoding
  46. * 'windows-874'
  47. * > new TextDecoder('iso-8859-9').decode(Uint8Array.of(0x80, 0x81, 0xd0))
  48. * '€\x81Ğ' // this is actually decoded according to windows-1254 per TextDecoder spec
  49. * > createSinglebyteDecoder('iso-8859-9')(Uint8Array.of(0x80, 0x81, 0xd0))
  50. * '\x80\x81Ğ' // this is iso-8859-9 as defined at https://unicode.org/Public/MAPPINGS/ISO8859/8859-9.txt
  51. * ```
  52. *
  53. * All WHATWG Encoding spec [`windows-*` encodings](https://encoding.spec.whatwg.org/#windows-874) are supersets of
  54. * corresponding [unicode.org encodings](https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/), meaning that
  55. * they encode/decode all the old valid (non-replacement) strings / byte sequences identically, but can also support
  56. * a wider range of inputs.
  57. *
  58. * @module @exodus/bytes/single-byte.js
  59. */
  60. /// <reference types="node" />
  61. import type { Uint8ArrayBuffer } from './array.js';
  62. /**
  63. * Create a decoder for a supported one-byte `encoding`, given its lowercased name `encoding`.
  64. *
  65. * Returns a function `decode(arr)` that decodes bytes to a string.
  66. *
  67. * @param encoding - The encoding name (e.g., 'iso-8859-1', 'windows-1252')
  68. * @param loose - If true, replaces unmapped bytes with replacement character instead of throwing (default: false)
  69. * @returns A function that decodes bytes to string
  70. */
  71. export function createSinglebyteDecoder(
  72. encoding: string,
  73. loose?: boolean
  74. ): (arr: Uint8Array) => string;
  75. /**
  76. * Create an encoder for a supported one-byte `encoding`, given its lowercased name `encoding`.
  77. *
  78. * Returns a function `encode(string)` that encodes a string to bytes.
  79. *
  80. * In `'fatal'` mode (default), will throw on non well-formed strings or any codepoints which could
  81. * not be encoded in the target encoding.
  82. *
  83. * @param encoding - The encoding name (e.g., 'iso-8859-1', 'windows-1252')
  84. * @param options - Encoding options
  85. * @param options.mode - Encoding mode (default: 'fatal'). Currently, only 'fatal' mode is supported.
  86. * @returns A function that encodes string to bytes
  87. */
  88. export function createSinglebyteEncoder(
  89. encoding: string,
  90. options?: { mode?: 'fatal' }
  91. ): (string: string) => Uint8ArrayBuffer;
  92. /**
  93. * Decode `iso-8859-1` bytes to a string.
  94. *
  95. * There is no loose variant for this encoding, all bytes can be decoded.
  96. *
  97. * Same as:
  98. * ```js
  99. * const latin1toString = createSinglebyteDecoder('iso-8859-1')
  100. * ```
  101. *
  102. * > [!NOTE]
  103. * > This is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as those
  104. * > alias to `new TextDecoder('windows-1252')`.
  105. *
  106. * @param arr - The bytes to decode
  107. * @returns The decoded string
  108. */
  109. export function latin1toString(arr: Uint8Array): string;
  110. /**
  111. * Encode a string to `iso-8859-1` bytes.
  112. *
  113. * Throws on non well-formed strings or any codepoints which could not be encoded in `iso-8859-1`.
  114. *
  115. * Same as:
  116. * ```js
  117. * const latin1fromString = createSinglebyteEncoder('iso-8859-1', { mode: 'fatal' })
  118. * ```
  119. *
  120. * @param string - The string to encode
  121. * @returns The encoded bytes
  122. */
  123. export function latin1fromString(string: string): Uint8ArrayBuffer;
  124. /**
  125. * Decode `windows-1252` bytes to a string.
  126. *
  127. * There is no loose variant for this encoding, all bytes can be decoded.
  128. *
  129. * Same as:
  130. * ```js
  131. * const windows1252toString = createSinglebyteDecoder('windows-1252')
  132. * ```
  133. *
  134. * @param arr - The bytes to decode
  135. * @returns The decoded string
  136. */
  137. export function windows1252toString(arr: Uint8Array): string;
  138. /**
  139. * Encode a string to `windows-1252` bytes.
  140. *
  141. * Throws on non well-formed strings or any codepoints which could not be encoded in `windows-1252`.
  142. *
  143. * Same as:
  144. * ```js
  145. * const windows1252fromString = createSinglebyteEncoder('windows-1252', { mode: 'fatal' })
  146. * ```
  147. *
  148. * @param string - The string to encode
  149. * @returns The encoded bytes
  150. */
  151. export function windows1252fromString(string: string): Uint8ArrayBuffer;