utf8.d.ts 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. /**
  2. * UTF-8 encoding/decoding
  3. *
  4. * ```js
  5. * import { utf8fromString, utf8toString } from '@exodus/bytes/utf8.js'
  6. *
  7. * // loose
  8. * import { utf8fromStringLoose, utf8toStringLoose } from '@exodus/bytes/utf8.js'
  9. * ```
  10. *
  11. * _These methods by design encode/decode BOM (codepoint `U+FEFF` Byte Order Mark) as-is._\
  12. * _If you need BOM handling or detection, use `@exodus/bytes/encoding.js`_
  13. *
  14. * @module @exodus/bytes/utf8.js
  15. */
  16. /// <reference types="node" />
  17. import type { OutputFormat, Uint8ArrayBuffer } from './array.js';
  18. /**
  19. * Encode a string to UTF-8 bytes (strict mode)
  20. *
  21. * Throws on invalid Unicode (unpaired surrogates)
  22. *
  23. * This is similar to the following snippet (but works on all engines):
  24. * ```js
  25. * // Strict encode, requiring Unicode codepoints to be valid
  26. * if (typeof string !== 'string' || !string.isWellFormed()) throw new TypeError()
  27. * return new TextEncoder().encode(string)
  28. * ```
  29. *
  30. * @param string - The string to encode
  31. * @param format - Output format (default: 'uint8')
  32. * @returns The encoded bytes
  33. */
  34. export function utf8fromString(string: string, format?: 'uint8'): Uint8ArrayBuffer;
  35. export function utf8fromString(string: string, format: 'buffer'): Buffer;
  36. export function utf8fromString(string: string, format?: OutputFormat): Uint8ArrayBuffer | Buffer;
  37. /**
  38. * Encode a string to UTF-8 bytes (loose mode)
  39. *
  40. * Replaces invalid Unicode (unpaired surrogates) with replacement codepoints `U+FFFD`
  41. * per [WHATWG Encoding](https://encoding.spec.whatwg.org/) specification.
  42. *
  43. * _Such replacement is a non-injective function, is irreversable and causes collisions.\
  44. * Prefer using strict throwing methods for cryptography applications._
  45. *
  46. * This is similar to the following snippet (but works on all engines):
  47. * ```js
  48. * // Loose encode, replacing invalid Unicode codepoints with U+FFFD
  49. * if (typeof string !== 'string') throw new TypeError()
  50. * return new TextEncoder().encode(string)
  51. * ```
  52. *
  53. * @param string - The string to encode
  54. * @param format - Output format (default: 'uint8')
  55. * @returns The encoded bytes
  56. */
  57. export function utf8fromStringLoose(string: string, format?: 'uint8'): Uint8ArrayBuffer;
  58. export function utf8fromStringLoose(string: string, format: 'buffer'): Buffer;
  59. export function utf8fromStringLoose(
  60. string: string,
  61. format?: OutputFormat
  62. ): Uint8ArrayBuffer | Buffer;
  63. /**
  64. * Decode UTF-8 bytes to a string (strict mode)
  65. *
  66. * Throws on invalid UTF-8 byte sequences
  67. *
  68. * This is similar to `new TextDecoder('utf-8', { fatal: true, ignoreBOM: true }).decode(arr)`,
  69. * but works on all engines.
  70. *
  71. * @param arr - The bytes to decode
  72. * @returns The decoded string
  73. */
  74. export function utf8toString(arr: Uint8Array): string;
  75. /**
  76. * Decode UTF-8 bytes to a string (loose mode)
  77. *
  78. * Replaces invalid UTF-8 byte sequences with replacement codepoints `U+FFFD`
  79. * per [WHATWG Encoding](https://encoding.spec.whatwg.org/) specification.
  80. *
  81. * _Such replacement is a non-injective function, is irreversable and causes collisions.\
  82. * Prefer using strict throwing methods for cryptography applications._
  83. *
  84. * This is similar to `new TextDecoder('utf-8', { ignoreBOM: true }).decode(arr)`,
  85. * but works on all engines.
  86. *
  87. * @param arr - The bytes to decode
  88. * @returns The decoded string
  89. */
  90. export function utf8toStringLoose(arr: Uint8Array): string;