decode.d.ts 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. export declare enum BinTrieFlags {
  2. VALUE_LENGTH = 49152,
  3. BRANCH_LENGTH = 16256,
  4. JUMP_TABLE = 127
  5. }
  6. export declare enum DecodingMode {
  7. /** Entities in text nodes that can end with any character. */
  8. Legacy = 0,
  9. /** Only allow entities terminated with a semicolon. */
  10. Strict = 1,
  11. /** Entities in attributes have limitations on ending characters. */
  12. Attribute = 2
  13. }
  14. /**
  15. * Producers for character reference errors as defined in the HTML spec.
  16. */
  17. export interface EntityErrorProducer {
  18. missingSemicolonAfterCharacterReference(): void;
  19. absenceOfDigitsInNumericCharacterReference(consumedCharacters: number): void;
  20. validateNumericCharacterReference(code: number): void;
  21. }
  22. /**
  23. * Token decoder with support of writing partial entities.
  24. */
  25. export declare class EntityDecoder {
  26. /** The tree used to decode entities. */
  27. private readonly decodeTree;
  28. /**
  29. * The function that is called when a codepoint is decoded.
  30. *
  31. * For multi-byte named entities, this will be called multiple times,
  32. * with the second codepoint, and the same `consumed` value.
  33. *
  34. * @param codepoint The decoded codepoint.
  35. * @param consumed The number of bytes consumed by the decoder.
  36. */
  37. private readonly emitCodePoint;
  38. /** An object that is used to produce errors. */
  39. private readonly errors?;
  40. constructor(
  41. /** The tree used to decode entities. */
  42. decodeTree: Uint16Array,
  43. /**
  44. * The function that is called when a codepoint is decoded.
  45. *
  46. * For multi-byte named entities, this will be called multiple times,
  47. * with the second codepoint, and the same `consumed` value.
  48. *
  49. * @param codepoint The decoded codepoint.
  50. * @param consumed The number of bytes consumed by the decoder.
  51. */
  52. emitCodePoint: (cp: number, consumed: number) => void,
  53. /** An object that is used to produce errors. */
  54. errors?: EntityErrorProducer | undefined);
  55. /** The current state of the decoder. */
  56. private state;
  57. /** Characters that were consumed while parsing an entity. */
  58. private consumed;
  59. /**
  60. * The result of the entity.
  61. *
  62. * Either the result index of a numeric entity, or the codepoint of a
  63. * numeric entity.
  64. */
  65. private result;
  66. /** The current index in the decode tree. */
  67. private treeIndex;
  68. /** The number of characters that were consumed in excess. */
  69. private excess;
  70. /** The mode in which the decoder is operating. */
  71. private decodeMode;
  72. /** Resets the instance to make it reusable. */
  73. startEntity(decodeMode: DecodingMode): void;
  74. /**
  75. * Write an entity to the decoder. This can be called multiple times with partial entities.
  76. * If the entity is incomplete, the decoder will return -1.
  77. *
  78. * Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
  79. * entity is incomplete, and resume when the next string is written.
  80. *
  81. * @param input The string containing the entity (or a continuation of the entity).
  82. * @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
  83. * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
  84. */
  85. write(input: string, offset: number): number;
  86. /**
  87. * Switches between the numeric decimal and hexadecimal states.
  88. *
  89. * Equivalent to the `Numeric character reference state` in the HTML spec.
  90. *
  91. * @param input The string containing the entity (or a continuation of the entity).
  92. * @param offset The current offset.
  93. * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
  94. */
  95. private stateNumericStart;
  96. private addToNumericResult;
  97. /**
  98. * Parses a hexadecimal numeric entity.
  99. *
  100. * Equivalent to the `Hexademical character reference state` in the HTML spec.
  101. *
  102. * @param input The string containing the entity (or a continuation of the entity).
  103. * @param offset The current offset.
  104. * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
  105. */
  106. private stateNumericHex;
  107. /**
  108. * Parses a decimal numeric entity.
  109. *
  110. * Equivalent to the `Decimal character reference state` in the HTML spec.
  111. *
  112. * @param input The string containing the entity (or a continuation of the entity).
  113. * @param offset The current offset.
  114. * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
  115. */
  116. private stateNumericDecimal;
  117. /**
  118. * Validate and emit a numeric entity.
  119. *
  120. * Implements the logic from the `Hexademical character reference start
  121. * state` and `Numeric character reference end state` in the HTML spec.
  122. *
  123. * @param lastCp The last code point of the entity. Used to see if the
  124. * entity was terminated with a semicolon.
  125. * @param expectedLength The minimum number of characters that should be
  126. * consumed. Used to validate that at least one digit
  127. * was consumed.
  128. * @returns The number of characters that were consumed.
  129. */
  130. private emitNumericEntity;
  131. /**
  132. * Parses a named entity.
  133. *
  134. * Equivalent to the `Named character reference state` in the HTML spec.
  135. *
  136. * @param input The string containing the entity (or a continuation of the entity).
  137. * @param offset The current offset.
  138. * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
  139. */
  140. private stateNamedEntity;
  141. /**
  142. * Emit a named entity that was not terminated with a semicolon.
  143. *
  144. * @returns The number of characters consumed.
  145. */
  146. private emitNotTerminatedNamedEntity;
  147. /**
  148. * Emit a named entity.
  149. *
  150. * @param result The index of the entity in the decode tree.
  151. * @param valueLength The number of bytes in the entity.
  152. * @param consumed The number of characters consumed.
  153. *
  154. * @returns The number of characters consumed.
  155. */
  156. private emitNamedEntityData;
  157. /**
  158. * Signal to the parser that the end of the input was reached.
  159. *
  160. * Remaining data will be emitted and relevant errors will be produced.
  161. *
  162. * @returns The number of characters consumed.
  163. */
  164. end(): number;
  165. }
  166. /**
  167. * Determines the branch of the current node that is taken given the current
  168. * character. This function is used to traverse the trie.
  169. *
  170. * @param decodeTree The trie.
  171. * @param current The current node.
  172. * @param nodeIdx The index right after the current node and its value.
  173. * @param char The current character.
  174. * @returns The index of the next node, or -1 if no branch is taken.
  175. */
  176. export declare function determineBranch(decodeTree: Uint16Array, current: number, nodeIndex: number, char: number): number;
  177. /**
  178. * Decodes an HTML string.
  179. *
  180. * @param htmlString The string to decode.
  181. * @param mode The decoding mode.
  182. * @returns The decoded string.
  183. */
  184. export declare function decodeHTML(htmlString: string, mode?: DecodingMode): string;
  185. /**
  186. * Decodes an HTML string in an attribute.
  187. *
  188. * @param htmlAttribute The string to decode.
  189. * @returns The decoded string.
  190. */
  191. export declare function decodeHTMLAttribute(htmlAttribute: string): string;
  192. /**
  193. * Decodes an HTML string, requiring all entities to be terminated by a semicolon.
  194. *
  195. * @param htmlString The string to decode.
  196. * @returns The decoded string.
  197. */
  198. export declare function decodeHTMLStrict(htmlString: string): string;
  199. /**
  200. * Decodes an XML string, requiring all entities to be terminated by a semicolon.
  201. *
  202. * @param xmlString The string to decode.
  203. * @returns The decoded string.
  204. */
  205. export declare function decodeXML(xmlString: string): string;
  206. export { htmlDecodeTree } from "./generated/decode-data-html.js";
  207. export { xmlDecodeTree } from "./generated/decode-data-xml.js";
  208. export { decodeCodePoint, replaceCodePoint, fromCodePoint, } from "./decode-codepoint.js";
  209. //# sourceMappingURL=decode.d.ts.map