encode.js 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. import { htmlTrie } from "./generated/encode-html.js";
  2. import { xmlReplacer, getCodePoint } from "./escape.js";
  3. const htmlReplacer = /[\t\n\f!-,./:-@[-`{-}\u0080-\uFFFF]/g;
  4. /**
  5. * Encodes all characters in the input using HTML entities. This includes
  6. * characters that are valid ASCII characters in HTML documents, such as `#`.
  7. *
  8. * To get a more compact output, consider using the `encodeNonAsciiHTML`
  9. * function, which will only encode characters that are not valid in HTML
  10. * documents, as well as non-ASCII characters.
  11. *
  12. * If a character has no equivalent entity, a numeric hexadecimal reference
  13. * (eg. `ü`) will be used.
  14. */
  15. export function encodeHTML(input) {
  16. return encodeHTMLTrieRe(htmlReplacer, input);
  17. }
  18. /**
  19. * Encodes all non-ASCII characters, as well as characters not valid in HTML
  20. * documents using HTML entities. This function will not encode characters that
  21. * are valid in HTML documents, such as `#`.
  22. *
  23. * If a character has no equivalent entity, a numeric hexadecimal reference
  24. * (eg. `ü`) will be used.
  25. */
  26. export function encodeNonAsciiHTML(input) {
  27. return encodeHTMLTrieRe(xmlReplacer, input);
  28. }
  29. function encodeHTMLTrieRe(regExp, input) {
  30. let returnValue = "";
  31. let lastIndex = 0;
  32. let match;
  33. while ((match = regExp.exec(input)) !== null) {
  34. const { index } = match;
  35. returnValue += input.substring(lastIndex, index);
  36. const char = input.charCodeAt(index);
  37. let next = htmlTrie.get(char);
  38. if (typeof next === "object") {
  39. // We are in a branch. Try to match the next char.
  40. if (index + 1 < input.length) {
  41. const nextChar = input.charCodeAt(index + 1);
  42. const value = typeof next.n === "number"
  43. ? next.n === nextChar
  44. ? next.o
  45. : undefined
  46. : next.n.get(nextChar);
  47. if (value !== undefined) {
  48. returnValue += value;
  49. lastIndex = regExp.lastIndex += 1;
  50. continue;
  51. }
  52. }
  53. next = next.v;
  54. }
  55. // We might have a tree node without a value; skip and use a numeric entity.
  56. if (next === undefined) {
  57. const cp = getCodePoint(input, index);
  58. returnValue += `&#x${cp.toString(16)};`;
  59. // Increase by 1 if we have a surrogate pair
  60. lastIndex = regExp.lastIndex += Number(cp !== char);
  61. }
  62. else {
  63. returnValue += next;
  64. lastIndex = index + 1;
  65. }
  66. }
  67. return returnValue + input.substr(lastIndex);
  68. }
  69. //# sourceMappingURL=encode.js.map