encode.js 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. exports.encodeHTML = encodeHTML;
  4. exports.encodeNonAsciiHTML = encodeNonAsciiHTML;
  5. const encode_html_js_1 = require("./generated/encode-html.js");
  6. const escape_js_1 = require("./escape.js");
  7. const htmlReplacer = /[\t\n\f!-,./:-@[-`{-}\u0080-\uFFFF]/g;
  8. /**
  9. * Encodes all characters in the input using HTML entities. This includes
  10. * characters that are valid ASCII characters in HTML documents, such as `#`.
  11. *
  12. * To get a more compact output, consider using the `encodeNonAsciiHTML`
  13. * function, which will only encode characters that are not valid in HTML
  14. * documents, as well as non-ASCII characters.
  15. *
  16. * If a character has no equivalent entity, a numeric hexadecimal reference
  17. * (eg. `ü`) will be used.
  18. */
  19. function encodeHTML(input) {
  20. return encodeHTMLTrieRe(htmlReplacer, input);
  21. }
  22. /**
  23. * Encodes all non-ASCII characters, as well as characters not valid in HTML
  24. * documents using HTML entities. This function will not encode characters that
  25. * are valid in HTML documents, such as `#`.
  26. *
  27. * If a character has no equivalent entity, a numeric hexadecimal reference
  28. * (eg. `ü`) will be used.
  29. */
  30. function encodeNonAsciiHTML(input) {
  31. return encodeHTMLTrieRe(escape_js_1.xmlReplacer, input);
  32. }
  33. function encodeHTMLTrieRe(regExp, input) {
  34. let returnValue = "";
  35. let lastIndex = 0;
  36. let match;
  37. while ((match = regExp.exec(input)) !== null) {
  38. const { index } = match;
  39. returnValue += input.substring(lastIndex, index);
  40. const char = input.charCodeAt(index);
  41. let next = encode_html_js_1.htmlTrie.get(char);
  42. if (typeof next === "object") {
  43. // We are in a branch. Try to match the next char.
  44. if (index + 1 < input.length) {
  45. const nextChar = input.charCodeAt(index + 1);
  46. const value = typeof next.n === "number"
  47. ? next.n === nextChar
  48. ? next.o
  49. : undefined
  50. : next.n.get(nextChar);
  51. if (value !== undefined) {
  52. returnValue += value;
  53. lastIndex = regExp.lastIndex += 1;
  54. continue;
  55. }
  56. }
  57. next = next.v;
  58. }
  59. // We might have a tree node without a value; skip and use a numeric entity.
  60. if (next === undefined) {
  61. const cp = (0, escape_js_1.getCodePoint)(input, index);
  62. returnValue += `&#x${cp.toString(16)};`;
  63. // Increase by 1 if we have a surrogate pair
  64. lastIndex = regExp.lastIndex += Number(cp !== char);
  65. }
  66. else {
  67. returnValue += next;
  68. lastIndex = index + 1;
  69. }
  70. }
  71. return returnValue + input.substr(lastIndex);
  72. }
  73. //# sourceMappingURL=encode.js.map