percent-encoding.js 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. "use strict";
  2. const { isASCIIHex } = require("./infra");
  3. const { utf8Encode } = require("./encoding");
  4. function p(char) {
  5. return char.codePointAt(0);
  6. }
  7. // https://url.spec.whatwg.org/#percent-encode
  8. function percentEncode(c) {
  9. let hex = c.toString(16).toUpperCase();
  10. if (hex.length === 1) {
  11. hex = `0${hex}`;
  12. }
  13. return `%${hex}`;
  14. }
  15. // https://url.spec.whatwg.org/#percent-decode
  16. function percentDecodeBytes(input) {
  17. const output = new Uint8Array(input.byteLength);
  18. let outputIndex = 0;
  19. for (let i = 0; i < input.byteLength; ++i) {
  20. const byte = input[i];
  21. if (byte !== 0x25) {
  22. output[outputIndex++] = byte;
  23. } else if (byte === 0x25 && (!isASCIIHex(input[i + 1]) || !isASCIIHex(input[i + 2]))) {
  24. output[outputIndex++] = byte;
  25. } else {
  26. const bytePoint = parseInt(String.fromCodePoint(input[i + 1], input[i + 2]), 16);
  27. output[outputIndex++] = bytePoint;
  28. i += 2;
  29. }
  30. }
  31. return output.slice(0, outputIndex);
  32. }
  33. // https://url.spec.whatwg.org/#string-percent-decode
  34. function percentDecodeString(input) {
  35. const bytes = utf8Encode(input);
  36. return percentDecodeBytes(bytes);
  37. }
  38. // https://url.spec.whatwg.org/#c0-control-percent-encode-set
  39. function isC0ControlPercentEncode(c) {
  40. return c <= 0x1F || c > 0x7E;
  41. }
  42. // https://url.spec.whatwg.org/#fragment-percent-encode-set
  43. const extraFragmentPercentEncodeSet = new Set([p(" "), p("\""), p("<"), p(">"), p("`")]);
  44. function isFragmentPercentEncode(c) {
  45. return isC0ControlPercentEncode(c) || extraFragmentPercentEncodeSet.has(c);
  46. }
  47. // https://url.spec.whatwg.org/#query-percent-encode-set
  48. const extraQueryPercentEncodeSet = new Set([p(" "), p("\""), p("#"), p("<"), p(">")]);
  49. const extraQueryPercentEncodeChars = String.fromCodePoint(...[...extraQueryPercentEncodeSet].sort());
  50. function isQueryPercentEncode(c) {
  51. return isC0ControlPercentEncode(c) || extraQueryPercentEncodeSet.has(c);
  52. }
  53. // https://url.spec.whatwg.org/#special-query-percent-encode-set
  54. const extraSpecialQueryPercentEncodeSet = new Set([...extraQueryPercentEncodeSet, p("'")]);
  55. const extraSpecialQueryPercentEncodeChars = String.fromCodePoint(...[...extraSpecialQueryPercentEncodeSet].sort());
  56. function isSpecialQueryPercentEncode(c) {
  57. return isC0ControlPercentEncode(c) || extraSpecialQueryPercentEncodeSet.has(c);
  58. }
  59. // https://url.spec.whatwg.org/#path-percent-encode-set
  60. const extraPathPercentEncodeSet = new Set([p("?"), p("`"), p("{"), p("}"), p("^")]);
  61. function isPathPercentEncode(c) {
  62. return isQueryPercentEncode(c) || extraPathPercentEncodeSet.has(c);
  63. }
  64. // https://url.spec.whatwg.org/#userinfo-percent-encode-set
  65. const extraUserinfoPercentEncodeSet =
  66. new Set([p("/"), p(":"), p(";"), p("="), p("@"), p("["), p("\\"), p("]"), p("|")]);
  67. function isUserinfoPercentEncode(c) {
  68. return isPathPercentEncode(c) || extraUserinfoPercentEncodeSet.has(c);
  69. }
  70. // https://url.spec.whatwg.org/#component-percent-encode-set
  71. const extraComponentPercentEncodeSet = new Set([p("$"), p("%"), p("&"), p("+"), p(",")]);
  72. function isComponentPercentEncode(c) {
  73. return isUserinfoPercentEncode(c) || extraComponentPercentEncodeSet.has(c);
  74. }
  75. // https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
  76. const extraURLEncodedPercentEncodeSet = new Set([p("!"), p("'"), p("("), p(")"), p("~")]);
  77. function isURLEncodedPercentEncode(c) {
  78. return isComponentPercentEncode(c) || extraURLEncodedPercentEncodeSet.has(c);
  79. }
  80. // https://url.spec.whatwg.org/#code-point-percent-encode-after-encoding
  81. // https://url.spec.whatwg.org/#utf-8-percent-encode
  82. // Assuming encoding is always utf-8 allows us to trim one of the logic branches. TODO: support encoding.
  83. // The "-Internal" variant here has code points as JS strings. The external version used by other files has code points
  84. // as JS numbers, like the rest of the codebase.
  85. function utf8PercentEncodeCodePointInternal(codePoint, percentEncodePredicate) {
  86. const bytes = utf8Encode(codePoint);
  87. let output = "";
  88. for (const byte of bytes) {
  89. // Our percentEncodePredicate operates on bytes, not code points, so this is slightly different from the spec.
  90. if (!percentEncodePredicate(byte)) {
  91. output += String.fromCharCode(byte);
  92. } else {
  93. output += percentEncode(byte);
  94. }
  95. }
  96. return output;
  97. }
  98. function utf8PercentEncodeCodePoint(codePoint, percentEncodePredicate) {
  99. return utf8PercentEncodeCodePointInternal(String.fromCodePoint(codePoint), percentEncodePredicate);
  100. }
  101. // https://url.spec.whatwg.org/#string-percent-encode-after-encoding
  102. // https://url.spec.whatwg.org/#string-utf-8-percent-encode
  103. function utf8PercentEncodeString(input, percentEncodePredicate, spaceAsPlus = false) {
  104. let output = "";
  105. for (const codePoint of input) {
  106. if (spaceAsPlus && codePoint === " ") {
  107. output += "+";
  108. } else {
  109. output += utf8PercentEncodeCodePointInternal(codePoint, percentEncodePredicate);
  110. }
  111. }
  112. return output;
  113. }
  114. module.exports = {
  115. isC0ControlPercentEncode,
  116. isFragmentPercentEncode,
  117. isQueryPercentEncode,
  118. isSpecialQueryPercentEncode,
  119. isPathPercentEncode,
  120. isUserinfoPercentEncode,
  121. isURLEncodedPercentEncode,
  122. extraQueryPercentEncodeChars,
  123. extraSpecialQueryPercentEncodeChars,
  124. percentDecodeString,
  125. percentDecodeBytes,
  126. utf8PercentEncodeString,
  127. utf8PercentEncodeCodePoint
  128. };