extract-hostname.ts 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. /**
  2. * @param url - URL we want to extract a hostname from.
  3. * @param urlIsValidHostname - hint from caller; true if `url` is already a valid hostname.
  4. */
  5. export default function extractHostname(
  6. url: string,
  7. urlIsValidHostname: boolean,
  8. ): string | null {
  9. let start = 0;
  10. let end: number = url.length;
  11. let hasUpper = false;
  12. // If url is not already a valid hostname, then try to extract hostname.
  13. if (!urlIsValidHostname) {
  14. // Special handling of data URLs
  15. if (url.startsWith('data:')) {
  16. return null;
  17. }
  18. // Trim leading spaces
  19. while (start < url.length && url.charCodeAt(start) <= 32) {
  20. start += 1;
  21. }
  22. // Trim trailing spaces
  23. while (end > start + 1 && url.charCodeAt(end - 1) <= 32) {
  24. end -= 1;
  25. }
  26. // Skip scheme.
  27. if (
  28. url.charCodeAt(start) === 47 /* '/' */ &&
  29. url.charCodeAt(start + 1) === 47 /* '/' */
  30. ) {
  31. start += 2;
  32. } else {
  33. const indexOfProtocol = url.indexOf(':/', start);
  34. if (indexOfProtocol !== -1) {
  35. // Implement fast-path for common protocols. We expect most protocols
  36. // should be one of these 4 and thus we will not need to perform the
  37. // more expansive validity check most of the time.
  38. const protocolSize = indexOfProtocol - start;
  39. const c0 = url.charCodeAt(start);
  40. const c1 = url.charCodeAt(start + 1);
  41. const c2 = url.charCodeAt(start + 2);
  42. const c3 = url.charCodeAt(start + 3);
  43. const c4 = url.charCodeAt(start + 4);
  44. if (
  45. protocolSize === 5 &&
  46. c0 === 104 /* 'h' */ &&
  47. c1 === 116 /* 't' */ &&
  48. c2 === 116 /* 't' */ &&
  49. c3 === 112 /* 'p' */ &&
  50. c4 === 115 /* 's' */
  51. ) {
  52. // https
  53. } else if (
  54. protocolSize === 4 &&
  55. c0 === 104 /* 'h' */ &&
  56. c1 === 116 /* 't' */ &&
  57. c2 === 116 /* 't' */ &&
  58. c3 === 112 /* 'p' */
  59. ) {
  60. // http
  61. } else if (
  62. protocolSize === 3 &&
  63. c0 === 119 /* 'w' */ &&
  64. c1 === 115 /* 's' */ &&
  65. c2 === 115 /* 's' */
  66. ) {
  67. // wss
  68. } else if (
  69. protocolSize === 2 &&
  70. c0 === 119 /* 'w' */ &&
  71. c1 === 115 /* 's' */
  72. ) {
  73. // ws
  74. } else {
  75. // Check that scheme is valid
  76. for (let i = start; i < indexOfProtocol; i += 1) {
  77. const lowerCaseCode = url.charCodeAt(i) | 32;
  78. if (
  79. !(
  80. (
  81. (lowerCaseCode >= 97 && lowerCaseCode <= 122) || // [a, z]
  82. (lowerCaseCode >= 48 && lowerCaseCode <= 57) || // [0, 9]
  83. lowerCaseCode === 46 || // '.'
  84. lowerCaseCode === 45 || // '-'
  85. lowerCaseCode === 43
  86. ) // '+'
  87. )
  88. ) {
  89. return null;
  90. }
  91. }
  92. }
  93. // Skip 0, 1 or more '/' after ':/'
  94. start = indexOfProtocol + 2;
  95. while (url.charCodeAt(start) === 47 /* '/' */) {
  96. start += 1;
  97. }
  98. }
  99. }
  100. // Detect first occurrence of '/', '?' or '#'. We also keep track of the
  101. // last occurrence of '@', ']' or ':' to speed-up subsequent parsing of
  102. // (respectively), identifier, ipv6 or port.
  103. let indexOfIdentifier = -1;
  104. let indexOfClosingBracket = -1;
  105. let indexOfPort = -1;
  106. for (let i = start; i < end; i += 1) {
  107. const code: number = url.charCodeAt(i);
  108. if (
  109. code === 35 || // '#'
  110. code === 47 || // '/'
  111. code === 63 // '?'
  112. ) {
  113. end = i;
  114. break;
  115. } else if (code === 64) {
  116. // '@'
  117. indexOfIdentifier = i;
  118. } else if (code === 93) {
  119. // ']'
  120. indexOfClosingBracket = i;
  121. } else if (code === 58) {
  122. // ':'
  123. indexOfPort = i;
  124. } else if (code >= 65 && code <= 90) {
  125. hasUpper = true;
  126. }
  127. }
  128. // Detect identifier: '@'
  129. if (
  130. indexOfIdentifier !== -1 &&
  131. indexOfIdentifier > start &&
  132. indexOfIdentifier < end
  133. ) {
  134. start = indexOfIdentifier + 1;
  135. }
  136. // Handle ipv6 addresses
  137. if (url.charCodeAt(start) === 91 /* '[' */) {
  138. if (indexOfClosingBracket !== -1) {
  139. return url.slice(start + 1, indexOfClosingBracket).toLowerCase();
  140. }
  141. return null;
  142. } else if (indexOfPort !== -1 && indexOfPort > start && indexOfPort < end) {
  143. // Detect port: ':'
  144. end = indexOfPort;
  145. }
  146. }
  147. // Trim trailing dots
  148. while (end > start + 1 && url.charCodeAt(end - 1) === 46 /* '.' */) {
  149. end -= 1;
  150. }
  151. const hostname: string =
  152. start !== 0 || end !== url.length ? url.slice(start, end) : url;
  153. if (hasUpper) {
  154. return hostname.toLowerCase();
  155. }
  156. return hostname;
  157. }