ClawLab
/
RobotDaily


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
							/**
 * @param url - URL we want to extract a hostname from.
 * @param urlIsValidHostname - hint from caller; true if `url` is already a valid hostname.
 */
export default function extractHostname(
  url: string,
  urlIsValidHostname: boolean,
): string | null {
  let start = 0;
  let end: number = url.length;
  let hasUpper = false;

  // If url is not already a valid hostname, then try to extract hostname.
  if (!urlIsValidHostname) {
    // Special handling of data URLs
    if (url.startsWith('data:')) {
      return null;
    }

    // Trim leading spaces
    while (start < url.length && url.charCodeAt(start) <= 32) {
      start += 1;
    }

    // Trim trailing spaces
    while (end > start + 1 && url.charCodeAt(end - 1) <= 32) {
      end -= 1;
    }

    // Skip scheme.
    if (
      url.charCodeAt(start) === 47 /* '/' */ &&
      url.charCodeAt(start + 1) === 47 /* '/' */
    ) {
      start += 2;
    } else {
      const indexOfProtocol = url.indexOf(':/', start);
      if (indexOfProtocol !== -1) {
        // Implement fast-path for common protocols. We expect most protocols
        // should be one of these 4 and thus we will not need to perform the
        // more expansive validity check most of the time.
        const protocolSize = indexOfProtocol - start;
        const c0 = url.charCodeAt(start);
        const c1 = url.charCodeAt(start + 1);
        const c2 = url.charCodeAt(start + 2);
        const c3 = url.charCodeAt(start + 3);
        const c4 = url.charCodeAt(start + 4);

        if (
          protocolSize === 5 &&
          c0 === 104 /* 'h' */ &&
          c1 === 116 /* 't' */ &&
          c2 === 116 /* 't' */ &&
          c3 === 112 /* 'p' */ &&
          c4 === 115 /* 's' */
        ) {
          // https
        } else if (
          protocolSize === 4 &&
          c0 === 104 /* 'h' */ &&
          c1 === 116 /* 't' */ &&
          c2 === 116 /* 't' */ &&
          c3 === 112 /* 'p' */
        ) {
          // http
        } else if (
          protocolSize === 3 &&
          c0 === 119 /* 'w' */ &&
          c1 === 115 /* 's' */ &&
          c2 === 115 /* 's' */
        ) {
          // wss
        } else if (
          protocolSize === 2 &&
          c0 === 119 /* 'w' */ &&
          c1 === 115 /* 's' */
        ) {
          // ws
        } else {
          // Check that scheme is valid
          for (let i = start; i < indexOfProtocol; i += 1) {
            const lowerCaseCode = url.charCodeAt(i) | 32;
            if (
              !(
                (
                  (lowerCaseCode >= 97 && lowerCaseCode <= 122) || // [a, z]
                  (lowerCaseCode >= 48 && lowerCaseCode <= 57) || // [0, 9]
                  lowerCaseCode === 46 || // '.'
                  lowerCaseCode === 45 || // '-'
                  lowerCaseCode === 43
                ) // '+'
              )
            ) {
              return null;
            }
          }
        }

        // Skip 0, 1 or more '/' after ':/'
        start = indexOfProtocol + 2;
        while (url.charCodeAt(start) === 47 /* '/' */) {
          start += 1;
        }
      }
    }

    // Detect first occurrence of '/', '?' or '#'. We also keep track of the
    // last occurrence of '@', ']' or ':' to speed-up subsequent parsing of
    // (respectively), identifier, ipv6 or port.
    let indexOfIdentifier = -1;
    let indexOfClosingBracket = -1;
    let indexOfPort = -1;
    for (let i = start; i < end; i += 1) {
      const code: number = url.charCodeAt(i);
      if (
        code === 35 || // '#'
        code === 47 || // '/'
        code === 63 // '?'
      ) {
        end = i;
        break;
      } else if (code === 64) {
        // '@'
        indexOfIdentifier = i;
      } else if (code === 93) {
        // ']'
        indexOfClosingBracket = i;
      } else if (code === 58) {
        // ':'
        indexOfPort = i;
      } else if (code >= 65 && code <= 90) {
        hasUpper = true;
      }
    }

    // Detect identifier: '@'
    if (
      indexOfIdentifier !== -1 &&
      indexOfIdentifier > start &&
      indexOfIdentifier < end
    ) {
      start = indexOfIdentifier + 1;
    }

    // Handle ipv6 addresses
    if (url.charCodeAt(start) === 91 /* '[' */) {
      if (indexOfClosingBracket !== -1) {
        return url.slice(start + 1, indexOfClosingBracket).toLowerCase();
      }
      return null;
    } else if (indexOfPort !== -1 && indexOfPort > start && indexOfPort < end) {
      // Detect port: ':'
      end = indexOfPort;
    }
  }

  // Trim trailing dots
  while (end > start + 1 && url.charCodeAt(end - 1) === 46 /* '.' */) {
    end -= 1;
  }

  const hostname: string =
    start !== 0 || end !== url.length ? url.slice(start, end) : url;

  if (hasUpper) {
    return hostname.toLowerCase();
  }

  return hostname;
}