url-state-machine.js 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284
  1. "use strict";
  2. require("@exodus/bytes/encoding.js"); // for legacy multi-byte encodings
  3. const { percentEncodeAfterEncoding } = require("@exodus/bytes/whatwg.js");
  4. const tr46 = require("tr46");
  5. const infra = require("./infra");
  6. const { utf8DecodeWithoutBOM } = require("./encoding");
  7. const { percentDecodeString, utf8PercentEncodeCodePoint, utf8PercentEncodeString,
  8. isC0ControlPercentEncode, isFragmentPercentEncode,
  9. extraQueryPercentEncodeChars, extraSpecialQueryPercentEncodeChars,
  10. isPathPercentEncode, isUserinfoPercentEncode } = require("./percent-encoding");
  11. function p(char) {
  12. return char.codePointAt(0);
  13. }
  14. const specialSchemes = {
  15. ftp: 21,
  16. file: null,
  17. http: 80,
  18. https: 443,
  19. ws: 80,
  20. wss: 443
  21. };
  22. const failure = Symbol("failure");
  23. function countSymbols(str) {
  24. return [...str].length;
  25. }
  26. function at(input, idx) {
  27. const c = input[idx];
  28. return isNaN(c) ? undefined : String.fromCodePoint(c);
  29. }
  30. function isSingleDot(buffer) {
  31. return buffer === "." || buffer.toLowerCase() === "%2e";
  32. }
  33. function isDoubleDot(buffer) {
  34. buffer = buffer.toLowerCase();
  35. return buffer === ".." || buffer === "%2e." || buffer === ".%2e" || buffer === "%2e%2e";
  36. }
  37. function isWindowsDriveLetterCodePoints(cp1, cp2) {
  38. return infra.isASCIIAlpha(cp1) && (cp2 === p(":") || cp2 === p("|"));
  39. }
  40. function isWindowsDriveLetterString(string) {
  41. return string.length === 2 && infra.isASCIIAlpha(string.codePointAt(0)) && (string[1] === ":" || string[1] === "|");
  42. }
  43. function isNormalizedWindowsDriveLetterString(string) {
  44. return string.length === 2 && infra.isASCIIAlpha(string.codePointAt(0)) && string[1] === ":";
  45. }
  46. function containsForbiddenHostCodePoint(string) {
  47. return string.search(/\u0000|\u0009|\u000A|\u000D|\u0020|#|\/|:|<|>|\?|@|\[|\\|\]|\^|\|/u) !== -1;
  48. }
  49. function containsForbiddenDomainCodePoint(string) {
  50. return containsForbiddenHostCodePoint(string) || string.search(/[\u0000-\u001F]|%|\u007F/u) !== -1;
  51. }
  52. function isSpecialScheme(scheme) {
  53. return specialSchemes[scheme] !== undefined;
  54. }
  55. function isSpecial(url) {
  56. return isSpecialScheme(url.scheme);
  57. }
  58. function isNotSpecial(url) {
  59. return !isSpecialScheme(url.scheme);
  60. }
  61. function defaultPort(scheme) {
  62. return specialSchemes[scheme];
  63. }
  64. function parseIPv4Number(input) {
  65. if (input === "") {
  66. return failure;
  67. }
  68. let R = 10;
  69. if (input.length >= 2 && input.charAt(0) === "0" && input.charAt(1).toLowerCase() === "x") {
  70. input = input.substring(2);
  71. R = 16;
  72. } else if (input.length >= 2 && input.charAt(0) === "0") {
  73. input = input.substring(1);
  74. R = 8;
  75. }
  76. if (input === "") {
  77. return 0;
  78. }
  79. let regex = /[^0-7]/u;
  80. if (R === 10) {
  81. regex = /[^0-9]/u;
  82. }
  83. if (R === 16) {
  84. regex = /[^0-9A-Fa-f]/u;
  85. }
  86. if (regex.test(input)) {
  87. return failure;
  88. }
  89. return parseInt(input, R);
  90. }
  91. function parseIPv4(input) {
  92. const parts = input.split(".");
  93. if (parts[parts.length - 1] === "") {
  94. if (parts.length > 1) {
  95. parts.pop();
  96. }
  97. }
  98. if (parts.length > 4) {
  99. return failure;
  100. }
  101. const numbers = [];
  102. for (const part of parts) {
  103. const n = parseIPv4Number(part);
  104. if (n === failure) {
  105. return failure;
  106. }
  107. numbers.push(n);
  108. }
  109. for (let i = 0; i < numbers.length - 1; ++i) {
  110. if (numbers[i] > 255) {
  111. return failure;
  112. }
  113. }
  114. if (numbers[numbers.length - 1] >= 256 ** (5 - numbers.length)) {
  115. return failure;
  116. }
  117. let ipv4 = numbers.pop();
  118. let counter = 0;
  119. for (const n of numbers) {
  120. ipv4 += n * 256 ** (3 - counter);
  121. ++counter;
  122. }
  123. return ipv4;
  124. }
  125. function serializeIPv4(address) {
  126. let output = "";
  127. let n = address;
  128. for (let i = 1; i <= 4; ++i) {
  129. output = String(n % 256) + output;
  130. if (i !== 4) {
  131. output = `.${output}`;
  132. }
  133. n = Math.floor(n / 256);
  134. }
  135. return output;
  136. }
  137. function parseIPv6(input) {
  138. const address = [0, 0, 0, 0, 0, 0, 0, 0];
  139. let pieceIndex = 0;
  140. let compress = null;
  141. let pointer = 0;
  142. input = Array.from(input, c => c.codePointAt(0));
  143. if (input[pointer] === p(":")) {
  144. if (input[pointer + 1] !== p(":")) {
  145. return failure;
  146. }
  147. pointer += 2;
  148. ++pieceIndex;
  149. compress = pieceIndex;
  150. }
  151. while (pointer < input.length) {
  152. if (pieceIndex === 8) {
  153. return failure;
  154. }
  155. if (input[pointer] === p(":")) {
  156. if (compress !== null) {
  157. return failure;
  158. }
  159. ++pointer;
  160. ++pieceIndex;
  161. compress = pieceIndex;
  162. continue;
  163. }
  164. let value = 0;
  165. let length = 0;
  166. while (length < 4 && infra.isASCIIHex(input[pointer])) {
  167. value = value * 0x10 + parseInt(at(input, pointer), 16);
  168. ++pointer;
  169. ++length;
  170. }
  171. if (input[pointer] === p(".")) {
  172. if (length === 0) {
  173. return failure;
  174. }
  175. pointer -= length;
  176. if (pieceIndex > 6) {
  177. return failure;
  178. }
  179. let numbersSeen = 0;
  180. while (input[pointer] !== undefined) {
  181. let ipv4Piece = null;
  182. if (numbersSeen > 0) {
  183. if (input[pointer] === p(".") && numbersSeen < 4) {
  184. ++pointer;
  185. } else {
  186. return failure;
  187. }
  188. }
  189. if (!infra.isASCIIDigit(input[pointer])) {
  190. return failure;
  191. }
  192. while (infra.isASCIIDigit(input[pointer])) {
  193. const number = parseInt(at(input, pointer));
  194. if (ipv4Piece === null) {
  195. ipv4Piece = number;
  196. } else if (ipv4Piece === 0) {
  197. return failure;
  198. } else {
  199. ipv4Piece = ipv4Piece * 10 + number;
  200. }
  201. if (ipv4Piece > 255) {
  202. return failure;
  203. }
  204. ++pointer;
  205. }
  206. address[pieceIndex] = address[pieceIndex] * 0x100 + ipv4Piece;
  207. ++numbersSeen;
  208. if (numbersSeen === 2 || numbersSeen === 4) {
  209. ++pieceIndex;
  210. }
  211. }
  212. if (numbersSeen !== 4) {
  213. return failure;
  214. }
  215. break;
  216. } else if (input[pointer] === p(":")) {
  217. ++pointer;
  218. if (input[pointer] === undefined) {
  219. return failure;
  220. }
  221. } else if (input[pointer] !== undefined) {
  222. return failure;
  223. }
  224. address[pieceIndex] = value;
  225. ++pieceIndex;
  226. }
  227. if (compress !== null) {
  228. let swaps = pieceIndex - compress;
  229. pieceIndex = 7;
  230. while (pieceIndex !== 0 && swaps > 0) {
  231. const temp = address[compress + swaps - 1];
  232. address[compress + swaps - 1] = address[pieceIndex];
  233. address[pieceIndex] = temp;
  234. --pieceIndex;
  235. --swaps;
  236. }
  237. } else if (compress === null && pieceIndex !== 8) {
  238. return failure;
  239. }
  240. return address;
  241. }
  242. function serializeIPv6(address) {
  243. let output = "";
  244. const compress = findTheIPv6AddressCompressedPieceIndex(address);
  245. let ignore0 = false;
  246. for (let pieceIndex = 0; pieceIndex <= 7; ++pieceIndex) {
  247. if (ignore0 && address[pieceIndex] === 0) {
  248. continue;
  249. } else if (ignore0) {
  250. ignore0 = false;
  251. }
  252. if (compress === pieceIndex) {
  253. const separator = pieceIndex === 0 ? "::" : ":";
  254. output += separator;
  255. ignore0 = true;
  256. continue;
  257. }
  258. output += address[pieceIndex].toString(16);
  259. if (pieceIndex !== 7) {
  260. output += ":";
  261. }
  262. }
  263. return output;
  264. }
  265. function parseHost(input, isOpaque = false) {
  266. if (input[0] === "[") {
  267. if (input[input.length - 1] !== "]") {
  268. return failure;
  269. }
  270. return parseIPv6(input.substring(1, input.length - 1));
  271. }
  272. if (isOpaque) {
  273. return parseOpaqueHost(input);
  274. }
  275. const domain = utf8DecodeWithoutBOM(percentDecodeString(input));
  276. const asciiDomain = domainToASCII(domain);
  277. if (asciiDomain === failure) {
  278. return failure;
  279. }
  280. if (endsInANumber(asciiDomain)) {
  281. return parseIPv4(asciiDomain);
  282. }
  283. return asciiDomain;
  284. }
  285. function endsInANumber(input) {
  286. const parts = input.split(".");
  287. if (parts[parts.length - 1] === "") {
  288. if (parts.length === 1) {
  289. return false;
  290. }
  291. parts.pop();
  292. }
  293. const last = parts[parts.length - 1];
  294. if (parseIPv4Number(last) !== failure) {
  295. return true;
  296. }
  297. if (/^[0-9]+$/u.test(last)) {
  298. return true;
  299. }
  300. return false;
  301. }
  302. function parseOpaqueHost(input) {
  303. if (containsForbiddenHostCodePoint(input)) {
  304. return failure;
  305. }
  306. return utf8PercentEncodeString(input, isC0ControlPercentEncode);
  307. }
  308. function findTheIPv6AddressCompressedPieceIndex(address) {
  309. let longestIndex = null;
  310. let longestSize = 1; // only find elements > 1
  311. let foundIndex = null;
  312. let foundSize = 0;
  313. for (let pieceIndex = 0; pieceIndex < address.length; ++pieceIndex) {
  314. if (address[pieceIndex] !== 0) {
  315. if (foundSize > longestSize) {
  316. longestIndex = foundIndex;
  317. longestSize = foundSize;
  318. }
  319. foundIndex = null;
  320. foundSize = 0;
  321. } else {
  322. if (foundIndex === null) {
  323. foundIndex = pieceIndex;
  324. }
  325. ++foundSize;
  326. }
  327. }
  328. if (foundSize > longestSize) {
  329. return foundIndex;
  330. }
  331. return longestIndex;
  332. }
  333. function serializeHost(host) {
  334. if (typeof host === "number") {
  335. return serializeIPv4(host);
  336. }
  337. // IPv6 serializer
  338. if (host instanceof Array) {
  339. return `[${serializeIPv6(host)}]`;
  340. }
  341. return host;
  342. }
  343. function domainToASCII(domain, beStrict = false) {
  344. const result = tr46.toASCII(domain, {
  345. checkHyphens: beStrict,
  346. checkBidi: true,
  347. checkJoiners: true,
  348. useSTD3ASCIIRules: beStrict,
  349. transitionalProcessing: false,
  350. verifyDNSLength: beStrict,
  351. ignoreInvalidPunycode: false
  352. });
  353. if (result === null) {
  354. return failure;
  355. }
  356. if (!beStrict) {
  357. if (result === "") {
  358. return failure;
  359. }
  360. if (containsForbiddenDomainCodePoint(result)) {
  361. return failure;
  362. }
  363. }
  364. return result;
  365. }
  366. function trimControlChars(string) {
  367. // Avoid using regexp because of this V8 bug: https://issues.chromium.org/issues/42204424
  368. let start = 0;
  369. let end = string.length;
  370. for (; start < end; ++start) {
  371. if (string.charCodeAt(start) > 0x20) {
  372. break;
  373. }
  374. }
  375. for (; end > start; --end) {
  376. if (string.charCodeAt(end - 1) > 0x20) {
  377. break;
  378. }
  379. }
  380. return string.substring(start, end);
  381. }
  382. function trimTabAndNewline(url) {
  383. return url.replace(/\u0009|\u000A|\u000D/ug, "");
  384. }
  385. function shortenPath(url) {
  386. const { path } = url;
  387. if (path.length === 0) {
  388. return;
  389. }
  390. if (url.scheme === "file" && path.length === 1 && isNormalizedWindowsDriveLetter(path[0])) {
  391. return;
  392. }
  393. path.pop();
  394. }
  395. function includesCredentials(url) {
  396. return url.username !== "" || url.password !== "";
  397. }
  398. function cannotHaveAUsernamePasswordPort(url) {
  399. return url.host === null || url.host === "" || url.scheme === "file";
  400. }
  401. function hasAnOpaquePath(url) {
  402. return typeof url.path === "string";
  403. }
  404. function isNormalizedWindowsDriveLetter(string) {
  405. return /^[A-Za-z]:$/u.test(string);
  406. }
  407. function URLStateMachine(input, base, encoding, url, stateOverride) {
  408. this.pointer = 0;
  409. this.input = input;
  410. this.base = base || null;
  411. this.encoding = encoding || "utf-8";
  412. this.stateOverride = stateOverride;
  413. this.url = url;
  414. this.failure = false;
  415. this.parseError = false;
  416. if (!this.url) {
  417. this.url = {
  418. scheme: "",
  419. username: "",
  420. password: "",
  421. host: null,
  422. port: null,
  423. path: [],
  424. query: null,
  425. fragment: null
  426. };
  427. const res = trimControlChars(this.input);
  428. if (res !== this.input) {
  429. this.parseError = true;
  430. }
  431. this.input = res;
  432. }
  433. const res = trimTabAndNewline(this.input);
  434. if (res !== this.input) {
  435. this.parseError = true;
  436. }
  437. this.input = res;
  438. this.state = stateOverride || "scheme start";
  439. this.buffer = "";
  440. this.atSignSeen = false;
  441. this.insideBrackets = false;
  442. this.passwordTokenSeen = false;
  443. this.input = Array.from(this.input, c => c.codePointAt(0));
  444. for (; this.pointer <= this.input.length; ++this.pointer) {
  445. const c = this.input[this.pointer];
  446. const cStr = isNaN(c) ? undefined : String.fromCodePoint(c);
  447. // exec state machine
  448. const ret = this[`parse ${this.state}`](c, cStr);
  449. if (!ret) {
  450. break; // terminate algorithm
  451. } else if (ret === failure) {
  452. this.failure = true;
  453. break;
  454. }
  455. }
  456. }
  457. URLStateMachine.prototype["parse scheme start"] = function parseSchemeStart(c, cStr) {
  458. if (infra.isASCIIAlpha(c)) {
  459. this.buffer += cStr.toLowerCase();
  460. this.state = "scheme";
  461. } else if (!this.stateOverride) {
  462. this.state = "no scheme";
  463. --this.pointer;
  464. } else {
  465. this.parseError = true;
  466. return failure;
  467. }
  468. return true;
  469. };
  470. URLStateMachine.prototype["parse scheme"] = function parseScheme(c, cStr) {
  471. if (infra.isASCIIAlphanumeric(c) || c === p("+") || c === p("-") || c === p(".")) {
  472. this.buffer += cStr.toLowerCase();
  473. } else if (c === p(":")) {
  474. if (this.stateOverride) {
  475. if (isSpecial(this.url) && !isSpecialScheme(this.buffer)) {
  476. return false;
  477. }
  478. if (!isSpecial(this.url) && isSpecialScheme(this.buffer)) {
  479. return false;
  480. }
  481. if ((includesCredentials(this.url) || this.url.port !== null) && this.buffer === "file") {
  482. return false;
  483. }
  484. if (this.url.scheme === "file" && this.url.host === "") {
  485. return false;
  486. }
  487. }
  488. this.url.scheme = this.buffer;
  489. if (this.stateOverride) {
  490. if (this.url.port === defaultPort(this.url.scheme)) {
  491. this.url.port = null;
  492. }
  493. return false;
  494. }
  495. this.buffer = "";
  496. if (this.url.scheme === "file") {
  497. if (this.input[this.pointer + 1] !== p("/") || this.input[this.pointer + 2] !== p("/")) {
  498. this.parseError = true;
  499. }
  500. this.state = "file";
  501. } else if (isSpecial(this.url) && this.base !== null && this.base.scheme === this.url.scheme) {
  502. this.state = "special relative or authority";
  503. } else if (isSpecial(this.url)) {
  504. this.state = "special authority slashes";
  505. } else if (this.input[this.pointer + 1] === p("/")) {
  506. this.state = "path or authority";
  507. ++this.pointer;
  508. } else {
  509. this.url.path = "";
  510. this.state = "opaque path";
  511. }
  512. } else if (!this.stateOverride) {
  513. this.buffer = "";
  514. this.state = "no scheme";
  515. this.pointer = -1;
  516. } else {
  517. this.parseError = true;
  518. return failure;
  519. }
  520. return true;
  521. };
  522. URLStateMachine.prototype["parse no scheme"] = function parseNoScheme(c) {
  523. if (this.base === null || (hasAnOpaquePath(this.base) && c !== p("#"))) {
  524. return failure;
  525. } else if (hasAnOpaquePath(this.base) && c === p("#")) {
  526. this.url.scheme = this.base.scheme;
  527. this.url.path = this.base.path;
  528. this.url.query = this.base.query;
  529. this.url.fragment = "";
  530. this.state = "fragment";
  531. } else if (this.base.scheme === "file") {
  532. this.state = "file";
  533. --this.pointer;
  534. } else {
  535. this.state = "relative";
  536. --this.pointer;
  537. }
  538. return true;
  539. };
  540. URLStateMachine.prototype["parse special relative or authority"] = function parseSpecialRelativeOrAuthority(c) {
  541. if (c === p("/") && this.input[this.pointer + 1] === p("/")) {
  542. this.state = "special authority ignore slashes";
  543. ++this.pointer;
  544. } else {
  545. this.parseError = true;
  546. this.state = "relative";
  547. --this.pointer;
  548. }
  549. return true;
  550. };
  551. URLStateMachine.prototype["parse path or authority"] = function parsePathOrAuthority(c) {
  552. if (c === p("/")) {
  553. this.state = "authority";
  554. } else {
  555. this.state = "path";
  556. --this.pointer;
  557. }
  558. return true;
  559. };
  560. URLStateMachine.prototype["parse relative"] = function parseRelative(c) {
  561. this.url.scheme = this.base.scheme;
  562. if (c === p("/")) {
  563. this.state = "relative slash";
  564. } else if (isSpecial(this.url) && c === p("\\")) {
  565. this.parseError = true;
  566. this.state = "relative slash";
  567. } else {
  568. this.url.username = this.base.username;
  569. this.url.password = this.base.password;
  570. this.url.host = this.base.host;
  571. this.url.port = this.base.port;
  572. this.url.path = this.base.path.slice();
  573. this.url.query = this.base.query;
  574. if (c === p("?")) {
  575. this.url.query = "";
  576. this.state = "query";
  577. } else if (c === p("#")) {
  578. this.url.fragment = "";
  579. this.state = "fragment";
  580. } else if (!isNaN(c)) {
  581. this.url.query = null;
  582. this.url.path.pop();
  583. this.state = "path";
  584. --this.pointer;
  585. }
  586. }
  587. return true;
  588. };
  589. URLStateMachine.prototype["parse relative slash"] = function parseRelativeSlash(c) {
  590. if (isSpecial(this.url) && (c === p("/") || c === p("\\"))) {
  591. if (c === p("\\")) {
  592. this.parseError = true;
  593. }
  594. this.state = "special authority ignore slashes";
  595. } else if (c === p("/")) {
  596. this.state = "authority";
  597. } else {
  598. this.url.username = this.base.username;
  599. this.url.password = this.base.password;
  600. this.url.host = this.base.host;
  601. this.url.port = this.base.port;
  602. this.state = "path";
  603. --this.pointer;
  604. }
  605. return true;
  606. };
  607. URLStateMachine.prototype["parse special authority slashes"] = function parseSpecialAuthoritySlashes(c) {
  608. if (c === p("/") && this.input[this.pointer + 1] === p("/")) {
  609. this.state = "special authority ignore slashes";
  610. ++this.pointer;
  611. } else {
  612. this.parseError = true;
  613. this.state = "special authority ignore slashes";
  614. --this.pointer;
  615. }
  616. return true;
  617. };
  618. URLStateMachine.prototype["parse special authority ignore slashes"] = function parseSpecialAuthorityIgnoreSlashes(c) {
  619. if (c !== p("/") && c !== p("\\")) {
  620. this.state = "authority";
  621. --this.pointer;
  622. } else {
  623. this.parseError = true;
  624. }
  625. return true;
  626. };
  627. URLStateMachine.prototype["parse authority"] = function parseAuthority(c, cStr) {
  628. if (c === p("@")) {
  629. this.parseError = true;
  630. if (this.atSignSeen) {
  631. this.buffer = `%40${this.buffer}`;
  632. }
  633. this.atSignSeen = true;
  634. // careful, this is based on buffer and has its own pointer (this.pointer != pointer) and inner chars
  635. const len = countSymbols(this.buffer);
  636. for (let pointer = 0; pointer < len; ++pointer) {
  637. const codePoint = this.buffer.codePointAt(pointer);
  638. if (codePoint === p(":") && !this.passwordTokenSeen) {
  639. this.passwordTokenSeen = true;
  640. continue;
  641. }
  642. const encodedCodePoints = utf8PercentEncodeCodePoint(codePoint, isUserinfoPercentEncode);
  643. if (this.passwordTokenSeen) {
  644. this.url.password += encodedCodePoints;
  645. } else {
  646. this.url.username += encodedCodePoints;
  647. }
  648. }
  649. this.buffer = "";
  650. } else if (isNaN(c) || c === p("/") || c === p("?") || c === p("#") ||
  651. (isSpecial(this.url) && c === p("\\"))) {
  652. if (this.atSignSeen && this.buffer === "") {
  653. this.parseError = true;
  654. return failure;
  655. }
  656. this.pointer -= countSymbols(this.buffer) + 1;
  657. this.buffer = "";
  658. this.state = "host";
  659. } else {
  660. this.buffer += cStr;
  661. }
  662. return true;
  663. };
  664. URLStateMachine.prototype["parse hostname"] =
  665. URLStateMachine.prototype["parse host"] = function parseHostName(c, cStr) {
  666. if (this.stateOverride && this.url.scheme === "file") {
  667. --this.pointer;
  668. this.state = "file host";
  669. } else if (c === p(":") && !this.insideBrackets) {
  670. if (this.buffer === "") {
  671. this.parseError = true;
  672. return failure;
  673. }
  674. if (this.stateOverride === "hostname") {
  675. return failure;
  676. }
  677. const host = parseHost(this.buffer, isNotSpecial(this.url));
  678. if (host === failure) {
  679. return failure;
  680. }
  681. this.url.host = host;
  682. this.buffer = "";
  683. this.state = "port";
  684. } else if (isNaN(c) || c === p("/") || c === p("?") || c === p("#") ||
  685. (isSpecial(this.url) && c === p("\\"))) {
  686. --this.pointer;
  687. if (isSpecial(this.url) && this.buffer === "") {
  688. this.parseError = true;
  689. return failure;
  690. } else if (this.stateOverride && this.buffer === "" &&
  691. (includesCredentials(this.url) || this.url.port !== null)) {
  692. this.parseError = true;
  693. return failure;
  694. }
  695. const host = parseHost(this.buffer, isNotSpecial(this.url));
  696. if (host === failure) {
  697. return failure;
  698. }
  699. this.url.host = host;
  700. this.buffer = "";
  701. this.state = "path start";
  702. if (this.stateOverride) {
  703. return false;
  704. }
  705. } else {
  706. if (c === p("[")) {
  707. this.insideBrackets = true;
  708. } else if (c === p("]")) {
  709. this.insideBrackets = false;
  710. }
  711. this.buffer += cStr;
  712. }
  713. return true;
  714. };
  715. URLStateMachine.prototype["parse port"] = function parsePort(c, cStr) {
  716. if (infra.isASCIIDigit(c)) {
  717. this.buffer += cStr;
  718. } else if (isNaN(c) || c === p("/") || c === p("?") || c === p("#") ||
  719. (isSpecial(this.url) && c === p("\\")) ||
  720. this.stateOverride) {
  721. if (this.buffer !== "") {
  722. const port = parseInt(this.buffer);
  723. if (port > 2 ** 16 - 1) {
  724. this.parseError = true;
  725. return failure;
  726. }
  727. this.url.port = port === defaultPort(this.url.scheme) ? null : port;
  728. this.buffer = "";
  729. if (this.stateOverride) {
  730. return false;
  731. }
  732. }
  733. if (this.stateOverride) {
  734. return failure;
  735. }
  736. this.state = "path start";
  737. --this.pointer;
  738. } else {
  739. this.parseError = true;
  740. return failure;
  741. }
  742. return true;
  743. };
  744. const fileOtherwiseCodePoints = new Set([p("/"), p("\\"), p("?"), p("#")]);
  745. function startsWithWindowsDriveLetter(input, pointer) {
  746. const length = input.length - pointer;
  747. return length >= 2 &&
  748. isWindowsDriveLetterCodePoints(input[pointer], input[pointer + 1]) &&
  749. (length === 2 || fileOtherwiseCodePoints.has(input[pointer + 2]));
  750. }
  751. URLStateMachine.prototype["parse file"] = function parseFile(c) {
  752. this.url.scheme = "file";
  753. this.url.host = "";
  754. if (c === p("/") || c === p("\\")) {
  755. if (c === p("\\")) {
  756. this.parseError = true;
  757. }
  758. this.state = "file slash";
  759. } else if (this.base !== null && this.base.scheme === "file") {
  760. this.url.host = this.base.host;
  761. this.url.path = this.base.path.slice();
  762. this.url.query = this.base.query;
  763. if (c === p("?")) {
  764. this.url.query = "";
  765. this.state = "query";
  766. } else if (c === p("#")) {
  767. this.url.fragment = "";
  768. this.state = "fragment";
  769. } else if (!isNaN(c)) {
  770. this.url.query = null;
  771. if (!startsWithWindowsDriveLetter(this.input, this.pointer)) {
  772. shortenPath(this.url);
  773. } else {
  774. this.parseError = true;
  775. this.url.path = [];
  776. }
  777. this.state = "path";
  778. --this.pointer;
  779. }
  780. } else {
  781. this.state = "path";
  782. --this.pointer;
  783. }
  784. return true;
  785. };
  786. URLStateMachine.prototype["parse file slash"] = function parseFileSlash(c) {
  787. if (c === p("/") || c === p("\\")) {
  788. if (c === p("\\")) {
  789. this.parseError = true;
  790. }
  791. this.state = "file host";
  792. } else {
  793. if (this.base !== null && this.base.scheme === "file") {
  794. if (!startsWithWindowsDriveLetter(this.input, this.pointer) &&
  795. isNormalizedWindowsDriveLetterString(this.base.path[0])) {
  796. this.url.path.push(this.base.path[0]);
  797. }
  798. this.url.host = this.base.host;
  799. }
  800. this.state = "path";
  801. --this.pointer;
  802. }
  803. return true;
  804. };
  805. URLStateMachine.prototype["parse file host"] = function parseFileHost(c, cStr) {
  806. if (isNaN(c) || c === p("/") || c === p("\\") || c === p("?") || c === p("#")) {
  807. --this.pointer;
  808. if (!this.stateOverride && isWindowsDriveLetterString(this.buffer)) {
  809. this.parseError = true;
  810. this.state = "path";
  811. } else if (this.buffer === "") {
  812. this.url.host = "";
  813. if (this.stateOverride) {
  814. return false;
  815. }
  816. this.state = "path start";
  817. } else {
  818. let host = parseHost(this.buffer, isNotSpecial(this.url));
  819. if (host === failure) {
  820. return failure;
  821. }
  822. if (host === "localhost") {
  823. host = "";
  824. }
  825. this.url.host = host;
  826. if (this.stateOverride) {
  827. return false;
  828. }
  829. this.buffer = "";
  830. this.state = "path start";
  831. }
  832. } else {
  833. this.buffer += cStr;
  834. }
  835. return true;
  836. };
  837. URLStateMachine.prototype["parse path start"] = function parsePathStart(c) {
  838. if (isSpecial(this.url)) {
  839. if (c === p("\\")) {
  840. this.parseError = true;
  841. }
  842. this.state = "path";
  843. if (c !== p("/") && c !== p("\\")) {
  844. --this.pointer;
  845. }
  846. } else if (!this.stateOverride && c === p("?")) {
  847. this.url.query = "";
  848. this.state = "query";
  849. } else if (!this.stateOverride && c === p("#")) {
  850. this.url.fragment = "";
  851. this.state = "fragment";
  852. } else if (c !== undefined) {
  853. this.state = "path";
  854. if (c !== p("/")) {
  855. --this.pointer;
  856. }
  857. } else if (this.stateOverride && this.url.host === null) {
  858. this.url.path.push("");
  859. }
  860. return true;
  861. };
  862. URLStateMachine.prototype["parse path"] = function parsePath(c) {
  863. if (isNaN(c) || c === p("/") || (isSpecial(this.url) && c === p("\\")) ||
  864. (!this.stateOverride && (c === p("?") || c === p("#")))) {
  865. if (isSpecial(this.url) && c === p("\\")) {
  866. this.parseError = true;
  867. }
  868. if (isDoubleDot(this.buffer)) {
  869. shortenPath(this.url);
  870. if (c !== p("/") && !(isSpecial(this.url) && c === p("\\"))) {
  871. this.url.path.push("");
  872. }
  873. } else if (isSingleDot(this.buffer) && c !== p("/") &&
  874. !(isSpecial(this.url) && c === p("\\"))) {
  875. this.url.path.push("");
  876. } else if (!isSingleDot(this.buffer)) {
  877. if (this.url.scheme === "file" && this.url.path.length === 0 && isWindowsDriveLetterString(this.buffer)) {
  878. this.buffer = `${this.buffer[0]}:`;
  879. }
  880. this.url.path.push(this.buffer);
  881. }
  882. this.buffer = "";
  883. if (c === p("?")) {
  884. this.url.query = "";
  885. this.state = "query";
  886. }
  887. if (c === p("#")) {
  888. this.url.fragment = "";
  889. this.state = "fragment";
  890. }
  891. } else {
  892. // TODO: If c is not a URL code point and not "%", parse error.
  893. if (c === p("%") &&
  894. (!infra.isASCIIHex(this.input[this.pointer + 1]) ||
  895. !infra.isASCIIHex(this.input[this.pointer + 2]))) {
  896. this.parseError = true;
  897. }
  898. this.buffer += utf8PercentEncodeCodePoint(c, isPathPercentEncode);
  899. }
  900. return true;
  901. };
  902. URLStateMachine.prototype["parse opaque path"] = function parseOpaquePath(c) {
  903. if (c === p("?")) {
  904. this.url.query = "";
  905. this.state = "query";
  906. } else if (c === p("#")) {
  907. this.url.fragment = "";
  908. this.state = "fragment";
  909. } else if (c === p(" ")) {
  910. const remaining = this.input[this.pointer + 1];
  911. if (remaining === p("?") || remaining === p("#")) {
  912. this.url.path += "%20";
  913. } else {
  914. this.url.path += " ";
  915. }
  916. } else {
  917. // TODO: Add: not a URL code point
  918. if (!isNaN(c) && c !== p("%")) {
  919. this.parseError = true;
  920. }
  921. if (c === p("%") &&
  922. (!infra.isASCIIHex(this.input[this.pointer + 1]) ||
  923. !infra.isASCIIHex(this.input[this.pointer + 2]))) {
  924. this.parseError = true;
  925. }
  926. if (!isNaN(c)) {
  927. this.url.path += utf8PercentEncodeCodePoint(c, isC0ControlPercentEncode);
  928. }
  929. }
  930. return true;
  931. };
  932. URLStateMachine.prototype["parse query"] = function parseQuery(c, cStr) {
  933. if (!isSpecial(this.url) || this.url.scheme === "ws" || this.url.scheme === "wss") {
  934. this.encoding = "utf-8";
  935. }
  936. if ((!this.stateOverride && c === p("#")) || isNaN(c)) {
  937. const percentEncodeSet = isSpecial(this.url) ? extraSpecialQueryPercentEncodeChars : extraQueryPercentEncodeChars;
  938. this.url.query += percentEncodeAfterEncoding(
  939. this.encoding,
  940. this.buffer,
  941. percentEncodeSet
  942. );
  943. this.buffer = "";
  944. if (c === p("#")) {
  945. this.url.fragment = "";
  946. this.state = "fragment";
  947. }
  948. } else if (!isNaN(c)) {
  949. // TODO: If c is not a URL code point and not "%", parse error.
  950. if (c === p("%") &&
  951. (!infra.isASCIIHex(this.input[this.pointer + 1]) ||
  952. !infra.isASCIIHex(this.input[this.pointer + 2]))) {
  953. this.parseError = true;
  954. }
  955. this.buffer += cStr;
  956. }
  957. return true;
  958. };
  959. URLStateMachine.prototype["parse fragment"] = function parseFragment(c) {
  960. if (!isNaN(c)) {
  961. // TODO: If c is not a URL code point and not "%", parse error.
  962. if (c === p("%") &&
  963. (!infra.isASCIIHex(this.input[this.pointer + 1]) ||
  964. !infra.isASCIIHex(this.input[this.pointer + 2]))) {
  965. this.parseError = true;
  966. }
  967. this.url.fragment += utf8PercentEncodeCodePoint(c, isFragmentPercentEncode);
  968. }
  969. return true;
  970. };
  971. function serializeURL(url, excludeFragment) {
  972. let output = `${url.scheme}:`;
  973. if (url.host !== null) {
  974. output += "//";
  975. if (url.username !== "" || url.password !== "") {
  976. output += url.username;
  977. if (url.password !== "") {
  978. output += `:${url.password}`;
  979. }
  980. output += "@";
  981. }
  982. output += serializeHost(url.host);
  983. if (url.port !== null) {
  984. output += `:${url.port}`;
  985. }
  986. }
  987. if (url.host === null && !hasAnOpaquePath(url) && url.path.length > 1 && url.path[0] === "") {
  988. output += "/.";
  989. }
  990. output += serializePath(url);
  991. if (url.query !== null) {
  992. output += `?${url.query}`;
  993. }
  994. if (!excludeFragment && url.fragment !== null) {
  995. output += `#${url.fragment}`;
  996. }
  997. return output;
  998. }
  999. function serializeOrigin(tuple) {
  1000. let result = `${tuple.scheme}://`;
  1001. result += serializeHost(tuple.host);
  1002. if (tuple.port !== null) {
  1003. result += `:${tuple.port}`;
  1004. }
  1005. return result;
  1006. }
  1007. function serializePath(url) {
  1008. if (hasAnOpaquePath(url)) {
  1009. return url.path;
  1010. }
  1011. let output = "";
  1012. for (const segment of url.path) {
  1013. output += `/${segment}`;
  1014. }
  1015. return output;
  1016. }
  1017. module.exports.serializeURL = serializeURL;
  1018. module.exports.serializePath = serializePath;
  1019. module.exports.serializeURLOrigin = function (url) {
  1020. // https://url.spec.whatwg.org/#concept-url-origin
  1021. switch (url.scheme) {
  1022. case "blob": {
  1023. const pathURL = module.exports.parseURL(serializePath(url));
  1024. if (pathURL === null) {
  1025. return "null";
  1026. }
  1027. if (pathURL.scheme !== "http" && pathURL.scheme !== "https") {
  1028. return "null";
  1029. }
  1030. return module.exports.serializeURLOrigin(pathURL);
  1031. }
  1032. case "ftp":
  1033. case "http":
  1034. case "https":
  1035. case "ws":
  1036. case "wss":
  1037. return serializeOrigin({
  1038. scheme: url.scheme,
  1039. host: url.host,
  1040. port: url.port
  1041. });
  1042. case "file":
  1043. // The spec says:
  1044. // > Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin.
  1045. // Browsers tested so far:
  1046. // - Chrome says "file://", but treats file: URLs as cross-origin for most (all?) purposes; see e.g.
  1047. // https://bugs.chromium.org/p/chromium/issues/detail?id=37586
  1048. // - Firefox says "null", but treats file: URLs as same-origin sometimes based on directory stuff; see
  1049. // https://developer.mozilla.org/en-US/docs/Archive/Misc_top_level/Same-origin_policy_for_file:_URIs
  1050. return "null";
  1051. default:
  1052. // serializing an opaque origin returns "null"
  1053. return "null";
  1054. }
  1055. };
  1056. module.exports.basicURLParse = function (input, options) {
  1057. if (options === undefined) {
  1058. options = {};
  1059. }
  1060. const usm = new URLStateMachine(input, options.baseURL, options.encoding, options.url, options.stateOverride);
  1061. if (usm.failure) {
  1062. return null;
  1063. }
  1064. return usm.url;
  1065. };
  1066. module.exports.setTheUsername = function (url, username) {
  1067. url.username = utf8PercentEncodeString(username, isUserinfoPercentEncode);
  1068. };
  1069. module.exports.setThePassword = function (url, password) {
  1070. url.password = utf8PercentEncodeString(password, isUserinfoPercentEncode);
  1071. };
  1072. module.exports.serializeHost = serializeHost;
  1073. module.exports.cannotHaveAUsernamePasswordPort = cannotHaveAUsernamePasswordPort;
  1074. module.exports.hasAnOpaquePath = hasAnOpaquePath;
  1075. module.exports.serializeInteger = function (integer) {
  1076. return String(integer);
  1077. };
  1078. module.exports.parseURL = function (input, options) {
  1079. if (options === undefined) {
  1080. options = {};
  1081. }
  1082. // We don't handle blobs, so this just delegates:
  1083. return module.exports.basicURLParse(input, { baseURL: options.baseURL, encoding: options.encoding });
  1084. };