match.js 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
  1. import { MATCH, MISMATCH, DISALLOW_EMPTY } from './match-graph.js';
  2. import * as TYPE from '../tokenizer/types.js';
  3. const { hasOwnProperty } = Object.prototype;
  4. const STUB = 0;
  5. const TOKEN = 1;
  6. const OPEN_SYNTAX = 2;
  7. const CLOSE_SYNTAX = 3;
  8. const EXIT_REASON_MATCH = 'Match';
  9. const EXIT_REASON_MISMATCH = 'Mismatch';
  10. const EXIT_REASON_ITERATION_LIMIT = 'Maximum iteration number exceeded (please fill an issue on https://github.com/csstree/csstree/issues)';
  11. const ITERATION_LIMIT = 15000;
  12. export let totalIterationCount = 0;
  13. function reverseList(list) {
  14. let prev = null;
  15. let next = null;
  16. let item = list;
  17. while (item !== null) {
  18. next = item.prev;
  19. item.prev = prev;
  20. prev = item;
  21. item = next;
  22. }
  23. return prev;
  24. }
  25. function areStringsEqualCaseInsensitive(testStr, referenceStr) {
  26. if (testStr.length !== referenceStr.length) {
  27. return false;
  28. }
  29. for (let i = 0; i < testStr.length; i++) {
  30. const referenceCode = referenceStr.charCodeAt(i);
  31. let testCode = testStr.charCodeAt(i);
  32. // testCode.toLowerCase() for U+0041 LATIN CAPITAL LETTER A (A) .. U+005A LATIN CAPITAL LETTER Z (Z).
  33. if (testCode >= 0x0041 && testCode <= 0x005A) {
  34. testCode = testCode | 32;
  35. }
  36. if (testCode !== referenceCode) {
  37. return false;
  38. }
  39. }
  40. return true;
  41. }
  42. function isContextEdgeDelim(token) {
  43. if (token.type !== TYPE.Delim) {
  44. return false;
  45. }
  46. // Fix matching for unicode-range: U+30??, U+FF00-FF9F
  47. // Probably we need to check out previous match instead
  48. return token.value !== '?';
  49. }
  50. function isCommaContextStart(token) {
  51. if (token === null) {
  52. return true;
  53. }
  54. return (
  55. token.type === TYPE.Comma ||
  56. token.type === TYPE.Function ||
  57. token.type === TYPE.LeftParenthesis ||
  58. token.type === TYPE.LeftSquareBracket ||
  59. token.type === TYPE.LeftCurlyBracket ||
  60. isContextEdgeDelim(token)
  61. );
  62. }
  63. function isCommaContextEnd(token) {
  64. if (token === null) {
  65. return true;
  66. }
  67. return (
  68. token.type === TYPE.RightParenthesis ||
  69. token.type === TYPE.RightSquareBracket ||
  70. token.type === TYPE.RightCurlyBracket ||
  71. (token.type === TYPE.Delim && token.value === '/')
  72. );
  73. }
  74. function internalMatch(tokens, state, syntaxes) {
  75. function moveToNextToken() {
  76. do {
  77. tokenIndex++;
  78. token = tokenIndex < tokens.length ? tokens[tokenIndex] : null;
  79. } while (token !== null && (token.type === TYPE.WhiteSpace || token.type === TYPE.Comment));
  80. }
  81. function getNextToken(offset) {
  82. const nextIndex = tokenIndex + offset;
  83. return nextIndex < tokens.length ? tokens[nextIndex] : null;
  84. }
  85. function stateSnapshotFromSyntax(nextState, prev) {
  86. return {
  87. nextState,
  88. matchStack,
  89. syntaxStack,
  90. thenStack,
  91. tokenIndex,
  92. prev
  93. };
  94. }
  95. function pushThenStack(nextState) {
  96. thenStack = {
  97. nextState,
  98. matchStack,
  99. syntaxStack,
  100. prev: thenStack
  101. };
  102. }
  103. function pushElseStack(nextState) {
  104. elseStack = stateSnapshotFromSyntax(nextState, elseStack);
  105. }
  106. function addTokenToMatch() {
  107. matchStack = {
  108. type: TOKEN,
  109. syntax: state.syntax,
  110. token,
  111. prev: matchStack
  112. };
  113. moveToNextToken();
  114. syntaxStash = null;
  115. if (tokenIndex > longestMatch) {
  116. longestMatch = tokenIndex;
  117. }
  118. }
  119. function openSyntax() {
  120. syntaxStack = {
  121. syntax: state.syntax,
  122. opts: state.syntax.opts || (syntaxStack !== null && syntaxStack.opts) || null,
  123. prev: syntaxStack
  124. };
  125. matchStack = {
  126. type: OPEN_SYNTAX,
  127. syntax: state.syntax,
  128. token: matchStack.token,
  129. prev: matchStack
  130. };
  131. }
  132. function closeSyntax() {
  133. if (matchStack.type === OPEN_SYNTAX) {
  134. matchStack = matchStack.prev;
  135. } else {
  136. matchStack = {
  137. type: CLOSE_SYNTAX,
  138. syntax: syntaxStack.syntax,
  139. token: matchStack.token,
  140. prev: matchStack
  141. };
  142. }
  143. syntaxStack = syntaxStack.prev;
  144. }
  145. let syntaxStack = null;
  146. let thenStack = null;
  147. let elseStack = null;
  148. // null – stashing allowed, nothing stashed
  149. // false – stashing disabled, nothing stashed
  150. // anithing else – fail stashable syntaxes, some syntax stashed
  151. let syntaxStash = null;
  152. let iterationCount = 0; // count iterations and prevent infinite loop
  153. let exitReason = null;
  154. let token = null;
  155. let tokenIndex = -1;
  156. let longestMatch = 0;
  157. let matchStack = {
  158. type: STUB,
  159. syntax: null,
  160. token: null,
  161. prev: null
  162. };
  163. moveToNextToken();
  164. while (exitReason === null && ++iterationCount < ITERATION_LIMIT) {
  165. // function mapList(list, fn) {
  166. // const result = [];
  167. // while (list) {
  168. // result.unshift(fn(list));
  169. // list = list.prev;
  170. // }
  171. // return result;
  172. // }
  173. // console.log('--\n',
  174. // '#' + iterationCount,
  175. // require('util').inspect({
  176. // match: mapList(matchStack, x => x.type === TOKEN ? x.token && x.token.value : x.syntax ? ({ [OPEN_SYNTAX]: '<', [CLOSE_SYNTAX]: '</' }[x.type] || x.type) + '!' + x.syntax.name : null),
  177. // token: token && token.value,
  178. // tokenIndex,
  179. // syntax: syntax.type + (syntax.id ? ' #' + syntax.id : '')
  180. // }, { depth: null })
  181. // );
  182. switch (state.type) {
  183. case 'Match':
  184. if (thenStack === null) {
  185. // turn to MISMATCH when some tokens left unmatched
  186. if (token !== null) {
  187. // doesn't mismatch if just one token left and it's an IE hack
  188. if (tokenIndex !== tokens.length - 1 || (token.value !== '\\0' && token.value !== '\\9')) {
  189. state = MISMATCH;
  190. break;
  191. }
  192. }
  193. // break the main loop, return a result - MATCH
  194. exitReason = EXIT_REASON_MATCH;
  195. break;
  196. }
  197. // go to next syntax (`then` branch)
  198. state = thenStack.nextState;
  199. // check match is not empty
  200. if (state === DISALLOW_EMPTY) {
  201. if (thenStack.matchStack === matchStack) {
  202. state = MISMATCH;
  203. break;
  204. } else {
  205. state = MATCH;
  206. }
  207. }
  208. // close syntax if needed
  209. while (thenStack.syntaxStack !== syntaxStack) {
  210. closeSyntax();
  211. }
  212. // pop stack
  213. thenStack = thenStack.prev;
  214. break;
  215. case 'Mismatch':
  216. // when some syntax is stashed
  217. if (syntaxStash !== null && syntaxStash !== false) {
  218. // there is no else branches or a branch reduce match stack
  219. if (elseStack === null || tokenIndex > elseStack.tokenIndex) {
  220. // restore state from the stash
  221. elseStack = syntaxStash;
  222. syntaxStash = false; // disable stashing
  223. }
  224. } else if (elseStack === null) {
  225. // no else branches -> break the main loop
  226. // return a result - MISMATCH
  227. exitReason = EXIT_REASON_MISMATCH;
  228. break;
  229. }
  230. // go to next syntax (`else` branch)
  231. state = elseStack.nextState;
  232. // restore all the rest stack states
  233. thenStack = elseStack.thenStack;
  234. syntaxStack = elseStack.syntaxStack;
  235. matchStack = elseStack.matchStack;
  236. tokenIndex = elseStack.tokenIndex;
  237. token = tokenIndex < tokens.length ? tokens[tokenIndex] : null;
  238. // pop stack
  239. elseStack = elseStack.prev;
  240. break;
  241. case 'MatchGraph':
  242. state = state.match;
  243. break;
  244. case 'If':
  245. // IMPORTANT: else stack push must go first,
  246. // since it stores the state of thenStack before changes
  247. if (state.else !== MISMATCH) {
  248. pushElseStack(state.else);
  249. }
  250. if (state.then !== MATCH) {
  251. pushThenStack(state.then);
  252. }
  253. state = state.match;
  254. break;
  255. case 'MatchOnce':
  256. state = {
  257. type: 'MatchOnceBuffer',
  258. syntax: state,
  259. index: 0,
  260. mask: 0
  261. };
  262. break;
  263. case 'MatchOnceBuffer': {
  264. const terms = state.syntax.terms;
  265. if (state.index === terms.length) {
  266. // no matches at all or it's required all terms to be matched
  267. if (state.mask === 0 || state.syntax.all) {
  268. state = MISMATCH;
  269. break;
  270. }
  271. // a partial match is ok
  272. state = MATCH;
  273. break;
  274. }
  275. // all terms are matched
  276. if (state.mask === (1 << terms.length) - 1) {
  277. state = MATCH;
  278. break;
  279. }
  280. for (; state.index < terms.length; state.index++) {
  281. const matchFlag = 1 << state.index;
  282. if ((state.mask & matchFlag) === 0) {
  283. // IMPORTANT: else stack push must go first,
  284. // since it stores the state of thenStack before changes
  285. pushElseStack(state);
  286. pushThenStack({
  287. type: 'AddMatchOnce',
  288. syntax: state.syntax,
  289. mask: state.mask | matchFlag
  290. });
  291. // match
  292. state = terms[state.index++];
  293. break;
  294. }
  295. }
  296. break;
  297. }
  298. case 'AddMatchOnce':
  299. state = {
  300. type: 'MatchOnceBuffer',
  301. syntax: state.syntax,
  302. index: 0,
  303. mask: state.mask
  304. };
  305. break;
  306. case 'Enum':
  307. if (token !== null) {
  308. let name = token.value.toLowerCase();
  309. // drop \0 and \9 hack from keyword name
  310. if (name.indexOf('\\') !== -1) {
  311. name = name.replace(/\\[09].*$/, '');
  312. }
  313. if (hasOwnProperty.call(state.map, name)) {
  314. state = state.map[name];
  315. break;
  316. }
  317. }
  318. state = MISMATCH;
  319. break;
  320. case 'Generic': {
  321. const opts = syntaxStack !== null ? syntaxStack.opts : null;
  322. const lastTokenIndex = tokenIndex + Math.floor(state.fn(token, getNextToken, opts));
  323. if (!isNaN(lastTokenIndex) && lastTokenIndex > tokenIndex) {
  324. while (tokenIndex < lastTokenIndex) {
  325. addTokenToMatch();
  326. }
  327. state = MATCH;
  328. } else {
  329. state = MISMATCH;
  330. }
  331. break;
  332. }
  333. case 'Type':
  334. case 'Property': {
  335. const syntaxDict = state.type === 'Type' ? 'types' : 'properties';
  336. const dictSyntax = hasOwnProperty.call(syntaxes, syntaxDict) ? syntaxes[syntaxDict][state.name] : null;
  337. if (!dictSyntax || !dictSyntax.match) {
  338. throw new Error(
  339. 'Bad syntax reference: ' +
  340. (state.type === 'Type'
  341. ? '<' + state.name + '>'
  342. : '<\'' + state.name + '\'>')
  343. );
  344. }
  345. // stash a syntax for types with low priority
  346. if (syntaxStash !== false && token !== null && state.type === 'Type') {
  347. const lowPriorityMatching =
  348. // https://drafts.csswg.org/css-values-4/#custom-idents
  349. // When parsing positionally-ambiguous keywords in a property value, a <custom-ident> production
  350. // can only claim the keyword if no other unfulfilled production can claim it.
  351. (state.name === 'custom-ident' && token.type === TYPE.Ident) ||
  352. // https://drafts.csswg.org/css-values-4/#lengths
  353. // ... if a `0` could be parsed as either a <number> or a <length> in a property (such as line-height),
  354. // it must parse as a <number>
  355. (state.name === 'length' && token.value === '0');
  356. if (lowPriorityMatching) {
  357. if (syntaxStash === null) {
  358. syntaxStash = stateSnapshotFromSyntax(state, elseStack);
  359. }
  360. state = MISMATCH;
  361. break;
  362. }
  363. }
  364. openSyntax();
  365. state = dictSyntax.matchRef || dictSyntax.match;
  366. break;
  367. }
  368. case 'Keyword': {
  369. const name = state.name;
  370. if (token !== null) {
  371. let keywordName = token.value;
  372. // drop \0 and \9 hack from keyword name
  373. if (keywordName.indexOf('\\') !== -1) {
  374. keywordName = keywordName.replace(/\\[09].*$/, '');
  375. }
  376. if (areStringsEqualCaseInsensitive(keywordName, name)) {
  377. addTokenToMatch();
  378. state = MATCH;
  379. break;
  380. }
  381. }
  382. state = MISMATCH;
  383. break;
  384. }
  385. case 'AtKeyword':
  386. case 'Function':
  387. if (token !== null && areStringsEqualCaseInsensitive(token.value, state.name)) {
  388. addTokenToMatch();
  389. state = MATCH;
  390. break;
  391. }
  392. state = MISMATCH;
  393. break;
  394. case 'Token':
  395. if (token !== null && token.value === state.value) {
  396. addTokenToMatch();
  397. state = MATCH;
  398. break;
  399. }
  400. state = MISMATCH;
  401. break;
  402. case 'Comma':
  403. if (token !== null && token.type === TYPE.Comma) {
  404. if (isCommaContextStart(matchStack.token)) {
  405. state = MISMATCH;
  406. } else {
  407. addTokenToMatch();
  408. state = isCommaContextEnd(token) ? MISMATCH : MATCH;
  409. }
  410. } else {
  411. state = isCommaContextStart(matchStack.token) || isCommaContextEnd(token) ? MATCH : MISMATCH;
  412. }
  413. break;
  414. case 'String':
  415. let string = '';
  416. let lastTokenIndex = tokenIndex;
  417. for (; lastTokenIndex < tokens.length && string.length < state.value.length; lastTokenIndex++) {
  418. string += tokens[lastTokenIndex].value;
  419. }
  420. if (areStringsEqualCaseInsensitive(string, state.value)) {
  421. while (tokenIndex < lastTokenIndex) {
  422. addTokenToMatch();
  423. }
  424. state = MATCH;
  425. } else {
  426. state = MISMATCH;
  427. }
  428. break;
  429. default:
  430. throw new Error('Unknown node type: ' + state.type);
  431. }
  432. }
  433. totalIterationCount += iterationCount;
  434. switch (exitReason) {
  435. case null:
  436. console.warn('[csstree-match] BREAK after ' + ITERATION_LIMIT + ' iterations');
  437. exitReason = EXIT_REASON_ITERATION_LIMIT;
  438. matchStack = null;
  439. break;
  440. case EXIT_REASON_MATCH:
  441. while (syntaxStack !== null) {
  442. closeSyntax();
  443. }
  444. break;
  445. default:
  446. matchStack = null;
  447. }
  448. return {
  449. tokens,
  450. reason: exitReason,
  451. iterations: iterationCount,
  452. match: matchStack,
  453. longestMatch
  454. };
  455. }
  456. export function matchAsList(tokens, matchGraph, syntaxes) {
  457. const matchResult = internalMatch(tokens, matchGraph, syntaxes || {});
  458. if (matchResult.match !== null) {
  459. let item = reverseList(matchResult.match).prev;
  460. matchResult.match = [];
  461. while (item !== null) {
  462. switch (item.type) {
  463. case OPEN_SYNTAX:
  464. case CLOSE_SYNTAX:
  465. matchResult.match.push({
  466. type: item.type,
  467. syntax: item.syntax
  468. });
  469. break;
  470. default:
  471. matchResult.match.push({
  472. token: item.token.value,
  473. node: item.token.node
  474. });
  475. break;
  476. }
  477. item = item.prev;
  478. }
  479. }
  480. return matchResult;
  481. }
  482. export function matchAsTree(tokens, matchGraph, syntaxes) {
  483. const matchResult = internalMatch(tokens, matchGraph, syntaxes || {});
  484. if (matchResult.match === null) {
  485. return matchResult;
  486. }
  487. let item = matchResult.match;
  488. let host = matchResult.match = {
  489. syntax: matchGraph.syntax || null,
  490. match: []
  491. };
  492. const hostStack = [host];
  493. // revert a list and start with 2nd item since 1st is a stub item
  494. item = reverseList(item).prev;
  495. // build a tree
  496. while (item !== null) {
  497. switch (item.type) {
  498. case OPEN_SYNTAX:
  499. host.match.push(host = {
  500. syntax: item.syntax,
  501. match: []
  502. });
  503. hostStack.push(host);
  504. break;
  505. case CLOSE_SYNTAX:
  506. hostStack.pop();
  507. host = hostStack[hostStack.length - 1];
  508. break;
  509. default:
  510. host.match.push({
  511. syntax: item.syntax || null,
  512. token: item.token.value,
  513. node: item.token.node
  514. });
  515. }
  516. item = item.prev;
  517. }
  518. return matchResult;
  519. }