TokenStream.cjs 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. 'use strict';
  2. const adoptBuffer = require('./adopt-buffer.cjs');
  3. const utils = require('./utils.cjs');
  4. const names = require('./names.cjs');
  5. const types = require('./types.cjs');
  6. const OFFSET_MASK = 0x00FFFFFF;
  7. const TYPE_SHIFT = 24;
  8. const balancePair = new Uint8Array(32); // 32b of memory ought to be enough for anyone (any number of tokens)
  9. balancePair[types.Function] = types.RightParenthesis;
  10. balancePair[types.LeftParenthesis] = types.RightParenthesis;
  11. balancePair[types.LeftSquareBracket] = types.RightSquareBracket;
  12. balancePair[types.LeftCurlyBracket] = types.RightCurlyBracket;
  13. function isBlockOpenerToken(tokenType) {
  14. return balancePair[tokenType] !== 0;
  15. }
  16. class TokenStream {
  17. constructor(source, tokenize) {
  18. this.setSource(source, tokenize);
  19. }
  20. reset() {
  21. this.eof = false;
  22. this.tokenIndex = -1;
  23. this.tokenType = 0;
  24. this.tokenStart = this.firstCharOffset;
  25. this.tokenEnd = this.firstCharOffset;
  26. }
  27. setSource(source = '', tokenize = () => {}) {
  28. source = String(source || '');
  29. const sourceLength = source.length;
  30. const offsetAndType = adoptBuffer.adoptBuffer(this.offsetAndType, source.length + 1); // +1 because of eof-token
  31. const balance = adoptBuffer.adoptBuffer(this.balance, source.length + 1);
  32. let tokenCount = 0;
  33. let firstCharOffset = -1;
  34. let balanceCloseType = 0;
  35. let balanceStart = source.length;
  36. // capture buffers
  37. this.offsetAndType = null;
  38. this.balance = null;
  39. balance.fill(0);
  40. tokenize(source, (type, start, end) => {
  41. const index = tokenCount++;
  42. // type & offset
  43. offsetAndType[index] = (type << TYPE_SHIFT) | end;
  44. if (firstCharOffset === -1) {
  45. firstCharOffset = start;
  46. }
  47. // balance
  48. balance[index] = balanceStart;
  49. if (type === balanceCloseType) {
  50. const prevBalanceStart = balance[balanceStart];
  51. // set reference to balance end for a block opener
  52. balance[balanceStart] = index;
  53. // pop state
  54. balanceStart = prevBalanceStart;
  55. balanceCloseType = balancePair[offsetAndType[prevBalanceStart] >> TYPE_SHIFT];
  56. } else if (isBlockOpenerToken(type)) { // check for FunctionToken, <(-token>, <[-token> and <{-token>
  57. // push state
  58. balanceStart = index;
  59. balanceCloseType = balancePair[type];
  60. }
  61. });
  62. // finalize buffers
  63. offsetAndType[tokenCount] = (types.EOF << TYPE_SHIFT) | sourceLength; // <EOF-token>
  64. balance[tokenCount] = tokenCount; // prevents false positive balance match with any token
  65. // reverse references from balance start to end
  66. // tokens
  67. // token: a ( [ b c ] d e ) {
  68. // index: 0 1 2 3 4 5 6 7 8 9
  69. // before
  70. // balance: 0 8 5 2 2 2 1 1 1 0
  71. // - > > < < < < < < -
  72. // after
  73. // balance: 9 8 5 5 5 2 8 8 1 9
  74. // > > > > > < > > < >
  75. for (let i = 0; i < tokenCount; i++) {
  76. const balanceStart = balance[i];
  77. if (balanceStart <= i) {
  78. const balanceEnd = balance[balanceStart];
  79. if (balanceEnd !== i) {
  80. balance[i] = balanceEnd;
  81. }
  82. } else if (balanceStart > tokenCount) {
  83. balance[i] = tokenCount;
  84. }
  85. }
  86. // balance[0] = tokenCount;
  87. this.source = source;
  88. this.firstCharOffset = firstCharOffset === -1 ? 0 : firstCharOffset;
  89. this.tokenCount = tokenCount;
  90. this.offsetAndType = offsetAndType;
  91. this.balance = balance;
  92. this.reset();
  93. this.next();
  94. }
  95. lookupType(offset) {
  96. offset += this.tokenIndex;
  97. if (offset < this.tokenCount) {
  98. return this.offsetAndType[offset] >> TYPE_SHIFT;
  99. }
  100. return types.EOF;
  101. }
  102. lookupTypeNonSC(idx) {
  103. for (let offset = this.tokenIndex; offset < this.tokenCount; offset++) {
  104. const tokenType = this.offsetAndType[offset] >> TYPE_SHIFT;
  105. if (tokenType !== types.WhiteSpace && tokenType !== types.Comment) {
  106. if (idx-- === 0) {
  107. return tokenType;
  108. }
  109. }
  110. }
  111. return types.EOF;
  112. }
  113. lookupOffset(offset) {
  114. offset += this.tokenIndex;
  115. if (offset < this.tokenCount) {
  116. return this.offsetAndType[offset - 1] & OFFSET_MASK;
  117. }
  118. return this.source.length;
  119. }
  120. lookupOffsetNonSC(idx) {
  121. for (let offset = this.tokenIndex; offset < this.tokenCount; offset++) {
  122. const tokenType = this.offsetAndType[offset] >> TYPE_SHIFT;
  123. if (tokenType !== types.WhiteSpace && tokenType !== types.Comment) {
  124. if (idx-- === 0) {
  125. return offset - this.tokenIndex;
  126. }
  127. }
  128. }
  129. return types.EOF;
  130. }
  131. lookupValue(offset, referenceStr) {
  132. offset += this.tokenIndex;
  133. if (offset < this.tokenCount) {
  134. return utils.cmpStr(
  135. this.source,
  136. this.offsetAndType[offset - 1] & OFFSET_MASK,
  137. this.offsetAndType[offset] & OFFSET_MASK,
  138. referenceStr
  139. );
  140. }
  141. return false;
  142. }
  143. getTokenStart(tokenIndex) {
  144. if (tokenIndex === this.tokenIndex) {
  145. return this.tokenStart;
  146. }
  147. if (tokenIndex > 0) {
  148. return tokenIndex < this.tokenCount
  149. ? this.offsetAndType[tokenIndex - 1] & OFFSET_MASK
  150. : this.offsetAndType[this.tokenCount] & OFFSET_MASK;
  151. }
  152. return this.firstCharOffset;
  153. }
  154. substrToCursor(start) {
  155. return this.source.substring(start, this.tokenStart);
  156. }
  157. isBalanceEdge(pos) {
  158. return this.balance[this.tokenIndex] < pos;
  159. // return this.balance[this.balance[pos]] !== this.tokenIndex;
  160. }
  161. isDelim(code, offset) {
  162. if (offset) {
  163. return (
  164. this.lookupType(offset) === types.Delim &&
  165. this.source.charCodeAt(this.lookupOffset(offset)) === code
  166. );
  167. }
  168. return (
  169. this.tokenType === types.Delim &&
  170. this.source.charCodeAt(this.tokenStart) === code
  171. );
  172. }
  173. skip(tokenCount) {
  174. let next = this.tokenIndex + tokenCount;
  175. if (next < this.tokenCount) {
  176. this.tokenIndex = next;
  177. this.tokenStart = this.offsetAndType[next - 1] & OFFSET_MASK;
  178. next = this.offsetAndType[next];
  179. this.tokenType = next >> TYPE_SHIFT;
  180. this.tokenEnd = next & OFFSET_MASK;
  181. } else {
  182. this.tokenIndex = this.tokenCount;
  183. this.next();
  184. }
  185. }
  186. next() {
  187. let next = this.tokenIndex + 1;
  188. if (next < this.tokenCount) {
  189. this.tokenIndex = next;
  190. this.tokenStart = this.tokenEnd;
  191. next = this.offsetAndType[next];
  192. this.tokenType = next >> TYPE_SHIFT;
  193. this.tokenEnd = next & OFFSET_MASK;
  194. } else {
  195. this.eof = true;
  196. this.tokenIndex = this.tokenCount;
  197. this.tokenType = types.EOF;
  198. this.tokenStart = this.tokenEnd = this.source.length;
  199. }
  200. }
  201. skipSC() {
  202. while (this.tokenType === types.WhiteSpace || this.tokenType === types.Comment) {
  203. this.next();
  204. }
  205. }
  206. skipUntilBalanced(startToken, stopConsume) {
  207. let cursor = startToken;
  208. let balanceEnd = 0;
  209. let offset = 0;
  210. loop:
  211. for (; cursor < this.tokenCount; cursor++) {
  212. balanceEnd = this.balance[cursor];
  213. // stop scanning on balance edge that points to offset before start token
  214. if (balanceEnd < startToken) {
  215. break loop;
  216. }
  217. offset = cursor > 0 ? this.offsetAndType[cursor - 1] & OFFSET_MASK : this.firstCharOffset;
  218. // check stop condition
  219. switch (stopConsume(this.source.charCodeAt(offset))) {
  220. case 1: // just stop
  221. break loop;
  222. case 2: // stop & included
  223. cursor++;
  224. break loop;
  225. default:
  226. // fast forward to the end of balanced block for an open block tokens
  227. if (isBlockOpenerToken(this.offsetAndType[cursor] >> TYPE_SHIFT)) {
  228. cursor = balanceEnd;
  229. }
  230. }
  231. }
  232. this.skip(cursor - this.tokenIndex);
  233. }
  234. forEachToken(fn) {
  235. for (let i = 0, offset = this.firstCharOffset; i < this.tokenCount; i++) {
  236. const start = offset;
  237. const item = this.offsetAndType[i];
  238. const end = item & OFFSET_MASK;
  239. const type = item >> TYPE_SHIFT;
  240. offset = end;
  241. fn(type, start, end, i);
  242. }
  243. }
  244. dump() {
  245. const tokens = new Array(this.tokenCount);
  246. this.forEachToken((type, start, end, index) => {
  247. tokens[index] = {
  248. idx: index,
  249. type: names[type],
  250. chunk: this.source.substring(start, end),
  251. balance: this.balance[index]
  252. };
  253. });
  254. return tokens;
  255. }
  256. }
  257. exports.TokenStream = TokenStream;