sax.js 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704
  1. ;(function (sax) {
  2. // wrapper for non-node envs
  3. sax.parser = function (strict, opt) {
  4. return new SAXParser(strict, opt)
  5. }
  6. sax.SAXParser = SAXParser
  7. sax.SAXStream = SAXStream
  8. sax.createStream = createStream
  9. // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
  10. // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
  11. // since that's the earliest that a buffer overrun could occur. This way, checks are
  12. // as rare as required, but as often as necessary to ensure never crossing this bound.
  13. // Furthermore, buffers are only tested at most once per write(), so passing a very
  14. // large string into write() might have undesirable effects, but this is manageable by
  15. // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
  16. // edge case, result in creating at most one complete copy of the string passed in.
  17. // Set to Infinity to have unlimited buffers.
  18. sax.MAX_BUFFER_LENGTH = 64 * 1024
  19. var buffers = [
  20. 'comment',
  21. 'sgmlDecl',
  22. 'textNode',
  23. 'tagName',
  24. 'doctype',
  25. 'procInstName',
  26. 'procInstBody',
  27. 'entity',
  28. 'attribName',
  29. 'attribValue',
  30. 'cdata',
  31. 'script',
  32. ]
  33. sax.EVENTS = [
  34. 'text',
  35. 'processinginstruction',
  36. 'sgmldeclaration',
  37. 'doctype',
  38. 'comment',
  39. 'opentagstart',
  40. 'attribute',
  41. 'opentag',
  42. 'closetag',
  43. 'opencdata',
  44. 'cdata',
  45. 'closecdata',
  46. 'error',
  47. 'end',
  48. 'ready',
  49. 'script',
  50. 'opennamespace',
  51. 'closenamespace',
  52. ]
  53. function SAXParser(strict, opt) {
  54. if (!(this instanceof SAXParser)) {
  55. return new SAXParser(strict, opt)
  56. }
  57. var parser = this
  58. clearBuffers(parser)
  59. parser.q = parser.c = ''
  60. parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
  61. parser.opt = opt || {}
  62. parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags
  63. parser.looseCase = parser.opt.lowercase ? 'toLowerCase' : 'toUpperCase'
  64. parser.tags = []
  65. parser.closed = parser.closedRoot = parser.sawRoot = false
  66. parser.tag = parser.error = null
  67. parser.strict = !!strict
  68. parser.noscript = !!(strict || parser.opt.noscript)
  69. parser.state = S.BEGIN
  70. parser.strictEntities = parser.opt.strictEntities
  71. parser.ENTITIES =
  72. parser.strictEntities ?
  73. Object.create(sax.XML_ENTITIES)
  74. : Object.create(sax.ENTITIES)
  75. parser.attribList = []
  76. // namespaces form a prototype chain.
  77. // it always points at the current tag,
  78. // which protos to its parent tag.
  79. if (parser.opt.xmlns) {
  80. parser.ns = Object.create(rootNS)
  81. }
  82. // disallow unquoted attribute values if not otherwise configured
  83. // and strict mode is true
  84. if (parser.opt.unquotedAttributeValues === undefined) {
  85. parser.opt.unquotedAttributeValues = !strict
  86. }
  87. // mostly just for error reporting
  88. parser.trackPosition = parser.opt.position !== false
  89. if (parser.trackPosition) {
  90. parser.position = parser.line = parser.column = 0
  91. }
  92. emit(parser, 'onready')
  93. }
  94. if (!Object.create) {
  95. Object.create = function (o) {
  96. function F() {}
  97. F.prototype = o
  98. var newf = new F()
  99. return newf
  100. }
  101. }
  102. if (!Object.keys) {
  103. Object.keys = function (o) {
  104. var a = []
  105. for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
  106. return a
  107. }
  108. }
  109. function checkBufferLength(parser) {
  110. var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
  111. var maxActual = 0
  112. for (var i = 0, l = buffers.length; i < l; i++) {
  113. var len = parser[buffers[i]].length
  114. if (len > maxAllowed) {
  115. // Text/cdata nodes can get big, and since they're buffered,
  116. // we can get here under normal conditions.
  117. // Avoid issues by emitting the text node now,
  118. // so at least it won't get any bigger.
  119. switch (buffers[i]) {
  120. case 'textNode':
  121. closeText(parser)
  122. break
  123. case 'cdata':
  124. emitNode(parser, 'oncdata', parser.cdata)
  125. parser.cdata = ''
  126. break
  127. case 'script':
  128. emitNode(parser, 'onscript', parser.script)
  129. parser.script = ''
  130. break
  131. default:
  132. error(parser, 'Max buffer length exceeded: ' + buffers[i])
  133. }
  134. }
  135. maxActual = Math.max(maxActual, len)
  136. }
  137. // schedule the next check for the earliest possible buffer overrun.
  138. var m = sax.MAX_BUFFER_LENGTH - maxActual
  139. parser.bufferCheckPosition = m + parser.position
  140. }
  141. function clearBuffers(parser) {
  142. for (var i = 0, l = buffers.length; i < l; i++) {
  143. parser[buffers[i]] = ''
  144. }
  145. }
  146. function flushBuffers(parser) {
  147. closeText(parser)
  148. if (parser.cdata !== '') {
  149. emitNode(parser, 'oncdata', parser.cdata)
  150. parser.cdata = ''
  151. }
  152. if (parser.script !== '') {
  153. emitNode(parser, 'onscript', parser.script)
  154. parser.script = ''
  155. }
  156. }
  157. SAXParser.prototype = {
  158. end: function () {
  159. end(this)
  160. },
  161. write: write,
  162. resume: function () {
  163. this.error = null
  164. return this
  165. },
  166. close: function () {
  167. return this.write(null)
  168. },
  169. flush: function () {
  170. flushBuffers(this)
  171. },
  172. }
  173. var Stream
  174. try {
  175. Stream = require('stream').Stream
  176. } catch (ex) {
  177. Stream = function () {}
  178. }
  179. if (!Stream) Stream = function () {}
  180. var streamWraps = sax.EVENTS.filter(function (ev) {
  181. return ev !== 'error' && ev !== 'end'
  182. })
  183. function createStream(strict, opt) {
  184. return new SAXStream(strict, opt)
  185. }
  186. function SAXStream(strict, opt) {
  187. if (!(this instanceof SAXStream)) {
  188. return new SAXStream(strict, opt)
  189. }
  190. Stream.apply(this)
  191. this._parser = new SAXParser(strict, opt)
  192. this.writable = true
  193. this.readable = true
  194. var me = this
  195. this._parser.onend = function () {
  196. me.emit('end')
  197. }
  198. this._parser.onerror = function (er) {
  199. me.emit('error', er)
  200. // if didn't throw, then means error was handled.
  201. // go ahead and clear error, so we can write again.
  202. me._parser.error = null
  203. }
  204. this._decoder = null
  205. streamWraps.forEach(function (ev) {
  206. Object.defineProperty(me, 'on' + ev, {
  207. get: function () {
  208. return me._parser['on' + ev]
  209. },
  210. set: function (h) {
  211. if (!h) {
  212. me.removeAllListeners(ev)
  213. me._parser['on' + ev] = h
  214. return h
  215. }
  216. me.on(ev, h)
  217. },
  218. enumerable: true,
  219. configurable: false,
  220. })
  221. })
  222. }
  223. SAXStream.prototype = Object.create(Stream.prototype, {
  224. constructor: {
  225. value: SAXStream,
  226. },
  227. })
  228. SAXStream.prototype.write = function (data) {
  229. if (
  230. typeof Buffer === 'function' &&
  231. typeof Buffer.isBuffer === 'function' &&
  232. Buffer.isBuffer(data)
  233. ) {
  234. if (!this._decoder) {
  235. this._decoder = new TextDecoder('utf8')
  236. }
  237. data = this._decoder.decode(data, { stream: true })
  238. }
  239. this._parser.write(data.toString())
  240. this.emit('data', data)
  241. return true
  242. }
  243. SAXStream.prototype.end = function (chunk) {
  244. if (chunk && chunk.length) {
  245. this.write(chunk)
  246. }
  247. // Flush any remaining decoded data from the TextDecoder
  248. if (this._decoder) {
  249. var remaining = this._decoder.decode()
  250. if (remaining) {
  251. this._parser.write(remaining)
  252. this.emit('data', remaining)
  253. }
  254. }
  255. this._parser.end()
  256. return true
  257. }
  258. SAXStream.prototype.on = function (ev, handler) {
  259. var me = this
  260. if (!me._parser['on' + ev] && streamWraps.indexOf(ev) !== -1) {
  261. me._parser['on' + ev] = function () {
  262. var args =
  263. arguments.length === 1 ?
  264. [arguments[0]]
  265. : Array.apply(null, arguments)
  266. args.splice(0, 0, ev)
  267. me.emit.apply(me, args)
  268. }
  269. }
  270. return Stream.prototype.on.call(me, ev, handler)
  271. }
  272. // this really needs to be replaced with character classes.
  273. // XML allows all manner of ridiculous numbers and digits.
  274. var CDATA = '[CDATA['
  275. var DOCTYPE = 'DOCTYPE'
  276. var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
  277. var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'
  278. var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
  279. // http://www.w3.org/TR/REC-xml/#NT-NameStartChar
  280. // This implementation works on strings, a single character at a time
  281. // as such, it cannot ever support astral-plane characters (10000-EFFFF)
  282. // without a significant breaking change to either this parser, or the
  283. // JavaScript language. Implementation of an emoji-capable xml parser
  284. // is left as an exercise for the reader.
  285. var nameStart =
  286. /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
  287. var nameBody =
  288. /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
  289. var entityStart =
  290. /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
  291. var entityBody =
  292. /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
  293. function isWhitespace(c) {
  294. return c === ' ' || c === '\n' || c === '\r' || c === '\t'
  295. }
  296. function isQuote(c) {
  297. return c === '"' || c === "'"
  298. }
  299. function isAttribEnd(c) {
  300. return c === '>' || isWhitespace(c)
  301. }
  302. function isMatch(regex, c) {
  303. return regex.test(c)
  304. }
  305. function notMatch(regex, c) {
  306. return !isMatch(regex, c)
  307. }
  308. var S = 0
  309. sax.STATE = {
  310. BEGIN: S++, // leading byte order mark or whitespace
  311. BEGIN_WHITESPACE: S++, // leading whitespace
  312. TEXT: S++, // general stuff
  313. TEXT_ENTITY: S++, // &amp and such.
  314. OPEN_WAKA: S++, // <
  315. SGML_DECL: S++, // <!BLARG
  316. SGML_DECL_QUOTED: S++, // <!BLARG foo "bar
  317. DOCTYPE: S++, // <!DOCTYPE
  318. DOCTYPE_QUOTED: S++, // <!DOCTYPE "//blah
  319. DOCTYPE_DTD: S++, // <!DOCTYPE "//blah" [ ...
  320. DOCTYPE_DTD_QUOTED: S++, // <!DOCTYPE "//blah" [ "foo
  321. COMMENT_STARTING: S++, // <!-
  322. COMMENT: S++, // <!--
  323. COMMENT_ENDING: S++, // <!-- blah -
  324. COMMENT_ENDED: S++, // <!-- blah --
  325. CDATA: S++, // <![CDATA[ something
  326. CDATA_ENDING: S++, // ]
  327. CDATA_ENDING_2: S++, // ]]
  328. PROC_INST: S++, // <?hi
  329. PROC_INST_BODY: S++, // <?hi there
  330. PROC_INST_ENDING: S++, // <?hi "there" ?
  331. OPEN_TAG: S++, // <strong
  332. OPEN_TAG_SLASH: S++, // <strong /
  333. ATTRIB: S++, // <a
  334. ATTRIB_NAME: S++, // <a foo
  335. ATTRIB_NAME_SAW_WHITE: S++, // <a foo _
  336. ATTRIB_VALUE: S++, // <a foo=
  337. ATTRIB_VALUE_QUOTED: S++, // <a foo="bar
  338. ATTRIB_VALUE_CLOSED: S++, // <a foo="bar"
  339. ATTRIB_VALUE_UNQUOTED: S++, // <a foo=bar
  340. ATTRIB_VALUE_ENTITY_Q: S++, // <foo bar="&quot;"
  341. ATTRIB_VALUE_ENTITY_U: S++, // <foo bar=&quot
  342. CLOSE_TAG: S++, // </a
  343. CLOSE_TAG_SAW_WHITE: S++, // </a >
  344. SCRIPT: S++, // <script> ...
  345. SCRIPT_ENDING: S++, // <script> ... <
  346. }
  347. sax.XML_ENTITIES = {
  348. amp: '&',
  349. gt: '>',
  350. lt: '<',
  351. quot: '"',
  352. apos: "'",
  353. }
  354. sax.ENTITIES = {
  355. amp: '&',
  356. gt: '>',
  357. lt: '<',
  358. quot: '"',
  359. apos: "'",
  360. AElig: 198,
  361. Aacute: 193,
  362. Acirc: 194,
  363. Agrave: 192,
  364. Aring: 197,
  365. Atilde: 195,
  366. Auml: 196,
  367. Ccedil: 199,
  368. ETH: 208,
  369. Eacute: 201,
  370. Ecirc: 202,
  371. Egrave: 200,
  372. Euml: 203,
  373. Iacute: 205,
  374. Icirc: 206,
  375. Igrave: 204,
  376. Iuml: 207,
  377. Ntilde: 209,
  378. Oacute: 211,
  379. Ocirc: 212,
  380. Ograve: 210,
  381. Oslash: 216,
  382. Otilde: 213,
  383. Ouml: 214,
  384. THORN: 222,
  385. Uacute: 218,
  386. Ucirc: 219,
  387. Ugrave: 217,
  388. Uuml: 220,
  389. Yacute: 221,
  390. aacute: 225,
  391. acirc: 226,
  392. aelig: 230,
  393. agrave: 224,
  394. aring: 229,
  395. atilde: 227,
  396. auml: 228,
  397. ccedil: 231,
  398. eacute: 233,
  399. ecirc: 234,
  400. egrave: 232,
  401. eth: 240,
  402. euml: 235,
  403. iacute: 237,
  404. icirc: 238,
  405. igrave: 236,
  406. iuml: 239,
  407. ntilde: 241,
  408. oacute: 243,
  409. ocirc: 244,
  410. ograve: 242,
  411. oslash: 248,
  412. otilde: 245,
  413. ouml: 246,
  414. szlig: 223,
  415. thorn: 254,
  416. uacute: 250,
  417. ucirc: 251,
  418. ugrave: 249,
  419. uuml: 252,
  420. yacute: 253,
  421. yuml: 255,
  422. copy: 169,
  423. reg: 174,
  424. nbsp: 160,
  425. iexcl: 161,
  426. cent: 162,
  427. pound: 163,
  428. curren: 164,
  429. yen: 165,
  430. brvbar: 166,
  431. sect: 167,
  432. uml: 168,
  433. ordf: 170,
  434. laquo: 171,
  435. not: 172,
  436. shy: 173,
  437. macr: 175,
  438. deg: 176,
  439. plusmn: 177,
  440. sup1: 185,
  441. sup2: 178,
  442. sup3: 179,
  443. acute: 180,
  444. micro: 181,
  445. para: 182,
  446. middot: 183,
  447. cedil: 184,
  448. ordm: 186,
  449. raquo: 187,
  450. frac14: 188,
  451. frac12: 189,
  452. frac34: 190,
  453. iquest: 191,
  454. times: 215,
  455. divide: 247,
  456. OElig: 338,
  457. oelig: 339,
  458. Scaron: 352,
  459. scaron: 353,
  460. Yuml: 376,
  461. fnof: 402,
  462. circ: 710,
  463. tilde: 732,
  464. Alpha: 913,
  465. Beta: 914,
  466. Gamma: 915,
  467. Delta: 916,
  468. Epsilon: 917,
  469. Zeta: 918,
  470. Eta: 919,
  471. Theta: 920,
  472. Iota: 921,
  473. Kappa: 922,
  474. Lambda: 923,
  475. Mu: 924,
  476. Nu: 925,
  477. Xi: 926,
  478. Omicron: 927,
  479. Pi: 928,
  480. Rho: 929,
  481. Sigma: 931,
  482. Tau: 932,
  483. Upsilon: 933,
  484. Phi: 934,
  485. Chi: 935,
  486. Psi: 936,
  487. Omega: 937,
  488. alpha: 945,
  489. beta: 946,
  490. gamma: 947,
  491. delta: 948,
  492. epsilon: 949,
  493. zeta: 950,
  494. eta: 951,
  495. theta: 952,
  496. iota: 953,
  497. kappa: 954,
  498. lambda: 955,
  499. mu: 956,
  500. nu: 957,
  501. xi: 958,
  502. omicron: 959,
  503. pi: 960,
  504. rho: 961,
  505. sigmaf: 962,
  506. sigma: 963,
  507. tau: 964,
  508. upsilon: 965,
  509. phi: 966,
  510. chi: 967,
  511. psi: 968,
  512. omega: 969,
  513. thetasym: 977,
  514. upsih: 978,
  515. piv: 982,
  516. ensp: 8194,
  517. emsp: 8195,
  518. thinsp: 8201,
  519. zwnj: 8204,
  520. zwj: 8205,
  521. lrm: 8206,
  522. rlm: 8207,
  523. ndash: 8211,
  524. mdash: 8212,
  525. lsquo: 8216,
  526. rsquo: 8217,
  527. sbquo: 8218,
  528. ldquo: 8220,
  529. rdquo: 8221,
  530. bdquo: 8222,
  531. dagger: 8224,
  532. Dagger: 8225,
  533. bull: 8226,
  534. hellip: 8230,
  535. permil: 8240,
  536. prime: 8242,
  537. Prime: 8243,
  538. lsaquo: 8249,
  539. rsaquo: 8250,
  540. oline: 8254,
  541. frasl: 8260,
  542. euro: 8364,
  543. image: 8465,
  544. weierp: 8472,
  545. real: 8476,
  546. trade: 8482,
  547. alefsym: 8501,
  548. larr: 8592,
  549. uarr: 8593,
  550. rarr: 8594,
  551. darr: 8595,
  552. harr: 8596,
  553. crarr: 8629,
  554. lArr: 8656,
  555. uArr: 8657,
  556. rArr: 8658,
  557. dArr: 8659,
  558. hArr: 8660,
  559. forall: 8704,
  560. part: 8706,
  561. exist: 8707,
  562. empty: 8709,
  563. nabla: 8711,
  564. isin: 8712,
  565. notin: 8713,
  566. ni: 8715,
  567. prod: 8719,
  568. sum: 8721,
  569. minus: 8722,
  570. lowast: 8727,
  571. radic: 8730,
  572. prop: 8733,
  573. infin: 8734,
  574. ang: 8736,
  575. and: 8743,
  576. or: 8744,
  577. cap: 8745,
  578. cup: 8746,
  579. int: 8747,
  580. there4: 8756,
  581. sim: 8764,
  582. cong: 8773,
  583. asymp: 8776,
  584. ne: 8800,
  585. equiv: 8801,
  586. le: 8804,
  587. ge: 8805,
  588. sub: 8834,
  589. sup: 8835,
  590. nsub: 8836,
  591. sube: 8838,
  592. supe: 8839,
  593. oplus: 8853,
  594. otimes: 8855,
  595. perp: 8869,
  596. sdot: 8901,
  597. lceil: 8968,
  598. rceil: 8969,
  599. lfloor: 8970,
  600. rfloor: 8971,
  601. lang: 9001,
  602. rang: 9002,
  603. loz: 9674,
  604. spades: 9824,
  605. clubs: 9827,
  606. hearts: 9829,
  607. diams: 9830,
  608. }
  609. Object.keys(sax.ENTITIES).forEach(function (key) {
  610. var e = sax.ENTITIES[key]
  611. var s = typeof e === 'number' ? String.fromCharCode(e) : e
  612. sax.ENTITIES[key] = s
  613. })
  614. for (var s in sax.STATE) {
  615. sax.STATE[sax.STATE[s]] = s
  616. }
  617. // shorthand
  618. S = sax.STATE
  619. function emit(parser, event, data) {
  620. parser[event] && parser[event](data)
  621. }
  622. function emitNode(parser, nodeType, data) {
  623. if (parser.textNode) closeText(parser)
  624. emit(parser, nodeType, data)
  625. }
  626. function closeText(parser) {
  627. parser.textNode = textopts(parser.opt, parser.textNode)
  628. if (parser.textNode) emit(parser, 'ontext', parser.textNode)
  629. parser.textNode = ''
  630. }
  631. function textopts(opt, text) {
  632. if (opt.trim) text = text.trim()
  633. if (opt.normalize) text = text.replace(/\s+/g, ' ')
  634. return text
  635. }
  636. function error(parser, er) {
  637. closeText(parser)
  638. if (parser.trackPosition) {
  639. er +=
  640. '\nLine: ' +
  641. parser.line +
  642. '\nColumn: ' +
  643. parser.column +
  644. '\nChar: ' +
  645. parser.c
  646. }
  647. er = new Error(er)
  648. parser.error = er
  649. emit(parser, 'onerror', er)
  650. return parser
  651. }
  652. function end(parser) {
  653. if (parser.sawRoot && !parser.closedRoot)
  654. strictFail(parser, 'Unclosed root tag')
  655. if (
  656. parser.state !== S.BEGIN &&
  657. parser.state !== S.BEGIN_WHITESPACE &&
  658. parser.state !== S.TEXT
  659. ) {
  660. error(parser, 'Unexpected end')
  661. }
  662. closeText(parser)
  663. parser.c = ''
  664. parser.closed = true
  665. emit(parser, 'onend')
  666. SAXParser.call(parser, parser.strict, parser.opt)
  667. return parser
  668. }
  669. function strictFail(parser, message) {
  670. if (typeof parser !== 'object' || !(parser instanceof SAXParser)) {
  671. throw new Error('bad call to strictFail')
  672. }
  673. if (parser.strict) {
  674. error(parser, message)
  675. }
  676. }
  677. function newTag(parser) {
  678. if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
  679. var parent = parser.tags[parser.tags.length - 1] || parser
  680. var tag = (parser.tag = { name: parser.tagName, attributes: {} })
  681. // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
  682. if (parser.opt.xmlns) {
  683. tag.ns = parent.ns
  684. }
  685. parser.attribList.length = 0
  686. emitNode(parser, 'onopentagstart', tag)
  687. }
  688. function qname(name, attribute) {
  689. var i = name.indexOf(':')
  690. var qualName = i < 0 ? ['', name] : name.split(':')
  691. var prefix = qualName[0]
  692. var local = qualName[1]
  693. // <x "xmlns"="http://foo">
  694. if (attribute && name === 'xmlns') {
  695. prefix = 'xmlns'
  696. local = ''
  697. }
  698. return { prefix: prefix, local: local }
  699. }
  700. function attrib(parser) {
  701. if (!parser.strict) {
  702. parser.attribName = parser.attribName[parser.looseCase]()
  703. }
  704. if (
  705. parser.attribList.indexOf(parser.attribName) !== -1 ||
  706. parser.tag.attributes.hasOwnProperty(parser.attribName)
  707. ) {
  708. parser.attribName = parser.attribValue = ''
  709. return
  710. }
  711. if (parser.opt.xmlns) {
  712. var qn = qname(parser.attribName, true)
  713. var prefix = qn.prefix
  714. var local = qn.local
  715. if (prefix === 'xmlns') {
  716. // namespace binding attribute. push the binding into scope
  717. if (local === 'xml' && parser.attribValue !== XML_NAMESPACE) {
  718. strictFail(
  719. parser,
  720. 'xml: prefix must be bound to ' +
  721. XML_NAMESPACE +
  722. '\n' +
  723. 'Actual: ' +
  724. parser.attribValue
  725. )
  726. } else if (
  727. local === 'xmlns' &&
  728. parser.attribValue !== XMLNS_NAMESPACE
  729. ) {
  730. strictFail(
  731. parser,
  732. 'xmlns: prefix must be bound to ' +
  733. XMLNS_NAMESPACE +
  734. '\n' +
  735. 'Actual: ' +
  736. parser.attribValue
  737. )
  738. } else {
  739. var tag = parser.tag
  740. var parent = parser.tags[parser.tags.length - 1] || parser
  741. if (tag.ns === parent.ns) {
  742. tag.ns = Object.create(parent.ns)
  743. }
  744. tag.ns[local] = parser.attribValue
  745. }
  746. }
  747. // defer onattribute events until all attributes have been seen
  748. // so any new bindings can take effect. preserve attribute order
  749. // so deferred events can be emitted in document order
  750. parser.attribList.push([parser.attribName, parser.attribValue])
  751. } else {
  752. // in non-xmlns mode, we can emit the event right away
  753. parser.tag.attributes[parser.attribName] = parser.attribValue
  754. emitNode(parser, 'onattribute', {
  755. name: parser.attribName,
  756. value: parser.attribValue,
  757. })
  758. }
  759. parser.attribName = parser.attribValue = ''
  760. }
  761. function openTag(parser, selfClosing) {
  762. if (parser.opt.xmlns) {
  763. // emit namespace binding events
  764. var tag = parser.tag
  765. // add namespace info to tag
  766. var qn = qname(parser.tagName)
  767. tag.prefix = qn.prefix
  768. tag.local = qn.local
  769. tag.uri = tag.ns[qn.prefix] || ''
  770. if (tag.prefix && !tag.uri) {
  771. strictFail(
  772. parser,
  773. 'Unbound namespace prefix: ' + JSON.stringify(parser.tagName)
  774. )
  775. tag.uri = qn.prefix
  776. }
  777. var parent = parser.tags[parser.tags.length - 1] || parser
  778. if (tag.ns && parent.ns !== tag.ns) {
  779. Object.keys(tag.ns).forEach(function (p) {
  780. emitNode(parser, 'onopennamespace', {
  781. prefix: p,
  782. uri: tag.ns[p],
  783. })
  784. })
  785. }
  786. // handle deferred onattribute events
  787. // Note: do not apply default ns to attributes:
  788. // http://www.w3.org/TR/REC-xml-names/#defaulting
  789. for (var i = 0, l = parser.attribList.length; i < l; i++) {
  790. var nv = parser.attribList[i]
  791. var name = nv[0]
  792. var value = nv[1]
  793. var qualName = qname(name, true)
  794. var prefix = qualName.prefix
  795. var local = qualName.local
  796. var uri = prefix === '' ? '' : tag.ns[prefix] || ''
  797. var a = {
  798. name: name,
  799. value: value,
  800. prefix: prefix,
  801. local: local,
  802. uri: uri,
  803. }
  804. // if there's any attributes with an undefined namespace,
  805. // then fail on them now.
  806. if (prefix && prefix !== 'xmlns' && !uri) {
  807. strictFail(
  808. parser,
  809. 'Unbound namespace prefix: ' + JSON.stringify(prefix)
  810. )
  811. a.uri = prefix
  812. }
  813. parser.tag.attributes[name] = a
  814. emitNode(parser, 'onattribute', a)
  815. }
  816. parser.attribList.length = 0
  817. }
  818. parser.tag.isSelfClosing = !!selfClosing
  819. // process the tag
  820. parser.sawRoot = true
  821. parser.tags.push(parser.tag)
  822. emitNode(parser, 'onopentag', parser.tag)
  823. if (!selfClosing) {
  824. // special case for <script> in non-strict mode.
  825. if (!parser.noscript && parser.tagName.toLowerCase() === 'script') {
  826. parser.state = S.SCRIPT
  827. } else {
  828. parser.state = S.TEXT
  829. }
  830. parser.tag = null
  831. parser.tagName = ''
  832. }
  833. parser.attribName = parser.attribValue = ''
  834. parser.attribList.length = 0
  835. }
  836. function closeTag(parser) {
  837. if (!parser.tagName) {
  838. strictFail(parser, 'Weird empty close tag.')
  839. parser.textNode += '</>'
  840. parser.state = S.TEXT
  841. return
  842. }
  843. if (parser.script) {
  844. if (parser.tagName !== 'script') {
  845. parser.script += '</' + parser.tagName + '>'
  846. parser.tagName = ''
  847. parser.state = S.SCRIPT
  848. return
  849. }
  850. emitNode(parser, 'onscript', parser.script)
  851. parser.script = ''
  852. }
  853. // first make sure that the closing tag actually exists.
  854. // <a><b></c></b></a> will close everything, otherwise.
  855. var t = parser.tags.length
  856. var tagName = parser.tagName
  857. if (!parser.strict) {
  858. tagName = tagName[parser.looseCase]()
  859. }
  860. var closeTo = tagName
  861. while (t--) {
  862. var close = parser.tags[t]
  863. if (close.name !== closeTo) {
  864. // fail the first time in strict mode
  865. strictFail(parser, 'Unexpected close tag')
  866. } else {
  867. break
  868. }
  869. }
  870. // didn't find it. we already failed for strict, so just abort.
  871. if (t < 0) {
  872. strictFail(parser, 'Unmatched closing tag: ' + parser.tagName)
  873. parser.textNode += '</' + parser.tagName + '>'
  874. parser.state = S.TEXT
  875. return
  876. }
  877. parser.tagName = tagName
  878. var s = parser.tags.length
  879. while (s-- > t) {
  880. var tag = (parser.tag = parser.tags.pop())
  881. parser.tagName = parser.tag.name
  882. emitNode(parser, 'onclosetag', parser.tagName)
  883. var x = {}
  884. for (var i in tag.ns) {
  885. x[i] = tag.ns[i]
  886. }
  887. var parent = parser.tags[parser.tags.length - 1] || parser
  888. if (parser.opt.xmlns && tag.ns !== parent.ns) {
  889. // remove namespace bindings introduced by tag
  890. Object.keys(tag.ns).forEach(function (p) {
  891. var n = tag.ns[p]
  892. emitNode(parser, 'onclosenamespace', { prefix: p, uri: n })
  893. })
  894. }
  895. }
  896. if (t === 0) parser.closedRoot = true
  897. parser.tagName = parser.attribValue = parser.attribName = ''
  898. parser.attribList.length = 0
  899. parser.state = S.TEXT
  900. }
  901. function parseEntity(parser) {
  902. var entity = parser.entity
  903. var entityLC = entity.toLowerCase()
  904. var num
  905. var numStr = ''
  906. if (parser.ENTITIES[entity]) {
  907. return parser.ENTITIES[entity]
  908. }
  909. if (parser.ENTITIES[entityLC]) {
  910. return parser.ENTITIES[entityLC]
  911. }
  912. entity = entityLC
  913. if (entity.charAt(0) === '#') {
  914. if (entity.charAt(1) === 'x') {
  915. entity = entity.slice(2)
  916. num = parseInt(entity, 16)
  917. numStr = num.toString(16)
  918. } else {
  919. entity = entity.slice(1)
  920. num = parseInt(entity, 10)
  921. numStr = num.toString(10)
  922. }
  923. }
  924. entity = entity.replace(/^0+/, '')
  925. if (
  926. isNaN(num) ||
  927. numStr.toLowerCase() !== entity ||
  928. num < 0 ||
  929. num > 0x10ffff
  930. ) {
  931. strictFail(parser, 'Invalid character entity')
  932. return '&' + parser.entity + ';'
  933. }
  934. return String.fromCodePoint(num)
  935. }
  936. function beginWhiteSpace(parser, c) {
  937. if (c === '<') {
  938. parser.state = S.OPEN_WAKA
  939. parser.startTagPosition = parser.position
  940. } else if (!isWhitespace(c)) {
  941. // have to process this as a text node.
  942. // weird, but happens.
  943. strictFail(parser, 'Non-whitespace before first tag.')
  944. parser.textNode = c
  945. parser.state = S.TEXT
  946. }
  947. }
  948. function charAt(chunk, i) {
  949. var result = ''
  950. if (i < chunk.length) {
  951. result = chunk.charAt(i)
  952. }
  953. return result
  954. }
  955. function write(chunk) {
  956. var parser = this
  957. if (this.error) {
  958. throw this.error
  959. }
  960. if (parser.closed) {
  961. return error(
  962. parser,
  963. 'Cannot write after close. Assign an onready handler.'
  964. )
  965. }
  966. if (chunk === null) {
  967. return end(parser)
  968. }
  969. if (typeof chunk === 'object') {
  970. chunk = chunk.toString()
  971. }
  972. var i = 0
  973. var c = ''
  974. while (true) {
  975. c = charAt(chunk, i++)
  976. parser.c = c
  977. if (!c) {
  978. break
  979. }
  980. if (parser.trackPosition) {
  981. parser.position++
  982. if (c === '\n') {
  983. parser.line++
  984. parser.column = 0
  985. } else {
  986. parser.column++
  987. }
  988. }
  989. switch (parser.state) {
  990. case S.BEGIN:
  991. parser.state = S.BEGIN_WHITESPACE
  992. if (c === '\uFEFF') {
  993. continue
  994. }
  995. beginWhiteSpace(parser, c)
  996. continue
  997. case S.BEGIN_WHITESPACE:
  998. beginWhiteSpace(parser, c)
  999. continue
  1000. case S.TEXT:
  1001. if (parser.sawRoot && !parser.closedRoot) {
  1002. var starti = i - 1
  1003. while (c && c !== '<' && c !== '&') {
  1004. c = charAt(chunk, i++)
  1005. if (c && parser.trackPosition) {
  1006. parser.position++
  1007. if (c === '\n') {
  1008. parser.line++
  1009. parser.column = 0
  1010. } else {
  1011. parser.column++
  1012. }
  1013. }
  1014. }
  1015. parser.textNode += chunk.substring(starti, i - 1)
  1016. }
  1017. if (
  1018. c === '<' &&
  1019. !(parser.sawRoot && parser.closedRoot && !parser.strict)
  1020. ) {
  1021. parser.state = S.OPEN_WAKA
  1022. parser.startTagPosition = parser.position
  1023. } else {
  1024. if (
  1025. !isWhitespace(c) &&
  1026. (!parser.sawRoot || parser.closedRoot)
  1027. ) {
  1028. strictFail(parser, 'Text data outside of root node.')
  1029. }
  1030. if (c === '&') {
  1031. parser.state = S.TEXT_ENTITY
  1032. } else {
  1033. parser.textNode += c
  1034. }
  1035. }
  1036. continue
  1037. case S.SCRIPT:
  1038. // only non-strict
  1039. if (c === '<') {
  1040. parser.state = S.SCRIPT_ENDING
  1041. } else {
  1042. parser.script += c
  1043. }
  1044. continue
  1045. case S.SCRIPT_ENDING:
  1046. if (c === '/') {
  1047. parser.state = S.CLOSE_TAG
  1048. } else {
  1049. parser.script += '<' + c
  1050. parser.state = S.SCRIPT
  1051. }
  1052. continue
  1053. case S.OPEN_WAKA:
  1054. // either a /, ?, !, or text is coming next.
  1055. if (c === '!') {
  1056. parser.state = S.SGML_DECL
  1057. parser.sgmlDecl = ''
  1058. } else if (isWhitespace(c)) {
  1059. // wait for it...
  1060. } else if (isMatch(nameStart, c)) {
  1061. parser.state = S.OPEN_TAG
  1062. parser.tagName = c
  1063. } else if (c === '/') {
  1064. parser.state = S.CLOSE_TAG
  1065. parser.tagName = ''
  1066. } else if (c === '?') {
  1067. parser.state = S.PROC_INST
  1068. parser.procInstName = parser.procInstBody = ''
  1069. } else {
  1070. strictFail(parser, 'Unencoded <')
  1071. // if there was some whitespace, then add that in.
  1072. if (parser.startTagPosition + 1 < parser.position) {
  1073. var pad = parser.position - parser.startTagPosition
  1074. c = new Array(pad).join(' ') + c
  1075. }
  1076. parser.textNode += '<' + c
  1077. parser.state = S.TEXT
  1078. }
  1079. continue
  1080. case S.SGML_DECL:
  1081. if (parser.sgmlDecl + c === '--') {
  1082. parser.state = S.COMMENT
  1083. parser.comment = ''
  1084. parser.sgmlDecl = ''
  1085. continue
  1086. }
  1087. if (
  1088. parser.doctype &&
  1089. parser.doctype !== true &&
  1090. parser.sgmlDecl
  1091. ) {
  1092. parser.state = S.DOCTYPE_DTD
  1093. parser.doctype += '<!' + parser.sgmlDecl + c
  1094. parser.sgmlDecl = ''
  1095. } else if ((parser.sgmlDecl + c).toUpperCase() === CDATA) {
  1096. emitNode(parser, 'onopencdata')
  1097. parser.state = S.CDATA
  1098. parser.sgmlDecl = ''
  1099. parser.cdata = ''
  1100. } else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) {
  1101. parser.state = S.DOCTYPE
  1102. if (parser.doctype || parser.sawRoot) {
  1103. strictFail(
  1104. parser,
  1105. 'Inappropriately located doctype declaration'
  1106. )
  1107. }
  1108. parser.doctype = ''
  1109. parser.sgmlDecl = ''
  1110. } else if (c === '>') {
  1111. emitNode(parser, 'onsgmldeclaration', parser.sgmlDecl)
  1112. parser.sgmlDecl = ''
  1113. parser.state = S.TEXT
  1114. } else if (isQuote(c)) {
  1115. parser.state = S.SGML_DECL_QUOTED
  1116. parser.sgmlDecl += c
  1117. } else {
  1118. parser.sgmlDecl += c
  1119. }
  1120. continue
  1121. case S.SGML_DECL_QUOTED:
  1122. if (c === parser.q) {
  1123. parser.state = S.SGML_DECL
  1124. parser.q = ''
  1125. }
  1126. parser.sgmlDecl += c
  1127. continue
  1128. case S.DOCTYPE:
  1129. if (c === '>') {
  1130. parser.state = S.TEXT
  1131. emitNode(parser, 'ondoctype', parser.doctype)
  1132. parser.doctype = true // just remember that we saw it.
  1133. } else {
  1134. parser.doctype += c
  1135. if (c === '[') {
  1136. parser.state = S.DOCTYPE_DTD
  1137. } else if (isQuote(c)) {
  1138. parser.state = S.DOCTYPE_QUOTED
  1139. parser.q = c
  1140. }
  1141. }
  1142. continue
  1143. case S.DOCTYPE_QUOTED:
  1144. parser.doctype += c
  1145. if (c === parser.q) {
  1146. parser.q = ''
  1147. parser.state = S.DOCTYPE
  1148. }
  1149. continue
  1150. case S.DOCTYPE_DTD:
  1151. if (c === ']') {
  1152. parser.doctype += c
  1153. parser.state = S.DOCTYPE
  1154. } else if (c === '<') {
  1155. parser.state = S.OPEN_WAKA
  1156. parser.startTagPosition = parser.position
  1157. } else if (isQuote(c)) {
  1158. parser.doctype += c
  1159. parser.state = S.DOCTYPE_DTD_QUOTED
  1160. parser.q = c
  1161. } else {
  1162. parser.doctype += c
  1163. }
  1164. continue
  1165. case S.DOCTYPE_DTD_QUOTED:
  1166. parser.doctype += c
  1167. if (c === parser.q) {
  1168. parser.state = S.DOCTYPE_DTD
  1169. parser.q = ''
  1170. }
  1171. continue
  1172. case S.COMMENT:
  1173. if (c === '-') {
  1174. parser.state = S.COMMENT_ENDING
  1175. } else {
  1176. parser.comment += c
  1177. }
  1178. continue
  1179. case S.COMMENT_ENDING:
  1180. if (c === '-') {
  1181. parser.state = S.COMMENT_ENDED
  1182. parser.comment = textopts(parser.opt, parser.comment)
  1183. if (parser.comment) {
  1184. emitNode(parser, 'oncomment', parser.comment)
  1185. }
  1186. parser.comment = ''
  1187. } else {
  1188. parser.comment += '-' + c
  1189. parser.state = S.COMMENT
  1190. }
  1191. continue
  1192. case S.COMMENT_ENDED:
  1193. if (c !== '>') {
  1194. strictFail(parser, 'Malformed comment')
  1195. // allow <!-- blah -- bloo --> in non-strict mode,
  1196. // which is a comment of " blah -- bloo "
  1197. parser.comment += '--' + c
  1198. parser.state = S.COMMENT
  1199. } else if (parser.doctype && parser.doctype !== true) {
  1200. parser.state = S.DOCTYPE_DTD
  1201. } else {
  1202. parser.state = S.TEXT
  1203. }
  1204. continue
  1205. case S.CDATA:
  1206. var starti = i - 1
  1207. while (c && c !== ']') {
  1208. c = charAt(chunk, i++)
  1209. if (c && parser.trackPosition) {
  1210. parser.position++
  1211. if (c === '\n') {
  1212. parser.line++
  1213. parser.column = 0
  1214. } else {
  1215. parser.column++
  1216. }
  1217. }
  1218. }
  1219. parser.cdata += chunk.substring(starti, i - 1)
  1220. if (c === ']') {
  1221. parser.state = S.CDATA_ENDING
  1222. }
  1223. continue
  1224. case S.CDATA_ENDING:
  1225. if (c === ']') {
  1226. parser.state = S.CDATA_ENDING_2
  1227. } else {
  1228. parser.cdata += ']' + c
  1229. parser.state = S.CDATA
  1230. }
  1231. continue
  1232. case S.CDATA_ENDING_2:
  1233. if (c === '>') {
  1234. if (parser.cdata) {
  1235. emitNode(parser, 'oncdata', parser.cdata)
  1236. }
  1237. emitNode(parser, 'onclosecdata')
  1238. parser.cdata = ''
  1239. parser.state = S.TEXT
  1240. } else if (c === ']') {
  1241. parser.cdata += ']'
  1242. } else {
  1243. parser.cdata += ']]' + c
  1244. parser.state = S.CDATA
  1245. }
  1246. continue
  1247. case S.PROC_INST:
  1248. if (c === '?') {
  1249. parser.state = S.PROC_INST_ENDING
  1250. } else if (isWhitespace(c)) {
  1251. parser.state = S.PROC_INST_BODY
  1252. } else {
  1253. parser.procInstName += c
  1254. }
  1255. continue
  1256. case S.PROC_INST_BODY:
  1257. if (!parser.procInstBody && isWhitespace(c)) {
  1258. continue
  1259. } else if (c === '?') {
  1260. parser.state = S.PROC_INST_ENDING
  1261. } else {
  1262. parser.procInstBody += c
  1263. }
  1264. continue
  1265. case S.PROC_INST_ENDING:
  1266. if (c === '>') {
  1267. emitNode(parser, 'onprocessinginstruction', {
  1268. name: parser.procInstName,
  1269. body: parser.procInstBody,
  1270. })
  1271. parser.procInstName = parser.procInstBody = ''
  1272. parser.state = S.TEXT
  1273. } else {
  1274. parser.procInstBody += '?' + c
  1275. parser.state = S.PROC_INST_BODY
  1276. }
  1277. continue
  1278. case S.OPEN_TAG:
  1279. if (isMatch(nameBody, c)) {
  1280. parser.tagName += c
  1281. } else {
  1282. newTag(parser)
  1283. if (c === '>') {
  1284. openTag(parser)
  1285. } else if (c === '/') {
  1286. parser.state = S.OPEN_TAG_SLASH
  1287. } else {
  1288. if (!isWhitespace(c)) {
  1289. strictFail(parser, 'Invalid character in tag name')
  1290. }
  1291. parser.state = S.ATTRIB
  1292. }
  1293. }
  1294. continue
  1295. case S.OPEN_TAG_SLASH:
  1296. if (c === '>') {
  1297. openTag(parser, true)
  1298. closeTag(parser)
  1299. } else {
  1300. strictFail(
  1301. parser,
  1302. 'Forward-slash in opening tag not followed by >'
  1303. )
  1304. parser.state = S.ATTRIB
  1305. }
  1306. continue
  1307. case S.ATTRIB:
  1308. // haven't read the attribute name yet.
  1309. if (isWhitespace(c)) {
  1310. continue
  1311. } else if (c === '>') {
  1312. openTag(parser)
  1313. } else if (c === '/') {
  1314. parser.state = S.OPEN_TAG_SLASH
  1315. } else if (isMatch(nameStart, c)) {
  1316. parser.attribName = c
  1317. parser.attribValue = ''
  1318. parser.state = S.ATTRIB_NAME
  1319. } else {
  1320. strictFail(parser, 'Invalid attribute name')
  1321. }
  1322. continue
  1323. case S.ATTRIB_NAME:
  1324. if (c === '=') {
  1325. parser.state = S.ATTRIB_VALUE
  1326. } else if (c === '>') {
  1327. strictFail(parser, 'Attribute without value')
  1328. parser.attribValue = parser.attribName
  1329. attrib(parser)
  1330. openTag(parser)
  1331. } else if (isWhitespace(c)) {
  1332. parser.state = S.ATTRIB_NAME_SAW_WHITE
  1333. } else if (isMatch(nameBody, c)) {
  1334. parser.attribName += c
  1335. } else {
  1336. strictFail(parser, 'Invalid attribute name')
  1337. }
  1338. continue
  1339. case S.ATTRIB_NAME_SAW_WHITE:
  1340. if (c === '=') {
  1341. parser.state = S.ATTRIB_VALUE
  1342. } else if (isWhitespace(c)) {
  1343. continue
  1344. } else {
  1345. strictFail(parser, 'Attribute without value')
  1346. parser.tag.attributes[parser.attribName] = ''
  1347. parser.attribValue = ''
  1348. emitNode(parser, 'onattribute', {
  1349. name: parser.attribName,
  1350. value: '',
  1351. })
  1352. parser.attribName = ''
  1353. if (c === '>') {
  1354. openTag(parser)
  1355. } else if (isMatch(nameStart, c)) {
  1356. parser.attribName = c
  1357. parser.state = S.ATTRIB_NAME
  1358. } else {
  1359. strictFail(parser, 'Invalid attribute name')
  1360. parser.state = S.ATTRIB
  1361. }
  1362. }
  1363. continue
  1364. case S.ATTRIB_VALUE:
  1365. if (isWhitespace(c)) {
  1366. continue
  1367. } else if (isQuote(c)) {
  1368. parser.q = c
  1369. parser.state = S.ATTRIB_VALUE_QUOTED
  1370. } else {
  1371. if (!parser.opt.unquotedAttributeValues) {
  1372. error(parser, 'Unquoted attribute value')
  1373. }
  1374. parser.state = S.ATTRIB_VALUE_UNQUOTED
  1375. parser.attribValue = c
  1376. }
  1377. continue
  1378. case S.ATTRIB_VALUE_QUOTED:
  1379. if (c !== parser.q) {
  1380. if (c === '&') {
  1381. parser.state = S.ATTRIB_VALUE_ENTITY_Q
  1382. } else {
  1383. parser.attribValue += c
  1384. }
  1385. continue
  1386. }
  1387. attrib(parser)
  1388. parser.q = ''
  1389. parser.state = S.ATTRIB_VALUE_CLOSED
  1390. continue
  1391. case S.ATTRIB_VALUE_CLOSED:
  1392. if (isWhitespace(c)) {
  1393. parser.state = S.ATTRIB
  1394. } else if (c === '>') {
  1395. openTag(parser)
  1396. } else if (c === '/') {
  1397. parser.state = S.OPEN_TAG_SLASH
  1398. } else if (isMatch(nameStart, c)) {
  1399. strictFail(parser, 'No whitespace between attributes')
  1400. parser.attribName = c
  1401. parser.attribValue = ''
  1402. parser.state = S.ATTRIB_NAME
  1403. } else {
  1404. strictFail(parser, 'Invalid attribute name')
  1405. }
  1406. continue
  1407. case S.ATTRIB_VALUE_UNQUOTED:
  1408. if (!isAttribEnd(c)) {
  1409. if (c === '&') {
  1410. parser.state = S.ATTRIB_VALUE_ENTITY_U
  1411. } else {
  1412. parser.attribValue += c
  1413. }
  1414. continue
  1415. }
  1416. attrib(parser)
  1417. if (c === '>') {
  1418. openTag(parser)
  1419. } else {
  1420. parser.state = S.ATTRIB
  1421. }
  1422. continue
  1423. case S.CLOSE_TAG:
  1424. if (!parser.tagName) {
  1425. if (isWhitespace(c)) {
  1426. continue
  1427. } else if (notMatch(nameStart, c)) {
  1428. if (parser.script) {
  1429. parser.script += '</' + c
  1430. parser.state = S.SCRIPT
  1431. } else {
  1432. strictFail(parser, 'Invalid tagname in closing tag.')
  1433. }
  1434. } else {
  1435. parser.tagName = c
  1436. }
  1437. } else if (c === '>') {
  1438. closeTag(parser)
  1439. } else if (isMatch(nameBody, c)) {
  1440. parser.tagName += c
  1441. } else if (parser.script) {
  1442. parser.script += '</' + parser.tagName + c
  1443. parser.tagName = ''
  1444. parser.state = S.SCRIPT
  1445. } else {
  1446. if (!isWhitespace(c)) {
  1447. strictFail(parser, 'Invalid tagname in closing tag')
  1448. }
  1449. parser.state = S.CLOSE_TAG_SAW_WHITE
  1450. }
  1451. continue
  1452. case S.CLOSE_TAG_SAW_WHITE:
  1453. if (isWhitespace(c)) {
  1454. continue
  1455. }
  1456. if (c === '>') {
  1457. closeTag(parser)
  1458. } else {
  1459. strictFail(parser, 'Invalid characters in closing tag')
  1460. }
  1461. continue
  1462. case S.TEXT_ENTITY:
  1463. case S.ATTRIB_VALUE_ENTITY_Q:
  1464. case S.ATTRIB_VALUE_ENTITY_U:
  1465. var returnState
  1466. var buffer
  1467. switch (parser.state) {
  1468. case S.TEXT_ENTITY:
  1469. returnState = S.TEXT
  1470. buffer = 'textNode'
  1471. break
  1472. case S.ATTRIB_VALUE_ENTITY_Q:
  1473. returnState = S.ATTRIB_VALUE_QUOTED
  1474. buffer = 'attribValue'
  1475. break
  1476. case S.ATTRIB_VALUE_ENTITY_U:
  1477. returnState = S.ATTRIB_VALUE_UNQUOTED
  1478. buffer = 'attribValue'
  1479. break
  1480. }
  1481. if (c === ';') {
  1482. var parsedEntity = parseEntity(parser)
  1483. if (
  1484. parser.opt.unparsedEntities &&
  1485. !Object.values(sax.XML_ENTITIES).includes(parsedEntity)
  1486. ) {
  1487. parser.entity = ''
  1488. parser.state = returnState
  1489. parser.write(parsedEntity)
  1490. } else {
  1491. parser[buffer] += parsedEntity
  1492. parser.entity = ''
  1493. parser.state = returnState
  1494. }
  1495. } else if (
  1496. isMatch(parser.entity.length ? entityBody : entityStart, c)
  1497. ) {
  1498. parser.entity += c
  1499. } else {
  1500. strictFail(parser, 'Invalid character in entity name')
  1501. parser[buffer] += '&' + parser.entity + c
  1502. parser.entity = ''
  1503. parser.state = returnState
  1504. }
  1505. continue
  1506. default: /* istanbul ignore next */ {
  1507. throw new Error(parser, 'Unknown state: ' + parser.state)
  1508. }
  1509. }
  1510. } // while
  1511. if (parser.position >= parser.bufferCheckPosition) {
  1512. checkBufferLength(parser)
  1513. }
  1514. return parser
  1515. }
  1516. /*! http://mths.be/fromcodepoint v0.1.0 by @mathias */
  1517. /* istanbul ignore next */
  1518. if (!String.fromCodePoint) {
  1519. ;(function () {
  1520. var stringFromCharCode = String.fromCharCode
  1521. var floor = Math.floor
  1522. var fromCodePoint = function () {
  1523. var MAX_SIZE = 0x4000
  1524. var codeUnits = []
  1525. var highSurrogate
  1526. var lowSurrogate
  1527. var index = -1
  1528. var length = arguments.length
  1529. if (!length) {
  1530. return ''
  1531. }
  1532. var result = ''
  1533. while (++index < length) {
  1534. var codePoint = Number(arguments[index])
  1535. if (
  1536. !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
  1537. codePoint < 0 || // not a valid Unicode code point
  1538. codePoint > 0x10ffff || // not a valid Unicode code point
  1539. floor(codePoint) !== codePoint // not an integer
  1540. ) {
  1541. throw RangeError('Invalid code point: ' + codePoint)
  1542. }
  1543. if (codePoint <= 0xffff) {
  1544. // BMP code point
  1545. codeUnits.push(codePoint)
  1546. } else {
  1547. // Astral code point; split in surrogate halves
  1548. // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  1549. codePoint -= 0x10000
  1550. highSurrogate = (codePoint >> 10) + 0xd800
  1551. lowSurrogate = (codePoint % 0x400) + 0xdc00
  1552. codeUnits.push(highSurrogate, lowSurrogate)
  1553. }
  1554. if (index + 1 === length || codeUnits.length > MAX_SIZE) {
  1555. result += stringFromCharCode.apply(null, codeUnits)
  1556. codeUnits.length = 0
  1557. }
  1558. }
  1559. return result
  1560. }
  1561. /* istanbul ignore next */
  1562. if (Object.defineProperty) {
  1563. Object.defineProperty(String, 'fromCodePoint', {
  1564. value: fromCodePoint,
  1565. configurable: true,
  1566. writable: true,
  1567. })
  1568. } else {
  1569. String.fromCodePoint = fromCodePoint
  1570. }
  1571. })()
  1572. }
  1573. })(typeof exports === 'undefined' ? (this.sax = {}) : exports)