UNPKG

@desertnet/html-parser

Version:

HTML parser and non-strict validator

493 lines (420 loc) 45.8 kB
'use strict'; Object.defineProperty(exports, "__esModule", { value: true }); var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); var _Instr = require('./Instr'); var _Instr2 = _interopRequireDefault(_Instr); var _Op = require('./Op'); var _Op2 = _interopRequireDefault(_Op); var _scanner = require('@desertnet/scanner'); var _scanner2 = _interopRequireDefault(_scanner); var _HTMLNode = require('./HTMLNode'); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } var Compiler = function () { function Compiler() { _classCallCheck(this, Compiler); var entityStart = /&(?=[a-z0-9#]+;)/i; var attributeStart = /[^>=\s\/]+/i; /** * The tokenizing scanner for the input string. HTML contains * many contexts where the scanner needs to accept differnt * sets of tokens. The @desertnet/scanner module calls these * sets of token definitions "dialects". Below, we're initializing * our scanner with all these different dialects. * @private * @type {Scanner} */ this._scanner = new _scanner2.default({ // Starting dialect, for content "outside of a tag". "content": [{ "text": /[^<>&]+/ }, { "commentStart": /<!--/ }, { "entityStart": entityStart }, { "tagStart": /<[a-z][^\t\n\ \/\>\0\xff]*/i }, { "closeTagStart": /<\/[a-z][^\t\n\ \/\>\0\xff]*/i }, { "error": /[<>&]/ }], // Dialect for the inside of comment tags. "comment": [{ "commentEnd": /-->/ }, { "dash": /-/ }, { "text": /[^-]+/ }], // Dialect for the inside of HTML entities. "entity": [{ "entityEnd": /;/ }, { "hex": /#x[a-f0-9]+/i }, { "dec": /#\d+/ }, { "named": /[a-z][a-z0-9]*/i }, { "error": /[^]/ }], // Dialect for the inside of tags. "tag": [{ "tagEnd": />/ }, { "whitespace": /\s+/ }, { "selfClose": /\// }, { "error": /['"<=]/ }, { "attributeStart": attributeStart }], // Initial dialect for attributes. "attribute": [{ "whitespace": /\s+/ }, { "attributeValueQuotedStart": /=['"]/ }, { "attributeValueStart": /=/ }, { "tagEnd": />/ }, { "selfClose": /\// }, { "error": /['"<]/ }, { "attributeStart": attributeStart }], // Dialect for unquoted attribute values. "attributeValue": [{ "whitespace": /\s+/ }, { "entityStart": entityStart }, { "tagEnd": />/ }, { "error": /['"<=`&]/ }, { "text": /[^'"<>=`&\s]+/ }], // Dialect for quoted attribute values. "attributeValueQuoted": [{ "dquo": /"/ }, { "squo": /'/ }, { "entityStart": entityStart }, { "error": /&/ }, { "text": /[^"'&]+/ }], // Dialect for closing tags. "closeTag": [{ "tagEnd": />/ }, { "whitespace": /\s+/ }, { "error": /[^\s>]+/ }], // Dialect for inside of script, style, and xmp tags "rawtext": [{ "closeTag": /<\/[a-z]+\s*>/i }, { "text": /[^<]+/ }, { "lt": /</ }] }); /** * @private * @type {string} */ this._expectedAttributeValueEndTokenType; /** * @private * @type {string} */ this._expectedRawtextClosingTagName; } /** * @param {string} html */ _createClass(Compiler, [{ key: 'setInput', value: function setInput(html) { this._scanner.setSource(html); this._scanner.pushDialect("content"); } /** * @private * @param {string} dialect */ }, { key: 'pushDialect', value: function pushDialect(dialect) { this._scanner.pushDialect(dialect); } /** * @private */ }, { key: 'popDialect', value: function popDialect() { this._scanner.popDialect(); } /** * @private * @return {string} */ }, { key: 'currentDialect', value: function currentDialect() { var dialect = this._scanner.currentDialect(); if (dialect === null) { throw new Error("Scanner dialect unexpectedly null."); } return dialect; } /** * @private * @param {string} tokenType */ }, { key: 'setExpectedAttributeValueEndTokenType', value: function setExpectedAttributeValueEndTokenType(tokenType) { this._expectedAttributeValueEndTokenType = tokenType; } /** * @private * @return {string} */ }, { key: 'expectedAttributeValueEndTokenType', value: function expectedAttributeValueEndTokenType() { return this._expectedAttributeValueEndTokenType; } /** * @private * @param {string} name */ }, { key: 'setExpectedRawtextClosingTagName', value: function setExpectedRawtextClosingTagName(name) { this._expectedRawtextClosingTagName = name.toLowerCase(); } /** * @private * @return {string} */ }, { key: 'expectedRawtextClosingTagName', value: function expectedRawtextClosingTagName() { return this._expectedRawtextClosingTagName; } /** * @param {string} tagName */ }, { key: 'setRawtextModeForTag', value: function setRawtextModeForTag(tagName) { this.pushDialect("rawtext"); this.setExpectedRawtextClosingTagName(tagName); } /** * @return {Array.<Op>} */ }, { key: 'generateNextCodeFragment', value: function generateNextCodeFragment() { var token = this._scanner.nextToken(); if (token === null) { return null; } return this.generateCodeForTokenInDialect(token, this.currentDialect()); } /** * Take a node and the dialect in which it was found, and tell the * parser what to do next. * @private * @param {Foundation.Scanner.Token} token * @param {string} dialect * @return {Array.<Op>} */ }, { key: 'generateCodeForTokenInDialect', value: function generateCodeForTokenInDialect(token, dialect) { switch (dialect) { case "content": return this.generateCodeForContentToken(token); case "comment": return this.generateCodeForCommentToken(token); case "entity": return this.generateCodeForEntityToken(token); case "tag": return this.generateCodeForTagToken(token); case "attribute": return this.generateCodeForAttributeToken(token); case "attributeValue": return this.generateCodeForAttributeValueToken(token); case "attributeValueQuoted": return this.generateCodeForAttributeValueQuotedToken(token); case "closeTag": return this.generateCodeForCloseTagToken(token); case "rawtext": return this.generateCodeForRawtextToken(token); default: throw new Error("Called compileTokenForDialect on unsuppoted dialect."); } } /** * @private * @param {Foundation.Scanner.Token} token * @return {Array.<Op>} */ }, { key: 'generateCodeForContentToken', value: function generateCodeForContentToken(token) { switch (token.type) { case "text": case "error": return [new _Op2.default(_Instr2.default.PUSH_NODE, new _HTMLNode.TextNode()), new _Op2.default(_Instr2.default.ADD_TOKEN, token), new _Op2.default(_Instr2.default.POP_NODE)]; case "commentStart": this.pushDialect("comment"); return [new _Op2.default(_Instr2.default.PUSH_NODE, new _HTMLNode.CommentNode()), new _Op2.default(_Instr2.default.ADD_TOKEN, token)]; case "entityStart": this.pushDialect("entity"); return [new _Op2.default(_Instr2.default.PUSH_NODE, new _HTMLNode.EntityNode()), new _Op2.default(_Instr2.default.ADD_TOKEN, token)]; case "tagStart": this.pushDialect("tag"); return [new _Op2.default(_Instr2.default.PUSH_NODE, new _HTMLNode.TagNode()), new _Op2.default(_Instr2.default.ADD_TOKEN, token)]; case "closeTagStart": this.pushDialect("closeTag"); return [new _Op2.default(_Instr2.default.PUSH_NODE, new _HTMLNode.CloseTagNode()), new _Op2.default(_Instr2.default.ADD_TOKEN, token)]; default: throw unknownTokenAssertion(token); } } /** * @private * @param {Foundation.Scanner.Token} token * @return {Array.<Op>} */ }, { key: 'generateCodeForCommentToken', value: function generateCodeForCommentToken(token) { switch (token.type) { case "text": case "dash": return [new _Op2.default(_Instr2.default.ADD_TOKEN, token)]; case "commentEnd": this.popDialect(); return [new _Op2.default(_Instr2.default.ADD_TOKEN, token), new _Op2.default(_Instr2.default.POP_NODE)]; default: throw unknownTokenAssertion(token); } } /** * @private * @param {Foundation.Scanner.Token} token */ }, { key: 'generateCodeForEntityToken', value: function generateCodeForEntityToken(token) { switch (token.type) { case "hex": case "dec": case "named": return [new _Op2.default(_Instr2.default.ADD_TOKEN, token)]; case "error": case "entityEnd": this.popDialect(); return [new _Op2.default(_Instr2.default.ADD_TOKEN, token), new _Op2.default(_Instr2.default.POP_NODE)]; default: throw unknownTokenAssertion(token); } } /** * @private * @param {Foundation.Scanner.Token} token * @return {Array.<Op>} */ }, { key: 'generateCodeForTagToken', value: function generateCodeForTagToken(token) { switch (token.type) { case "tagEnd": this.popDialect(); return [new _Op2.default(_Instr2.default.ADD_TOKEN, token), new _Op2.default(_Instr2.default.POP_NODE)]; case "whitespace": case "selfClose": return [new _Op2.default(_Instr2.default.ADD_TOKEN, token)]; case "attributeStart": case "error": this.pushDialect("attribute"); return [new _Op2.default(_Instr2.default.PUSH_NODE, new _HTMLNode.AttrNode()), new _Op2.default(_Instr2.default.ADD_TOKEN, token)]; default: throw unknownTokenAssertion(token); } } /** * @private * @param {Foundation.Scanner.Token} token * @return {Array.<Op>} */ }, { key: 'generateCodeForAttributeToken', value: function generateCodeForAttributeToken(token) { switch (token.type) { case "attributeValueQuotedStart": var isDquo = !!token.value.match(/"$/); this.setExpectedAttributeValueEndTokenType(isDquo ? "dquo" : "squo"); // continue into next case... case "attributeValueStart": this.popDialect(); this.pushDialect(token.type.replace(/Start$/, "")); // continue into next case... case "whitespace": return [new _Op2.default(_Instr2.default.ADD_TOKEN, token)]; case "tagEnd": this.popDialect(); // pop out of attribute dialect this.popDialect(); // pop out of tag dialect return [new _Op2.default(_Instr2.default.POP_NODE), new _Op2.default(_Instr2.default.ADD_TOKEN, token), new _Op2.default(_Instr2.default.POP_NODE)]; case "selfClose": this.popDialect(); return [new _Op2.default(_Instr2.default.POP_NODE), new _Op2.default(_Instr2.default.ADD_TOKEN, token)]; case "attributeStart": case "error": return [new _Op2.default(_Instr2.default.POP_NODE), new _Op2.default(_Instr2.default.PUSH_NODE, new _HTMLNode.AttrNode()), new _Op2.default(_Instr2.default.ADD_TOKEN, token)]; default: throw unknownTokenAssertion(token); } } /** * @private * @param {Foundation.Scanner.Token} token * @return {Array.<Op>} */ }, { key: 'generateCodeForAttributeValueToken', value: function generateCodeForAttributeValueToken(token) { switch (token.type) { case "whitespace": this.popDialect(); return [new _Op2.default(_Instr2.default.POP_NODE), new _Op2.default(_Instr2.default.ADD_TOKEN, token)]; case "entityStart": this.pushDialect("entity"); return [new _Op2.default(_Instr2.default.PUSH_NODE, new _HTMLNode.EntityNode()), new _Op2.default(_Instr2.default.ADD_TOKEN, token)]; case "tagEnd": this.popDialect(); // pop out of attributeValue dialect this.popDialect(); // pop out of tag dialect return [new _Op2.default(_Instr2.default.POP_NODE), new _Op2.default(_Instr2.default.ADD_TOKEN, token), new _Op2.default(_Instr2.default.POP_NODE)]; case "text": case "error": return [new _Op2.default(_Instr2.default.ADD_TOKEN, token)]; default: throw unknownTokenAssertion(token); } } /** * @private * @param {Foundation.Scanner.Token} token * @return {Array.<Op>} */ }, { key: 'generateCodeForAttributeValueQuotedToken', value: function generateCodeForAttributeValueQuotedToken(token) { switch (token.type) { case "dquo": case "squo": if (token.type === this.expectedAttributeValueEndTokenType()) { this.popDialect(); return [new _Op2.default(_Instr2.default.ADD_TOKEN, token), new _Op2.default(_Instr2.default.POP_NODE)]; } else { token.type = "text"; return [new _Op2.default(_Instr2.default.ADD_TOKEN, token)]; } case "entityStart": this.pushDialect("entity"); return [new _Op2.default(_Instr2.default.PUSH_NODE, new _HTMLNode.EntityNode()), new _Op2.default(_Instr2.default.ADD_TOKEN, token)]; case "error": case "text": return [new _Op2.default(_Instr2.default.ADD_TOKEN, token)]; default: throw unknownTokenAssertion(token); } } /** * @private * @param {Foundation.Scanner.Token} token * @return {Array.<Op>} */ }, { key: 'generateCodeForCloseTagToken', value: function generateCodeForCloseTagToken(token) { switch (token.type) { case "whitespace": case "error": return [new _Op2.default(_Instr2.default.ADD_TOKEN, token)]; case "tagEnd": this.popDialect(); return [new _Op2.default(_Instr2.default.ADD_TOKEN, token), new _Op2.default(_Instr2.default.POP_NODE)]; default: throw unknownTokenAssertion(token); } } /** * @private * @param {Foundation.Scanner.Token} token * @return {Array.<Op>} */ }, { key: 'generateCodeForRawtextToken', value: function generateCodeForRawtextToken(token) { switch (token.type) { case "closeTag": var closeTagName = token.value.toLowerCase().replace(/\W/g, ""); if (closeTagName === this.expectedRawtextClosingTagName()) { this.popDialect(); return [new _Op2.default(_Instr2.default.PUSH_NODE, new _HTMLNode.CloseTagNode()), new _Op2.default(_Instr2.default.ADD_TOKEN, token), new _Op2.default(_Instr2.default.POP_NODE)]; } case "text": case "lt": return [new _Op2.default(_Instr2.default.PUSH_NODE, new _HTMLNode.TextNode()), new _Op2.default(_Instr2.default.ADD_TOKEN, token), new _Op2.default(_Instr2.default.POP_NODE)]; default: throw unknownTokenAssertion(token); } } }]); return Compiler; }(); /** * @private * @param {Foundation.Scanner.Token} token * @return {Error} */ exports.default = Compiler; function unknownTokenAssertion(token) { return new Error("failed assertion: unkown token type: " + token.type); } //# sourceMappingURL=data:application/json;base64,{"version":3,"sources":["../../lib/Compiler.js"],"names":[],"mappings":";;;;;;;;AAAA;;;;AACA;;;;AACA;;;;AACA;;;;;;IAEqB,Q;AACnB,sBAAe;AAAA;;AACb,QAAI,cAAc,mBAAlB;AACA,QAAI,iBAAiB,aAArB;;AAEA;;;;;;;;;AASA,SAAK,QAAL,GAAgB,sBAAY;AAC1B;AACA,iBAAW,CACT,EAAC,QAAQ,SAAT,EADS,EAET,EAAC,gBAAgB,MAAjB,EAFS,EAGT,EAAC,eAAe,WAAhB,EAHS,EAIT,EAAC,YAAY,6BAAb,EAJS,EAKT,EAAC,iBAAiB,+BAAlB,EALS,EAMT,EAAC,SAAS,OAAV,EANS,CAFe;;AAW1B;AACA,iBAAW,CACT,EAAC,cAAc,KAAf,EADS,EAET,EAAC,QAAQ,GAAT,EAFS,EAGT,EAAC,QAAQ,OAAT,EAHS,CAZe;;AAkB1B;AACA,gBAAU,CACR,EAAC,aAAa,GAAd,EADQ,EAER,EAAC,OAAO,cAAR,EAFQ,EAGR,EAAC,OAAO,MAAR,EAHQ,EAIR,EAAC,SAAS,iBAAV,EAJQ,EAKR,EAAC,SAAS,KAAV,EALQ,CAnBgB;;AA2B1B;AACA,aAAO,CACL,EAAC,UAAU,GAAX,EADK,EAEL,EAAC,cAAc,KAAf,EAFK,EAGL,EAAC,aAAa,IAAd,EAHK,EAIL,EAAC,SAAS,QAAV,EAJK,EAKL,EAAC,kBAAkB,cAAnB,EALK,CA5BmB;;AAoC1B;AACA,mBAAa,CACX,EAAC,cAAc,KAAf,EADW,EAEX,EAAC,6BAA6B,OAA9B,EAFW,EAGX,EAAC,uBAAuB,GAAxB,EAHW,EAIX,EAAC,UAAU,GAAX,EAJW,EAKX,EAAC,aAAa,IAAd,EALW,EAMX,EAAC,SAAS,OAAV,EANW,EAOX,EAAC,kBAAkB,cAAnB,EAPW,CArCa;;AA+C1B;AACA,wBAAkB,CAChB,EAAC,cAAc,KAAf,EADgB,EAEhB,EAAC,eAAe,WAAhB,EAFgB,EAGhB,EAAC,UAAU,GAAX,EAHgB,EAIhB,EAAC,SAAS,UAAV,EAJgB,EAKhB,EAAC,QAAQ,eAAT,EALgB,CAhDQ;;AAwD1B;AACA,8BAAwB,CACtB,EAAC,QAAQ,GAAT,EADsB,EAEtB,EAAC,QAAQ,GAAT,EAFsB,EAGtB,EAAC,eAAe,WAAhB,EAHsB,EAItB,EAAC,SAAS,GAAV,EAJsB,EAKtB,EAAC,QAAQ,SAAT,EALsB,CAzDE;;AAiE1B;AACA,kBAAY,CACV,EAAC,UAAU,GAAX,EADU,EAEV,EAAC,cAAc,KAAf,EAFU,EAGV,EAAC,SAAS,SAAV,EAHU,CAlEc;;AAwE1B;AACA,iBAAW,CACT,EAAC,YAAY,gBAAb,EADS,EAET,EAAC,QAAQ,OAAT,EAFS,EAGT,EAAC,MAAM,GAAP,EAHS;AAzEe,KAAZ,CAAhB;;AAgFA;;;;AAIA,SAAK,mCAAL;;AAEA;;;;AAIA,SAAK,8BAAL;AACD;;AAGD;;;;;;;6BAGU,I,EAAM;AACd,WAAK,QAAL,CAAc,SAAd,CAAwB,IAAxB;AACA,WAAK,QAAL,CAAc,WAAd,CAA0B,SAA1B;AACD;;AAED;;;;;;;gCAIa,O,EAAS;AACpB,WAAK,QAAL,CAAc,WAAd,CAA0B,OAA1B;AACD;;AAED;;;;;;iCAGc;AACZ,WAAK,QAAL,CAAc,UAAd;AACD;;AAED;;;;;;;qCAIkB;AAChB,UAAI,UAAU,KAAK,QAAL,CAAc,cAAd,EAAd;;AAEA,UAAI,YAAY,IAAhB,EAAsB;AACpB,cAAM,IAAI,KAAJ,CAAU,oCAAV,CAAN;AACD;;AAED,aAAO,OAAP;AACD;;AAED;;;;;;;0DAIuC,S,EAAW;AAChD,WAAK,mCAAL,GAA2C,SAA3C;AACD;;AAED;;;;;;;yDAIsC;AACpC,aAAO,KAAK,mCAAZ;AACD;;AAED;;;;;;;qDAIkC,I,EAAM;AACtC,WAAK,8BAAL,GAAsC,KAAK,WAAL,EAAtC;AACD;;AAED;;;;;;;oDAIiC;AAC/B,aAAO,KAAK,8BAAZ;AACD;;AAED;;;;;;yCAGsB,O,EAAS;AAC7B,WAAK,WAAL,CAAiB,SAAjB;AACA,WAAK,gCAAL,CAAsC,OAAtC;AACD;;AAED;;;;;;+CAG4B;AAC1B,UAAI,QAAQ,KAAK,QAAL,CAAc,SAAd,EAAZ;;AAEA,UAAI,UAAU,IAAd,EAAoB;AAClB,eAAO,IAAP;AACD;;AAED,aAAO,KAAK,6BAAL,CAAmC,KAAnC,EAA0C,KAAK,cAAL,EAA1C,CAAP;AACD;;AAED;;;;;;;;;;;kDAQ+B,K,EAAO,O,EAAS;AAC7C,cAAQ,OAAR;AACE,aAAK,SAAL;AACE,iBAAO,KAAK,2BAAL,CAAiC,KAAjC,CAAP;AACF,aAAK,SAAL;AACE,iBAAO,KAAK,2BAAL,CAAiC,KAAjC,CAAP;AACF,aAAK,QAAL;AACE,iBAAO,KAAK,0BAAL,CAAgC,KAAhC,CAAP;AACF,aAAK,KAAL;AACE,iBAAO,KAAK,uBAAL,CAA6B,KAA7B,CAAP;AACF,aAAK,WAAL;AACE,iBAAO,KAAK,6BAAL,CAAmC,KAAnC,CAAP;AACF,aAAK,gBAAL;AACE,iBAAO,KAAK,kCAAL,CAAwC,KAAxC,CAAP;AACF,aAAK,sBAAL;AACE,iBAAO,KAAK,wCAAL,CAA8C,KAA9C,CAAP;AACF,aAAK,UAAL;AACE,iBAAO,KAAK,4BAAL,CAAkC,KAAlC,CAAP;AACF,aAAK,SAAL;AACE,iBAAO,KAAK,2BAAL,CAAiC,KAAjC,CAAP;AACF;AACE,gBAAM,IAAI,KAAJ,CAAU,sDAAV,CAAN;AApBJ;AAsBD;;AAED;;;;;;;;gDAK6B,K,EAAO;AAClC,cAAQ,MAAM,IAAd;AACE,aAAK,MAAL;AACA,aAAK,OAAL;AACE,iBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,wBAAxB,CADK,EAEL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CAFK,EAGL,iBAAO,gBAAM,QAAb,CAHK,CAAP;AAKF,aAAK,cAAL;AACE,eAAK,WAAL,CAAiB,SAAjB;AACA,iBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,2BAAxB,CADK,EAEL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CAFK,CAAP;AAIF,aAAK,aAAL;AACE,eAAK,WAAL,CAAiB,QAAjB;AACA,iBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,0BAAxB,CADK,EAEL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CAFK,CAAP;AAIF,aAAK,UAAL;AACE,eAAK,WAAL,CAAiB,KAAjB;AACA,iBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,uBAAxB,CADK,EAEL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CAFK,CAAP;AAIF,aAAK,eAAL;AACE,eAAK,WAAL,CAAiB,UAAjB;AACA,iBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,4BAAxB,CADK,EAEL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CAFK,CAAP;AAIF;AACE,gBAAM,sBAAsB,KAAtB,CAAN;AAjCJ;AAmCD;;AAED;;;;;;;;gDAK6B,K,EAAO;AAClC,cAAQ,MAAM,IAAd;AACE,aAAK,MAAL;AACA,aAAK,MAAL;AACE,iBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CADK,CAAP;AAGF,aAAK,YAAL;AACE,eAAK,UAAL;AACA,iBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CADK,EAEL,iBAAO,gBAAM,QAAb,CAFK,CAAP;AAIF;AACG,gBAAM,sBAAsB,KAAtB,CAAN;AAbL;AAeD;;AAED;;;;;;;+CAI4B,K,EAAO;AACjC,cAAQ,MAAM,IAAd;AACE,aAAK,KAAL;AACA,aAAK,KAAL;AACA,aAAK,OAAL;AACE,iBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CADK,CAAP;AAGF,aAAK,OAAL;AACA,aAAK,WAAL;AACE,eAAK,UAAL;AACA,iBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CADK,EAEL,iBAAO,gBAAM,QAAb,CAFK,CAAP;AAIF;AACE,gBAAM,sBAAsB,KAAtB,CAAN;AAfJ;AAiBD;;AAED;;;;;;;;4CAKyB,K,EAAO;AAC9B,cAAQ,MAAM,IAAd;AACE,aAAK,QAAL;AACE,eAAK,UAAL;AACA,iBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CADK,EAEL,iBAAO,gBAAM,QAAb,CAFK,CAAP;AAIF,aAAK,YAAL;AACA,aAAK,WAAL;AACE,iBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CADK,CAAP;AAGF,aAAK,gBAAL;AACA,aAAK,OAAL;AACE,eAAK,WAAL,CAAiB,WAAjB;AACA,iBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,wBAAxB,CADK,EAEL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CAFK,CAAP;AAIF;AACE,gBAAM,sBAAsB,KAAtB,CAAN;AApBJ;AAsBD;;AAED;;;;;;;;kDAK+B,K,EAAO;AACpC,cAAQ,MAAM,IAAd;AACE,aAAK,2BAAL;AACE,cAAI,SAAS,CAAC,CAAC,MAAM,KAAN,CAAY,KAAZ,CAAkB,IAAlB,CAAf;AACA,eAAK,qCAAL,CAA2C,SAAS,MAAT,GAAkB,MAA7D;AACA;AACF,aAAK,qBAAL;AACE,eAAK,UAAL;AACA,eAAK,WAAL,CAAiB,MAAM,IAAN,CAAW,OAAX,CAAmB,QAAnB,EAA6B,EAA7B,CAAjB;AACA;AACF,aAAK,YAAL;AACE,iBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CADK,CAAP;AAGF,aAAK,QAAL;AACE,eAAK,UAAL,GADF,CACsB;AACpB,eAAK,UAAL,GAFF,CAEsB;AACpB,iBAAO,CACL,iBAAO,gBAAM,QAAb,CADK,EAEL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CAFK,EAGL,iBAAO,gBAAM,QAAb,CAHK,CAAP;AAKF,aAAK,WAAL;AACE,eAAK,UAAL;AACA,iBAAO,CACL,iBAAO,gBAAM,QAAb,CADK,EAEL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CAFK,CAAP;AAIF,aAAK,gBAAL;AACA,aAAK,OAAL;AACE,iBAAO,CACL,iBAAO,gBAAM,QAAb,CADK,EAEL,iBAAO,gBAAM,SAAb,EAAwB,wBAAxB,CAFK,EAGL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CAHK,CAAP;AAKF;AACE,gBAAM,sBAAsB,KAAtB,CAAN;AAnCJ;AAqCD;;AAED;;;;;;;;uDAKoC,K,EAAO;AACzC,cAAQ,MAAM,IAAd;AACE,aAAK,YAAL;AACE,eAAK,UAAL;AACA,iBAAO,CACL,iBAAO,gBAAM,QAAb,CADK,EAEL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CAFK,CAAP;AAIF,aAAK,aAAL;AACE,eAAK,WAAL,CAAiB,QAAjB;AACA,iBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,0BAAxB,CADK,EAEL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CAFK,CAAP;AAIF,aAAK,QAAL;AACE,eAAK,UAAL,GADF,CACsB;AACpB,eAAK,UAAL,GAFF,CAEsB;AACpB,iBAAO,CACL,iBAAO,gBAAM,QAAb,CADK,EAEL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CAFK,EAGL,iBAAO,gBAAM,QAAb,CAHK,CAAP;AAKF,aAAK,MAAL;AACA,aAAK,OAAL;AACE,iBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CADK,CAAP;AAGF;AACE,gBAAM,sBAAsB,KAAtB,CAAN;AA3BJ;AA6BD;;AAED;;;;;;;;6DAK0C,K,EAAO;AAC/C,cAAQ,MAAM,IAAd;AACE,aAAK,MAAL;AACA,aAAK,MAAL;AACE,cAAI,MAAM,IAAN,KAAe,KAAK,kCAAL,EAAnB,EAA8D;AAC5D,iBAAK,UAAL;AACA,mBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CADK,EAEL,iBAAO,gBAAM,QAAb,CAFK,CAAP;AAID,WAND,MAOK;AACH,kBAAM,IAAN,GAAa,MAAb;AACA,mBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CADK,CAAP;AAGD;AACH,aAAK,aAAL;AACE,eAAK,WAAL,CAAiB,QAAjB;AACA,iBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,0BAAxB,CADK,EAEL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CAFK,CAAP;AAIF,aAAK,OAAL;AACA,aAAK,MAAL;AACE,iBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CADK,CAAP;AAGF;AACE,gBAAM,sBAAsB,KAAtB,CAAN;AA5BJ;AA8BD;;AAED;;;;;;;;iDAK8B,K,EAAO;AACnC,cAAQ,MAAM,IAAd;AACE,aAAK,YAAL;AACA,aAAK,OAAL;AACE,iBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CADK,CAAP;AAGF,aAAK,QAAL;AACE,eAAK,UAAL;AACA,iBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CADK,EAEL,iBAAO,gBAAM,QAAb,CAFK,CAAP;AAIF;AACE,gBAAM,sBAAsB,KAAtB,CAAN;AAbJ;AAeD;;AAED;;;;;;;;gDAK6B,K,EAAO;AAClC,cAAQ,MAAM,IAAd;AACE,aAAK,UAAL;AACE,cAAI,eAAe,MAAM,KAAN,CAAY,WAAZ,GAA0B,OAA1B,CAAkC,KAAlC,EAAyC,EAAzC,CAAnB;AACA,cAAI,iBAAiB,KAAK,6BAAL,EAArB,EAA2D;AACzD,iBAAK,UAAL;AACA,mBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,4BAAxB,CADK,EAEL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CAFK,EAGL,iBAAO,gBAAM,QAAb,CAHK,CAAP;AAKD;AACH,aAAK,MAAL;AACA,aAAK,IAAL;AACE,iBAAO,CACL,iBAAO,gBAAM,SAAb,EAAwB,wBAAxB,CADK,EAEL,iBAAO,gBAAM,SAAb,EAAwB,KAAxB,CAFK,EAGL,iBAAO,gBAAM,QAAb,CAHK,CAAP;AAKF;AACE,gBAAM,sBAAsB,KAAtB,CAAN;AAnBJ;AAqBD;;;;;;AAGH;;;;;;;kBA5gBqB,Q;AAihBrB,SAAS,qBAAT,CAAgC,KAAhC,EAAuC;AACrC,SAAO,IAAI,KAAJ,CAAU,0CAA0C,MAAM,IAA1D,CAAP;AACD","file":"Compiler.js","sourcesContent":["import Instr from './Instr'\nimport Op from './Op'\nimport Scanner from '@desertnet/scanner'\nimport {TagNode, TextNode, AttrNode, CloseTagNode, CommentNode, EntityNode} from './HTMLNode'\n\nexport default class Compiler {\n  constructor () {\n    var entityStart = /&(?=[a-z0-9#]+;)/i;\n    var attributeStart = /[^>=\\s\\/]+/i;\n\n    /**\n     * The tokenizing scanner for the input string. HTML contains\n     * many contexts where the scanner needs to accept differnt\n     * sets of tokens. The @desertnet/scanner module calls these\n     * sets of token definitions \"dialects\". Below, we're initializing\n     * our scanner with all these different dialects.\n     * @private\n     * @type {Scanner}\n     */\n    this._scanner = new Scanner({\n      // Starting dialect, for content \"outside of a tag\".\n      \"content\": [\n        {\"text\": /[^<>&]+/},\n        {\"commentStart\": /<!--/},\n        {\"entityStart\": entityStart},\n        {\"tagStart\": /<[a-z][^\\t\\n\\ \\/\\>\\0\\xff]*/i},\n        {\"closeTagStart\": /<\\/[a-z][^\\t\\n\\ \\/\\>\\0\\xff]*/i},\n        {\"error\": /[<>&]/}\n      ],\n\n      // Dialect for the inside of comment tags.\n      \"comment\": [\n        {\"commentEnd\": /-->/},\n        {\"dash\": /-/},\n        {\"text\": /[^-]+/}\n      ],\n\n      // Dialect for the inside of HTML entities.\n      \"entity\": [\n        {\"entityEnd\": /;/},\n        {\"hex\": /#x[a-f0-9]+/i},\n        {\"dec\": /#\\d+/},\n        {\"named\": /[a-z][a-z0-9]*/i},\n        {\"error\": /[^]/}\n      ],\n\n      // Dialect for the inside of tags.\n      \"tag\": [\n        {\"tagEnd\": />/},\n        {\"whitespace\": /\\s+/},\n        {\"selfClose\": /\\//},\n        {\"error\": /['\"<=]/},\n        {\"attributeStart\": attributeStart}\n      ],\n\n      // Initial dialect for attributes.\n      \"attribute\": [\n        {\"whitespace\": /\\s+/},\n        {\"attributeValueQuotedStart\": /=['\"]/},\n        {\"attributeValueStart\": /=/},\n        {\"tagEnd\": />/},\n        {\"selfClose\": /\\//},\n        {\"error\": /['\"<]/},\n        {\"attributeStart\": attributeStart}\n      ],\n\n      // Dialect for unquoted attribute values.\n      \"attributeValue\": [\n        {\"whitespace\": /\\s+/},\n        {\"entityStart\": entityStart},\n        {\"tagEnd\": />/},\n        {\"error\": /['\"<=`&]/},\n        {\"text\": /[^'\"<>=`&\\s]+/}\n      ],\n\n      // Dialect for quoted attribute values.\n      \"attributeValueQuoted\": [\n        {\"dquo\": /\"/},\n        {\"squo\": /'/},\n        {\"entityStart\": entityStart},\n        {\"error\": /&/},\n        {\"text\": /[^\"'&]+/}\n      ],\n\n      // Dialect for closing tags.\n      \"closeTag\": [\n        {\"tagEnd\": />/},\n        {\"whitespace\": /\\s+/},\n        {\"error\": /[^\\s>]+/}\n      ],\n\n      // Dialect for inside of script, style, and xmp tags\n      \"rawtext\": [\n        {\"closeTag\": /<\\/[a-z]+\\s*>/i},\n        {\"text\": /[^<]+/},\n        {\"lt\": /</}\n      ]\n    });\n\n    /**\n     * @private\n     * @type {string}\n     */\n    this._expectedAttributeValueEndTokenType;\n\n    /**\n     * @private\n     * @type {string}\n     */\n    this._expectedRawtextClosingTagName;\n  }\n\n\n  /**\n   * @param {string} html\n   */\n  setInput (html) {\n    this._scanner.setSource(html);\n    this._scanner.pushDialect(\"content\");\n  }\n\n  /**\n   * @private\n   * @param {string} dialect\n   */\n  pushDialect (dialect) {\n    this._scanner.pushDialect(dialect);\n  }\n\n  /**\n   * @private\n   */\n  popDialect () {\n    this._scanner.popDialect();\n  }\n\n  /**\n   * @private\n   * @return {string}\n   */\n  currentDialect () {\n    var dialect = this._scanner.currentDialect();\n\n    if (dialect === null) {\n      throw new Error(\"Scanner dialect unexpectedly null.\");\n    }\n\n    return dialect;\n  }\n\n  /**\n   * @private\n   * @param {string} tokenType\n   */\n  setExpectedAttributeValueEndTokenType (tokenType) {\n    this._expectedAttributeValueEndTokenType = tokenType;\n  }\n\n  /**\n   * @private\n   * @return {string}\n   */\n  expectedAttributeValueEndTokenType () {\n    return this._expectedAttributeValueEndTokenType;\n  }\n\n  /**\n   * @private\n   * @param {string} name\n   */\n  setExpectedRawtextClosingTagName (name) {\n    this._expectedRawtextClosingTagName = name.toLowerCase();\n  }\n\n  /**\n   * @private\n   * @return {string}\n   */\n  expectedRawtextClosingTagName () {\n    return this._expectedRawtextClosingTagName;\n  }\n\n  /**\n   * @param {string} tagName\n   */\n  setRawtextModeForTag (tagName) {\n    this.pushDialect(\"rawtext\");\n    this.setExpectedRawtextClosingTagName(tagName);\n  }\n\n  /**\n   * @return {Array.<Op>}\n   */\n  generateNextCodeFragment () {\n    var token = this._scanner.nextToken();\n\n    if (token === null) {\n      return null;\n    }\n\n    return this.generateCodeForTokenInDialect(token, this.currentDialect());\n  }\n\n  /**\n   * Take a node and the dialect in which it was found, and tell the\n   * parser what to do next.\n   * @private\n   * @param {Foundation.Scanner.Token} token\n   * @param {string} dialect\n   * @return {Array.<Op>}\n   */\n  generateCodeForTokenInDialect (token, dialect) {\n    switch (dialect) {\n      case \"content\":\n        return this.generateCodeForContentToken(token);\n      case \"comment\":\n        return this.generateCodeForCommentToken(token);\n      case \"entity\":\n        return this.generateCodeForEntityToken(token);\n      case \"tag\":\n        return this.generateCodeForTagToken(token);\n      case \"attribute\":\n        return this.generateCodeForAttributeToken(token);\n      case \"attributeValue\":\n        return this.generateCodeForAttributeValueToken(token);\n      case \"attributeValueQuoted\":\n        return this.generateCodeForAttributeValueQuotedToken(token);\n      case \"closeTag\":\n        return this.generateCodeForCloseTagToken(token);\n      case \"rawtext\":\n        return this.generateCodeForRawtextToken(token);\n      default:\n        throw new Error(\"Called compileTokenForDialect on unsuppoted dialect.\");\n    }\n  }\n\n  /**\n   * @private\n   * @param {Foundation.Scanner.Token} token\n   * @return {Array.<Op>}\n   */\n  generateCodeForContentToken (token) {\n    switch (token.type) {\n      case \"text\":\n      case \"error\":\n        return [\n          new Op(Instr.PUSH_NODE, new TextNode()),\n          new Op(Instr.ADD_TOKEN, token),\n          new Op(Instr.POP_NODE)\n        ];\n      case \"commentStart\":\n        this.pushDialect(\"comment\");\n        return [\n          new Op(Instr.PUSH_NODE, new CommentNode()),\n          new Op(Instr.ADD_TOKEN, token)\n        ];\n      case \"entityStart\":\n        this.pushDialect(\"entity\");\n        return [\n          new Op(Instr.PUSH_NODE, new EntityNode()),\n          new Op(Instr.ADD_TOKEN, token)\n        ];\n      case \"tagStart\":\n        this.pushDialect(\"tag\");\n        return [\n          new Op(Instr.PUSH_NODE, new TagNode()),\n          new Op(Instr.ADD_TOKEN, token)\n        ];\n      case \"closeTagStart\":\n        this.pushDialect(\"closeTag\");\n        return [\n          new Op(Instr.PUSH_NODE, new CloseTagNode()),\n          new Op(Instr.ADD_TOKEN, token)\n        ];\n      default:\n        throw unknownTokenAssertion(token);\n    }\n  }\n\n  /**\n   * @private\n   * @param {Foundation.Scanner.Token} token\n   * @return {Array.<Op>}\n   */\n  generateCodeForCommentToken (token) {\n    switch (token.type) {\n      case \"text\":\n      case \"dash\":\n        return [\n          new Op(Instr.ADD_TOKEN, token)\n        ];\n      case \"commentEnd\":\n        this.popDialect();\n        return [\n          new Op(Instr.ADD_TOKEN, token),\n          new Op(Instr.POP_NODE)\n        ];\n      default:\n         throw unknownTokenAssertion(token);\n    }\n  }\n\n  /**\n   * @private\n   * @param {Foundation.Scanner.Token} token\n   */\n  generateCodeForEntityToken (token) {\n    switch (token.type) {\n      case \"hex\":\n      case \"dec\":\n      case \"named\":\n        return [\n          new Op(Instr.ADD_TOKEN, token)\n        ];\n      case \"error\":\n      case \"entityEnd\":\n        this.popDialect();\n        return [\n          new Op(Instr.ADD_TOKEN, token),\n          new Op(Instr.POP_NODE)\n        ];\n      default:\n        throw unknownTokenAssertion(token);\n    }\n  }\n\n  /**\n   * @private\n   * @param {Foundation.Scanner.Token} token\n   * @return {Array.<Op>}\n   */\n  generateCodeForTagToken (token) {\n    switch (token.type) {\n      case \"tagEnd\":\n        this.popDialect();\n        return [\n          new Op(Instr.ADD_TOKEN, token),\n          new Op(Instr.POP_NODE)\n        ];\n      case \"whitespace\":\n      case \"selfClose\":\n        return [\n          new Op(Instr.ADD_TOKEN, token)\n        ];\n      case \"attributeStart\":\n      case \"error\":\n        this.pushDialect(\"attribute\");\n        return [\n          new Op(Instr.PUSH_NODE, new AttrNode()),\n          new Op(Instr.ADD_TOKEN, token)\n        ];\n      default:\n        throw unknownTokenAssertion(token);\n    }\n  }\n\n  /**\n   * @private\n   * @param {Foundation.Scanner.Token} token\n   * @return {Array.<Op>}\n   */\n  generateCodeForAttributeToken (token) {\n    switch (token.type) {\n      case \"attributeValueQuotedStart\":\n        var isDquo = !!token.value.match(/\"$/);\n        this.setExpectedAttributeValueEndTokenType(isDquo ? \"dquo\" : \"squo\");\n        // continue into next case...\n      case \"attributeValueStart\":\n        this.popDialect();\n        this.pushDialect(token.type.replace(/Start$/, \"\"));\n        // continue into next case...\n      case \"whitespace\":\n        return [\n          new Op(Instr.ADD_TOKEN, token)\n        ];\n      case \"tagEnd\":\n        this.popDialect();  // pop out of attribute dialect\n        this.popDialect();  // pop out of tag dialect\n        return [\n          new Op(Instr.POP_NODE),\n          new Op(Instr.ADD_TOKEN, token),\n          new Op(Instr.POP_NODE)\n        ];\n      case \"selfClose\":\n        this.popDialect();\n        return [\n          new Op(Instr.POP_NODE),\n          new Op(Instr.ADD_TOKEN, token)\n        ];\n      case \"attributeStart\":\n      case \"error\":\n        return [\n          new Op(Instr.POP_NODE),\n          new Op(Instr.PUSH_NODE, new AttrNode()),\n          new Op(Instr.ADD_TOKEN, token)\n        ];\n      default:\n        throw unknownTokenAssertion(token);\n    }\n  }\n\n  /**\n   * @private\n   * @param {Foundation.Scanner.Token} token\n   * @return {Array.<Op>}\n   */\n  generateCodeForAttributeValueToken (token) {\n    switch (token.type) {\n      case \"whitespace\":\n        this.popDialect();\n        return [\n          new Op(Instr.POP_NODE),\n          new Op(Instr.ADD_TOKEN, token)\n        ];\n      case \"entityStart\":\n        this.pushDialect(\"entity\");\n        return [\n          new Op(Instr.PUSH_NODE, new EntityNode()),\n          new Op(Instr.ADD_TOKEN, token)\n        ];\n      case \"tagEnd\":\n        this.popDialect();  // pop out of attributeValue dialect\n        this.popDialect();  // pop out of tag dialect\n        return [\n          new Op(Instr.POP_NODE),\n          new Op(Instr.ADD_TOKEN, token),\n          new Op(Instr.POP_NODE)\n        ];\n      case \"text\":\n      case \"error\":\n        return [\n          new Op(Instr.ADD_TOKEN, token)\n        ];\n      default:\n        throw unknownTokenAssertion(token);\n    }\n  }\n\n  /**\n   * @private\n   * @param {Foundation.Scanner.Token} token\n   * @return {Array.<Op>}\n   */\n  generateCodeForAttributeValueQuotedToken (token) {\n    switch (token.type) {\n      case \"dquo\":\n      case \"squo\":\n        if (token.type === this.expectedAttributeValueEndTokenType()) {\n          this.popDialect();\n          return [\n            new Op(Instr.ADD_TOKEN, token),\n            new Op(Instr.POP_NODE)\n          ];\n        }\n        else {\n          token.type = \"text\";\n          return [\n            new Op(Instr.ADD_TOKEN, token)\n          ];\n        }\n      case \"entityStart\":\n        this.pushDialect(\"entity\");\n        return [\n          new Op(Instr.PUSH_NODE, new EntityNode()),\n          new Op(Instr.ADD_TOKEN, token)\n        ];\n      case \"error\":\n      case \"text\":\n        return [\n          new Op(Instr.ADD_TOKEN, token)\n        ];\n      default:\n        throw unknownTokenAssertion(token);\n    }\n  }\n\n  /**\n   * @private\n   * @param {Foundation.Scanner.Token} token\n   * @return {Array.<Op>}\n   */\n  generateCodeForCloseTagToken (token) {\n    switch (token.type) {\n      case \"whitespace\":\n      case \"error\":\n        return [\n          new Op(Instr.ADD_TOKEN, token)\n        ];\n      case \"tagEnd\":\n        this.popDialect();\n        return [\n          new Op(Instr.ADD_TOKEN, token),\n          new Op(Instr.POP_NODE)\n        ];\n      default:\n        throw unknownTokenAssertion(token);\n    }\n  }\n\n  /**\n   * @private\n   * @param {Foundation.Scanner.Token} token\n   * @return {Array.<Op>}\n   */\n  generateCodeForRawtextToken (token) {\n    switch (token.type) {\n      case \"closeTag\":\n        var closeTagName = token.value.toLowerCase().replace(/\\W/g, \"\");\n        if (closeTagName === this.expectedRawtextClosingTagName()) {\n          this.popDialect();\n          return [\n            new Op(Instr.PUSH_NODE, new CloseTagNode()),\n            new Op(Instr.ADD_TOKEN, token),\n            new Op(Instr.POP_NODE)\n          ];\n        }\n      case \"text\":\n      case \"lt\":\n        return [\n          new Op(Instr.PUSH_NODE, new TextNode()),\n          new Op(Instr.ADD_TOKEN, token),\n          new Op(Instr.POP_NODE)\n        ];\n      default:\n        throw unknownTokenAssertion(token);\n    }\n  }\n}\n\n/**\n * @private\n * @param {Foundation.Scanner.Token} token\n * @return {Error}\n */\nfunction unknownTokenAssertion (token) {\n  return new Error(\"failed assertion: unkown token type: \" + token.type);\n}\n"]}