UNPKG

@desertnet/html-parser

Version:

HTML parser and non-strict validator

266 lines (210 loc) 15.7 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.NodeError = exports.NodeType = undefined; var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); var _HTMLParseError = require("../HTMLParseError"); var _HTMLParseError2 = _interopRequireDefault(_HTMLParseError); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } function _possibleConstructorReturn(self, call) { if (!self) { throw new ReferenceError("this hasn't been initialised - super() hasn't been called"); } return call && (typeof call === "object" || typeof call === "function") ? call : self; } function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function, not " + typeof superClass); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, enumerable: false, writable: true, configurable: true } }); if (superClass) Object.setPrototypeOf ? Object.setPrototypeOf(subClass, superClass) : subClass.__proto__ = superClass; } function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } /** * HTML node types. * @enum {string} */ var NodeType = exports.NodeType = { ROOT: "ROOT", TAG: "TAG", ATTRIBUTE: "ATTRIBUTE", TEXT: "TEXT", ENTITY: "ENTITY", COMMENT: "COMMENT", CLOSETAG: "CLOSETAG" }; /** * HTML tree nodes base class. Should never be called outside of * subclass constructors! */ var HTMLNode = function () { function HTMLNode() { _classCallCheck(this, HTMLNode); /** * @private * @type {Array.<Foundation.HTML.Parser.HTMLNode>?} */ this._children = null; /** * @private * @type {Array.<Foundation.Scanner.Token>} */ this._sourceTokens = []; /** * @private * @type {Array.<HTMLParseError>?} */ this._errors = null; } /** * Subclasses must override this. * @return {boolean} */ _createClass(HTMLNode, [{ key: "toString", /** * Subclasses must override this. * @return {string} */ value: function toString() { throw new NodeError("toString not overriden in subclass", this); } /** * @return {NodeType} */ }, { key: "addToken", /** * @param {Foundation.Scanner.Token} token */ value: function addToken(token) { this._sourceTokens.push(token); // Create error objects when adding a token of type error. if (token.type === "error") { var error = new _HTMLParseError2.default(); error.message = "Invalid token: \"" + token.value + "\""; error.addToken(token); this.addError(error); } } /** * @return {Array.<Foundation.Scanner.Token>} */ }, { key: "appendChild", /** * @param {Foundation.HTML.Parser.HTMLNode} childNode * @return {Foundation.HTML.Parser.HTMLNode} */ value: function appendChild(childNode) { if (!this.canHaveChildren) { throw new NodeError("attempted to call appendChild on node that can't have children", this); } if (!this._children) { this._children = []; } this._children.push(childNode); return this; } /** * @public * @return {Array.<HTMLParseError>} */ }, { key: "addError", /** * @param {HTMLParseError} error */ value: function addError(error) { if (this._errors === null) { this._errors = []; } this._errors.push(error); } /** * @private * @return {Array.<HTMLParseError>} */ }, { key: "canHaveChildren", get: function get() { throw new NodeError("canHaveChildren not overriden in subclass", this); } }, { key: "type", get: function get() { throw new NodeError("type getter not overriden in subclass", this); } /** * @return {Array.<Foundation.HTML.Parser.HTMLNode>?} */ }, { key: "children", get: function get() { if (this._children === null || this._children.length === 0) { return null; } return this._children.slice(0); } /** * @return {Foundation.HTML.Parser.HTMLNode} */ }, { key: "lastChild", get: function get() { var children = this.children; if (children === null || children.length === 0) { return null; } return children[children.length - 1]; } }, { key: "tokens", get: function get() { return this._sourceTokens.slice(0); } /** * @return {Array.<number>?} */ }, { key: "indexRange", get: function get() { var tokens = this.tokens; if (tokens.length === 0) { return null; } var firstToken = tokens[0]; var lastToken = tokens[tokens.length - 1]; return [firstToken.index, lastToken.index + lastToken.value.length - 1]; } }, { key: "errors", get: function get() { if (this.canHaveChildren && this.children) { return this.children.reduce(function (accumulator, child) { return accumulator.concat(child.errors); }, this.ownErrors); } return this.ownErrors; } }, { key: "ownErrors", get: function get() { return this._errors === null ? [] : this._errors.slice(0); } }]); return HTMLNode; }(); /** * HTML node error class. This is a throwable Error object, not to be * confused with the HTMLParseError class which deals * with reporting errors found in the HTML source. */ exports.default = HTMLNode; var NodeError = exports.NodeError = function (_Error) { _inherits(NodeError, _Error); /** * @param {string} message * @param {Foundation.HTML.Parser.HTMLNode} node */ function NodeError(message, node) { _classCallCheck(this, NodeError); var _this = _possibleConstructorReturn(this, Object.getPrototypeOf(NodeError).call(this, message)); _this.name = "HTMLParserNodeError"; _this.node = node; return _this; } return NodeError; }(Error); ; //# sourceMappingURL=data:application/json;base64,{"version":3,"sources":["../../../lib/HTMLNode/HTMLNode.js"],"names":[],"mappings":";;;;;;;;;AAAA;;;;;;;;;;;;AAEA;;;;AAIO,IAAM,8BAAW;AACtB,QAAM,MADgB;AAEtB,OAAM,KAFgB;AAGtB,aAAW,WAHW;AAItB,QAAM,MAJgB;AAKtB,UAAS,QALa;AAMtB,WAAS,SANa;AAOtB,YAAU;AAPY,CAAjB;;AAUP;;;;;IAIqB,Q;AACnB,sBAAe;AAAA;;AACb;;;;AAIA,SAAK,SAAL,GAAiB,IAAjB;;AAEA;;;;AAIA,SAAK,aAAL,GAAqB,EAArB;;AAEA;;;;AAIA,SAAK,OAAL,GAAe,IAAf;AACD;;AAED;;;;;;;;;;AAQA;;;;+BAIY;AACV,YAAM,IAAI,SAAJ,CAAc,oCAAd,EAAoD,IAApD,CAAN;AACD;;AAED;;;;;;;;AA6BA;;;6BAGU,K,EAAO;AACf,WAAK,aAAL,CAAmB,IAAnB,CAAwB,KAAxB;;AAEA;AACA,UAAI,MAAM,IAAN,KAAe,OAAnB,EAA4B;AAC1B,YAAI,QAAQ,8BAAZ;AACA,cAAM,OAAN,GAAgB,sBAAsB,MAAM,KAA5B,GAAoC,IAApD;AACA,cAAM,QAAN,CAAe,KAAf;AACA,aAAK,QAAL,CAAc,KAAd;AACD;AACF;;AAED;;;;;;;;AAwBA;;;;gCAIa,S,EAAW;AACtB,UAAI,CAAE,KAAK,eAAX,EAA4B;AAC1B,cAAM,IAAI,SAAJ,CACJ,gEADI,EAC8D,IAD9D,CAAN;AAGD;;AAED,UAAI,CAAE,KAAK,SAAX,EAAsB;AACpB,aAAK,SAAL,GAAiB,EAAjB;AACD;;AAED,WAAK,SAAL,CAAe,IAAf,CAAoB,SAApB;;AAEA,aAAO,IAAP;AACD;;AAED;;;;;;;;;AAcA;;;6BAGU,K,EAAO;AACf,UAAI,KAAK,OAAL,KAAiB,IAArB,EAA2B;AACzB,aAAK,OAAL,GAAe,EAAf;AACD;AACD,WAAK,OAAL,CAAa,IAAb,CAAkB,KAAlB;AACD;;AAED;;;;;;;wBA5HuB;AACrB,YAAM,IAAI,SAAJ,CAAc,2CAAd,EAA2D,IAA3D,CAAN;AACD;;;wBAaW;AACV,YAAM,IAAI,SAAJ,CAAc,uCAAd,EAAuD,IAAvD,CAAN;AACD;;AAED;;;;;;wBAGgB;AACd,UAAI,KAAK,SAAL,KAAmB,IAAnB,IAA2B,KAAK,SAAL,CAAe,MAAf,KAA0B,CAAzD,EAA4D;AAC1D,eAAO,IAAP;AACD;;AAED,aAAO,KAAK,SAAL,CAAe,KAAf,CAAqB,CAArB,CAAP;AACD;;AAED;;;;;;wBAGiB;AACf,UAAI,WAAW,KAAK,QAApB;AACA,UAAI,aAAa,IAAb,IAAqB,SAAS,MAAT,KAAoB,CAA7C,EAAgD;AAC9C,eAAO,IAAP;AACD;AACD,aAAO,SAAS,SAAS,MAAT,GAAkB,CAA3B,CAAP;AACD;;;wBAoBa;AACZ,aAAO,KAAK,aAAL,CAAmB,KAAnB,CAAyB,CAAzB,CAAP;AACD;;AAED;;;;;;wBAGkB;AAChB,UAAI,SAAS,KAAK,MAAlB;AACA,UAAI,OAAO,MAAP,KAAkB,CAAtB,EAAyB;AACvB,eAAO,IAAP;AACD;;AAED,UAAI,aAAa,OAAO,CAAP,CAAjB;AACA,UAAI,YAAY,OAAO,OAAO,MAAP,GAAgB,CAAvB,CAAhB;AACA,aAAO,CACL,WAAW,KADN,EAEL,UAAU,KAAV,GAAkB,UAAU,KAAV,CAAgB,MAAlC,GAA2C,CAFtC,CAAP;AAID;;;wBA0Ba;AACZ,UAAI,KAAK,eAAL,IAAwB,KAAK,QAAjC,EAA2C;AACzC,eAAO,KAAK,QAAL,CAAc,MAAd,CAAqB,UAAC,WAAD,EAAc,KAAd,EAAwB;AAClD,iBAAO,YAAY,MAAZ,CAAmB,MAAM,MAAzB,CAAP;AACD,SAFM,EAEJ,KAAK,SAFD,CAAP;AAGD;;AAED,aAAO,KAAK,SAAZ;AACD;;;wBAgBgB;AACf,aAAO,KAAK,OAAL,KAAiB,IAAjB,GAAwB,EAAxB,GAA6B,KAAK,OAAL,CAAa,KAAb,CAAmB,CAAnB,CAApC;AACD;;;;;;AAGH;;;;;;;kBA9JqB,Q;;IAmKR,S,WAAA,S;;;AACX;;;;AAIA,qBAAa,OAAb,EAAsB,IAAtB,EAA4B;AAAA;;AAAA,6FACpB,OADoB;;AAE1B,UAAK,IAAL,GAAY,qBAAZ;AACA,UAAK,IAAL,GAAY,IAAZ;AAH0B;AAI3B;;;EAT4B,K;;AAU9B","file":"HTMLNode.js","sourcesContent":["import HTMLParseError from '../HTMLParseError'\n\n/**\n * HTML node types.\n * @enum {string}\n */\nexport const NodeType = {\n  ROOT: \"ROOT\",\n  TAG:  \"TAG\",\n  ATTRIBUTE: \"ATTRIBUTE\",\n  TEXT: \"TEXT\",\n  ENTITY:  \"ENTITY\",\n  COMMENT: \"COMMENT\",\n  CLOSETAG: \"CLOSETAG\"\n};\n\n/**\n * HTML tree nodes base class. Should never be called outside of\n * subclass constructors!\n */\nexport default class HTMLNode {\n  constructor () {\n    /**\n     * @private\n     * @type {Array.<Foundation.HTML.Parser.HTMLNode>?}\n     */\n    this._children = null;\n\n    /**\n     * @private\n     * @type {Array.<Foundation.Scanner.Token>}\n     */\n    this._sourceTokens = [];\n\n    /**\n     * @private\n     * @type {Array.<HTMLParseError>?}\n     */\n    this._errors = null;\n  }\n\n  /**\n   * Subclasses must override this.\n   * @return {boolean}\n   */\n  get canHaveChildren () {\n    throw new NodeError(\"canHaveChildren not overriden in subclass\", this)\n  }\n\n  /**\n   * Subclasses must override this.\n   * @return {string}\n   */\n  toString () {\n    throw new NodeError(\"toString not overriden in subclass\", this)\n  }\n\n  /**\n   * @return {NodeType}\n   */\n  get type () {\n    throw new NodeError(\"type getter not overriden in subclass\", this)\n  }\n\n  /**\n   * @return {Array.<Foundation.HTML.Parser.HTMLNode>?}\n   */\n  get children () {\n    if (this._children === null || this._children.length === 0) {\n      return null;\n    }\n\n    return this._children.slice(0);\n  }\n\n  /**\n   * @return {Foundation.HTML.Parser.HTMLNode}\n   */\n  get lastChild () {\n    var children = this.children;\n    if (children === null || children.length === 0) {\n      return null;\n    }\n    return children[children.length - 1];\n  }\n\n  /**\n   * @param {Foundation.Scanner.Token} token\n   */\n  addToken (token) {\n    this._sourceTokens.push(token);\n\n    // Create error objects when adding a token of type error.\n    if (token.type === \"error\") {\n      var error = new HTMLParseError();\n      error.message = \"Invalid token: \\\"\" + token.value + \"\\\"\";\n      error.addToken(token);\n      this.addError(error);\n    }\n  }\n\n  /**\n   * @return {Array.<Foundation.Scanner.Token>}\n   */\n  get tokens () {\n    return this._sourceTokens.slice(0);\n  }\n\n  /**\n   * @return {Array.<number>?}\n   */\n  get indexRange () {\n    var tokens = this.tokens;\n    if (tokens.length === 0) {\n      return null;\n    }\n\n    var firstToken = tokens[0];\n    var lastToken = tokens[tokens.length - 1];\n    return [\n      firstToken.index,\n      lastToken.index + lastToken.value.length - 1\n    ];\n  }\n\n  /**\n   * @param {Foundation.HTML.Parser.HTMLNode} childNode\n   * @return {Foundation.HTML.Parser.HTMLNode}\n   */\n  appendChild (childNode) {\n    if (! this.canHaveChildren) {\n      throw new NodeError(\n        \"attempted to call appendChild on node that can't have children\", this\n      );\n    }\n\n    if (! this._children) {\n      this._children = [];\n    }\n\n    this._children.push(childNode);\n\n    return this;\n  }\n\n  /**\n   * @public\n   * @return {Array.<HTMLParseError>}\n   */\n  get errors () {\n    if (this.canHaveChildren && this.children) {\n      return this.children.reduce((accumulator, child) => {\n        return accumulator.concat(child.errors)\n      }, this.ownErrors)\n    }\n\n    return this.ownErrors\n  }\n\n  /**\n   * @param {HTMLParseError} error\n   */\n  addError (error) {\n    if (this._errors === null) {\n      this._errors = [];\n    }\n    this._errors.push(error);\n  }\n\n  /**\n   * @private\n   * @return {Array.<HTMLParseError>}\n   */\n  get ownErrors () {\n    return this._errors === null ? [] : this._errors.slice(0);\n  }\n}\n\n/**\n * HTML node error class. This is a throwable Error object, not to be\n * confused with the HTMLParseError class which deals\n * with reporting errors found in the HTML source.\n */\nexport class NodeError extends Error {\n  /**\n   * @param {string} message\n   * @param {Foundation.HTML.Parser.HTMLNode} node\n   */\n  constructor (message, node) {\n    super(message);\n    this.name = \"HTMLParserNodeError\";\n    this.node = node;\n  }\n};\n"]}