@desertnet/html-parser
Version:
HTML parser and non-strict validator
266 lines (210 loc) • 15.7 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.NodeError = exports.NodeType = undefined;
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
var _HTMLParseError = require("../HTMLParseError");
var _HTMLParseError2 = _interopRequireDefault(_HTMLParseError);
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
function _possibleConstructorReturn(self, call) { if (!self) { throw new ReferenceError("this hasn't been initialised - super() hasn't been called"); } return call && (typeof call === "object" || typeof call === "function") ? call : self; }
function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function, not " + typeof superClass); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, enumerable: false, writable: true, configurable: true } }); if (superClass) Object.setPrototypeOf ? Object.setPrototypeOf(subClass, superClass) : subClass.__proto__ = superClass; }
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
/**
* HTML node types.
* @enum {string}
*/
var NodeType = exports.NodeType = {
ROOT: "ROOT",
TAG: "TAG",
ATTRIBUTE: "ATTRIBUTE",
TEXT: "TEXT",
ENTITY: "ENTITY",
COMMENT: "COMMENT",
CLOSETAG: "CLOSETAG"
};
/**
* HTML tree nodes base class. Should never be called outside of
* subclass constructors!
*/
var HTMLNode = function () {
function HTMLNode() {
_classCallCheck(this, HTMLNode);
/**
* @private
* @type {Array.<Foundation.HTML.Parser.HTMLNode>?}
*/
this._children = null;
/**
* @private
* @type {Array.<Foundation.Scanner.Token>}
*/
this._sourceTokens = [];
/**
* @private
* @type {Array.<HTMLParseError>?}
*/
this._errors = null;
}
/**
* Subclasses must override this.
* @return {boolean}
*/
_createClass(HTMLNode, [{
key: "toString",
/**
* Subclasses must override this.
* @return {string}
*/
value: function toString() {
throw new NodeError("toString not overriden in subclass", this);
}
/**
* @return {NodeType}
*/
}, {
key: "addToken",
/**
* @param {Foundation.Scanner.Token} token
*/
value: function addToken(token) {
this._sourceTokens.push(token);
// Create error objects when adding a token of type error.
if (token.type === "error") {
var error = new _HTMLParseError2.default();
error.message = "Invalid token: \"" + token.value + "\"";
error.addToken(token);
this.addError(error);
}
}
/**
* @return {Array.<Foundation.Scanner.Token>}
*/
}, {
key: "appendChild",
/**
* @param {Foundation.HTML.Parser.HTMLNode} childNode
* @return {Foundation.HTML.Parser.HTMLNode}
*/
value: function appendChild(childNode) {
if (!this.canHaveChildren) {
throw new NodeError("attempted to call appendChild on node that can't have children", this);
}
if (!this._children) {
this._children = [];
}
this._children.push(childNode);
return this;
}
/**
* @public
* @return {Array.<HTMLParseError>}
*/
}, {
key: "addError",
/**
* @param {HTMLParseError} error
*/
value: function addError(error) {
if (this._errors === null) {
this._errors = [];
}
this._errors.push(error);
}
/**
* @private
* @return {Array.<HTMLParseError>}
*/
}, {
key: "canHaveChildren",
get: function get() {
throw new NodeError("canHaveChildren not overriden in subclass", this);
}
}, {
key: "type",
get: function get() {
throw new NodeError("type getter not overriden in subclass", this);
}
/**
* @return {Array.<Foundation.HTML.Parser.HTMLNode>?}
*/
}, {
key: "children",
get: function get() {
if (this._children === null || this._children.length === 0) {
return null;
}
return this._children.slice(0);
}
/**
* @return {Foundation.HTML.Parser.HTMLNode}
*/
}, {
key: "lastChild",
get: function get() {
var children = this.children;
if (children === null || children.length === 0) {
return null;
}
return children[children.length - 1];
}
}, {
key: "tokens",
get: function get() {
return this._sourceTokens.slice(0);
}
/**
* @return {Array.<number>?}
*/
}, {
key: "indexRange",
get: function get() {
var tokens = this.tokens;
if (tokens.length === 0) {
return null;
}
var firstToken = tokens[0];
var lastToken = tokens[tokens.length - 1];
return [firstToken.index, lastToken.index + lastToken.value.length - 1];
}
}, {
key: "errors",
get: function get() {
if (this.canHaveChildren && this.children) {
return this.children.reduce(function (accumulator, child) {
return accumulator.concat(child.errors);
}, this.ownErrors);
}
return this.ownErrors;
}
}, {
key: "ownErrors",
get: function get() {
return this._errors === null ? [] : this._errors.slice(0);
}
}]);
return HTMLNode;
}();
/**
* HTML node error class. This is a throwable Error object, not to be
* confused with the HTMLParseError class which deals
* with reporting errors found in the HTML source.
*/
exports.default = HTMLNode;
var NodeError = exports.NodeError = function (_Error) {
_inherits(NodeError, _Error);
/**
* @param {string} message
* @param {Foundation.HTML.Parser.HTMLNode} node
*/
function NodeError(message, node) {
_classCallCheck(this, NodeError);
var _this = _possibleConstructorReturn(this, Object.getPrototypeOf(NodeError).call(this, message));
_this.name = "HTMLParserNodeError";
_this.node = node;
return _this;
}
return NodeError;
}(Error);
;
//# sourceMappingURL=data:application/json;base64,{"version":3,"sources":["../../../lib/HTMLNode/HTMLNode.js"],"names":[],"mappings":";;;;;;;;;AAAA;;;;;;;;;;;;AAEA;;;;AAIO,IAAM,8BAAW;AACtB,QAAM,MADgB;AAEtB,OAAM,KAFgB;AAGtB,aAAW,WAHW;AAItB,QAAM,MAJgB;AAKtB,UAAS,QALa;AAMtB,WAAS,SANa;AAOtB,YAAU;AAPY,CAAjB;;AAUP;;;;;IAIqB,Q;AACnB,sBAAe;AAAA;;AACb;;;;AAIA,SAAK,SAAL,GAAiB,IAAjB;;AAEA;;;;AAIA,SAAK,aAAL,GAAqB,EAArB;;AAEA;;;;AAIA,SAAK,OAAL,GAAe,IAAf;AACD;;AAED;;;;;;;;;;AAQA;;;;+BAIY;AACV,YAAM,IAAI,SAAJ,CAAc,oCAAd,EAAoD,IAApD,CAAN;AACD;;AAED;;;;;;;;AA6BA;;;6BAGU,K,EAAO;AACf,WAAK,aAAL,CAAmB,IAAnB,CAAwB,KAAxB;;AAEA;AACA,UAAI,MAAM,IAAN,KAAe,OAAnB,EAA4B;AAC1B,YAAI,QAAQ,8BAAZ;AACA,cAAM,OAAN,GAAgB,sBAAsB,MAAM,KAA5B,GAAoC,IAApD;AACA,cAAM,QAAN,CAAe,KAAf;AACA,aAAK,QAAL,CAAc,KAAd;AACD;AACF;;AAED;;;;;;;;AAwBA;;;;gCAIa,S,EAAW;AACtB,UAAI,CAAE,KAAK,eAAX,EAA4B;AAC1B,cAAM,IAAI,SAAJ,CACJ,gEADI,EAC8D,IAD9D,CAAN;AAGD;;AAED,UAAI,CAAE,KAAK,SAAX,EAAsB;AACpB,aAAK,SAAL,GAAiB,EAAjB;AACD;;AAED,WAAK,SAAL,CAAe,IAAf,CAAoB,SAApB;;AAEA,aAAO,IAAP;AACD;;AAED;;;;;;;;;AAcA;;;6BAGU,K,EAAO;AACf,UAAI,KAAK,OAAL,KAAiB,IAArB,EAA2B;AACzB,aAAK,OAAL,GAAe,EAAf;AACD;AACD,WAAK,OAAL,CAAa,IAAb,CAAkB,KAAlB;AACD;;AAED;;;;;;;wBA5HuB;AACrB,YAAM,IAAI,SAAJ,CAAc,2CAAd,EAA2D,IAA3D,CAAN;AACD;;;wBAaW;AACV,YAAM,IAAI,SAAJ,CAAc,uCAAd,EAAuD,IAAvD,CAAN;AACD;;AAED;;;;;;wBAGgB;AACd,UAAI,KAAK,SAAL,KAAmB,IAAnB,IAA2B,KAAK,SAAL,CAAe,MAAf,KAA0B,CAAzD,EAA4D;AAC1D,eAAO,IAAP;AACD;;AAED,aAAO,KAAK,SAAL,CAAe,KAAf,CAAqB,CAArB,CAAP;AACD;;AAED;;;;;;wBAGiB;AACf,UAAI,WAAW,KAAK,QAApB;AACA,UAAI,aAAa,IAAb,IAAqB,SAAS,MAAT,KAAoB,CAA7C,EAAgD;AAC9C,eAAO,IAAP;AACD;AACD,aAAO,SAAS,SAAS,MAAT,GAAkB,CAA3B,CAAP;AACD;;;wBAoBa;AACZ,aAAO,KAAK,aAAL,CAAmB,KAAnB,CAAyB,CAAzB,CAAP;AACD;;AAED;;;;;;wBAGkB;AAChB,UAAI,SAAS,KAAK,MAAlB;AACA,UAAI,OAAO,MAAP,KAAkB,CAAtB,EAAyB;AACvB,eAAO,IAAP;AACD;;AAED,UAAI,aAAa,OAAO,CAAP,CAAjB;AACA,UAAI,YAAY,OAAO,OAAO,MAAP,GAAgB,CAAvB,CAAhB;AACA,aAAO,CACL,WAAW,KADN,EAEL,UAAU,KAAV,GAAkB,UAAU,KAAV,CAAgB,MAAlC,GAA2C,CAFtC,CAAP;AAID;;;wBA0Ba;AACZ,UAAI,KAAK,eAAL,IAAwB,KAAK,QAAjC,EAA2C;AACzC,eAAO,KAAK,QAAL,CAAc,MAAd,CAAqB,UAAC,WAAD,EAAc,KAAd,EAAwB;AAClD,iBAAO,YAAY,MAAZ,CAAmB,MAAM,MAAzB,CAAP;AACD,SAFM,EAEJ,KAAK,SAFD,CAAP;AAGD;;AAED,aAAO,KAAK,SAAZ;AACD;;;wBAgBgB;AACf,aAAO,KAAK,OAAL,KAAiB,IAAjB,GAAwB,EAAxB,GAA6B,KAAK,OAAL,CAAa,KAAb,CAAmB,CAAnB,CAApC;AACD;;;;;;AAGH;;;;;;;kBA9JqB,Q;;IAmKR,S,WAAA,S;;;AACX;;;;AAIA,qBAAa,OAAb,EAAsB,IAAtB,EAA4B;AAAA;;AAAA,6FACpB,OADoB;;AAE1B,UAAK,IAAL,GAAY,qBAAZ;AACA,UAAK,IAAL,GAAY,IAAZ;AAH0B;AAI3B;;;EAT4B,K;;AAU9B","file":"HTMLNode.js","sourcesContent":["import HTMLParseError from '../HTMLParseError'\n\n/**\n * HTML node types.\n * @enum {string}\n */\nexport const NodeType = {\n  ROOT: \"ROOT\",\n  TAG:  \"TAG\",\n  ATTRIBUTE: \"ATTRIBUTE\",\n  TEXT: \"TEXT\",\n  ENTITY:  \"ENTITY\",\n  COMMENT: \"COMMENT\",\n  CLOSETAG: \"CLOSETAG\"\n};\n\n/**\n * HTML tree nodes base class. Should never be called outside of\n * subclass constructors!\n */\nexport default class HTMLNode {\n  constructor () {\n    /**\n     * @private\n     * @type {Array.<Foundation.HTML.Parser.HTMLNode>?}\n     */\n    this._children = null;\n\n    /**\n     * @private\n     * @type {Array.<Foundation.Scanner.Token>}\n     */\n    this._sourceTokens = [];\n\n    /**\n     * @private\n     * @type {Array.<HTMLParseError>?}\n     */\n    this._errors = null;\n  }\n\n  /**\n   * Subclasses must override this.\n   * @return {boolean}\n   */\n  get canHaveChildren () {\n    throw new NodeError(\"canHaveChildren not overriden in subclass\", this)\n  }\n\n  /**\n   * Subclasses must override this.\n   * @return {string}\n   */\n  toString () {\n    throw new NodeError(\"toString not overriden in subclass\", this)\n  }\n\n  /**\n   * @return {NodeType}\n   */\n  get type () {\n    throw new NodeError(\"type getter not overriden in subclass\", this)\n  }\n\n  /**\n   * @return {Array.<Foundation.HTML.Parser.HTMLNode>?}\n   */\n  get children () {\n    if (this._children === null || this._children.length === 0) {\n      return null;\n    }\n\n    return this._children.slice(0);\n  }\n\n  /**\n   * @return {Foundation.HTML.Parser.HTMLNode}\n   */\n  get lastChild () {\n    var children = this.children;\n    if (children === null || children.length === 0) {\n      return null;\n    }\n    return children[children.length - 1];\n  }\n\n  /**\n   * @param {Foundation.Scanner.Token} token\n   */\n  addToken (token) {\n    this._sourceTokens.push(token);\n\n    // Create error objects when adding a token of type error.\n    if (token.type === \"error\") {\n      var error = new HTMLParseError();\n      error.message = \"Invalid token: \\\"\" + token.value + \"\\\"\";\n      error.addToken(token);\n      this.addError(error);\n    }\n  }\n\n  /**\n   * @return {Array.<Foundation.Scanner.Token>}\n   */\n  get tokens () {\n    return this._sourceTokens.slice(0);\n  }\n\n  /**\n   * @return {Array.<number>?}\n   */\n  get indexRange () {\n    var tokens = this.tokens;\n    if (tokens.length === 0) {\n      return null;\n    }\n\n    var firstToken = tokens[0];\n    var lastToken = tokens[tokens.length - 1];\n    return [\n      firstToken.index,\n      lastToken.index + lastToken.value.length - 1\n    ];\n  }\n\n  /**\n   * @param {Foundation.HTML.Parser.HTMLNode} childNode\n   * @return {Foundation.HTML.Parser.HTMLNode}\n   */\n  appendChild (childNode) {\n    if (! this.canHaveChildren) {\n      throw new NodeError(\n        \"attempted to call appendChild on node that can't have children\", this\n      );\n    }\n\n    if (! this._children) {\n      this._children = [];\n    }\n\n    this._children.push(childNode);\n\n    return this;\n  }\n\n  /**\n   * @public\n   * @return {Array.<HTMLParseError>}\n   */\n  get errors () {\n    if (this.canHaveChildren && this.children) {\n      return this.children.reduce((accumulator, child) => {\n        return accumulator.concat(child.errors)\n      }, this.ownErrors)\n    }\n\n    return this.ownErrors\n  }\n\n  /**\n   * @param {HTMLParseError} error\n   */\n  addError (error) {\n    if (this._errors === null) {\n      this._errors = [];\n    }\n    this._errors.push(error);\n  }\n\n  /**\n   * @private\n   * @return {Array.<HTMLParseError>}\n   */\n  get ownErrors () {\n    return this._errors === null ? [] : this._errors.slice(0);\n  }\n}\n\n/**\n * HTML node error class. This is a throwable Error object, not to be\n * confused with the HTMLParseError class which deals\n * with reporting errors found in the HTML source.\n */\nexport class NodeError extends Error {\n  /**\n   * @param {string} message\n   * @param {Foundation.HTML.Parser.HTMLNode} node\n   */\n  constructor (message, node) {\n    super(message);\n    this.name = \"HTMLParserNodeError\";\n    this.node = node;\n  }\n};\n"]}