UNPKG

fortissimo-html

Version:

Fortissimo HTML - Flexible, Forgiving, Formatting HTML Parser

549 lines 20.6 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.DomModel = exports.DomNode = exports.UnmatchedClosingTag = exports.TextElement = exports.ProcessingElement = exports.DocType = exports.DeclarationElement = exports.CommentElement = exports.CData = exports.DomElement = exports.isCommentLike = exports.CQ = exports.OQ = exports.ClosureState = void 0; const elements_1 = require("./elements"); const characters_1 = require("./characters"); const util_1 = require("@tubular/util"); function last(array) { if (array && array.length > 0) return array[array.length - 1]; else return undefined; } var ClosureState; (function (ClosureState) { ClosureState[ClosureState["UNCLOSED"] = 0] = "UNCLOSED"; ClosureState[ClosureState["SELF_CLOSED"] = 1] = "SELF_CLOSED"; ClosureState[ClosureState["VOID_CLOSED"] = 2] = "VOID_CLOSED"; ClosureState[ClosureState["EXPLICITLY_CLOSED"] = 3] = "EXPLICITLY_CLOSED"; ClosureState[ClosureState["IMPLICITLY_CLOSED"] = 4] = "IMPLICITLY_CLOSED"; })(ClosureState = exports.ClosureState || (exports.ClosureState = {})); // Opening quotation mark function OQ(quote) { return quote.length < 2 ? quote : quote.substr(1); } exports.OQ = OQ; // Closing quotation mark function CQ(quote) { return quote.length < 2 ? quote : ''; } exports.CQ = CQ; function isCommentLike(obj) { return (obj instanceof CommentElement || obj instanceof DeclarationElement || obj instanceof ProcessingElement) && !(obj instanceof DocType); } exports.isCommentLike = isCommentLike; function stringToSelector(s) { const selector = {}; const $ = /(.*)\.(.+)/.exec(s); if ($) { s = $[1]; selector.qlass = $[2]; } if (s) { if (s.startsWith('#')) selector.id = s.substr(1); else if (s === '*') selector.element = ''; else selector.element = s.toLowerCase(); } else selector.element = ''; return selector; } class DomElement { constructor(content, line, column, terminated) { this.content = content; this.line = line; this.column = column; this.terminated = terminated; this.blockContext = false; // Used by formatter.ts } get depth() { let depth = -1; let node = this.parent; while (node) { depth += (node.synthetic && node.parent ? 0 : 1); node = node.parent; } return depth; } get syntheticDepth() { let depth = -1; let node = this.parent; while (node) { ++depth; node = node.parent; } return depth; } // noinspection JSUnusedGlobalSymbols toJSON() { return this.toString() + ' (' + this.depth + (this.line ? `; ${this.line}, ${this.column}` : '') + (this.parent ? '; ' + this.parent.tag : '') + ')' + (this.terminated ? '' : '!'); } } exports.DomElement = DomElement; class CData extends DomElement { toString() { return '<![CDATA[' + this.content + (this.terminated ? ']]>' : ''); } } exports.CData = CData; class CommentElement extends DomElement { toString() { return '<!--' + this.content + (this.terminated ? '-->' : ''); } } exports.CommentElement = CommentElement; class DeclarationElement extends DomElement { toString() { return '<!' + this.content + (this.terminated ? '>' : ''); } } exports.DeclarationElement = DeclarationElement; class DocType extends DeclarationElement { constructor(content, line, column, terminated) { super(content, line, column, terminated); this.type = /\bxhtml\b/i.test(content) ? 'xhtml' : 'html'; this.variety = (/\b(frameset|strict|transitional)\b/i.exec(content.toLowerCase()) || [])[1]; this.version = (/\bx?html[ \n\r\t\f]*([.\d]+)\b/i.exec(content) || [])[1]; if (!this.version && /^doctype[ \n\r\t\f]+html[ \n\r\t\f]*$/i.test(content)) this.version = '5'; } } exports.DocType = DocType; class ProcessingElement extends DomElement { toString() { return '<?' + this.content + (this.terminated ? '>' : ''); } } exports.ProcessingElement = ProcessingElement; class TextElement extends DomElement { constructor(content, line, column, possibleEntities) { super(content, line, column, true); this.possibleEntities = possibleEntities; } toString() { return this.content; } } exports.TextElement = TextElement; class UnmatchedClosingTag extends DomElement { constructor(content, line, column) { super(content, line, column, true); } toString() { return this.content; } } exports.UnmatchedClosingTag = UnmatchedClosingTag; class DomNode extends DomElement { constructor(tag, line = 0, column = 0, caseSensitive = false, synthetic = false) { super(null, line, column, true); this.tag = tag; this.attributes = []; this.badTerminator = null; this.closureState = ClosureState.UNCLOSED; this.endTagLine = 0; this.endTagColumn = 0; this.endTagText = ''; this.equals = []; this.innerWhitespace = ''; this.quotes = []; this.spacing = []; this.values = []; this.valuesLookup = {}; this.tagLc = caseSensitive ? tag : tag.toLowerCase(); if (synthetic) this.synthetic = true; } static createNode(tag) { const node = new DomNode(tag); node.setEndTag(`</${tag}>`); node.closureState = ClosureState.EXPLICITLY_CLOSED; return node; } get attributeCount() { return this.attributes.length; } getAttribute(nameOrIndex) { if ((0, util_1.isString)(nameOrIndex)) nameOrIndex = this.attributes.indexOf(nameOrIndex); return [this.attributes[nameOrIndex], this.values[nameOrIndex]]; } deleteAttribute(nameOrIndex) { if ((0, util_1.isString)(nameOrIndex)) nameOrIndex = this.attributes.indexOf(nameOrIndex); if (this.attributes[nameOrIndex] === undefined) return false; delete this.valuesLookup[this.attributes[nameOrIndex]]; this.attributes.splice(nameOrIndex, 1); this.values.splice(nameOrIndex, 1); this.equals.splice(nameOrIndex, 1); this.quotes.splice(nameOrIndex, 1); this.spacing.splice(nameOrIndex, 1); return true; } clearAttributes() { this.valuesLookup = {}; this.attributes = []; this.values = []; this.equals = []; this.quotes = []; this.spacing = []; } // TODO: Needs more work about how raw attribute values vs. actual values will be handled. setAttribute(name, value = '', leadingSpace, equals, quote) { const index = this.attributes.indexOf(name); if (index < 0) this.addAttribute(name, value, leadingSpace, equals, quote); else if (value === null) this.deleteAttribute(index); else { this.values[index] = value; this.spacing[index] = leadingSpace !== null && leadingSpace !== void 0 ? leadingSpace : this.spacing[index]; this.equals[index] = equals !== null && equals !== void 0 ? equals : this.equals[index]; this.quotes[index] = quote !== null && quote !== void 0 ? quote : this.quotes[index]; this.valuesLookup[name] = value; if (value) { if (!this.equals[index]) this.equals[index] = '='; if (!this.quotes[index]) this.quotes[index] = '"'; } } } addAttribute(name, value = '', leadingSpace = ' ', equals, quote) { if (value === null) return; if (value === '') { equals = equals !== null && equals !== void 0 ? equals : ''; quote = quote !== null && quote !== void 0 ? quote : ''; } else { equals = equals !== null && equals !== void 0 ? equals : '='; quote = quote !== null && quote !== void 0 ? quote : '"'; } this.attributes.push(name); this.values.push(value); this.spacing.push(leadingSpace); this.equals.push(equals); this.quotes.push(quote); this.valuesLookup[name] = value; } addChild(child) { this.children = this.children || []; child.parent = this; this.children.push(child); } remove(child) { if ((0, util_1.isNumber)(child)) this.children.splice(child, 1); else if (child) { const index = this.children.indexOf(child); if (index >= 0) this.children.splice(index, 1); } else if (this.parent) { const index = this.parent.children.indexOf(this); if (index >= 0) this.parent.children.splice(index, 1); } } setEndTag(text, line = 0, column = 0) { this.endTagText = text; this.endTagLine = line; this.endTagColumn = column; } querySelector(selector) { const results = []; this.querySelectorImpl(selector, results, 1); if (results.length === 0) return null; else return results[0]; } querySelectorAll(selector) { const results = []; this.querySelectorImpl(selector, results); return results; } querySelectorImpl(selector, results, limit = Number.MAX_SAFE_INTEGER) { if (typeof selector === 'string') selector = stringToSelector(selector); if ((!selector.element || this.tagLc === selector.element) && (!selector.qlass || (this.valuesLookup.class || '').split(/\s+/).indexOf(selector.qlass) >= 0) && (!selector.id || this.valuesLookup.id === selector.id)) results.push(this); if (this.children) { for (let i = 0; i < this.children.length && results.length < limit; ++i) { if (this.children[i] instanceof DomNode) this.children[i].querySelectorImpl(selector, results); } } } get textContent() { const text = []; if (this.children) { for (const child of this.children) { if (child instanceof CData) text.push(child.content); else if (child instanceof TextElement) text.push(child.possibleEntities ? (0, characters_1.unescapeEntities)(child.content) : child.content); else if (child instanceof DomNode) text.push(child.textContent); } } return text.join(''); } get innerHTML() { return this.toString(false); } countUnclosed() { let unclosed = 0; let implicitlyClosed = 0; if (!this.synthetic) { if (this.closureState === ClosureState.UNCLOSED) ++unclosed; else if (this.closureState === ClosureState.IMPLICITLY_CLOSED) ++implicitlyClosed; } if (this.children) { this.children.forEach(child => { if (child instanceof DomNode) { const [childUnclosed, childImplicit] = child.countUnclosed(); unclosed += childUnclosed; implicitlyClosed += childImplicit; } }); } return [unclosed, implicitlyClosed]; } toJSON() { const json = { tag: this.tag }; if (this.line) json.line = this.line; if (this.column) json.column = this.column; if (this.synthetic) json.synthetic = true; if (this.badTerminator !== null) json.badTerminator = this.badTerminator; json.depth = this.depth; if (json.depth !== this.syntheticDepth) json.syntheticDepth = this.syntheticDepth; json.closureState = this.closureState; if (this.attributes.length > 0) json.values = this.attributes.reduce((values, attrib, index) => { values[attrib] = this.values[index]; return values; }, {}); if (this.parent) json.parentTag = this.parent.tag; if (this.children) json.children = this.children; if (this.closureState === ClosureState.EXPLICITLY_CLOSED && this.endTagText) json.endTagText = `${this.endTagText} (${this.endTagLine}, ${this.endTagColumn})`; return json; } toString(includeSelf = true) { const parts = []; if (includeSelf && !this.synthetic) { parts.push('<', this.tag); if (this.attributes) { this.attributes.forEach((attrib, index) => { parts.push(this.spacing[index], attrib, this.equals[index], OQ(this.quotes[index]), this.values[index], CQ(this.quotes[index])); }); } if (this.innerWhitespace) parts.push(this.innerWhitespace); if (this.badTerminator !== null) parts.push(this.badTerminator); else if (this.closureState === ClosureState.SELF_CLOSED) parts.push('/>'); else parts.push('>'); } if (this.children) this.children.forEach(child => parts.push(child.toString())); if (includeSelf && !this.synthetic && this.closureState === ClosureState.EXPLICITLY_CLOSED && this.endTagText) parts.push(this.endTagText); return parts.join(''); } } exports.DomNode = DomNode; class DomModel { constructor() { this.root = new DomNode('/', 0, 0, false, true); this.currentNode = this.root; this.inMathOrSvg = 0; this.openStack = [this.root]; this.xmlMode = false; } getRoot() { return this.root; } getCurrentNode() { return this.currentNode; } addAttribute(name, value, leadingSpace = '', equals = '=', quote = '"') { this.currentNode.addAttribute(name, value, leadingSpace, equals, quote); } addInnerWhitespace(whitespace) { if (this.currentNode) this.currentNode.innerWhitespace = whitespace || ''; } canDoXmlMode() { return this.openStack.length === 1 && this.root.children.length === 0 || (this.root.children.length === 1 && this.root.children[0].toString().trim() === ''); } getDepth() { return this.openStack.length - 2; } setXmlMode(mode) { this.xmlMode = mode; } prePush(node) { if (!this.xmlMode && node.tagLc in elements_1.OPEN_IMPLIES_CLOSE) { while (elements_1.OPEN_IMPLIES_CLOSE[node.tagLc].has(this.currentNode.tagLc)) { this.currentNode.closureState = ClosureState.IMPLICITLY_CLOSED; this.openStack.pop(); this.updateCurrentNode(); } } } addChild(child) { this.currentNode.addChild(child); } examineTable(table) { const children = table.children; if (!children || this.xmlMode) return; DomModel.insertRowsWhereNeeded(table); const sections = new Set(); for (const elem of children) { if (elem instanceof DomNode) { if (/^(thead|tbody|tfoot)$/.test(elem.tagLc)) { sections.add(elem.tagLc); DomModel.insertRowsWhereNeeded(elem); } } } if (sections.size === 0) return; let section; for (let i = 0; i < children.length; ++i) { const elem = children[i]; if (elem instanceof DomNode) { if (/^(thead|tbody|tfoot)$/.test(elem.tagLc)) section = elem.closureState === ClosureState.EXPLICITLY_CLOSED ? undefined : elem; else if (elem.tagLc === 'tr') { const hasTh = !!elem.querySelector('th'); if (!section || (hasTh && (section === null || section === void 0 ? void 0 : section.tagLc) !== 'thead')) { section = new DomNode(hasTh ? 'thead' : 'tbody', 0, 0, false, true); section.parent = table; children[i] = section; } section.addChild(elem); } } } } static insertRowsWhereNeeded(node) { const children = node.children; if (!children) return; let row; for (let i = 0; i < children.length; ++i) { const elem = children[i]; if (elem instanceof DomNode) { if (elem.tagLc === 'th' || elem.tagLc === 'td') { if (!row) { row = new DomNode('tr', 0, 0, false, true); row.parent = node; children[i] = row; } row.addChild(elem); } else if (elem.tagLc === 'tr') row = elem.closureState === ClosureState.EXPLICITLY_CLOSED ? undefined : elem; } } } push(node) { this.openStack.push(node); this.currentNode = node; if (node.tagLc === 'math' || node.tagLc === 'svg') ++this.inMathOrSvg; } pop(tagLc, endTagText = '</' + tagLc + '>', line = 0, column = 0) { let popped = false; let parseError = false; if (!tagLc || this.currentNode.tagLc === tagLc) { popped = true; this.openStack.pop(); if (tagLc === null) this.currentNode.closureState = ClosureState.SELF_CLOSED; else if (tagLc === undefined) this.currentNode.closureState = ClosureState.VOID_CLOSED; else { this.currentNode.closureState = ClosureState.EXPLICITLY_CLOSED; this.currentNode.setEndTag(endTagText, line, column); } if (this.currentNode.tagLc === 'table') this.examineTable(this.currentNode); } if (!popped && !this.xmlMode) { let nodeIndex = this.openStack.map(node => node.tagLc).lastIndexOf(tagLc); if (nodeIndex > 0) { // No, I really don't want >= 0. if (elements_1.FORMATTING_ELEMENTS.has(tagLc)) { for (let i = nodeIndex + 1; i < this.openStack.length; ++i) { if (elements_1.MARKER_ELEMENTS.has(this.openStack[i].tagLc)) { nodeIndex = -1; break; } } } while (this.openStack.length > nodeIndex) { if (!this.currentNode.closureState) { if (this.openStack.length - 1 === nodeIndex) { popped = true; this.currentNode.closureState = ClosureState.EXPLICITLY_CLOSED; this.currentNode.setEndTag(endTagText, line, column); } else this.currentNode.closureState = ClosureState.IMPLICITLY_CLOSED; if (this.currentNode.tagLc === 'table') this.examineTable(this.currentNode); } this.openStack.pop(); this.updateCurrentNode(); } } } if (!popped) { this.addChild(new UnmatchedClosingTag(endTagText, line, column)); parseError = true; } if (this.openStack.length === 0) this.openStack.push(this.root); this.updateCurrentNode(); this.inMathOrSvg = 0; this.openStack.forEach((node, index) => { this.inMathOrSvg += (node.tagLc === 'math' || node.tagLc === 'svg' ? 1 : 0); if (index > 0) node.parent = this.openStack[index - 1]; }); return !parseError; } shouldParseCData() { return this.xmlMode || this.inMathOrSvg > 0; } updateCurrentNode() { this.currentNode = last(this.openStack) || this.root; } } exports.DomModel = DomModel; //# sourceMappingURL=dom.js.map