fortissimo-html
Version:
Fortissimo HTML - Flexible, Forgiving, Formatting HTML Parser
549 lines • 20.6 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.DomModel = exports.DomNode = exports.UnmatchedClosingTag = exports.TextElement = exports.ProcessingElement = exports.DocType = exports.DeclarationElement = exports.CommentElement = exports.CData = exports.DomElement = exports.isCommentLike = exports.CQ = exports.OQ = exports.ClosureState = void 0;
const elements_1 = require("./elements");
const characters_1 = require("./characters");
const util_1 = require("@tubular/util");
function last(array) {
if (array && array.length > 0)
return array[array.length - 1];
else
return undefined;
}
var ClosureState;
(function (ClosureState) {
ClosureState[ClosureState["UNCLOSED"] = 0] = "UNCLOSED";
ClosureState[ClosureState["SELF_CLOSED"] = 1] = "SELF_CLOSED";
ClosureState[ClosureState["VOID_CLOSED"] = 2] = "VOID_CLOSED";
ClosureState[ClosureState["EXPLICITLY_CLOSED"] = 3] = "EXPLICITLY_CLOSED";
ClosureState[ClosureState["IMPLICITLY_CLOSED"] = 4] = "IMPLICITLY_CLOSED";
})(ClosureState = exports.ClosureState || (exports.ClosureState = {}));
// Opening quotation mark
function OQ(quote) {
return quote.length < 2 ? quote : quote.substr(1);
}
exports.OQ = OQ;
// Closing quotation mark
function CQ(quote) {
return quote.length < 2 ? quote : '';
}
exports.CQ = CQ;
function isCommentLike(obj) {
return (obj instanceof CommentElement || obj instanceof DeclarationElement || obj instanceof ProcessingElement) &&
!(obj instanceof DocType);
}
exports.isCommentLike = isCommentLike;
function stringToSelector(s) {
const selector = {};
const $ = /(.*)\.(.+)/.exec(s);
if ($) {
s = $[1];
selector.qlass = $[2];
}
if (s) {
if (s.startsWith('#'))
selector.id = s.substr(1);
else if (s === '*')
selector.element = '';
else
selector.element = s.toLowerCase();
}
else
selector.element = '';
return selector;
}
class DomElement {
constructor(content, line, column, terminated) {
this.content = content;
this.line = line;
this.column = column;
this.terminated = terminated;
this.blockContext = false; // Used by formatter.ts
}
get depth() {
let depth = -1;
let node = this.parent;
while (node) {
depth += (node.synthetic && node.parent ? 0 : 1);
node = node.parent;
}
return depth;
}
get syntheticDepth() {
let depth = -1;
let node = this.parent;
while (node) {
++depth;
node = node.parent;
}
return depth;
}
// noinspection JSUnusedGlobalSymbols
toJSON() {
return this.toString() + ' (' + this.depth +
(this.line ? `; ${this.line}, ${this.column}` : '') +
(this.parent ? '; ' + this.parent.tag : '') + ')' +
(this.terminated ? '' : '!');
}
}
exports.DomElement = DomElement;
class CData extends DomElement {
toString() {
return '<![CDATA[' + this.content + (this.terminated ? ']]>' : '');
}
}
exports.CData = CData;
class CommentElement extends DomElement {
toString() {
return '<!--' + this.content + (this.terminated ? '-->' : '');
}
}
exports.CommentElement = CommentElement;
class DeclarationElement extends DomElement {
toString() {
return '<!' + this.content + (this.terminated ? '>' : '');
}
}
exports.DeclarationElement = DeclarationElement;
class DocType extends DeclarationElement {
constructor(content, line, column, terminated) {
super(content, line, column, terminated);
this.type = /\bxhtml\b/i.test(content) ? 'xhtml' : 'html';
this.variety = (/\b(frameset|strict|transitional)\b/i.exec(content.toLowerCase()) || [])[1];
this.version = (/\bx?html[ \n\r\t\f]*([.\d]+)\b/i.exec(content) || [])[1];
if (!this.version && /^doctype[ \n\r\t\f]+html[ \n\r\t\f]*$/i.test(content))
this.version = '5';
}
}
exports.DocType = DocType;
class ProcessingElement extends DomElement {
toString() {
return '<?' + this.content + (this.terminated ? '>' : '');
}
}
exports.ProcessingElement = ProcessingElement;
class TextElement extends DomElement {
constructor(content, line, column, possibleEntities) {
super(content, line, column, true);
this.possibleEntities = possibleEntities;
}
toString() {
return this.content;
}
}
exports.TextElement = TextElement;
class UnmatchedClosingTag extends DomElement {
constructor(content, line, column) {
super(content, line, column, true);
}
toString() {
return this.content;
}
}
exports.UnmatchedClosingTag = UnmatchedClosingTag;
class DomNode extends DomElement {
constructor(tag, line = 0, column = 0, caseSensitive = false, synthetic = false) {
super(null, line, column, true);
this.tag = tag;
this.attributes = [];
this.badTerminator = null;
this.closureState = ClosureState.UNCLOSED;
this.endTagLine = 0;
this.endTagColumn = 0;
this.endTagText = '';
this.equals = [];
this.innerWhitespace = '';
this.quotes = [];
this.spacing = [];
this.values = [];
this.valuesLookup = {};
this.tagLc = caseSensitive ? tag : tag.toLowerCase();
if (synthetic)
this.synthetic = true;
}
static createNode(tag) {
const node = new DomNode(tag);
node.setEndTag(`</${tag}>`);
node.closureState = ClosureState.EXPLICITLY_CLOSED;
return node;
}
get attributeCount() {
return this.attributes.length;
}
getAttribute(nameOrIndex) {
if ((0, util_1.isString)(nameOrIndex))
nameOrIndex = this.attributes.indexOf(nameOrIndex);
return [this.attributes[nameOrIndex], this.values[nameOrIndex]];
}
deleteAttribute(nameOrIndex) {
if ((0, util_1.isString)(nameOrIndex))
nameOrIndex = this.attributes.indexOf(nameOrIndex);
if (this.attributes[nameOrIndex] === undefined)
return false;
delete this.valuesLookup[this.attributes[nameOrIndex]];
this.attributes.splice(nameOrIndex, 1);
this.values.splice(nameOrIndex, 1);
this.equals.splice(nameOrIndex, 1);
this.quotes.splice(nameOrIndex, 1);
this.spacing.splice(nameOrIndex, 1);
return true;
}
clearAttributes() {
this.valuesLookup = {};
this.attributes = [];
this.values = [];
this.equals = [];
this.quotes = [];
this.spacing = [];
}
// TODO: Needs more work about how raw attribute values vs. actual values will be handled.
setAttribute(name, value = '', leadingSpace, equals, quote) {
const index = this.attributes.indexOf(name);
if (index < 0)
this.addAttribute(name, value, leadingSpace, equals, quote);
else if (value === null)
this.deleteAttribute(index);
else {
this.values[index] = value;
this.spacing[index] = leadingSpace !== null && leadingSpace !== void 0 ? leadingSpace : this.spacing[index];
this.equals[index] = equals !== null && equals !== void 0 ? equals : this.equals[index];
this.quotes[index] = quote !== null && quote !== void 0 ? quote : this.quotes[index];
this.valuesLookup[name] = value;
if (value) {
if (!this.equals[index])
this.equals[index] = '=';
if (!this.quotes[index])
this.quotes[index] = '"';
}
}
}
addAttribute(name, value = '', leadingSpace = ' ', equals, quote) {
if (value === null)
return;
if (value === '') {
equals = equals !== null && equals !== void 0 ? equals : '';
quote = quote !== null && quote !== void 0 ? quote : '';
}
else {
equals = equals !== null && equals !== void 0 ? equals : '=';
quote = quote !== null && quote !== void 0 ? quote : '"';
}
this.attributes.push(name);
this.values.push(value);
this.spacing.push(leadingSpace);
this.equals.push(equals);
this.quotes.push(quote);
this.valuesLookup[name] = value;
}
addChild(child) {
this.children = this.children || [];
child.parent = this;
this.children.push(child);
}
remove(child) {
if ((0, util_1.isNumber)(child))
this.children.splice(child, 1);
else if (child) {
const index = this.children.indexOf(child);
if (index >= 0)
this.children.splice(index, 1);
}
else if (this.parent) {
const index = this.parent.children.indexOf(this);
if (index >= 0)
this.parent.children.splice(index, 1);
}
}
setEndTag(text, line = 0, column = 0) {
this.endTagText = text;
this.endTagLine = line;
this.endTagColumn = column;
}
querySelector(selector) {
const results = [];
this.querySelectorImpl(selector, results, 1);
if (results.length === 0)
return null;
else
return results[0];
}
querySelectorAll(selector) {
const results = [];
this.querySelectorImpl(selector, results);
return results;
}
querySelectorImpl(selector, results, limit = Number.MAX_SAFE_INTEGER) {
if (typeof selector === 'string')
selector = stringToSelector(selector);
if ((!selector.element || this.tagLc === selector.element) &&
(!selector.qlass || (this.valuesLookup.class || '').split(/\s+/).indexOf(selector.qlass) >= 0) &&
(!selector.id || this.valuesLookup.id === selector.id))
results.push(this);
if (this.children) {
for (let i = 0; i < this.children.length && results.length < limit; ++i) {
if (this.children[i] instanceof DomNode)
this.children[i].querySelectorImpl(selector, results);
}
}
}
get textContent() {
const text = [];
if (this.children) {
for (const child of this.children) {
if (child instanceof CData)
text.push(child.content);
else if (child instanceof TextElement)
text.push(child.possibleEntities ? (0, characters_1.unescapeEntities)(child.content) : child.content);
else if (child instanceof DomNode)
text.push(child.textContent);
}
}
return text.join('');
}
get innerHTML() {
return this.toString(false);
}
countUnclosed() {
let unclosed = 0;
let implicitlyClosed = 0;
if (!this.synthetic) {
if (this.closureState === ClosureState.UNCLOSED)
++unclosed;
else if (this.closureState === ClosureState.IMPLICITLY_CLOSED)
++implicitlyClosed;
}
if (this.children) {
this.children.forEach(child => {
if (child instanceof DomNode) {
const [childUnclosed, childImplicit] = child.countUnclosed();
unclosed += childUnclosed;
implicitlyClosed += childImplicit;
}
});
}
return [unclosed, implicitlyClosed];
}
toJSON() {
const json = { tag: this.tag };
if (this.line)
json.line = this.line;
if (this.column)
json.column = this.column;
if (this.synthetic)
json.synthetic = true;
if (this.badTerminator !== null)
json.badTerminator = this.badTerminator;
json.depth = this.depth;
if (json.depth !== this.syntheticDepth)
json.syntheticDepth = this.syntheticDepth;
json.closureState = this.closureState;
if (this.attributes.length > 0)
json.values = this.attributes.reduce((values, attrib, index) => { values[attrib] = this.values[index]; return values; }, {});
if (this.parent)
json.parentTag = this.parent.tag;
if (this.children)
json.children = this.children;
if (this.closureState === ClosureState.EXPLICITLY_CLOSED && this.endTagText)
json.endTagText = `${this.endTagText} (${this.endTagLine}, ${this.endTagColumn})`;
return json;
}
toString(includeSelf = true) {
const parts = [];
if (includeSelf && !this.synthetic) {
parts.push('<', this.tag);
if (this.attributes) {
this.attributes.forEach((attrib, index) => {
parts.push(this.spacing[index], attrib, this.equals[index], OQ(this.quotes[index]), this.values[index], CQ(this.quotes[index]));
});
}
if (this.innerWhitespace)
parts.push(this.innerWhitespace);
if (this.badTerminator !== null)
parts.push(this.badTerminator);
else if (this.closureState === ClosureState.SELF_CLOSED)
parts.push('/>');
else
parts.push('>');
}
if (this.children)
this.children.forEach(child => parts.push(child.toString()));
if (includeSelf && !this.synthetic && this.closureState === ClosureState.EXPLICITLY_CLOSED && this.endTagText)
parts.push(this.endTagText);
return parts.join('');
}
}
exports.DomNode = DomNode;
class DomModel {
constructor() {
this.root = new DomNode('/', 0, 0, false, true);
this.currentNode = this.root;
this.inMathOrSvg = 0;
this.openStack = [this.root];
this.xmlMode = false;
}
getRoot() {
return this.root;
}
getCurrentNode() {
return this.currentNode;
}
addAttribute(name, value, leadingSpace = '', equals = '=', quote = '"') {
this.currentNode.addAttribute(name, value, leadingSpace, equals, quote);
}
addInnerWhitespace(whitespace) {
if (this.currentNode)
this.currentNode.innerWhitespace = whitespace || '';
}
canDoXmlMode() {
return this.openStack.length === 1 &&
this.root.children.length === 0 ||
(this.root.children.length === 1 && this.root.children[0].toString().trim() === '');
}
getDepth() {
return this.openStack.length - 2;
}
setXmlMode(mode) {
this.xmlMode = mode;
}
prePush(node) {
if (!this.xmlMode && node.tagLc in elements_1.OPEN_IMPLIES_CLOSE) {
while (elements_1.OPEN_IMPLIES_CLOSE[node.tagLc].has(this.currentNode.tagLc)) {
this.currentNode.closureState = ClosureState.IMPLICITLY_CLOSED;
this.openStack.pop();
this.updateCurrentNode();
}
}
}
addChild(child) {
this.currentNode.addChild(child);
}
examineTable(table) {
const children = table.children;
if (!children || this.xmlMode)
return;
DomModel.insertRowsWhereNeeded(table);
const sections = new Set();
for (const elem of children) {
if (elem instanceof DomNode) {
if (/^(thead|tbody|tfoot)$/.test(elem.tagLc)) {
sections.add(elem.tagLc);
DomModel.insertRowsWhereNeeded(elem);
}
}
}
if (sections.size === 0)
return;
let section;
for (let i = 0; i < children.length; ++i) {
const elem = children[i];
if (elem instanceof DomNode) {
if (/^(thead|tbody|tfoot)$/.test(elem.tagLc))
section = elem.closureState === ClosureState.EXPLICITLY_CLOSED ? undefined : elem;
else if (elem.tagLc === 'tr') {
const hasTh = !!elem.querySelector('th');
if (!section || (hasTh && (section === null || section === void 0 ? void 0 : section.tagLc) !== 'thead')) {
section = new DomNode(hasTh ? 'thead' : 'tbody', 0, 0, false, true);
section.parent = table;
children[i] = section;
}
section.addChild(elem);
}
}
}
}
static insertRowsWhereNeeded(node) {
const children = node.children;
if (!children)
return;
let row;
for (let i = 0; i < children.length; ++i) {
const elem = children[i];
if (elem instanceof DomNode) {
if (elem.tagLc === 'th' || elem.tagLc === 'td') {
if (!row) {
row = new DomNode('tr', 0, 0, false, true);
row.parent = node;
children[i] = row;
}
row.addChild(elem);
}
else if (elem.tagLc === 'tr')
row = elem.closureState === ClosureState.EXPLICITLY_CLOSED ? undefined : elem;
}
}
}
push(node) {
this.openStack.push(node);
this.currentNode = node;
if (node.tagLc === 'math' || node.tagLc === 'svg')
++this.inMathOrSvg;
}
pop(tagLc, endTagText = '</' + tagLc + '>', line = 0, column = 0) {
let popped = false;
let parseError = false;
if (!tagLc || this.currentNode.tagLc === tagLc) {
popped = true;
this.openStack.pop();
if (tagLc === null)
this.currentNode.closureState = ClosureState.SELF_CLOSED;
else if (tagLc === undefined)
this.currentNode.closureState = ClosureState.VOID_CLOSED;
else {
this.currentNode.closureState = ClosureState.EXPLICITLY_CLOSED;
this.currentNode.setEndTag(endTagText, line, column);
}
if (this.currentNode.tagLc === 'table')
this.examineTable(this.currentNode);
}
if (!popped && !this.xmlMode) {
let nodeIndex = this.openStack.map(node => node.tagLc).lastIndexOf(tagLc);
if (nodeIndex > 0) { // No, I really don't want >= 0.
if (elements_1.FORMATTING_ELEMENTS.has(tagLc)) {
for (let i = nodeIndex + 1; i < this.openStack.length; ++i) {
if (elements_1.MARKER_ELEMENTS.has(this.openStack[i].tagLc)) {
nodeIndex = -1;
break;
}
}
}
while (this.openStack.length > nodeIndex) {
if (!this.currentNode.closureState) {
if (this.openStack.length - 1 === nodeIndex) {
popped = true;
this.currentNode.closureState = ClosureState.EXPLICITLY_CLOSED;
this.currentNode.setEndTag(endTagText, line, column);
}
else
this.currentNode.closureState = ClosureState.IMPLICITLY_CLOSED;
if (this.currentNode.tagLc === 'table')
this.examineTable(this.currentNode);
}
this.openStack.pop();
this.updateCurrentNode();
}
}
}
if (!popped) {
this.addChild(new UnmatchedClosingTag(endTagText, line, column));
parseError = true;
}
if (this.openStack.length === 0)
this.openStack.push(this.root);
this.updateCurrentNode();
this.inMathOrSvg = 0;
this.openStack.forEach((node, index) => {
this.inMathOrSvg += (node.tagLc === 'math' || node.tagLc === 'svg' ? 1 : 0);
if (index > 0)
node.parent = this.openStack[index - 1];
});
return !parseError;
}
shouldParseCData() {
return this.xmlMode || this.inMathOrSvg > 0;
}
updateCurrentNode() {
this.currentNode = last(this.openStack) || this.root;
}
}
exports.DomModel = DomModel;
//# sourceMappingURL=dom.js.map