domhandler
Version:
Handler for htmlparser2 that turns pages into a dom
156 lines (155 loc) • 4.8 kB
JavaScript
import { ElementType } from "domelementtype";
import { CDATA, Comment, Document, Element, ProcessingInstruction, Text, } from "./node.js";
export * from "./node.js";
// Default options
const defaultOptions = {
withStartIndices: false,
withEndIndices: false,
xmlMode: false,
};
/**
* Event-based handler that builds a DOM tree from parser callbacks.
*/
export class DomHandler {
/** The elements of the DOM */
dom = [];
/** The root element for the DOM */
root = new Document(this.dom);
/** Called once parsing has completed. */
callback;
/** Settings for the handler. */
options;
/** Callback whenever a tag is closed. */
elementCB;
/** Indicated whether parsing has been completed. */
done = false;
/** Stack of open tags. */
tagStack = [this.root];
/** A data node that is still being written to. */
lastNode = null;
/** Reference to the parser instance. Used for location information. */
parser = null;
/**
* @param callback Called once parsing has completed.
* @param options Settings for the handler.
* @param elementCB Callback whenever a tag is closed.
*/
constructor(callback, options, elementCB) {
// Make it possible to skip arguments, for backwards-compatibility
if (typeof options === "function") {
elementCB = options;
options = defaultOptions;
}
if (typeof callback === "object") {
options = callback;
callback = undefined;
}
this.callback = callback ?? null;
this.options = options ?? defaultOptions;
this.elementCB = elementCB ?? null;
}
onparserinit(parser) {
this.parser = parser;
}
// Resets the handler back to starting state
onreset() {
this.dom = [];
this.root = new Document(this.dom);
this.done = false;
this.tagStack = [this.root];
this.lastNode = null;
this.parser = null;
}
// Signals the handler that parsing is done
onend() {
if (this.done)
return;
this.done = true;
this.parser = null;
this.handleCallback(null);
}
onerror(error) {
this.handleCallback(error);
}
onclosetag() {
this.lastNode = null;
const element = this.tagStack.pop();
if (this.options.withEndIndices && this.parser) {
element.endIndex = this.parser.endIndex;
}
if (this.elementCB)
this.elementCB(element);
}
onopentag(name, attribs) {
const type = this.options.xmlMode ? ElementType.Tag : undefined;
const element = new Element(name, attribs, undefined, type);
this.addNode(element);
this.tagStack.push(element);
}
ontext(data) {
const { lastNode } = this;
if (lastNode && lastNode.type === ElementType.Text) {
lastNode.data += data;
if (this.options.withEndIndices && this.parser) {
lastNode.endIndex = this.parser.endIndex;
}
}
else {
const node = new Text(data);
this.addNode(node);
this.lastNode = node;
}
}
oncomment(data) {
if (this.lastNode && this.lastNode.type === ElementType.Comment) {
this.lastNode.data += data;
return;
}
const node = new Comment(data);
this.addNode(node);
this.lastNode = node;
}
oncommentend() {
this.lastNode = null;
}
oncdatastart() {
const text = new Text("");
const node = new CDATA([text]);
this.addNode(node);
text.parent = node;
this.lastNode = text;
}
oncdataend() {
this.lastNode = null;
}
onprocessinginstruction(name, data) {
const node = new ProcessingInstruction(name, data);
this.addNode(node);
}
handleCallback(error) {
if (typeof this.callback === "function") {
this.callback(error, this.dom);
}
else if (error) {
throw error;
}
}
addNode(node) {
const parent = this.tagStack[this.tagStack.length - 1];
const previousSibling = parent.children[parent.children.length - 1];
if (this.options.withStartIndices && this.parser) {
node.startIndex = this.parser.startIndex;
}
if (this.options.withEndIndices && this.parser) {
node.endIndex = this.parser.endIndex;
}
parent.children.push(node);
if (previousSibling) {
node.prev = previousSibling;
previousSibling.next = node;
}
node.parent = parent;
this.lastNode = null;
}
}
export default DomHandler;