UNPKG

tag-soup

Version:

The fastest pure JS SAX/DOM XML/HTML parser.

125 lines (124 loc) 5.12 kB
import { __assign } from "tslib"; import { tokenize } from './tokenize'; import { ObjectPool } from '@smikhalevski/object-pool'; import { createAttributeToken, createDataToken, createEndTagToken, createStartTagToken } from './tokens'; /** * Creates a new stateful SAX parser. * * @param handler The parsing handler. * @param options Parsing options. */ export function createSaxParser(handler, options) { var opts = __assign({}, options); var buffer = ''; var chunkOffset = 0; var tokenizerOptions = { startTagTokenPool: new ObjectPool(createStartTagToken), attributeTokenPool: new ObjectPool(createAttributeToken), endTagToken: createEndTagToken(), dataToken: createDataToken(), }; var forgivingHandler = createForgivingHandler(handler, tokenizerOptions, opts); var write = function (sourceChunk) { sourceChunk || (sourceChunk = ''); buffer += sourceChunk; var index = tokenize(buffer, true, chunkOffset, tokenizerOptions, opts, forgivingHandler); buffer = buffer.substr(index); chunkOffset += index; }; var parse = function (source) { var _a; source || (source = ''); buffer += source; var index = tokenize(buffer, false, chunkOffset, tokenizerOptions, opts, forgivingHandler); (_a = forgivingHandler.sourceEnd) === null || _a === void 0 ? void 0 : _a.call(forgivingHandler, chunkOffset + index); reset(); }; var reset = function () { var _a; buffer = ''; chunkOffset = 0; (_a = forgivingHandler.reset) === null || _a === void 0 ? void 0 : _a.call(forgivingHandler); }; return { write: write, parse: parse, reset: reset, }; } function createForgivingHandler(handler, tokenizerOptions, options) { var startTagCallback = handler.startTag, endTagCallback = handler.endTag, resetCallback = handler.reset, sourceEndCallback = handler.sourceEnd; var startTagTokenPool = tokenizerOptions.startTagTokenPool, attributeTokenPool = tokenizerOptions.attributeTokenPool; var checkVoidTag = options.checkVoidTag, endsAncestorAt = options.endsAncestorAt; var endTagToken = createEndTagToken(); var forgivingHandler = __assign({}, handler); var ancestors = { length: 0 }; var releaseStartTag = function (token) { startTagTokenPool.release(token); for (var i = 0; i < token.attributes.length; ++i) { attributeTokenPool.release(token.attributes[i]); } }; if (!startTagCallback && !endTagCallback) { forgivingHandler.startTag = releaseStartTag; return forgivingHandler; } var releaseAncestors = function (ancestorIndex) { for (var i = ancestorIndex; i < ancestors.length; ++i) { releaseStartTag(ancestors[i]); ancestors[i] = undefined; } ancestors.length = ancestorIndex; }; var triggerImplicitEnd = function (ancestorIndex, end) { if (ancestorIndex % 1 !== 0 || ancestorIndex < 0 || ancestorIndex >= ancestors.length) { return; } if (!endTagCallback) { releaseAncestors(ancestorIndex); return; } for (var i = ancestors.length - 1; i >= ancestorIndex; --i) { endTagToken.rawName = ancestors[i].rawName; endTagToken.name = ancestors[i].name; endTagToken.start = endTagToken.end = end; endTagToken.nameStart = endTagToken.nameEnd = -1; endTagCallback(endTagToken); } releaseAncestors(ancestorIndex); }; forgivingHandler.startTag = function (token) { token.selfClosing || (token.selfClosing = (checkVoidTag === null || checkVoidTag === void 0 ? void 0 : checkVoidTag(token)) || false); if (endsAncestorAt != null && ancestors.length !== 0) { triggerImplicitEnd(endsAncestorAt(ancestors, token), token.start); } startTagCallback === null || startTagCallback === void 0 ? void 0 : startTagCallback(token); if (token.selfClosing) { releaseStartTag(token); } else { ancestors[ancestors.length++] = token; } }; forgivingHandler.endTag = function (token) { for (var i = ancestors.length - 1; i >= 0; --i) { if (ancestors[i].name !== token.name) { continue; } triggerImplicitEnd(i + 1, token.start); endTagCallback === null || endTagCallback === void 0 ? void 0 : endTagCallback(token); releaseStartTag(ancestors[i]); ancestors.length = i; break; } }; forgivingHandler.sourceEnd = function (sourceLength) { triggerImplicitEnd(0, sourceLength); sourceEndCallback === null || sourceEndCallback === void 0 ? void 0 : sourceEndCallback(sourceLength); }; forgivingHandler.reset = function () { releaseAncestors(0); resetCallback === null || resetCallback === void 0 ? void 0 : resetCallback(); }; return forgivingHandler; }