tag-soup
Version:
The fastest pure JS SAX/DOM XML/HTML parser.
125 lines (124 loc) • 5.12 kB
JavaScript
import { __assign } from "tslib";
import { tokenize } from './tokenize';
import { ObjectPool } from '@smikhalevski/object-pool';
import { createAttributeToken, createDataToken, createEndTagToken, createStartTagToken } from './tokens';
/**
* Creates a new stateful SAX parser.
*
* @param handler The parsing handler.
* @param options Parsing options.
*/
export function createSaxParser(handler, options) {
var opts = __assign({}, options);
var buffer = '';
var chunkOffset = 0;
var tokenizerOptions = {
startTagTokenPool: new ObjectPool(createStartTagToken),
attributeTokenPool: new ObjectPool(createAttributeToken),
endTagToken: createEndTagToken(),
dataToken: createDataToken(),
};
var forgivingHandler = createForgivingHandler(handler, tokenizerOptions, opts);
var write = function (sourceChunk) {
sourceChunk || (sourceChunk = '');
buffer += sourceChunk;
var index = tokenize(buffer, true, chunkOffset, tokenizerOptions, opts, forgivingHandler);
buffer = buffer.substr(index);
chunkOffset += index;
};
var parse = function (source) {
var _a;
source || (source = '');
buffer += source;
var index = tokenize(buffer, false, chunkOffset, tokenizerOptions, opts, forgivingHandler);
(_a = forgivingHandler.sourceEnd) === null || _a === void 0 ? void 0 : _a.call(forgivingHandler, chunkOffset + index);
reset();
};
var reset = function () {
var _a;
buffer = '';
chunkOffset = 0;
(_a = forgivingHandler.reset) === null || _a === void 0 ? void 0 : _a.call(forgivingHandler);
};
return {
write: write,
parse: parse,
reset: reset,
};
}
function createForgivingHandler(handler, tokenizerOptions, options) {
var startTagCallback = handler.startTag, endTagCallback = handler.endTag, resetCallback = handler.reset, sourceEndCallback = handler.sourceEnd;
var startTagTokenPool = tokenizerOptions.startTagTokenPool, attributeTokenPool = tokenizerOptions.attributeTokenPool;
var checkVoidTag = options.checkVoidTag, endsAncestorAt = options.endsAncestorAt;
var endTagToken = createEndTagToken();
var forgivingHandler = __assign({}, handler);
var ancestors = { length: 0 };
var releaseStartTag = function (token) {
startTagTokenPool.release(token);
for (var i = 0; i < token.attributes.length; ++i) {
attributeTokenPool.release(token.attributes[i]);
}
};
if (!startTagCallback && !endTagCallback) {
forgivingHandler.startTag = releaseStartTag;
return forgivingHandler;
}
var releaseAncestors = function (ancestorIndex) {
for (var i = ancestorIndex; i < ancestors.length; ++i) {
releaseStartTag(ancestors[i]);
ancestors[i] = undefined;
}
ancestors.length = ancestorIndex;
};
var triggerImplicitEnd = function (ancestorIndex, end) {
if (ancestorIndex % 1 !== 0 || ancestorIndex < 0 || ancestorIndex >= ancestors.length) {
return;
}
if (!endTagCallback) {
releaseAncestors(ancestorIndex);
return;
}
for (var i = ancestors.length - 1; i >= ancestorIndex; --i) {
endTagToken.rawName = ancestors[i].rawName;
endTagToken.name = ancestors[i].name;
endTagToken.start = endTagToken.end = end;
endTagToken.nameStart = endTagToken.nameEnd = -1;
endTagCallback(endTagToken);
}
releaseAncestors(ancestorIndex);
};
forgivingHandler.startTag = function (token) {
token.selfClosing || (token.selfClosing = (checkVoidTag === null || checkVoidTag === void 0 ? void 0 : checkVoidTag(token)) || false);
if (endsAncestorAt != null && ancestors.length !== 0) {
triggerImplicitEnd(endsAncestorAt(ancestors, token), token.start);
}
startTagCallback === null || startTagCallback === void 0 ? void 0 : startTagCallback(token);
if (token.selfClosing) {
releaseStartTag(token);
}
else {
ancestors[ancestors.length++] = token;
}
};
forgivingHandler.endTag = function (token) {
for (var i = ancestors.length - 1; i >= 0; --i) {
if (ancestors[i].name !== token.name) {
continue;
}
triggerImplicitEnd(i + 1, token.start);
endTagCallback === null || endTagCallback === void 0 ? void 0 : endTagCallback(token);
releaseStartTag(ancestors[i]);
ancestors.length = i;
break;
}
};
forgivingHandler.sourceEnd = function (sourceLength) {
triggerImplicitEnd(0, sourceLength);
sourceEndCallback === null || sourceEndCallback === void 0 ? void 0 : sourceEndCallback(sourceLength);
};
forgivingHandler.reset = function () {
releaseAncestors(0);
resetCallback === null || resetCallback === void 0 ? void 0 : resetCallback();
};
return forgivingHandler;
}