tag-soup
Version:
The fastest pure JS SAX/DOM XML/HTML parser.
136 lines (135 loc) • 3.56 kB
JavaScript
import { decodeHTML, escapeXML } from 'speedy-entities';
import { createDOMParser } from './createDOMParser.js';
import { createSAXParser } from './createSAXParser.js';
import { createSerializer } from './createSerializer.js';
import { createTokenizer } from './createTokenizer.js';
const formTags = ['input', 'option', 'optgroup', 'select', 'button', 'datalist', 'textarea'];
const pTag = ['p'];
const headingTags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p'];
const ddtTags = ['dd', 'dt'];
const rtpTags = ['rt', 'rp'];
const tableSectionTags = ['thead', 'tbody'];
const htmlParserOptions = {
voidTags: [
'area',
'base',
'basefont',
'br',
'col',
'command',
'embed',
'frame',
'hr',
'img',
'input',
'isindex',
'keygen',
'link',
'meta',
'param',
'source',
'track',
'wbr',
],
rawTextTags: ['script', 'style', 'textarea', 'iframe', 'noembed', 'noframes', 'plaintext'],
implicitlyClosedTags: {
tr: ['tr', 'th', 'td'],
th: ['th', 'td'],
td: ['thead', 'th', 'td'],
body: ['head', 'link', 'script'],
a: ['a'],
li: ['li'],
p: pTag,
h1: headingTags,
h2: headingTags,
h3: headingTags,
h4: headingTags,
h5: headingTags,
h6: headingTags,
select: formTags,
input: formTags,
output: formTags,
button: formTags,
datalist: formTags,
textarea: formTags,
option: ['option'],
optgroup: ['optgroup', 'option'],
dd: ddtTags,
dt: ddtTags,
address: pTag,
article: pTag,
aside: pTag,
blockquote: pTag,
details: pTag,
div: pTag,
dl: pTag,
fieldset: pTag,
figcaption: pTag,
figure: pTag,
footer: pTag,
form: pTag,
header: pTag,
hr: pTag,
main: pTag,
nav: pTag,
ol: pTag,
pre: pTag,
section: pTag,
table: pTag,
ul: pTag,
rt: rtpTags,
rp: rtpTags,
tbody: tableSectionTags,
tfoot: tableSectionTags,
},
implicitlyOpenedTags: ['p', 'br'],
areTagNamesCaseInsensitive: true,
areUnbalancedStartTagsImplicitlyClosed: true,
areUnbalancedEndTagsIgnored: true,
decodeText: decodeHTML,
};
htmlParserOptions.foreignTags = {
svg: {
areCDATASectionsRecognized: true,
areProcessingInstructionsRecognized: true,
areSelfClosingTagsRecognized: true,
foreignTags: {
foreignObject: htmlParserOptions,
},
},
math: {
areCDATASectionsRecognized: true,
areProcessingInstructionsRecognized: true,
areSelfClosingTagsRecognized: true,
},
};
const htmlSerializerOptions = {
voidTags: htmlParserOptions.voidTags,
areTagNamesCaseInsensitive: true,
areSelfClosingTagsSupported: false,
encodeText: escapeXML,
};
/**
* Tokenizes HTML markup as a stream of tokens.
*
* @group Tokenizer
*/
export const HTMLTokenizer = createTokenizer(htmlParserOptions);
/**
* Parses HTML markup as DOM.
*
* @group DOM
*/
export const HTMLDOMParser = createDOMParser(htmlParserOptions);
/**
* Parses HTML markup as a stream of tokens.
*
* @group SAX
*/
export const HTMLSAXParser = createSAXParser(htmlParserOptions);
/**
* Serializes DOM node as HTML string.
*
* @group DOM
*/
export const toHTML = createSerializer(htmlSerializerOptions);