html-minifier-next
Version:
Super-configurable and well-tested web page minifier (enhanced successor of HTML Minifier)
1,278 lines (1,098 loc) • 176 kB
JavaScript
'use strict';
Object.defineProperty(exports, '__esModule', { value: true });
// Stringify for options signatures (sorted keys, shallow, nested objects)
function stableStringify(obj) {
if (obj == null || typeof obj !== 'object') return JSON.stringify(obj);
if (Array.isArray(obj)) return '[' + obj.map(stableStringify).join(',') + ']';
const keys = Object.keys(obj).sort();
let out = '{';
for (let i = 0; i < keys.length; i++) {
const k = keys[i];
out += JSON.stringify(k) + ':' + stableStringify(obj[k]) + (i < keys.length - 1 ? ',' : '');
}
return out + '}';
}
// LRU cache for strings and promises
class LRU {
constructor(limit = 200) {
this.limit = limit;
this.map = new Map();
}
get(key) {
if (this.map.has(key)) {
const v = this.map.get(key);
this.map.delete(key);
this.map.set(key, v);
return v;
}
return undefined;
}
set(key, value) {
if (this.map.has(key)) this.map.delete(key);
this.map.set(key, value);
if (this.map.size > this.limit) {
const first = this.map.keys().next().value;
this.map.delete(first);
}
}
delete(key) { this.map.delete(key); }
}
// Unique ID generator
function uniqueId(value) {
let id;
do {
id = 'u' + crypto.randomUUID().replace(/-/g, '');
} while (~value.indexOf(id));
return id;
}
// Identity and transform functions
function identity(value) {
return value;
}
function isThenable(value) {
return value != null && typeof value === 'object' && typeof value.then === 'function';
}
function lowercase(value) {
return value.toLowerCase();
}
// Replace async helper
/**
* Asynchronously replace matches in a string
* @param {string} str - Input string
* @param {RegExp} regex - Regular expression with global flag
* @param {Function} asyncFn - Async function to process each match
* @returns {Promise<string>} Processed string
*/
async function replaceAsync(str, regex, asyncFn) {
const promises = [];
str.replace(regex, (match, ...args) => {
const promise = asyncFn(match, ...args);
promises.push(promise);
});
const data = await Promise.all(promises);
return str.replace(regex, () => data.shift());
}
// String patterns to RegExp conversion (for JSON config support)
function parseRegExp(value) {
if (typeof value === 'string') {
if (!value) return undefined; // Empty string = not configured
const match = value.match(/^\/(.+)\/([dgimsuvy]*)$/);
if (match) {
return new RegExp(match[1], match[2]);
}
return new RegExp(value);
}
return value;
}
/*
* HTML Parser By John Resig (ejohn.org)
* Modified by Juriy “kangax” Zaytsev
* Original code by Erik Arvidsson, Mozilla Public License
* http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
*/
/*
* Use like so:
*
* HTMLParser(htmlString, {
* start: function(tag, attrs, unary) {},
* end: function(tag) {},
* chars: function(text) {},
* comment: function(text) {}
* });
*/
class CaseInsensitiveSet extends Set {
has(str) {
return super.has(str.toLowerCase());
}
}
// Regular expressions for parsing tags and attributes
const singleAttrIdentifier = /([^\s"'<>/=]+)/;
const singleAttrAssigns = [/=/];
const singleAttrValues = [
// Attr value double quotes
/"([^"]*)"+/.source,
// Attr value, single quotes
/'([^']*)'+/.source,
// Attr value, no quotes
/([^ \t\n\f\r"'`=<>]+)/.source
];
// https://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-QName
const qnameCapture = (function () {
// https://www.npmjs.com/package/ncname
const combiningChar = '\u0300-\u0345\u0360\u0361\u0483-\u0486\u0591-\u05A1\u05A3-\u05B9\u05BB-\u05BD\u05BF\u05C1\u05C2\u05C4\u064B-\u0652\u0670\u06D6-\u06E4\u06E7\u06E8\u06EA-\u06ED\u0901-\u0903\u093C\u093E-\u094D\u0951-\u0954\u0962\u0963\u0981-\u0983\u09BC\u09BE-\u09C4\u09C7\u09C8\u09CB-\u09CD\u09D7\u09E2\u09E3\u0A02\u0A3C\u0A3E-\u0A42\u0A47\u0A48\u0A4B-\u0A4D\u0A70\u0A71\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0B01-\u0B03\u0B3C\u0B3E-\u0B43\u0B47\u0B48\u0B4B-\u0B4D\u0B56\u0B57\u0B82\u0B83\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0C01-\u0C03\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55\u0C56\u0C82\u0C83\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5\u0CD6\u0D02\u0D03\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB\u0EBC\u0EC8-\u0ECD\u0F18\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F84\u0F86-\u0F8B\u0F90-\u0F95\u0F97\u0F99-\u0FAD\u0FB1-\u0FB7\u0FB9\u20D0-\u20DC\u20E1\u302A-\u302F\u3099\u309A';
const digit = '0-9\u0660-\u0669\u06F0-\u06F9\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE7-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF\u0D66-\u0D6F\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29';
const extender = '\xB7\u02D0\u02D1\u0387\u0640\u0E46\u0EC6\u3005\u3031-\u3035\u309D\u309E\u30FC-\u30FE';
const letter = 'A-Za-z\xC0-\xD6\xD8-\xF6\xF8-\u0131\u0134-\u013E\u0141-\u0148\u014A-\u017E\u0180-\u01C3\u01CD-\u01F0\u01F4\u01F5\u01FA-\u0217\u0250-\u02A8\u02BB-\u02C1\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2-\u03F3\u0401-\u040C\u040E-\u044F\u0451-\u045C\u045E-\u0481\u0490-\u04C4\u04C7\u04C8\u04CB\u04CC\u04D0-\u04EB\u04EE-\u04F5\u04F8\u04F9\u0531-\u0556\u0559\u0561-\u0586\u05D0-\u05EA\u05F0-\u05F2\u0621-\u063A\u0641-\u064A\u0671-\u06B7\u06BA-\u06BE\u06C0-\u06CE\u06D0-\u06D3\u06D5\u06E5\u06E6\u0905-\u0939\u093D\u0958-\u0961\u0985-\u098C\u098F\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09DC\u09DD\u09DF-\u09E1\u09F0\u09F1\u0A05-\u0A0A\u0A0F\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32\u0A33\u0A35\u0A36\u0A38\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8B\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2\u0AB3\u0AB5-\u0AB9\u0ABD\u0AE0\u0B05-\u0B0C\u0B0F\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32\u0B33\u0B36-\u0B39\u0B3D\u0B5C\u0B5D\u0B5F-\u0B61\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99\u0B9A\u0B9C\u0B9E\u0B9F\u0BA3\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C60\u0C61\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CDE\u0CE0\u0CE1\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D60\u0D61\u0E01-\u0E2E\u0E30\u0E32\u0E33\u0E40-\u0E45\u0E81\u0E82\u0E84\u0E87\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA\u0EAB\u0EAD\u0EAE\u0EB0\u0EB2\u0EB3\u0EBD\u0EC0-\u0EC4\u0F40-\u0F47\u0F49-\u0F69\u10A0-\u10C5\u10D0-\u10F6\u1100\u1102\u1103\u1105-\u1107\u1109\u110B\u110C\u110E-\u1112\u113C\u113E\u1140\u114C\u114E\u1150\u1154\u1155\u1159\u115F-\u1161\u1163\u1165\u1167\u1169\u116D\u116E\u1172\u1173\u1175\u119E\u11A8\u11AB\u11AE\u11AF\u11B7\u11B8\u11BA\u11BC-\u11C2\u11EB\u11F0\u11F9\u1E00-\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u212A\u212B\u212E\u2180-\u2182\u3007\u3021-\u3029\u3041-\u3094\u30A1-\u30FA\u3105-\u312C\u4E00-\u9FA5\uAC00-\uD7A3';
const ncname = '[' + letter + '_][' + letter + digit + '\\.\\-_' + combiningChar + extender + ']*';
return '((?:' + ncname + '\\:)?' + ncname + ')';
})();
const startTagOpen = new RegExp('^<' + qnameCapture);
const endTag = new RegExp('^</' + qnameCapture + '[^>]*>');
let IS_REGEX_CAPTURING_BROKEN = false;
'x'.replace(/x(.)?/g, function (m, g) {
IS_REGEX_CAPTURING_BROKEN = g === '';
});
// Empty elements
const empty = new CaseInsensitiveSet(['area', 'base', 'basefont', 'br', 'col', 'embed', 'frame', 'hr', 'img', 'input', 'isindex', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
// Elements that you can, intentionally, leave open (and which close themselves)
const closeSelf = new CaseInsensitiveSet(['colgroup', 'dd', 'dt', 'li', 'option', 'p', 'td', 'tfoot', 'th', 'thead', 'tr', 'source']);
// Attributes that have their values filled in `disabled='disabled'`
const fillAttrs = new CaseInsensitiveSet(['checked', 'compact', 'declare', 'defer', 'disabled', 'ismap', 'multiple', 'nohref', 'noresize', 'noshade', 'nowrap', 'readonly', 'selected']);
// Special elements (can contain anything)
const special = new CaseInsensitiveSet(['script', 'style']);
// HTML elements, https://html.spec.whatwg.org/multipage/indices.html#elements-3
// Phrasing content, https://html.spec.whatwg.org/multipage/dom.html#phrasing-content
const nonPhrasing = new CaseInsensitiveSet(['address', 'article', 'aside', 'base', 'blockquote', 'body', 'caption', 'col', 'colgroup', 'dd', 'details', 'dialog', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hgroup', 'hr', 'html', 'legend', 'li', 'menuitem', 'meta', 'ol', 'optgroup', 'option', 'param', 'rp', 'rt', 'source', 'style', 'summary', 'tbody', 'td', 'tfoot', 'th', 'thead', 'title', 'tr', 'track', 'ul']);
const reCache = {};
// Pre-compiled regexes for common special elements (`script`, `style`, `noscript`)
// These are used frequently, and pre-compiling them avoids regex creation overhead
const preCompiledStackedTags = {
'script': /([\s\S]*?)<\/script[^>]*>/i,
'style': /([\s\S]*?)<\/style[^>]*>/i,
'noscript': /([\s\S]*?)<\/noscript[^>]*>/i
};
// Cache for compiled attribute regexes per handler configuration
const attrRegexCache = new WeakMap();
// O(n) helper: Strip all occurrences of `open…close` delimiters, keeping inner content
// Used instead of a regex replace to avoid O(n²) behavior on adversarial inputs
function stripDelimited(str, open, close) {
let result = '';
let i = 0;
while (i < str.length) {
const start = str.indexOf(open, i);
if (start === -1) { result += str.slice(i); break; }
result += str.slice(i, start);
const end = str.indexOf(close, start + open.length);
if (end === -1) { result += str.slice(start); break; }
result += str.slice(start + open.length, end);
i = end + close.length;
}
return result;
}
function buildAttrRegex(handler) {
let pattern = singleAttrIdentifier.source +
'(?:\\s*(' + joinSingleAttrAssigns(handler) + ')' +
'[ \\t\\n\\f\\r]*(?:' + singleAttrValues.join('|') + '))?';
if (handler.customAttrSurround) {
const attrClauses = [];
for (let i = handler.customAttrSurround.length - 1; i >= 0; i--) {
attrClauses[i] = '(?:' +
'(' + handler.customAttrSurround[i][0].source + ')\\s*' +
pattern +
'\\s*(' + handler.customAttrSurround[i][1].source + ')' +
')';
}
attrClauses.push('(?:' + pattern + ')');
pattern = '(?:' + attrClauses.join('|') + ')';
}
return new RegExp('^\\s*' + pattern);
}
function getAttrRegexForHandler(handler) {
let cached = attrRegexCache.get(handler);
if (cached) return cached;
const compiled = buildAttrRegex(handler);
attrRegexCache.set(handler, compiled);
return compiled;
}
// Cache for sticky attribute regexes (`y` flag for position-based matching on full string)
const attrRegexStickyCache = new WeakMap();
function getAttrRegexStickyForHandler(handler) {
let cached = attrRegexStickyCache.get(handler);
if (cached) return cached;
const nonSticky = getAttrRegexForHandler(handler);
// Derive sticky version: Remove `^` anchor, add `y` flag
const compiled = new RegExp(nonSticky.source.slice(1), 'y');
attrRegexStickyCache.set(handler, compiled);
return compiled;
}
function joinSingleAttrAssigns(handler) {
return singleAttrAssigns.concat(
handler.customAttrAssign || []
).map(function (assign) {
return '(?:' + assign.source + ')';
}).join('|');
}
// Number of captured parts per `customAttrSurround` pattern
const NCP = 7;
class HTMLParser {
constructor(html, handler) {
this.html = html;
this.handler = handler;
}
async parse() {
const handler = this.handler;
const fullHtml = this.html;
const fullLength = fullHtml.length;
const stack = []; let lastTag;
// Use cached attribute regex for this handler configuration
const attribute = getAttrRegexForHandler(handler);
const attributeY = getAttrRegexStickyForHandler(handler);
let prevTag = undefined, nextTag = undefined;
let prevAttrs = [], nextAttrs = [];
// Sticky regex versions for position-based matching (avoids string slicing)
const startTagOpenY = new RegExp(startTagOpen.source.slice(1), 'y');
// `\s*` with sticky flag is O(n) at worst—no retry from different positions possible
const startTagCloseY = /\s*(\/?)>/y;
const endTagY = new RegExp(endTag.source.slice(1), 'y');
const doctypeY = /<!DOCTYPE[^<>]+>/iy;
const commentTestY = /<!--/y;
const conditionalTestY = /<!\[/y;
// Cached next-tag from lookahead (avoids re-parsing the same tag)
let cachedNextStartTag = null;
let cachedNextEndTag = null;
// Index-based parsing
let pos = 0;
let lastPos;
// Helper to advance position
const advance = (n) => { pos += n; };
// Lazy line/column calculation—only compute on actual errors
const getLineColumn = (position) => {
let line = 1;
let column = 1;
for (let i = 0; i < position; i++) {
if (fullHtml[i] === '\n') {
line++;
column = 1;
} else {
column++;
}
}
return { line, column };
};
// Helper to safely extract substring when needed for stacked tag content
const sliceFromPos = (startPos) => {
return fullHtml.slice(startPos);
};
while (pos < fullLength) {
lastPos = pos;
// Make sure we’re not in a `script` or `style` element
if (!lastTag || !special.has(lastTag)) {
const textEnd = fullHtml.indexOf('<', pos);
if (textEnd === pos) {
// We found a tag at current position
// Check cache from previous lookahead (avoids re-parsing the same tag)
if (cachedNextStartTag && cachedNextStartTag.pos === pos) {
const startTagMatch = cachedNextStartTag.match;
cachedNextStartTag = null;
cachedNextEndTag = null;
advance(startTagMatch.advance);
await handleStartTag(startTagMatch);
prevTag = startTagMatch.tagName.toLowerCase();
continue;
}
if (cachedNextEndTag && cachedNextEndTag.pos === pos) {
const endTagMatch = cachedNextEndTag.match;
cachedNextStartTag = null;
cachedNextEndTag = null;
advance(endTagMatch[0].length);
await parseEndTag(endTagMatch[0], endTagMatch[1]);
prevTag = '/' + endTagMatch[1].toLowerCase();
prevAttrs = [];
continue;
}
cachedNextStartTag = null;
cachedNextEndTag = null;
// Comment
commentTestY.lastIndex = pos;
if (commentTestY.test(fullHtml)) {
const commentEnd = fullHtml.indexOf('-->', pos + 4);
if (commentEnd >= 0) {
if (handler.comment) {
const result = handler.comment(fullHtml.substring(pos + 4, commentEnd));
if (isThenable(result)) await result;
}
advance(commentEnd + 3 - pos);
prevTag = '';
prevAttrs = [];
continue;
}
}
// https://web.archive.org/web/20241201212701/https://en.wikipedia.org/wiki/Conditional_comment#Downlevel-revealed_conditional_comment
conditionalTestY.lastIndex = pos;
if (conditionalTestY.test(fullHtml)) {
const conditionalEnd = fullHtml.indexOf(']>', pos + 3);
if (conditionalEnd >= 0) {
if (handler.comment) {
const result = handler.comment(fullHtml.substring(pos + 2, conditionalEnd + 1), true /* Non-standard */);
if (isThenable(result)) await result;
}
advance(conditionalEnd + 2 - pos);
prevTag = '';
prevAttrs = [];
continue;
}
}
// Doctype
doctypeY.lastIndex = pos;
const doctypeMatch = doctypeY.exec(fullHtml);
if (doctypeMatch) {
if (handler.doctype) {
handler.doctype(doctypeMatch[0]);
}
advance(doctypeMatch[0].length);
prevTag = '';
prevAttrs = [];
continue;
}
// End tag
endTagY.lastIndex = pos;
const endTagMatch = endTagY.exec(fullHtml);
if (endTagMatch) {
advance(endTagMatch[0].length);
await parseEndTag(endTagMatch[0], endTagMatch[1]);
prevTag = '/' + endTagMatch[1].toLowerCase();
prevAttrs = [];
continue;
}
// Start tag
const startTagMatch = parseStartTag(pos);
if (startTagMatch) {
advance(startTagMatch.advance);
await handleStartTag(startTagMatch);
prevTag = startTagMatch.tagName.toLowerCase();
continue;
}
// Treat `<` as text
if (handler.continueOnParseError) ;
}
let text;
if (textEnd >= 0) {
text = fullHtml.substring(pos, textEnd);
advance(textEnd - pos);
} else {
text = fullHtml.substring(pos);
advance(fullLength - pos);
}
// Next tag for whitespace processing context
if (handler.wantsNextTag) {
const nextStartTagMatch = parseStartTag(pos);
if (nextStartTagMatch) {
nextTag = nextStartTagMatch.tagName;
// Extract minimal attribute info for whitespace logic (just name/value pairs)
nextAttrs = extractAttrInfo(nextStartTagMatch.attrs);
cachedNextStartTag = { match: nextStartTagMatch, pos };
} else {
endTagY.lastIndex = pos;
const nextEndTagMatch = endTagY.exec(fullHtml);
if (nextEndTagMatch) {
nextTag = '/' + nextEndTagMatch[1];
nextAttrs = [];
cachedNextEndTag = { match: nextEndTagMatch, pos };
} else {
nextTag = '';
nextAttrs = [];
}
}
}
if (handler.chars) {
const result = handler.chars(text, prevTag, nextTag, prevAttrs, nextAttrs);
if (isThenable(result)) await result;
}
prevTag = '';
prevAttrs = [];
} else {
const stackedTag = lastTag.toLowerCase();
// Use pre-compiled regex for common tags (`script`, `style`, `noscript`) to avoid regex creation overhead
const reStackedTag = preCompiledStackedTags[stackedTag] || reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\\s\\S]*?)\\x3c/' + stackedTag + '[^>]*>', 'i'));
const remaining = sliceFromPos(pos);
const m = reStackedTag.exec(remaining);
if (m && m.index === 0) {
let text = m[1];
if (stackedTag !== 'script' && stackedTag !== 'style' && stackedTag !== 'noscript') {
text = stripDelimited(stripDelimited(text, '<!--', '-->'), '<![CDATA[', ']]>');
}
if (handler.chars) {
const result = handler.chars(text);
if (isThenable(result)) await result;
}
// Advance HTML past the matched special tag content and its closing tag
advance(m[0].length);
await parseEndTag('</' + stackedTag + '>', stackedTag);
} else {
// No closing tag found; to avoid infinite loop, break similarly to previous behavior
if (handler.continueOnParseError && handler.chars && pos < fullLength) {
const result = handler.chars(fullHtml[pos], prevTag, '', prevAttrs, []);
if (isThenable(result)) await result;
advance(1);
} else {
break;
}
}
}
if (pos === lastPos) {
if (handler.continueOnParseError) {
// Skip the problematic character and continue
if (handler.chars) {
const result = handler.chars(fullHtml[pos], prevTag, '', prevAttrs, []);
if (isThenable(result)) await result;
}
advance(1);
prevTag = '';
prevAttrs = [];
continue;
}
const loc = getLineColumn(pos);
// Include some context before the error position so the snippet contains the offending markup plus preceding characters (e.g., `invalid<tag`)
const CONTEXT_BEFORE = 50;
const startPos = Math.max(0, pos - CONTEXT_BEFORE);
const snippet = fullHtml.slice(startPos, startPos + 200).replace(/\n/g, ' ');
throw new Error(
`Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${fullHtml.length > startPos + 200 ? '…' : ''}`
);
}
}
if (!handler.partialMarkup) {
// Clean up any remaining tags
await parseEndTag();
}
// Helper to extract minimal attribute info (name/value pairs) from raw attribute matches
// Used for whitespace collapsing logic—doesn’t need full processing
function extractAttrInfo(rawAttrs) {
if (!rawAttrs || !rawAttrs.length) return [];
const numCustomParts = handler.customAttrSurround ? handler.customAttrSurround.length * NCP : 0;
const baseIndex = 1 + numCustomParts;
return rawAttrs.map(args => {
// Extract attribute name (always at `baseIndex`)
const name = args[baseIndex];
// Extract value from double-quoted (`baseIndex + 2`), single-quoted (`baseIndex + 3`), or unquoted (`baseIndex + 4`)
const value = args[baseIndex + 2] ?? args[baseIndex + 3] ?? args[baseIndex + 4];
return { name: name?.toLowerCase(), value };
}).filter(attr => attr.name); // Filter out invalid entries
}
function parseStartTag(startPos) {
startTagOpenY.lastIndex = startPos;
const start = startTagOpenY.exec(fullHtml);
if (start) {
const match = {
tagName: start[1],
attrs: [],
advance: 0
};
let consumed = start[0].length;
let currentPos = startPos + consumed;
let end, attr;
// Safety limit: Max length of input to check for attributes
// Protects against catastrophic backtracking on massive attribute values
const MAX_ATTR_PARSE_LENGTH = 20000; // 20 KB should be enough for any reasonable tag
while (true) {
// Check for closing tag first (sticky regex—no slicing)
startTagCloseY.lastIndex = currentPos;
end = startTagCloseY.exec(fullHtml);
if (end) {
break;
}
// Limit the input length we pass to the regex to prevent catastrophic backtracking
const remainingLen = fullLength - currentPos;
const isLimited = remainingLen > MAX_ATTR_PARSE_LENGTH;
if (!isLimited) {
// Common case: Use sticky regex directly on full string (no slicing)
attributeY.lastIndex = currentPos;
attr = attributeY.exec(fullHtml);
} else {
const extractEndPos = currentPos + MAX_ATTR_PARSE_LENGTH;
// Create a temporary substring only for attribute parsing (limited for safety)
const searchStr = fullHtml.substring(currentPos, extractEndPos);
attr = searchStr.match(attribute);
// If we limited the input and got a match, check if the value might be truncated
if (attr) {
// Check if the attribute value extends beyond our search window
const attrEnd = attr[0].length;
// If the match ends near the limit, the value might be truncated
if (attrEnd > MAX_ATTR_PARSE_LENGTH - 100) {
// Manually extract this attribute to handle potentially huge value
const manualMatch = searchStr.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
if (manualMatch) {
const quoteChar = searchStr[manualMatch[0].length];
if (quoteChar === '"' || quoteChar === "'") {
const closeQuote = searchStr.indexOf(quoteChar, manualMatch[0].length + 1);
if (closeQuote !== -1) {
const fullAttrLen = closeQuote + 1;
const numCustomParts = handler.customAttrSurround
? handler.customAttrSurround.length * NCP
: 0;
const baseIndex = 1 + numCustomParts;
attr = [];
attr[0] = searchStr.substring(0, fullAttrLen);
attr[baseIndex] = manualMatch[1]; // Attribute name
attr[baseIndex + 1] = '='; // `customAssign` (falls back to "=" for huge attributes)
const value = searchStr.substring(manualMatch[0].length + 1, closeQuote);
// Place value at correct index based on quote type
if (quoteChar === '"') {
attr[baseIndex + 2] = value; // Double-quoted value
} else {
attr[baseIndex + 3] = value; // Single-quoted value
}
currentPos += fullAttrLen;
consumed += fullAttrLen;
match.attrs.push(attr);
continue;
}
}
// Note: Unquoted attribute values are intentionally not handled here.
// Per HTML spec, unquoted values cannot contain spaces or special chars,
// making a 20 KB+ unquoted value practically impossible. If encountered,
// it's malformed HTML and using the truncated regex match is acceptable.
}
}
}
if (!attr) {
// If we limited the input and got no match, try manual extraction
// This handles cases where quoted attributes exceed `MAX_ATTR_PARSE_LENGTH`
const manualMatch = searchStr.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
if (manualMatch) {
const quoteChar = searchStr[manualMatch[0].length];
if (quoteChar === '"' || quoteChar === "'") {
// Search in the full HTML (not limited substring) for closing quote
const closeQuote = fullHtml.indexOf(quoteChar, currentPos + manualMatch[0].length + 1);
if (closeQuote !== -1) {
const fullAttrLen = closeQuote - currentPos + 1;
const numCustomParts = handler.customAttrSurround
? handler.customAttrSurround.length * NCP
: 0;
const baseIndex = 1 + numCustomParts;
attr = [];
attr[0] = fullHtml.substring(currentPos, closeQuote + 1);
attr[baseIndex] = manualMatch[1]; // Attribute name
attr[baseIndex + 1] = '='; // customAssign
const value = fullHtml.substring(currentPos + manualMatch[0].length + 1, closeQuote);
// Place value at correct index based on quote type
if (quoteChar === '"') {
attr[baseIndex + 2] = value; // Double-quoted value
} else {
attr[baseIndex + 3] = value; // Single-quoted value
}
currentPos += fullAttrLen;
consumed += fullAttrLen;
match.attrs.push(attr);
continue;
}
}
}
}
}
if (!attr) {
break;
}
const attrLen = attr[0].length;
currentPos += attrLen;
consumed += attrLen;
match.attrs.push(attr);
}
// Check for closing tag (sticky regex—no slicing)
startTagCloseY.lastIndex = currentPos;
end = startTagCloseY.exec(fullHtml);
if (end) {
match.unarySlash = end[1];
consumed += end[0].length;
match.advance = consumed;
return match;
}
}
}
function findTagInCurrentTable(tagName) {
let pos;
const needle = tagName.toLowerCase();
for (pos = stack.length - 1; pos >= 0; pos--) {
const currentTag = stack[pos].lowerTag;
if (currentTag === needle) {
return pos;
}
// Stop searching if we hit a table boundary
if (currentTag === 'table') {
break;
}
}
return -1;
}
async function parseEndTagAt(pos) {
// Close all open elements up to `pos` (mirrors `parseEndTag`’s core branch)
for (let i = stack.length - 1; i >= pos; i--) {
if (handler.end) {
await handler.end(stack[i].tag, stack[i].attrs, true);
}
}
stack.length = pos;
lastTag = pos && stack[pos - 1].tag;
}
async function closeIfFoundInCurrentTable(tagName) {
const pos = findTagInCurrentTable(tagName);
if (pos >= 0) {
// Close at the specific index to avoid re-searching
await parseEndTagAt(pos);
return true;
}
return false;
}
async function handleStartTag(match) {
const tagName = match.tagName;
let unarySlash = match.unarySlash;
if (lastTag === 'p' && nonPhrasing.has(tagName)) {
await parseEndTag('', lastTag);
} else if (tagName === 'tbody') {
if (!await closeIfFoundInCurrentTable('tfoot')) {
await closeIfFoundInCurrentTable('thead');
}
} else if (tagName === 'tfoot') {
if (!await closeIfFoundInCurrentTable('tbody')) {
await closeIfFoundInCurrentTable('thead');
}
} else if (tagName === 'thead') {
// If a `tbody` or `tfoot` is open in the current table, close it
if (!await closeIfFoundInCurrentTable('tbody')) {
await closeIfFoundInCurrentTable('tfoot');
}
}
if (tagName === 'col' && findTagInCurrentTable('colgroup') < 0) {
lastTag = 'colgroup';
stack.push({ tag: lastTag, lowerTag: 'colgroup', attrs: [] });
if (handler.start) {
await handler.start(lastTag, [], false, '', true);
}
} else if (tagName !== 'col' && lastTag === 'colgroup') {
// Auto-close synthetic `<colgroup>` when a non-`col` element starts
await parseEndTag('', 'colgroup');
}
if (closeSelf.has(tagName) && lastTag === tagName) {
await parseEndTag('', tagName);
}
// Handle `dt`/`dd` cross-closing: `dt` followed by `dd`, or `dd` followed by `dt`
if ((tagName === 'dt' || tagName === 'dd') && (lastTag === 'dt' || lastTag === 'dd')) {
await parseEndTag('', lastTag);
}
const unary = empty.has(tagName) || (tagName === 'html' && lastTag === 'head') || !!unarySlash;
const attrs = match.attrs.map(function (args) {
let name, value, customOpen, customClose, customAssign, quote;
// Hackish workaround for Firefox bug, https://bugzilla.mozilla.org/show_bug.cgi?id=369778
if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
if (args[3] === '') { delete args[3]; }
if (args[4] === '') { delete args[4]; }
if (args[5] === '') { delete args[5]; }
}
function populate(index) {
customAssign = args[index];
value = args[index + 1];
if (typeof value !== 'undefined') {
return '"';
}
value = args[index + 2];
if (typeof value !== 'undefined') {
return '\'';
}
value = args[index + 3];
if (typeof value === 'undefined' && fillAttrs.has(name)) {
value = name;
}
return '';
}
let j = 1;
if (handler.customAttrSurround) {
for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += NCP) {
name = args[j + 1];
if (name) {
quote = populate(j + 2);
customOpen = args[j];
customClose = args[j + 6];
break;
}
}
}
if (!name && (name = args[j])) {
quote = populate(j + 1);
}
return {
name,
value,
customAssign: customAssign || '=',
customOpen: customOpen || '',
customClose: customClose || '',
quote: quote || ''
};
});
if (!unary) {
stack.push({ tag: tagName, lowerTag: tagName.toLowerCase(), attrs });
lastTag = tagName;
unarySlash = '';
}
// Store attributes for `prevAttrs` tracking (used in whitespace collapsing)
prevAttrs = attrs;
if (handler.start) {
await handler.start(tagName, attrs, unary, unarySlash);
}
}
function findTag(tagName) {
let pos;
const needle = tagName.toLowerCase();
for (pos = stack.length - 1; pos >= 0; pos--) {
if (stack[pos].lowerTag === needle) {
break;
}
}
return pos;
}
async function parseEndTag(tag, tagName) {
let pos;
// Find the closest opened tag of the same type
if (tagName) {
pos = findTag(tagName);
} else { // If no tag name is provided, clean shop
pos = 0;
}
if (pos >= 0) {
// Close all the open elements, up the stack
for (let i = stack.length - 1; i >= pos; i--) {
if (handler.end) {
handler.end(stack[i].tag, stack[i].attrs, i > pos || !tag);
}
}
// Remove the open elements from the stack
stack.length = pos;
lastTag = pos && stack[pos - 1].tag;
} else if (handler.partialMarkup && tagName) {
// In partial markup mode, preserve stray end tags
if (handler.end) {
handler.end(tagName, [], false);
}
} else if (tagName && tagName.toLowerCase() === 'br') {
if (handler.start) {
await handler.start(tagName, [], true, '');
}
} else if (tagName && tagName.toLowerCase() === 'p') {
if (handler.start) {
await handler.start(tagName, [], false, '', true);
}
if (handler.end) {
handler.end(tagName, []);
}
}
}
}
}
class Sorter {
sort(tokens, fromIndex = 0) {
for (let i = 0, len = this.keys.length; i < len; i++) {
const token = this.keys[i];
// Single pass: Count matches and collect non-matches
let matchCount = 0;
const others = [];
for (let j = fromIndex; j < tokens.length; j++) {
if (tokens[j] === token) {
matchCount++;
} else {
others.push(tokens[j]);
}
}
if (matchCount > 0) {
// Rebuild: `matchCount` instances of token first, then others
let writeIdx = fromIndex;
for (let j = 0; j < matchCount; j++) {
tokens[writeIdx++] = token;
}
for (let j = 0; j < others.length; j++) {
tokens[writeIdx++] = others[j];
}
const newFromIndex = fromIndex + matchCount;
return this.sorterMap.get(token).sort(tokens, newFromIndex);
}
}
return tokens;
}
}
class TokenChain {
constructor() {
// Use map instead of object properties for better performance
this.map = new Map();
}
add(tokens) {
tokens.forEach((token) => {
if (!this.map.has(token)) {
this.map.set(token, { arrays: [], processed: 0 });
}
this.map.get(token).arrays.push(tokens);
});
}
createSorter() {
const sorter = new Sorter();
sorter.sorterMap = new Map();
// Convert map entries to array and sort by frequency (descending), then alphabetically
const entries = Array.from(this.map.entries()).sort((a, b) => {
const m = a[1].arrays.length;
const n = b[1].arrays.length;
// Sort by length descending (larger first)
const lengthDiff = n - m;
if (lengthDiff !== 0) return lengthDiff;
// If lengths equal, sort by key ascending
return a[0].localeCompare(b[0]);
});
sorter.keys = [];
entries.forEach(([token, data]) => {
if (data.processed < data.arrays.length) {
const chain = new TokenChain();
data.arrays.forEach((tokens) => {
// Build new array without the current token instead of splicing
const filtered = [];
for (let i = 0; i < tokens.length; i++) {
if (tokens[i] !== token) {
filtered.push(tokens[i]);
}
}
// Mark remaining tokens as processed
filtered.forEach((t) => {
const tData = this.map.get(t);
if (tData) {
tData.processed++;
}
});
if (filtered.length > 0) {
chain.add(filtered);
}
});
sorter.keys.push(token);
sorter.sorterMap.set(token, chain.createSorter());
}
});
return sorter;
}
}
/**
* Preset configurations
*
* Presets provide curated option sets for common use cases:
* - `conservative`: Safe minification suitable for most projects
* - `comprehensive`: Aggressive minification for maximum file size reduction
*/
const presets = {
conservative: {
caseSensitive: true,
collapseBooleanAttributes: true,
collapseWhitespace: true,
conservativeCollapse: true,
preserveLineBreaks: true,
processConditionalComments: true,
removeComments: true,
removeScriptTypeAttributes: true,
removeStyleLinkTypeAttributes: true,
useShortDoctype: true
},
comprehensive: {
collapseAttributeWhitespace: true,
collapseBooleanAttributes: true,
collapseWhitespace: true,
continueOnParseError: true,
decodeEntities: true,
mergeScripts: true,
minifyCSS: true,
minifyJS: true,
minifySVG: true,
minifyURLs: true,
processConditionalComments: true,
removeAttributeQuotes: true,
removeComments: true,
removeEmptyAttributes: true,
removeOptionalTags: true,
removeRedundantAttributes: true,
removeScriptTypeAttributes: true,
removeStyleLinkTypeAttributes: true,
useShortDoctype: true
}
};
/**
* Get preset configuration by name
* @param {string} name - Preset name (“conservative” or “comprehensive”)
* @returns {object|null} Preset options object or null if not found
*/
function getPreset(name) {
if (!name) return null;
const normalizedName = name.toLowerCase();
return presets[normalizedName] || null;
}
/**
* Get list of available preset names
* @returns {string[]} Array of preset names
*/
function getPresetNames() {
return Object.keys(presets);
}
// Regex patterns (to avoid repeated allocations in hot paths)
const RE_WS_START = /^[ \n\r\t\f]+/;
const RE_WS_END = /[ \n\r\t\f]+$/;
const RE_ALL_WS_NBSP = /[ \n\r\t\f\xA0]+/g;
const RE_NBSP_LEADING_GROUP = /(^|\xA0+)[^\xA0]+/g;
const RE_NBSP_LEAD_GROUP = /(\xA0+)[^\xA0]+/g;
const RE_NBSP_TRAILING_GROUP = /[^\xA0]+(\xA0+)/g;
const RE_NBSP_TRAILING_STRIP = /[^\xA0]+$/;
const RE_CONDITIONAL_COMMENT = /^\[if\s[^\]]+]|\[endif]$/;
const RE_EVENT_ATTR_DEFAULT = /^on[a-z]{3,}$/;
const RE_CAN_REMOVE_ATTR_QUOTES = /^[^ \t\n\f\r"'`=<>]+$/;
const RE_TRAILING_SEMICOLON = /;$/;
const RE_AMP_ENTITY = /&(#?[0-9a-zA-Z]+;)/g;
const RE_LEGACY_ENTITIES = /&((?:Iacute|aacute|uacute|plusmn|Otilde|otilde|agrave|Agrave|Yacute|yacute|Oslash|oslash|atilde|Atilde|brvbar|ccedil|Ccedil|Ograve|curren|divide|eacute|Eacute|ograve|Oacute|egrave|Egrave|Ugrave|frac12|frac14|frac34|ugrave|oacute|iacute|Ntilde|ntilde|Uacute|middot|igrave|Igrave|iquest|Aacute|cedil|laquo|micro|iexcl|Icirc|icirc|acirc|Ucirc|Ecirc|ocirc|Ocirc|ecirc|ucirc|Aring|aring|AElig|aelig|acute|pound|raquo|Acirc|times|THORN|szlig|thorn|COPY|auml|ordf|ordm|Uuml|macr|uuml|Auml|ouml|Ouml|para|nbsp|euml|quot|QUOT|Euml|yuml|cent|sect|copy|sup1|sup2|sup3|iuml|Iuml|ETH|shy|reg|not|yen|amp|AMP|REG|uml|eth|deg|gt|GT|LT|lt)(?!;)|(?:#?[0-9a-zA-Z]+;))/g;
const RE_ESCAPE_LT = /</g;
const RE_ATTR_WS_CHECK = /[ \n\r\t\f]/;
const RE_ATTR_WS_COLLAPSE = /[ \n\r\t\f]+/g;
const RE_ATTR_WS_TRIM = /^[ \n\r\t\f]+|[ \n\r\t\f]+$/g;
// Inline element sets for whitespace handling
// Non-empty elements that will maintain whitespace around them
const inlineElementsToKeepWhitespaceAround = new Set(['a', 'abbr', 'acronym', 'b', 'bdi', 'bdo', 'big', 'button', 'cite', 'code', 'del', 'dfn', 'em', 'font', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'mark', 'math', 'meter', 'nobr', 'object', 'output', 'progress', 'q', 'rb', 'rp', 'rt', 'rtc', 'ruby', 's', 'samp', 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'svg', 'textarea', 'time', 'tt', 'u', 'var', 'wbr']);
// Non-empty elements that will maintain whitespace within them
const inlineElementsToKeepWhitespaceWithin = new Set(['a', 'abbr', 'acronym', 'b', 'big', 'del', 'em', 'font', 'i', 'ins', 'kbd', 'mark', 'nobr', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'time', 'tt', 'u', 'var']);
// Elements that will always maintain whitespace around them
const inlineElementsToKeepWhitespace = new Set(['comment', 'img', 'input', 'wbr']);
// Form control elements (for conditional whitespace collapsing)
const formControlElements = new Set(['input', 'button', 'select', 'textarea', 'output', 'meter', 'progress']);
// Default attribute values
// Default attribute values (could apply to any element)
const generalDefaults = {
autocorrect: 'on',
fetchpriority: 'auto',
loading: 'eager',
popovertargetaction: 'toggle'
};
// Tag-specific default attribute values
const tagDefaults = {
area: { shape: 'rect' },
button: { type: 'submit' },
form: {
enctype: 'application/x-www-form-urlencoded',
method: 'get'
},
html: { dir: 'ltr' },
img: { decoding: 'auto' },
input: {
colorspace: 'limited-srgb',
type: 'text'
},
link: { media: 'all' },
marquee: {
behavior: 'scroll',
direction: 'left'
},
meta: { media: 'all' },
source: { media: 'all' },
style: { media: 'all' },
textarea: { wrap: 'soft' },
track: { kind: 'subtitles' }
};
// Script MIME types
// https://mathiasbynens.be/demo/javascript-mime-type
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/script
const executableScriptsMimetypes = new Set([
'text/javascript',
'text/x-javascript',
'text/ecmascript',
'text/x-ecmascript',
'text/jscript',
'application/javascript',
'application/x-javascript',
'application/ecmascript',
'application/x-ecmascript',
'module'
]);
const keepScriptsMimetypes = new Set([
'module'
]);
// Boolean attribute sets
const isSimpleBoolean = new Set(['allowfullscreen', 'async', 'autofocus', 'autoplay', 'checked', 'compact', 'controls', 'declare', 'default', 'defaultchecked', 'defaultmuted', 'defaultselected', 'defer', 'disabled', 'enabled', 'formnovalidate', 'hidden', 'indeterminate', 'inert', 'ismap', 'itemscope', 'loop', 'multiple', 'muted', 'nohref', 'noresize', 'noshade', 'novalidate', 'nowrap', 'open', 'pauseonexit', 'readonly', 'required', 'reversed', 'scoped', 'seamless', 'selected', 'sortable', 'truespeed', 'typemustmatch', 'visible']);
const isBooleanValue = new Set(['true', 'false']);
// Attributes where certain values can be collapsed to just the attribute name;
// maps each attribute name to the set of values that collapse to the bare attribute:
// - `crossorigin=""` and `crossorigin="anonymous"` → `crossorigin` (anonymous is the default)
// - `contenteditable=""` → `contenteditable` (empty string means inherit/true)
const collapsibleValues = new Map([
['crossorigin', new Set(['', 'anonymous'])],
['contenteditable', new Set([''])]
]);
// `srcset` elements
const srcsetElements = new Set(['img', 'source']);
// JSON script types
const jsonScriptTypes = new Set([
'application/json',
'application/ld+json',
'application/manifest+json',
'application/vnd.geo+json',
'application/problem+json',
'application/merge-patch+json',
'application/json-patch+json',
'importmap',
'speculationrules',
]);
// Tag omission rules and element sets
// Tag omission rules from https://html.spec.whatwg.org/multipage/syntax.html#optional-tags with the following extensions:
// - retain `<body>` if followed by `<noscript>`
// - `<rb>`, `<rt>`, `<rtc>`, `<rp>` follow HTML Ruby Markup Extensions draft (https://www.w3.org/TR/html-ruby-extensions/)
// - retain all tags which are adjacent to non-standard HTML tags
const optionalStartTags = new Set(['html', 'head', 'body', 'colgroup', 'tbody']);
const optionalEndTags = new Set(['html', 'head', 'body', 'li', 'dt', 'dd', 'p', 'rb', 'rt', 'rtc', 'rp', 'optgroup', 'option', 'colgroup', 'caption', 'thead', 'tbody', 'tfoot', 'tr', 'td', 'th']);
const headerElements = new Set(['meta', 'link', 'script', 'style', 'template', 'noscript']);
const descriptionElements = new Set(['dt', 'dd']);
const pBlockElements = new Set(['address', 'article', 'aside', 'blockquote', 'details', 'dialog', 'div', 'dl', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'main', 'menu', 'nav', 'ol', 'p', 'pre', 'search', 'section', 'table', 'ul']);
const pInlineElements = new Set(['a', 'audio', 'del', 'ins', 'map', 'noscript', 'video']);
const rubyEndTagOmission = new Set(['rb', 'rt', 'rtc', 'rp']); // `</rb>`, `</rt>`, `</rp>` can be omitted if followed by `<rb>`, `<rt>`, `<rtc>`, or `<rp>`
const rubyRtcEndTagOmission = new Set(['rb', 'rtc']); // `</rtc>` can be omitted if followed by `<rb>` or `<rtc>` (not `<rt>` or `<rp>`)
const optionElements = new Set(['option', 'optgroup']);
const tableContentElements = new Set(['tbody', 'tfoot']);
const tableSectionElements = new Set(['thead', 'tbody', 'tfoot']);
const cellElements = new Set(['td', 'th']);
const topLevelElements = new Set(['html', 'head', 'body']);
const compactElements = new Set(['html', 'body']);
const looseElements = new Set(['head', 'colgroup', 'caption']);
const trailingElements = new Set(['dt', 'thead']);
const htmlElements = new Set(['a', 'abbr', 'acronym', 'address', 'applet', 'area', 'article', 'aside', 'audio', 'b', 'base', 'basefont', 'bdi', 'bdo', 'bgsound', 'big', 'blink', 'blockquote', 'body', 'br', 'button', 'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'command', 'content', 'data', 'datalist', 'dd', 'del', 'details', 'dfn', 'dialog', 'dir', 'div', 'dl', 'dt', 'element', 'em', 'embed', 'fieldset', 'figcaption', 'figure', 'font', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hgroup', 'hr', 'html', 'i', 'iframe', 'image', 'img', 'input', 'ins', 'isindex', 'kbd', 'keygen', 'label', 'legend', 'li', 'link', 'listing', 'main', 'map', 'mark', 'marquee', 'menu', 'menuitem', 'meta', 'meter', 'multicol', 'nav', 'nobr', 'noembed', 'noframes', 'noscript', 'object', 'ol', 'optgroup', 'option', 'output', 'p', 'param', 'picture', 'plaintext', 'pre', 'progress', 'q', 'rb', 'rp', 'rt', 'rtc', 'ruby', 's', 'samp', 'script', 'search', 'section', 'select', 'selectedcontent', 'shadow', 'small', 'source', 'spacer', 'span', 'strike', 'strong', 'style', 'sub', 'summary', 'sup', 'table', 'tbody', 'td', 'template', 'textarea', 'tfoot', 'th', 'thead', 'time', 'title', 'tr', 'track', 'tt', 'u', 'ul', 'var', 'video', 'wbr', 'xmp']);
// Empty attribute regex
const reEmptyAttribute = new RegExp(
'^(?:class|id|style|title|lang|dir|on(?:focus|blur|change|click|dblclick|mouse(' +
'?:down|up|over|move|out)|key(?:press|down|up)))$');
// Special content elements
const specialContentElements = new Set(['script', 'style']);
// Imports
// Trim whitespace
const trimWhitespace = str => {
if (!str) return str;
// Fast path: If no whitespace at start or end, return early
if (!/^[ \n\r\t\f]/.test(str) && !/[ \n\r\t\f]$/.test(str)) {
return str;
}
return str.replace(RE_WS_START, '').replace(RE_WS_END, '');
};
// Collapse all whitespace
function collapseWhitespaceAll(str) {
if (!str) return str;
// Fast path: If there are no common whitespace characters, return early
if (!/[ \n\r\t\f\xA0]/.test(str)) {
return str;
}
// No-break space is specifically handled inside the replacer function here:
return str.replace(RE_ALL_WS_NBSP, function (spaces) {
// Preserve standalone tabs
if (spaces === '\t') return '\t';
// Fast path: No no-break space, common case—just collapse to single space
// This avoids the nested regex for the majority of cases
if (spaces.indexOf('\xA0') === -1) return ' ';
// For no-break space handling, use the original regex approach
return spaces.replace(RE_NBSP_LEADING_GROUP, '$1 ');
});
}
// Collapse whitespace with options
function collapseWhitespace(str, options, trimLeft, trimRight, collapseAll) {
let lineBreakBefore = ''; let lineBreakAfter = '';
if (!str) return str;
// Fast path: Nothing to do
if (!trimLeft && !trimRight && !collapseAll && !options.preserveLineBreaks) {
return str;
}
// Fast path: No whitespace at all
if (!/[ \n\r\t\f\xA0]/.test(str)) {
return str;
}
if (options.preserveLineBreaks) {
// Find leading/trailing whitespace containing line breaks manually
// (avoids polynomial backtracking with end-anchored lazy quantifiers)
const WS_CHARS = ' \n\r\t\f';
let leadEnd = 0;
while (leadEnd < str.length && WS_CHARS.includes(str[leadEnd])) {
leadEnd++;
}
if (leadEnd > 0) {
const leading = str.slice(0, leadEnd);
if (/[\n\r]/.test(leading)) {
lineBreakBefore = '\n';
str = str.slice(leadEnd);