html-minifier-next
Version:
Super-configurable and well-tested web page minifier (enhanced successor of HTML Minifier)
1,270 lines (1,178 loc) • 70.9 kB
JavaScript
// Imports
import { HTMLParser, endTag } from './htmlparser.js';
import TokenChain from './tokenchain.js';
import { presets, getPreset, getPresetNames } from './presets.js';
import { LRU, identity, isThenable, lowercase, uniqueId } from './lib/utils.js';
import {
RE_LEGACY_ENTITIES,
RE_ESCAPE_LT,
inlineElementsToKeepWhitespaceAround,
inlineElementsToKeepWhitespaceWithin,
specialContentElements,
htmlElements,
optionalStartTags,
optionalEndTags,
topLevelElements,
compactElements,
looseElements,
trailingElements,
pInlineElements
} from './lib/constants.js';
import {
trimWhitespace,
collapseWhitespaceAll,
collapseWhitespace,
collapseWhitespaceSmart,
canCollapseWhitespace as defaultCanCollapseWhitespace,
canTrimWhitespace as defaultCanTrimWhitespace
} from './lib/whitespace.js';
import {
isConditionalComment,
isIgnoredComment,
isExecutableScript,
isStyleElement,
normalizeAttr,
buildAttr,
deduplicateAttributes
} from './lib/attributes.js';
import {
canRemoveParentTag,
isStartTagMandatory,
canRemovePrecedingTag,
canRemoveElement,
parseRemoveEmptyElementsExcept,
shouldPreserveEmptyElement
} from './lib/elements.js';
import {
cleanConditionalComment,
hasJsonScriptType,
processScript
} from './lib/content.js';
import { processOptions } from './lib/options.js';
// Lazy-load heavy dependencies only when needed
let lightningCSSPromise;
async function getLightningCSS() {
if (!lightningCSSPromise) {
lightningCSSPromise = import('lightningcss').then(m => m.transform);
}
return lightningCSSPromise;
}
let terserPromise;
async function getTerser() {
if (!terserPromise) {
terserPromise = import('terser').then(m => m.minify);
}
return terserPromise;
}
let swcPromise;
async function getSwc() {
if (!swcPromise) {
swcPromise = import('@swc/core')
.then(m => m.default || m)
.catch(() => {
throw new Error(
'The swc minifier requires @swc/core to be installed.\n' +
'Install it with: npm install @swc/core'
);
});
}
return swcPromise;
}
let svgoPromise;
async function getSvgo() {
if (!svgoPromise) {
svgoPromise = import('svgo').then(m => m.optimize);
}
return svgoPromise;
}
let decodeHTMLPromise;
async function getDecodeHTML() {
if (!decodeHTMLPromise) {
decodeHTMLPromise = import('entities').then(m => m.decodeHTML);
}
return decodeHTMLPromise;
}
// Minification caches (initialized on first use with configurable sizes)
let cssMinifyCache = null;
let jsMinifyCache = null;
let svgMinifyCache = null;
// Pre-compiled patterns for script merging (avoid repeated allocation in hot path)
const RE_SCRIPT_ATTRS = /([^\s=]+)(?:=(?:"([^"]*)"|'([^']*)'|([^\s>]+)))?/g;
const SCRIPT_BOOL_ATTRS = new Set(['async', 'defer', 'nomodule']);
const DEFAULT_JS_TYPES = new Set(['', 'text/javascript', 'application/javascript']);
// Pre-compiled patterns for buffer scanning
const RE_START_TAG = /^<[^/!]/;
const RE_END_TAG = /^<\//;
// HTML encoding types for annotation-xml (MathML)
const RE_HTML_ENCODING = /^(text\/html|application\/xhtml\+xml)$/i;
// Script merging
/**
* Merge consecutive inline script tags into one (`mergeConsecutiveScripts`).
* Only merges scripts that are compatible:
* - Both inline (no `src` attribute)
* - Same `type` (or both default JavaScript)
* - No conflicting attributes (`async`, `defer`, `nomodule`, different `nonce`)
*
* Limitation: This function uses regex-based matching (`pattern` variable below),
* which can produce incorrect results if a script’s content contains a literal
* `</script>` string (e.g., `document.write('<script>…</script>')`). In valid
* HTML, such strings should be escaped as `<\/script>` or split like
* `'</scr' + 'ipt>'`, so this limitation rarely affects real-world code. The
* earlier `minifyJS` step (if enabled) typically handles this escaping already.
*
* @param {string} html - The HTML string to process
* @returns {string} HTML with consecutive scripts merged
*/
function mergeConsecutiveScripts(html) {
// `pattern`: Regex to match consecutive `</script>` followed by `<script…>`.
// See function JSDoc above for known limitations with literal `</script>` in content.
// Captures:
// 1. first script attrs
// 2. first script content
// 3. whitespace between
// 4. second script attrs
// 5. second script content
const pattern = /<script([^>]*)>([\s\S]*?)<\/script>([\s]*)<script([^>]*)>([\s\S]*?)<\/script>/gi;
let result = html;
let changed = true;
// Keep merging until no more changes (handles chains of 3+ scripts)
while (changed) {
changed = false;
result = result.replace(pattern, (match, attrs1, content1, whitespace, attrs2, content2) => {
// Parse attributes from both script tags (uses pre-compiled RE_SCRIPT_ATTRS)
const parseAttrs = (attrStr) => {
const attrs = {};
RE_SCRIPT_ATTRS.lastIndex = 0; // Reset for reuse
let m;
while ((m = RE_SCRIPT_ATTRS.exec(attrStr)) !== null) {
const name = m[1].toLowerCase();
const value = m[2] ?? m[3] ?? m[4] ?? '';
attrs[name] = value;
}
return attrs;
};
const a1 = parseAttrs(attrs1);
const a2 = parseAttrs(attrs2);
// Check for `src`—cannot merge external scripts
if ('src' in a1 || 'src' in a2) {
return match;
}
// Check `type` compatibility (both must be same, or both default JS)
const type1 = a1.type || '';
const type2 = a2.type || '';
if (DEFAULT_JS_TYPES.has(type1) && DEFAULT_JS_TYPES.has(type2)) {
// Both are default JavaScript—compatible
} else if (type1 === type2) {
// Same explicit type—compatible
} else {
// Incompatible types
return match;
}
// Check for conflicting boolean attributes (uses pre-compiled SCRIPT_BOOL_ATTRS)
for (const attr of SCRIPT_BOOL_ATTRS) {
const has1 = attr in a1;
const has2 = attr in a2;
if (has1 !== has2) {
// One has it, one doesn't - incompatible
return match;
}
}
// Check `nonce`—must be same or both absent
if (a1.nonce !== a2.nonce) {
return match;
}
// Scripts are compatible—merge them
changed = true;
// Combine content—use semicolon normally, newline only for trailing `//` comments
const c1 = content1.trim();
const c2 = content2.trim();
let mergedContent;
if (c1 && c2) {
// Check if last line of c1 contains `//` (single-line comment)
// If so, use newline to terminate it; otherwise use semicolon (if not already present)
const lastLine = c1.slice(c1.lastIndexOf('\n') + 1);
const separator = lastLine.includes('//') ? '\n' : (c1.endsWith(';') ? '' : ';');
mergedContent = c1 + separator + c2;
} else {
mergedContent = c1 || c2;
}
// Use first script’s attributes (they should be compatible)
return `<script${attrs1}>${mergedContent}</script>`;
});
}
return result;
}
// Type definitions
/**
* @typedef {Object} HTMLAttribute
* Representation of an attribute from the HTML parser.
*
* @prop {string} name
* @prop {string} [value]
* @prop {string} [quote]
* @prop {string} [customAssign]
* @prop {string} [customOpen]
* @prop {string} [customClose]
*/
/**
* @typedef {Object} MinifierOptions
* Options that control how HTML is minified. All of these are optional
* and usually default to a disabled/safe value unless noted.
*
* @prop {(tag: string, attrs: HTMLAttribute[], canCollapseWhitespace: (tag: string) => boolean) => boolean} [canCollapseWhitespace]
* Predicate that determines whether whitespace inside a given element
* can be collapsed.
*
* Default: Built-in `canCollapseWhitespace` function
*
* @prop {(tag: string | null, attrs: HTMLAttribute[] | undefined, canTrimWhitespace: (tag: string) => boolean) => boolean} [canTrimWhitespace]
* Predicate that determines whether leading/trailing whitespace around
* the element may be trimmed.
*
* Default: Built-in `canTrimWhitespace` function
*
* @prop {number} [cacheCSS]
* The maximum number of entries for the CSS minification cache. Higher values
* improve performance for inputs with repeated CSS (e.g., batch processing).
* - Cache is created on first `minify()` call and persists for the process lifetime
* - Cache size is locked after first call—subsequent calls reuse the same cache
* - Explicit `0` values are coerced to `1` (minimum functional cache size)
*
* Default: `500`
*
* @prop {number} [cacheJS]
* The maximum number of entries for the JavaScript minification cache. Higher
* values improve performance for inputs with repeated JavaScript.
* - Cache is created on first `minify()` call and persists for the process lifetime
* - Cache size is locked after first call—subsequent calls reuse the same cache
* - Explicit `0` values are coerced to `1` (minimum functional cache size)
*
* Default: `500`
*
* @prop {number} [cacheSVG]
* The maximum number of entries for the SVG minification cache. Higher
* values improve performance for inputs with repeated SVG content.
* - Cache is created on first `minify()` call and persists for the process lifetime
* - Cache size is locked after first call—subsequent calls reuse the same cache
* - Explicit `0` values are coerced to `1` (minimum functional cache size)
*
* Default: `500`
*
* @prop {boolean} [caseSensitive]
* When true, tag and attribute names are treated as case-sensitive.
* Useful for custom HTML tags.
* If false (default) names are lower-cased via the `name` function.
*
* Default: `false`
*
* @prop {boolean} [collapseAttributeWhitespace]
* Collapse multiple whitespace characters within attribute values into a
* single space. Also trims leading and trailing whitespace from attribute
* values. Applied as an early normalization step before special attribute
* handlers (CSS minification, class sorting, etc.) run.
*
* Default: `false`
*
* @prop {boolean} [collapseBooleanAttributes]
* Collapse boolean attributes to their name only (for example
* `disabled="disabled"` → `disabled`).
* See also: https://perfectionkills.com/experimenting-with-html-minifier/#collapse_boolean_attributes
*
* Default: `false`
*
* @prop {boolean} [collapseInlineTagWhitespace]
* When false (default) whitespace around `inline` tags is preserved in
* more cases. When true, whitespace around inline tags may be collapsed.
* Must also enable `collapseWhitespace` to have effect.
*
* Default: `false`
*
* @prop {boolean} [collapseWhitespace]
* Collapse multiple whitespace characters into one where allowed. Also
* controls trimming behaviour in several code paths.
* See also: https://perfectionkills.com/experimenting-with-html-minifier/#collapse_whitespace
*
* Default: `false`
*
* @prop {boolean} [conservativeCollapse]
* If true, be conservative when collapsing whitespace (preserve more
* whitespace in edge cases). Affects collapse algorithms.
* Must also enable `collapseWhitespace` to have effect.
*
* Default: `false`
*
* @prop {boolean} [continueOnMinifyError]
* When set to `false`, minification errors may throw.
* By default, the minifier will attempt to recover from minification
* errors, or ignore them and preserve the original content.
*
* Default: `true`
*
* @prop {boolean} [continueOnParseError]
* When true, the parser will attempt to continue on recoverable parse
* errors. Otherwise, parsing errors may throw.
*
* Default: `false`
*
* @prop {RegExp[]} [customAttrAssign]
* Array of regexes used to recognise custom attribute assignment
* operators (e.g. `'<div flex?="{{mode != cover}}"></div>'`).
* These are concatenated with the built-in assignment patterns.
*
* Default: `[]`
*
* @prop {RegExp} [customAttrCollapse]
* Regex matching attribute names whose values should be collapsed.
* Basically used to remove newlines and excess spaces inside attribute values,
* e.g. `/ng-class/`.
*
* @prop {[RegExp, RegExp][]} [customAttrSurround]
* Array of `[openRegExp, closeRegExp]` pairs used by the parser to
* detect custom attribute surround patterns (for non-standard syntaxes,
* e.g. `<input {{#if value}}checked="checked"{{/if}}>`).
*
* @prop {RegExp[]} [customEventAttributes]
* Array of regexes used to detect event handler attributes for `minifyJS`
* (e.g. `ng-click`). The default matches standard `on…` event attributes.
*
* Default: `[/^on[a-z]{3,}$/]`
*
* @prop {number} [customFragmentQuantifierLimit]
* Limits the quantifier used when building a safe regex for custom
* fragments to avoid ReDoS. See source use for details.
*
* Default: `200`
*
* @prop {boolean} [decodeEntities]
* When true, decodes HTML entities in text and attributes before
* processing, and re-encodes ambiguous ampersands when outputting.
*
* Default: `false`
*
*
* @prop {RegExp[]} [ignoreCustomComments]
* Comments matching any pattern in this array of regexes will be
* preserved when `removeComments` is enabled. The default preserves
* “bang” comments and comments starting with `#`.
*
* Default: `[/^!/, /^\s*#/]`
*
* @prop {RegExp[]} [ignoreCustomFragments]
* Array of regexes used to identify fragments that should be
* preserved (for example server templates). These fragments are temporarily
* replaced during minification to avoid corrupting template code.
* The default preserves ASP/PHP-style tags.
*
* Default: `[/<%[\s\S]*?%>/, /<\?[\s\S]*?\?>/]`
*
* @prop {boolean} [includeAutoGeneratedTags]
* If false, tags marked as auto-generated by the parser will be omitted
* from output. Useful to skip injected tags.
*
* Default: `false`
*
* @prop {ArrayLike<string>} [inlineCustomElements]
* Collection of custom element tag names that should be treated as inline
* elements for white-space handling, alongside the built-in inline elements.
*
* Default: `[]`
*
* @prop {boolean} [keepClosingSlash]
* Preserve the trailing slash in self-closing tags when present.
*
* Default: `false`
*
* @prop {(message: unknown) => void} [log]
* Logging function used by the minifier for warnings/errors/info.
* You can directly provide `console.log`, but `message` may also be an `Error`
* object or other non-string value.
*
* Default: `() => {}` (no-op function)
*
* @prop {number} [maxInputLength]
* The maximum allowed input length. Used as a guard against ReDoS via
* pathological inputs. If the input exceeds this length an error is
* thrown.
*
* Default: No limit
*
* @prop {number} [maxLineLength]
* Maximum line length for the output. When set the minifier will wrap
* output to the given number of characters where possible.
*
* Default: No limit
*
* @prop {boolean} [mergeScripts]
* When true, consecutive inline `<script>` elements are merged into one.
* Only merges compatible scripts (same `type`, matching `async`/`defer`/
* `nomodule`/`nonce` attributes). Does not merge external scripts (with `src`).
*
* Default: `false`
*
* @prop {boolean | Partial<import("lightningcss").TransformOptions<import("lightningcss").CustomAtRules>> | ((text: string, type?: string) => Promise<string> | string)} [minifyCSS]
* When true, enables CSS minification for inline `<style>` tags or
* `style` attributes. If an object is provided, it is passed to
* [Lightning CSS](https://www.npmjs.com/package/lightningcss)
* as transform options. If a function is provided, it will be used to perform
* custom CSS minification. If disabled, CSS is not minified.
*
* Default: `false`
*
* @prop {boolean | import("terser").MinifyOptions | {engine?: 'terser' | 'swc', [key: string]: any} | ((text: string, inline?: boolean) => Promise<string> | string)} [minifyJS]
* When true, enables JS minification for `<script>` contents and
* event handler attributes. If an object is provided, it can include:
* - `engine`: The minifier to use (`terser` or `swc`). Default: `terser`.
* Note: Inline event handlers (e.g., `onclick="…"`) always use Terser
* regardless of engine setting, as swc doesn’t support bare return statements.
* - Engine-specific options (e.g., Terser options if `engine: 'terser'`,
* SWC options if `engine: 'swc'`).
* If a function is provided, it will be used to perform
* custom JS minification. If disabled, JS is not minified.
*
* Default: `false`
*
* @prop {boolean | string | {site?: string} | ((text: string) => Promise<string> | string)} [minifyURLs]
* When true, enables URL rewriting/minification. If an object is provided,
* the `site` property sets the base URL for computing relative paths.
* If a string is provided, it is treated as an `{ site: string }` options
* object. If a function is provided, it will be used to perform custom URL
* minification. If disabled, URLs are not minified.
*
* Default: `false`
*
* @prop {boolean | Object} [minifySVG]
* When true, enables SVG minification using [SVGO](https://github.com/svg/svgo).
* Complete SVG subtrees are extracted and optimized as a block.
* If an object is provided, it is passed to SVGO as configuration options.
* If disabled, SVG content is minified using standard HTML rules only.
*
* Default: `false`
*
* @prop {(name: string) => string} [name]
* Function used to normalise tag/attribute names. By default, this lowercases
* names, unless `caseSensitive` is enabled.
*
* Default: `(name) => name.toLowerCase()`,
* or `(name) => name` (no-op function) if `caseSensitive` is enabled.
*
* @prop {boolean} [noNewlinesBeforeTagClose]
* When wrapping lines, prevent inserting a newline directly before a
* closing tag (useful to keep tags like `</a>` on the same line).
*
* Default: `false`
*
* @prop {boolean} [partialMarkup]
* When true, treat input as a partial HTML fragment rather than a complete
* document. This preserves stray end tags (closing tags without corresponding
* opening tags) and prevents auto-closing of unclosed tags at the end of input.
* Useful for minifying template fragments, SSI includes, or other partial HTML
* that will be combined with other fragments.
*
* Default: `false`
*
* @prop {boolean} [preserveLineBreaks]
* Preserve a single line break at the start/end of text nodes when
* collapsing/trimming whitespace.
* Must also enable `collapseWhitespace` to have effect.
*
* Default: `false`
*
* @prop {boolean} [preventAttributesEscaping]
* When true, attribute values will not be HTML-escaped (dangerous for
* untrusted input). By default, attributes are escaped.
*
* Default: `false`
*
* @prop {boolean} [processConditionalComments]
* When true, conditional comments (for example `<!--[if IE]> … <![endif]-->`)
* will have their inner content processed by the minifier.
* Useful to minify HTML that appears inside conditional comments.
*
* Default: `false`
*
* @prop {string[]} [processScripts]
* Array of `type` attribute values for `<script>` elements whose contents
* should be processed as HTML
* (e.g. `text/ng-template`, `text/x-handlebars-template`, etc.).
* When present, the contents of matching script tags are recursively minified,
* like normal HTML content.
*
* Default: `[]`
*
* @prop {"\"" | "'"} [quoteCharacter]
* Preferred quote character for attribute values. If unspecified the
* minifier picks the safest quote based on the attribute value.
*
* Default: Auto-detected
*
* @prop {boolean} [removeAttributeQuotes]
* Remove quotes around attribute values where it is safe to do so.
* See also: https://perfectionkills.com/experimenting-with-html-minifier/#remove_attribute_quotes
*
* Default: `false`
*
* @prop {boolean} [removeComments]
* Remove HTML comments. Comments that match `ignoreCustomComments` will
* still be preserved.
* See also: https://perfectionkills.com/experimenting-with-html-minifier/#remove_comments
*
* Default: `false`
*
* @prop {boolean | ((attrName: string, tag: string) => boolean)} [removeEmptyAttributes]
* If true, removes attributes whose values are empty (some attributes
* are excluded by name). Can also be a function to customise which empty
* attributes are removed.
* See also: https://perfectionkills.com/experimenting-with-html-minifier/#remove_empty_or_blank_attributes
*
* Default: `false`
*
* @prop {boolean} [removeEmptyElements]
* Remove elements that are empty and safe to remove (for example
* `<script />` without `src`).
* See also: https://perfectionkills.com/experimenting-with-html-minifier/#remove_empty_elements
*
* Default: `false`
*
* @prop {string[]} [removeEmptyElementsExcept]
* Specifies empty elements to preserve when `removeEmptyElements` is enabled.
* Has no effect unless `removeEmptyElements: true`.
*
* Accepts tag names or HTML-like element specifications:
*
* * Tag name only: `["td", "span"]`—preserves all empty elements of these types
* * With valued attributes: `["<span aria-hidden='true'>"]`—preserves only when attribute values match
* * With boolean attributes: `["<input disabled>"]`—preserves only when boolean attribute is present
* * Mixed: `["<button type='button' disabled>"]`—all specified attributes must match
*
* Attribute matching:
*
* * All specified attributes must be present and match (valued attributes must have exact values)
* * Additional attributes on the element are allowed
* * Attribute name matching respects the `caseSensitive` option
* * Supports double quotes, single quotes, and unquoted attribute values in specifications
*
* Limitations:
*
* * Self-closing syntax (e.g., `["<span/>"]`) is not supported; use `["span"]` instead
* * Definitions containing `>` within quoted attribute values (e.g., `["<span title='a>b'>"]`) are not supported
*
* Default: `[]`
*
* @prop {boolean} [removeOptionalTags]
* Drop optional start/end tags where the HTML specification permits it
* (for example `</li>`, optional `<html>` etc.).
* See also: https://perfectionkills.com/experimenting-with-html-minifier/#remove_optional_tags
*
* Default: `false`
*
* @prop {boolean} [removeRedundantAttributes]
* Remove attributes that are redundant because they match the element’s
* default values (for example `<button type="submit">`).
* See also: https://perfectionkills.com/experimenting-with-html-minifier/#remove_redundant_attributes
*
* Default: `false`
*
* @prop {boolean} [removeScriptTypeAttributes]
* Remove `type` attributes from `<script>` when they are unnecessary
* (e.g. `type="text/javascript"`).
*
* Default: `false`
*
* @prop {boolean} [removeStyleLinkTypeAttributes]
* Remove `type` attributes from `<style>` and `<link>` elements when
* they are unnecessary (e.g. `type="text/css"`).
*
* Default: `false`
*
* @prop {boolean} [removeTagWhitespace]
* **Note that this will result in invalid HTML!**
*
* When true, extra whitespace between tag name and attributes (or before
* the closing bracket) will be removed where possible. Affects output spacing
* such as the space used in the short doctype representation.
*
* Default: `false`
*
* @prop {boolean | ((tag: string, attrs: HTMLAttribute[]) => void)} [sortAttributes]
* When true, enables sorting of attributes. If a function is provided it
* will be used as a custom attribute sorter, which should mutate `attrs`
* in-place to the desired order. If disabled, the minifier will attempt to
* preserve the order from the input.
*
* Default: `false`
*
* @prop {boolean | ((value: string) => string)} [sortClassNames]
* When true, enables sorting of class names inside `class` attributes.
* If a function is provided, it will be used to transform/sort the class
* name string. If disabled, the minifier will attempt to preserve the
* class-name order from the input.
*
* Default: `false`
*
* @prop {boolean} [trimCustomFragments]
* When true, whitespace around ignored custom fragments may be trimmed
* more aggressively. This affects how preserved fragments interact with
* surrounding whitespace collapse.
*
* Default: `false`
*
* @prop {boolean} [useShortDoctype]
* Replace the HTML doctype with the short `<!doctype html>` form.
* See also: https://perfectionkills.com/experimenting-with-html-minifier/#use_short_doctype
*
* Default: `false`
*/
async function createSortFns(value, options, uidIgnore, uidAttr, ignoredMarkupChunks) {
const attrChains = options.sortAttributes && typeof options.sortAttributes !== 'function' && Object.create(null);
const classChain = options.sortClassNames && typeof options.sortClassNames !== 'function' && new TokenChain();
function attrNames(attrs) {
return attrs.map(function (attr) {
return options.name(attr.name);
});
}
function shouldSkipUID(token, uid) {
return !uid || token.indexOf(uid) === -1;
}
function shouldKeepToken(token) {
// Filter out any HTML comment tokens (UID placeholders)
// These are temporary markers created by `htmlmin:ignore` and `ignoreCustomFragments`
if (token.startsWith('<!--') && token.endsWith('-->')) {
return false;
}
return shouldSkipUID(token, uidIgnore) && shouldSkipUID(token, uidAttr);
}
// Pre-compile regex patterns for reuse (performance optimization)
// These must be declared before `scan()` since scan uses them
const whitespaceSplitPatternScan = /[ \t\n\f\r]+/;
const whitespaceSplitPatternSort = /[ \n\f\r]+/;
async function scan(input) {
let currentTag, currentType;
const parser = new HTMLParser(input, {
start: function (tag, attrs) {
if (attrChains) {
if (!attrChains[tag]) {
attrChains[tag] = new TokenChain();
}
const attrNamesList = attrNames(attrs).filter(shouldKeepToken);
attrChains[tag].add(attrNamesList);
}
for (let i = 0, len = attrs.length; i < len; i++) {
const attr = attrs[i];
if (classChain && attr.value && options.name(attr.name) === 'class') {
const classes = trimWhitespace(attr.value).split(whitespaceSplitPatternScan).filter(shouldKeepToken);
classChain.add(classes);
} else if (options.processScripts && attr.name.toLowerCase() === 'type') {
currentTag = tag;
currentType = attr.value;
}
}
},
end: function () {
currentTag = '';
},
chars: async function (text) {
// Only recursively scan HTML content, not JSON-LD or other non-HTML script types
// `scan()` is for analyzing HTML attribute order, not for parsing JSON
if (options.processScripts && specialContentElements.has(currentTag) &&
options.processScripts.indexOf(currentType) > -1 &&
currentType === 'text/html') {
await scan(text);
}
},
// We never need `nextTag` information in this scan
wantsNextTag: false,
// Continue on parse errors during analysis pass
continueOnParseError: options.continueOnParseError
});
try {
await parser.parse();
} catch (err) {
// If parsing fails during analysis pass, just skip it—we’ll still have partial frequency data from what we could parse
if (!options.continueOnParseError) {
throw err;
}
}
}
// For the first pass, create a copy of options and disable aggressive minification.
// Keep attribute transformations (like `removeStyleLinkTypeAttributes`) for accurate analysis.
// This is safe because `createSortFns` is called before custom fragment UID markers (`uidAttr`) are added.
// Note: `htmlmin:ignore` UID markers (`uidIgnore`) already exist and are expanded for analysis.
const firstPassOptions = Object.assign({}, options, {
// Disable sorting for the analysis pass
sortAttributes: false,
sortClassNames: false,
// Disable aggressive minification that doesn’t affect attribute analysis
collapseWhitespace: false,
removeAttributeQuotes: false,
removeTagWhitespace: false,
decodeEntities: false,
processScripts: false,
// Keep `ignoreCustomFragments` to handle template syntax correctly
// This is safe because `createSortFns` is now called before UID markers are added
// Continue on parse errors during analysis (e.g., template syntax)
continueOnParseError: true,
log: identity
});
// Temporarily enable `continueOnParseError` for the `scan()` function call below.
// Note: `firstPassOptions` already has `continueOnParseError: true` for the `minifyHTML` call.
const originalContinueOnParseError = options.continueOnParseError;
options.continueOnParseError = true;
// Pre-compile regex patterns for UID replacement and custom fragments
const uidReplacePattern = uidIgnore && ignoredMarkupChunks
? new RegExp('<!--' + uidIgnore + '(\\d+)-->', 'g')
: null;
const customFragmentPattern = options.ignoreCustomFragments && options.ignoreCustomFragments.length > 0
? new RegExp('(' + options.ignoreCustomFragments.map(re => re.source).join('|') + ')', 'g')
: null;
try {
// Expand UID tokens back to the original content for frequency analysis
let expandedValue = value;
if (uidReplacePattern) {
expandedValue = value.replace(uidReplacePattern, function (match, index) {
return ignoredMarkupChunks[+index] || '';
});
// Reset `lastIndex` for pattern reuse
uidReplacePattern.lastIndex = 0;
}
// First pass minification applies attribute transformations like `removeStyleLinkTypeAttributes` for accurate frequency analysis
const firstPassOutput = await minifyHTML(expandedValue, firstPassOptions);
// For frequency analysis, we need to remove custom fragments temporarily
// because HTML comments in opening tags prevent proper attribute parsing.
// We remove them with a space to preserve attribute boundaries.
let scanValue = firstPassOutput;
if (customFragmentPattern) {
scanValue = firstPassOutput.replace(customFragmentPattern, ' ');
}
await scan(scanValue);
} finally {
// Restore original option
options.continueOnParseError = originalContinueOnParseError;
}
if (attrChains) {
const attrSorters = Object.create(null);
for (const tag in attrChains) {
attrSorters[tag] = attrChains[tag].createSorter();
}
// Memoize sorted attribute orders—attribute sets often repeat in templates
const attrOrderCache = new LRU(500);
options.sortAttributes = function (tag, attrs) {
const sorter = attrSorters[tag];
if (sorter) {
const names = attrNames(attrs);
// Create order-independent cache key from tag and sorted attribute names
const cacheKey = tag + ':' + names.slice().sort().join(',');
let sortedNames = attrOrderCache.get(cacheKey);
if (sortedNames === undefined) {
// Only sort if not in cache—need to clone names since sort mutates in place
sortedNames = sorter.sort(names.slice());
attrOrderCache.set(cacheKey, sortedNames);
}
// Apply the sorted order to `attrs`
const attrMap = Object.create(null);
names.forEach(function (name, index) {
(attrMap[name] || (attrMap[name] = [])).push(attrs[index]);
});
sortedNames.forEach(function (name, index) {
attrs[index] = attrMap[name].shift();
});
}
};
}
if (classChain) {
const sorter = classChain.createSorter();
// Memoize `sortClassNames` results—class lists often repeat in templates
const classNameCache = new LRU(500);
options.sortClassNames = function (value) {
// Fast path: Single class (no spaces) needs no sorting
if (value.indexOf(' ') === -1) {
return value;
}
// Check cache first
const cached = classNameCache.get(value);
if (cached !== undefined) {
return cached;
}
// Expand UID tokens back to original content before sorting
// Fast path: Skip if no HTML comments (UID markers) present
let expandedValue = value;
if (uidReplacePattern && value.indexOf('<!--') !== -1) {
expandedValue = value.replace(uidReplacePattern, function (match, index) {
return ignoredMarkupChunks[+index] || '';
});
// Reset `lastIndex` for pattern reuse
uidReplacePattern.lastIndex = 0;
}
const classes = expandedValue.split(whitespaceSplitPatternSort).filter(function(cls) {
return cls !== '';
});
const sorted = sorter.sort(classes);
const result = sorted.join(' ');
// Cache the result
classNameCache.set(value, result);
return result;
};
}
}
/**
* @param {string} value - HTML content to minify
* @param {MinifierOptions} options - Normalized minification options
* @param {boolean} [partialMarkup] - Whether treating input as partial markup
* @returns {Promise<string>} Minified HTML
*/
async function minifyHTML(value, options, partialMarkup) {
// Check input length limitation to prevent ReDoS attacks
if (options.maxInputLength && value.length > options.maxInputLength) {
throw new Error(`Input length (${value.length}) exceeds maximum allowed length (${options.maxInputLength})`);
}
if (options.collapseWhitespace) {
value = collapseWhitespace(value, options, true, true);
}
const buffer = [];
let charsPrevTag;
let currentChars = '';
let hasChars;
let currentTag = '';
let currentAttrs = [];
const stackNoTrimWhitespace = [];
const stackNoCollapseWhitespace = [];
let preTextareaDepth = 0; // Count of `pre`/`textarea` entries in `stackNoTrimWhitespace`
let optionalStartTag = '';
let optionalEndTag = '';
let optionalEndTagEmitted = false;
const ignoredMarkupChunks = [];
const ignoredCustomMarkupChunks = [];
let uidIgnore;
let uidIgnorePlaceholderPattern;
let uidAttr;
let uidPattern;
// Create inline tags/text sets with custom elements
const customElementsInput = options.inlineCustomElements ?? [];
const customElementsArr = Array.isArray(customElementsInput) ? customElementsInput : Array.from(customElementsInput);
const normalizedCustomElements = customElementsArr.map(name => options.name(name));
// Fast path: Reuse base sets if no custom elements
const inlineTextSet = normalizedCustomElements.length
? new Set([...inlineElementsToKeepWhitespaceWithin, ...normalizedCustomElements])
: inlineElementsToKeepWhitespaceWithin;
const inlineElements = normalizedCustomElements.length
? new Set([...inlineElementsToKeepWhitespaceAround, ...normalizedCustomElements])
: inlineElementsToKeepWhitespaceAround;
// Parse `removeEmptyElementsExcept` option
let removeEmptyElementsExcept;
if (options.removeEmptyElementsExcept && !Array.isArray(options.removeEmptyElementsExcept)) {
if (options.log) {
options.log('Warning: `removeEmptyElementsExcept` option must be an array, received: ' + typeof options.removeEmptyElementsExcept);
}
removeEmptyElementsExcept = [];
} else {
removeEmptyElementsExcept = parseRemoveEmptyElementsExcept(options.removeEmptyElementsExcept, options) || [];
}
// Temporarily replace ignored chunks with comments, so that we don’t have to worry what’s there;
// for all we care there might be completely-horribly-broken-alien-non-html-emoji-cthulhu-filled content
if (value.indexOf('<!-- htmlmin:ignore -->') !== -1) {
// Use `indexOf`-based O(n) loop instead of a global regex with [\s\S]*? to avoid O(n²)
// backtracking on adversarial HTML with many `<!--` prefixes but no closing marker
const ignoreMarker = '<!-- htmlmin:ignore -->';
const ignoreMarkerLen = ignoreMarker.length;
let ignoreResult = '';
let ignorePos = 0;
while (ignorePos < value.length) {
const ignoreStart = value.indexOf(ignoreMarker, ignorePos);
if (ignoreStart === -1) { ignoreResult += value.slice(ignorePos); break; }
ignoreResult += value.slice(ignorePos, ignoreStart);
const ignoreEnd = value.indexOf(ignoreMarker, ignoreStart + ignoreMarkerLen);
if (ignoreEnd === -1) { ignoreResult += value.slice(ignoreStart); break; }
const group1 = value.slice(ignoreStart + ignoreMarkerLen, ignoreEnd);
if (!uidIgnore) {
uidIgnore = uniqueId(value);
const pattern = new RegExp('^' + uidIgnore + '([0-9]+)$');
uidIgnorePlaceholderPattern = new RegExp('^<!--' + uidIgnore + '(\\d+)-->$');
if (options.ignoreCustomComments) {
options.ignoreCustomComments = options.ignoreCustomComments.slice();
} else {
options.ignoreCustomComments = [];
}
options.ignoreCustomComments.push(pattern);
}
const token = '<!--' + uidIgnore + ignoredMarkupChunks.length + '-->';
ignoredMarkupChunks.push(group1);
ignoreResult += token;
ignorePos = ignoreEnd + ignoreMarkerLen;
}
value = ignoreResult;
}
// Create sort functions after `htmlmin:ignore` processing but before custom fragment UID markers
// This allows proper frequency analysis with access to ignored content via UID tokens
if ((options.sortAttributes && typeof options.sortAttributes !== 'function') ||
(options.sortClassNames && typeof options.sortClassNames !== 'function')) {
await createSortFns(value, options, uidIgnore, null, ignoredMarkupChunks);
}
const customFragments = options.ignoreCustomFragments.map(function (re) {
return re.source;
});
if (customFragments.length) {
// Warn about potential ReDoS if custom fragments use unlimited quantifiers
for (let i = 0; i < customFragments.length; i++) {
if (/[*+]/.test(customFragments[i])) {
options.log('Warning: Custom fragment contains unlimited quantifiers (“*” or “+”) which may cause ReDoS vulnerability');
break;
}
}
// Safe approach: Use bounded quantifiers instead of unlimited ones to prevent ReDoS
const maxQuantifier = options.customFragmentQuantifierLimit || 200;
const whitespacePattern = `\\s{0,${maxQuantifier}}`;
// Use bounded quantifiers to prevent ReDoS—this approach prevents exponential backtracking
const reCustomIgnore = new RegExp(
whitespacePattern + '(?:' + customFragments.join('|') + '){1,' + maxQuantifier + '}' + whitespacePattern,
'g'
);
// Temporarily replace custom ignored fragments with unique attributes
value = value.replace(reCustomIgnore, function (match) {
if (!uidAttr) {
uidAttr = uniqueId(value);
uidPattern = new RegExp('(\\s*)' + uidAttr + '([0-9]+)' + uidAttr + '(\\s*)', 'g');
if (options.minifyCSS) {
options.minifyCSS = (function (fn) {
return function (text, type) {
text = text.replace(uidPattern, function (match, prefix, index) {
const chunks = ignoredCustomMarkupChunks[+index];
return chunks[1] + uidAttr + index + uidAttr + chunks[2];
});
return fn(text, type);
};
})(options.minifyCSS);
}
if (options.minifyJS) {
options.minifyJS = (function (fn) {
return function (text, type) {
return fn(text.replace(uidPattern, function (match, prefix, index) {
const chunks = ignoredCustomMarkupChunks[+index];
return chunks[1] + uidAttr + index + uidAttr + chunks[2];
}), type);
};
})(options.minifyJS);
}
}
const token = uidAttr + ignoredCustomMarkupChunks.length + uidAttr;
ignoredCustomMarkupChunks.push(/^(\s*)[\s\S]*?(\s*)$/.exec(match));
return '\t' + token + '\t';
});
}
function canCollapseWhitespace(tag, attrs) {
return options.canCollapseWhitespace(tag, attrs, defaultCanCollapseWhitespace);
}
function canTrimWhitespace(tag, attrs) {
return options.canTrimWhitespace(tag, attrs, defaultCanTrimWhitespace);
}
function removeStartTag() {
let index = buffer.length - 1;
while (index > 0 && !RE_START_TAG.test(buffer[index])) {
index--;
}
buffer.length = Math.max(0, index);
}
function removeEndTag() {
let index = buffer.length - 1;
while (index > 0 && !RE_END_TAG.test(buffer[index])) {
index--;
}
buffer.length = Math.max(0, index);
}
// Look for trailing whitespaces, bypass any inline tags
function trimTrailingWhitespace(index, nextTag) {
for (let endTag = null; index >= 0 && canTrimWhitespace(endTag); index--) {
const str = buffer[index];
const match = str.match(/^<\/([\w:-]+)>$/);
if (match) {
endTag = match[1];
} else if (/>$/.test(str) || (buffer[index] = collapseWhitespaceSmart(str, null, nextTag, [], [], options, inlineElements, inlineTextSet))) {
break;
}
}
}
// Look for trailing whitespaces from previously processed text
// which may not be trimmed due to a following comment or an empty
// element which has now been removed
function squashTrailingWhitespace(nextTag) {
let charsIndex = buffer.length - 1;
if (buffer.length > 1) {
const item = buffer[buffer.length - 1];
if (/^(?:<!|$)/.test(item) && (!uidIgnore || item.indexOf(uidIgnore) === -1)) {
charsIndex--;
}
}
trimTrailingWhitespace(charsIndex, nextTag);
}
// SVG subtree capture: When SVGO is active, record buffer positions for post-processing
const svgBlocks = []; // Array of { start, end } buffer indices
let svgBufferStartIndex = -1;
let svgDepth = 0;
const parser = new HTMLParser(value, {
partialMarkup: partialMarkup ?? options.partialMarkup,
continueOnParseError: options.continueOnParseError,
customAttrAssign: options.customAttrAssign,
customAttrSurround: options.customAttrSurround,
// Compute `nextTag` only when whitespace collapse features require it
wantsNextTag: !!(options.collapseWhitespace || options.collapseInlineTagWhitespace || options.conservativeCollapse),
start: async function (tag, attrs, unary, unarySlash, autoGenerated) {
const lowerTag = tag.toLowerCase();
if (lowerTag === 'svg' || lowerTag === 'math') {
options = Object.create(options);
options.caseSensitive = true;
options.keepClosingSlash = true;
options.name = identity;
options.insideSVG = lowerTag === 'svg';
options.insideForeignContent = true;
// Disable HTML-specific options that produce invalid XML
options.removeAttributeQuotes = false;
options.removeTagWhitespace = false;
options.decodeEntities = false;
}
// `foreignObject` in SVG and `annotation-xml` in MathML contain HTML content
// Note: The element itself is in SVG/MathML namespace, only its children are HTML
let useParentNameForTag = false;
if (options.insideForeignContent && (lowerTag === 'foreignobject' ||
(lowerTag === 'annotation-xml' && attrs.some(a => a.name.toLowerCase() === 'encoding' &&
RE_HTML_ENCODING.test(a.value))))) {
const parentName = options.name;
options = Object.create(options);
options.caseSensitive = false;
options.keepClosingSlash = false;
options.parentName = parentName; // Preserve for the element tag itself
options.name = options.htmlName || lowercase;
options.insideForeignContent = false;
// Note: `removeAttributeQuotes`, `removeTagWhitespace`, and `decodeEntities`
// stay disabled (inherited from SVG context) because the entire SVG block
// must be valid XML for SVGO processing
useParentNameForTag = true;
}
tag = (useParentNameForTag ? options.parentName : options.name)(tag);
currentTag = tag;
charsPrevTag = tag;
if (!inlineTextSet.has(tag)) {
currentChars = '';
}
hasChars = false;
currentAttrs = attrs;
let optional = options.removeOptionalTags;
if (optional) {
const htmlTag = htmlElements.has(tag);
// `<html>` may be omitted if first thing inside is not a comment
// `<head>` may be omitted if first thing inside is an element
// `<body>` may be omitted if first thing inside is not space, comment, `<meta>`, `<link>`, `<script>`, `<style>`, or `<template>`
// `<colgroup>` may be omitted if first thing inside is `<col>`
// `<tbody>` may be omitted if first thing inside is `<tr>`
if (htmlTag && canRemoveParentTag(optionalStartTag, tag)) {
removeStartTag();
}
optionalStartTag = '';
// End-tag-followed-by-start-tag omission rules
if (htmlTag && canRemovePrecedingTag(optionalEndTag, tag)) {
if (optionalEndTagEmitted) {
removeEndTag();
}
// `<colgroup>` cannot be omitted if preceding `</colgroup>` is omitted
// `<tbody>` cannot be omitted if preceding `</tbody>`, `</thead>`, or `</tfoot>` is omitted
optional = !isStartTagMandatory(optionalEndTag, tag);
}
optionalEndTag = '';
optionalEndTagEmitted = false;
}
// Set whitespace flags for nested tags (e.g., `<code>` within a `<pre>`)
if (options.collapseWhitespace) {
if (!stackNoTrimWhitespace.length) {
squashTrailingWhitespace(tag);
}
if (!unary) {
if (!canTrimWhitespace(tag, attrs) || stackNoTrimWhitespace.length) {
stackNoTrimWhitespace.push(tag);
if (tag === 'pre' || tag === 'textarea') preTextareaDepth++;
}
if (!canCollapseWhitespace(tag, attrs) || stackNoCollapseWhitespace.length) {
stackNoCollapseWhitespace.push(tag);
}
}
}
// Track SVG subtree for SVGO block processing
if (lowerTag === 'svg' && options.minifySVG) {
if (svgDepth === 0) {
svgBufferStartIndex = buffer.length; // Record position before <svg> is pushed
}
svgDepth++;
}
const openTag = '<' + tag;
const hasUnarySlash = unarySlash && options.keepClosingSlash;
buffer.push(openTag);
// Remove duplicate attributes (per HTML spec, first occurrence wins)
// Duplicate attributes result in invalid HTML
// https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
deduplicateAttributes(attrs, options.caseSensitive);
if (options.sortAttributes) {
options.sortAttributes(tag, attrs);
}
const attrResults = attrs.map(attr => normalizeAttr(attr, attrs, tag, options, minifyHTML));
const normalizedAttrs = attrResults.some(isThenable) ? await Promise.all(attrResults) : attrResults;
const parts = [];
let isLast = true;
for (let i = normalizedAttrs.length - 1; i >= 0; i--) {
if (normalizedAttrs[i]) {
parts.push(buildAttr(normalizedAttrs[i], hasUnarySlash, options, isLast, uidAttr));
isLast = false;
}
}
parts.reverse();
if (parts.length > 0) {
buffer.push(' ');
buffer.push.apply(buffer, parts);
} else if (optional && optionalStartTags.has(tag)) {
// Start tag must never be omitted if it has any attributes
optionalStartTag = tag;
}
buffer.push(buffer.pop() + (hasUnarySlash ? '/' : '') + '>');
if (autoGenerated && !options.includeAutoGeneratedTags) {
removeStartTag();
optionalStartTag = '';
currentTag = '';
}
},
end: function (tag, attrs, autoGenerated) {
const lowerTag = tag.toLowerCase();
// Restore parent context when exiting SVG/MathML or HTML-in-foreign-content elements
if (lowerTag === 'svg' || lowerTag === 'math') {
options = Object.getPrototypeOf(options);
} else if ((lowerTag === 'foreignobject' || lowerTag === 'annotation-xml') &&
!options.insideForeignContent && Object.getPrototypeOf(options).insideForeignContent) {
options = Object.getPrototypeOf(options);
}
tag = options.name(tag);
// Check if current tag is in a whitespace stack
if (options.collapseWhitespace) {
if (stackNoTrimWhitespace.length) {
if (tag === stackNoTrimWhitespace[stackNoTrimWhitespace.length - 1]) {
if (tag === 'pre' || tag === 'textarea') preTextareaDepth--;
stackNoTrimWhitespace.pop();
}
} else {
squashTrailingWhitespace('/' + tag);
}
if (stackNoCollapseWhitespace.length &&
tag === stackNoCollapseWhitespace[stackNoCollapseWhitespace.length - 1]) {
stackNoCollapseWhitespace.pop();
}
}
let isElementEmpty = false;
if (tag === currentTag) {
currentTag = '';
isElementEmpty = !hasChars;
}
if (options.removeOptionalTags) {
// `<html>`, `<head>` or `<body>` may be omitted if the element is empty
if (isElementEmpty && topLevelElements.has(optionalStartTag)) {
removeStartTag();
}
optionalStartTag = '';
// `</html>` or `</body>` may be omitted if not followed by comment
// `</head>` may be omitted if not followed by space or comment
// `</p>` may be omitted if no more content in parent, unless parent is in `pInlineElements` or is a custom element
// https://html.spec.whatwg.org/multipage/syntax.html#optional-tags
// except for `</dt>` or `</thead>`, end tags may be omitted if no more content in parent element
if (tag && optionalEndTag && optionalEndTagEmitted && !trailingElements.has(optionalEndTag) && (optionalEn