html-minifier-next
Version:
Super-configurable and well-tested web page minifier (enhanced successor of HTML Minifier)
704 lines (627 loc) • 24 kB
JavaScript
// Imports
import {
RE_CONDITIONAL_COMMENT,
RE_EVENT_ATTR_DEFAULT,
RE_CAN_REMOVE_ATTR_QUOTES,
RE_AMP_ENTITY,
RE_ATTR_WS_CHECK,
RE_ATTR_WS_COLLAPSE,
RE_ATTR_WS_TRIM,
generalDefaults,
tagDefaults,
executableScriptsMimetypes,
keepScriptsMimetypes,
isSimpleBoolean,
isBooleanValue,
collapsibleValues,
srcsetElements,
reEmptyAttribute
} from './constants.js';
import { trimWhitespace, collapseWhitespaceAll } from './whitespace.js';
import { shouldMinifyInnerHTML } from './options.js';
import { isThenable } from './utils.js';
// Lazy-load entities only when `decodeEntities` is enabled
let decodeHTMLStrictPromise;
async function getDecodeHTMLStrict() {
if (!decodeHTMLStrictPromise) {
decodeHTMLStrictPromise = import('entities').then(m => m.decodeHTMLStrict);
}
return decodeHTMLStrictPromise;
}
// Validators
function isConditionalComment(text) {
return RE_CONDITIONAL_COMMENT.test(text);
}
function isIgnoredComment(text, options) {
for (let i = 0, len = options.ignoreCustomComments.length; i < len; i++) {
if (options.ignoreCustomComments[i].test(text)) {
return true;
}
}
return false;
}
function isEventAttribute(attrName, options) {
const patterns = options.customEventAttributes;
if (patterns) {
for (let i = patterns.length; i--;) {
if (patterns[i].test(attrName)) {
return true;
}
}
return false;
}
return RE_EVENT_ATTR_DEFAULT.test(attrName);
}
function canRemoveAttributeQuotes(value) {
// https://mathiasbynens.be/notes/unquoted-attribute-values
return RE_CAN_REMOVE_ATTR_QUOTES.test(value);
}
function attributesInclude(attributes, attribute) {
for (let i = attributes.length; i--;) {
if (attributes[i].name.toLowerCase() === attribute) {
return true;
}
}
return false;
}
/**
* Remove duplicate attributes from an attribute list.
* Per HTML spec, when an attribute appears multiple times, the first occurrence wins.
* Duplicate attributes result in invalid HTML, so we keep only the first.
* @param {Array} attrs - Array of attribute objects with `name` property
* @param {boolean} caseSensitive - Whether to compare names case-sensitively (for XML/SVG)
* @returns {Array} Deduplicated attribute array (modifies in place and returns)
*/
function deduplicateAttributes(attrs, caseSensitive) {
if (attrs.length < 2) {
return attrs;
}
const seen = new Set();
let writeIndex = 0;
for (let i = 0; i < attrs.length; i++) {
const attr = attrs[i];
const key = caseSensitive ? attr.name : attr.name.toLowerCase();
if (!seen.has(key)) {
seen.add(key);
attrs[writeIndex++] = attr;
}
}
attrs.length = writeIndex;
return attrs;
}
function isAttributeRedundant(tag, attrName, attrValue, attrs) {
// Fast-path: Check if this element–attribute combination can possibly be redundant
// before doing expensive string operations
// Check if attribute name is in general defaults
const hasGeneralDefault = attrName in generalDefaults;
// Check if element has any default attributes
const tagHasDefaults = tag in tagDefaults;
// Check for legacy attribute rules (element- and attribute-specific)
const isLegacyAttr = (tag === 'script' && (attrName === 'language' || attrName === 'charset')) || (tag === 'a' && attrName === 'name');
// If none of these conditions apply, attribute cannot be redundant
if (!hasGeneralDefault && !tagHasDefaults && !isLegacyAttr) {
return false;
}
// Now we know we need to check the value, so normalize it
attrValue = attrValue ? trimWhitespace(attrValue.toLowerCase()) : '';
// Legacy attribute checks
if (tag === 'script' && attrName === 'language' && attrValue === 'javascript') {
return true;
}
if (tag === 'script' && attrName === 'charset' && !attributesInclude(attrs, 'src')) {
return true;
}
if (tag === 'a' && attrName === 'name' && attributesInclude(attrs, 'id')) {
return true;
}
// Check general defaults
if (hasGeneralDefault && generalDefaults[attrName] === attrValue) {
return true;
}
// Check tag-specific defaults
return tagHasDefaults && tagDefaults[tag][attrName] === attrValue;
}
function isScriptTypeAttribute(attrValue = '') {
attrValue = trimWhitespace(attrValue.split(/;/, 2)[0]).toLowerCase();
return attrValue === '' || executableScriptsMimetypes.has(attrValue);
}
function keepScriptTypeAttribute(attrValue = '') {
attrValue = trimWhitespace(attrValue.split(/;/, 2)[0]).toLowerCase();
return keepScriptsMimetypes.has(attrValue);
}
function isExecutableScript(tag, attrs) {
if (tag !== 'script') {
return false;
}
for (let i = 0, len = attrs.length; i < len; i++) {
const attrName = attrs[i].name.toLowerCase();
if (attrName === 'type') {
return isScriptTypeAttribute(attrs[i].value);
}
}
return true;
}
function isStyleLinkTypeAttribute(attrValue = '') {
attrValue = trimWhitespace(attrValue).toLowerCase();
return attrValue === '' || attrValue === 'text/css';
}
function isStyleElement(tag, attrs) {
if (tag !== 'style') {
return false;
}
for (let i = 0, len = attrs.length; i < len; i++) {
const attrName = attrs[i].name.toLowerCase();
if (attrName === 'type') {
return isStyleLinkTypeAttribute(attrs[i].value);
}
}
return true;
}
function isBooleanAttribute(attrName, attrValue) {
return isSimpleBoolean.has(attrName) ||
(attrName === 'draggable' && !isBooleanValue.has(attrValue)) ||
(collapsibleValues.has(attrName) && collapsibleValues.get(attrName).has(attrValue));
}
const uriTypeAttributes = new Map([
['a', new Set(['href'])],
['area', new Set(['href'])],
['link', new Set(['href'])],
['base', new Set(['href'])],
['img', new Set(['src', 'longdesc', 'usemap'])],
['object', new Set(['classid', 'codebase', 'data', 'usemap'])],
['q', new Set(['cite'])],
['blockquote', new Set(['cite'])],
['ins', new Set(['cite'])],
['del', new Set(['cite'])],
['form', new Set(['action'])],
['input', new Set(['src', 'usemap'])],
['head', new Set(['profile'])],
['script', new Set(['src', 'for'])]
]);
function isUriTypeAttribute(attrName, tag) {
const set = uriTypeAttributes.get(tag);
return set ? set.has(attrName) : false;
}
const numberTypeAttributes = new Map([
['a', new Set(['tabindex'])],
['area', new Set(['tabindex'])],
['object', new Set(['tabindex'])],
['button', new Set(['tabindex'])],
['input', new Set(['maxlength', 'tabindex'])],
['select', new Set(['size', 'tabindex'])],
['textarea', new Set(['rows', 'cols', 'tabindex'])],
['colgroup', new Set(['span'])],
['col', new Set(['span'])],
['th', new Set(['rowspan', 'colspan'])],
['td', new Set(['rowspan', 'colspan'])]
]);
function isNumberTypeAttribute(attrName, tag) {
const set = numberTypeAttributes.get(tag);
return set ? set.has(attrName) : false;
}
function isLinkType(tag, attrs, value) {
if (tag !== 'link') return false;
const needle = String(value).toLowerCase();
for (let i = 0; i < attrs.length; i++) {
if (attrs[i].name.toLowerCase() === 'rel') {
const tokens = String(attrs[i].value).toLowerCase().split(/\s+/);
if (tokens.includes(needle)) return true;
}
}
return false;
}
function isMediaQuery(tag, attrs, attrName) {
return attrName === 'media' && (isLinkType(tag, attrs, 'stylesheet') || isStyleElement(tag, attrs));
}
function isSrcset(attrName, tag) {
return attrName === 'srcset' && srcsetElements.has(tag);
}
function isMetaViewport(tag, attrs) {
if (tag !== 'meta') {
return false;
}
for (let i = 0, len = attrs.length; i < len; i++) {
if (attrs[i].name.toLowerCase() === 'name' && attrs[i].value.toLowerCase() === 'viewport') {
return true;
}
}
return false;
}
function isContentSecurityPolicy(tag, attrs) {
if (tag !== 'meta') {
return false;
}
for (let i = 0, len = attrs.length; i < len; i++) {
if (attrs[i].name.toLowerCase() === 'http-equiv' && attrs[i].value.toLowerCase() === 'content-security-policy') {
return true;
}
}
return false;
}
function canDeleteEmptyAttribute(tag, attrName, attrValue, options) {
const isValueEmpty = !attrValue || attrValue.trim() === '';
if (!isValueEmpty) {
return false;
}
if (typeof options.removeEmptyAttributes === 'function') {
return options.removeEmptyAttributes(attrName, tag);
}
return (tag === 'input' && attrName === 'value') || reEmptyAttribute.test(attrName);
}
function hasAttrName(name, attrs) {
for (let i = attrs.length - 1; i >= 0; i--) {
if (attrs[i].name === name) {
return true;
}
}
return false;
}
// Cleaners
// Returns the cleaned attribute value directly (sync) or as a Promise (async);
// callers must handle both cases—use `isThenable()` to distinguish
function cleanAttributeValue(tag, attrName, attrValue, options, attrs, minifyHTMLSelf) {
// Apply early whitespace normalization if enabled
// Preserves special spaces (no-break space, hair space, etc.) for consistency with `collapseWhitespace`
if (options.collapseAttributeWhitespace) {
// Fast path: Only process if whitespace exists (avoids regex overhead on clean values)
if (RE_ATTR_WS_CHECK.test(attrValue)) {
// Two-pass approach (faster than single-pass with callback)
// First: Collapse internal whitespace sequences to single space
// Second: Trim leading/trailing whitespace
attrValue = attrValue.replace(RE_ATTR_WS_COLLAPSE, ' ').replace(RE_ATTR_WS_TRIM, '');
}
}
if (isEventAttribute(attrName, options)) {
attrValue = trimWhitespace(attrValue).replace(/^javascript:\s*/i, '');
const result = options.minifyJS(attrValue, true);
if (isThenable(result)) {
return result.catch(err => {
if (!options.continueOnMinifyError) throw err;
options.log && options.log(err);
return attrValue;
});
}
return result;
}
if (attrName === 'class') {
attrValue = trimWhitespace(attrValue);
if (options.sortClassNames) {
attrValue = options.sortClassNames(attrValue);
} else {
attrValue = collapseWhitespaceAll(attrValue);
}
return attrValue;
}
if (isUriTypeAttribute(attrName, tag)) {
attrValue = trimWhitespace(attrValue);
if (isLinkType(tag, attrs, 'canonical')) {
return attrValue;
}
const result = options.minifyURLs(attrValue);
if (isThenable(result)) {
return result
.then(out => typeof out === 'string' ? out : attrValue)
.catch(err => {
if (!options.continueOnMinifyError) throw err;
options.log && options.log(err);
return attrValue;
});
}
return typeof result === 'string' ? result : attrValue;
}
if (isNumberTypeAttribute(attrName, tag)) {
return trimWhitespace(attrValue);
}
if (attrName === 'style') {
attrValue = trimWhitespace(attrValue);
if (attrValue) {
if (attrValue.endsWith(';') && !/&#?[0-9a-zA-Z]+;$/.test(attrValue)) {
attrValue = attrValue.replace(/\s*;$/, ';');
}
const originalAttrValue = attrValue;
const cssResult = options.minifyCSS(attrValue, 'inline');
if (isThenable(cssResult)) {
return cssResult
.then(minified => {
// After minification, check if CSS consists entirely of invalid properties (no values)
// I.e., `color:` or `margin:;padding:` should be treated as empty
if (minified && /^(?:[a-z-]+:[;\s]*)+$/i.test(minified)) return '';
return minified;
})
.catch(err => {
if (!options.continueOnMinifyError) throw err;
options.log && options.log(err);
return originalAttrValue;
});
}
// Sync path (`minifyCSS` disabled—identity function)
if (cssResult && /^(?:[a-z-]+:[;\s]*)+$/i.test(cssResult)) return '';
return cssResult != null ? cssResult : attrValue;
}
return attrValue;
}
if (isSrcset(attrName, tag)) {
// https://html.spec.whatwg.org/multipage/embedded-content.html#attr-img-srcset
const candidates = trimWhitespace(attrValue).split(/\s*,\s*/);
const processed = candidates.map(candidate => {
let url = candidate;
let descriptor = '';
const match = candidate.match(/\s+([1-9][0-9]*w|[0-9]+(?:\.[0-9]+)?x)$/);
if (match) {
url = url.slice(0, -match[0].length);
const num = +match[1].slice(0, -1);
const suffix = match[1].slice(-1);
if (num !== 1 || suffix !== 'x') {
descriptor = ' ' + num + suffix;
}
}
const out = options.minifyURLs(url);
if (isThenable(out)) {
return out
.then(result => (typeof result === 'string' ? result : url) + descriptor)
.catch(err => {
if (!options.continueOnMinifyError) throw err;
options.log && options.log(err);
return url + descriptor;
});
}
return (typeof out === 'string' ? out : url) + descriptor;
});
if (processed.some(isThenable)) {
return Promise.all(processed).then(results => results.join(', '));
}
return processed.join(', ');
}
if (isMetaViewport(tag, attrs) && attrName === 'content') {
return attrValue.replace(/\s+/g, '').replace(/[0-9]+\.[0-9]+/g, function (numString) {
// 0.90000 → 0.9
// 1.0 → 1
// 1.0001 → 1.0001 (unchanged)
return (+numString).toString();
});
}
if (isContentSecurityPolicy(tag, attrs) && attrName.toLowerCase() === 'content') {
return collapseWhitespaceAll(attrValue);
}
if (options.customAttrCollapse && options.customAttrCollapse.test(attrName)) {
return trimWhitespace(attrValue.replace(/ ?[\n\r]+ ?/g, '').replace(/\s{2,}/g, options.conservativeCollapse ? ' ' : ''));
}
if (tag === 'script' && attrName === 'type') {
return trimWhitespace(attrValue.replace(/\s*;\s*/g, ';'));
}
if (isMediaQuery(tag, attrs, attrName)) {
attrValue = trimWhitespace(attrValue);
// Only minify actual media queries (those with features in parentheses)
// Skip simple media types like `all`, `screen`, `print` which are already minimal
if (!/[()]/.test(attrValue)) {
return attrValue;
}
const originalAttrValue = attrValue;
const cssResult = options.minifyCSS(attrValue, 'media');
if (isThenable(cssResult)) {
return cssResult.catch(err => {
if (!options.continueOnMinifyError) throw err;
options.log && options.log(err);
return originalAttrValue;
});
}
return cssResult != null ? cssResult : attrValue;
}
if (tag === 'iframe' && attrName === 'srcdoc') {
// Recursively minify HTML content within `srcdoc` attribute
// Fast-path: Skip if nothing would change
if (!shouldMinifyInnerHTML(options)) {
return attrValue;
}
return minifyHTMLSelf(attrValue, options, true);
}
return attrValue;
}
/**
* Choose appropriate quote character for an attribute value
* @param {string} attrValue - The attribute value
* @param {Object} options - Minifier options
* @returns {string} The chosen quote character (`"` or `'`)
*/
function chooseAttributeQuote(attrValue, options) {
if (typeof options.quoteCharacter !== 'undefined') {
return options.quoteCharacter === '\'' ? '\'' : '"';
}
// Count quotes in a single pass
let apos = 0, quot = 0;
for (let i = 0; i < attrValue.length; i++) {
if (attrValue[i] === "'") apos++;
else if (attrValue[i] === '"') quot++;
}
return apos < quot ? '\'' : '"';
}
// Returns the normalized attribute object directly (sync) or as a Promise (async);
// callers must handle both cases—use `isThenable()` to distinguish
function normalizeAttr(attr, attrs, tag, options, minifyHTML) {
const attrName = options.name(attr.name);
let attrValue = attr.value;
// Entity decoding requires a lazy import—async only when `&` is present
if (options.decodeEntities && attrValue && attrValue.indexOf('&') !== -1) {
return getDecodeHTMLStrict().then(decode => {
return normalizeAttrContinue(attrName, decode(attrValue), attr, attrs, tag, options, minifyHTML);
});
}
return normalizeAttrContinue(attrName, attrValue, attr, attrs, tag, options, minifyHTML);
}
// Internal: Handles attribute normalization after entity decoding (if any)
function normalizeAttrContinue(attrName, attrValue, attr, attrs, tag, options, minifyHTML) {
if ((options.removeRedundantAttributes &&
isAttributeRedundant(tag, attrName, attrValue, attrs)) ||
(options.removeScriptTypeAttributes && tag === 'script' &&
attrName === 'type' && isScriptTypeAttribute(attrValue) && !keepScriptTypeAttribute(attrValue)) ||
(options.removeStyleLinkTypeAttributes && (tag === 'style' || tag === 'link') &&
attrName === 'type' && isStyleLinkTypeAttribute(attrValue))) {
return;
}
if (attrValue) {
const cleaned = cleanAttributeValue(tag, attrName, attrValue, options, attrs, minifyHTML);
if (isThenable(cleaned)) {
return cleaned.then(v => normalizeAttrFinish(attrName, v, attr, tag, options));
}
return normalizeAttrFinish(attrName, cleaned, attr, tag, options);
}
return normalizeAttrFinish(attrName, attrValue, attr, tag, options);
}
// Internal: Final checks and result assembly after value cleaning
function normalizeAttrFinish(attrName, attrValue, attr, tag, options) {
if (options.removeEmptyAttributes &&
canDeleteEmptyAttribute(tag, attrName, attrValue, options)) {
return;
}
if (options.decodeEntities && attrValue && attrValue.indexOf('&') !== -1) {
attrValue = attrValue.replace(RE_AMP_ENTITY, '&$1');
}
return {
attr,
name: attrName,
value: attrValue
};
}
function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
const attrName = normalized.name;
let attrValue = normalized.value;
const attr = normalized.attr;
let attrQuote = attr.quote;
let attrFragment;
let emittedAttrValue;
// Determine if we need to add/keep quotes
const shouldAddQuotes = typeof attrValue !== 'undefined' && (
// If `removeAttributeQuotes` is enabled, add quotes only if they can’t be removed
(options.removeAttributeQuotes && (attrValue.indexOf(uidAttr) !== -1 || !canRemoveAttributeQuotes(attrValue))) ||
// If `removeAttributeQuotes` is not enabled, preserve original quote style or add quotes if value requires them
(!options.removeAttributeQuotes && (attrQuote !== '' || !canRemoveAttributeQuotes(attrValue) ||
// Special case: With `removeTagWhitespace`, unquoted values that aren’t last will have space added,
// which can create ambiguous/invalid HTML—add quotes to be safe
(options.removeTagWhitespace && attrQuote === '' && !isLast)))
);
if (shouldAddQuotes) {
// Determine the appropriate quote character
if (!options.preventAttributesEscaping) {
// Normal mode: Choose optimal quote type to minimize escaping
// unless we’re preserving original quotes and they don’t need escaping
const needsEscaping = (attrQuote === '"' && attrValue.indexOf('"') !== -1) || (attrQuote === "'" && attrValue.indexOf("'") !== -1);
if (options.removeAttributeQuotes || typeof options.quoteCharacter !== 'undefined' || needsEscaping || attrQuote === '') {
attrQuote = chooseAttributeQuote(attrValue, options);
}
if (attrQuote === '"') {
attrValue = attrValue.replace(/"/g, '"');
} else {
attrValue = attrValue.replace(/'/g, ''');
}
} else {
// `preventAttributesEscaping` mode: Choose safe quotes but don't escape
// except when both quote types are present—then escape to prevent invalid HTML
const hasDoubleQuote = attrValue.indexOf('"') !== -1;
const hasSingleQuote = attrValue.indexOf("'") !== -1;
// Both quote types present: Escaping is required to guarantee valid HTML delimiter matching
if (hasDoubleQuote && hasSingleQuote) {
attrQuote = chooseAttributeQuote(attrValue, options);
if (attrQuote === '"') {
attrValue = attrValue.replace(/"/g, '"');
} else {
attrValue = attrValue.replace(/'/g, ''');
}
// Auto quote selection: Prefer the opposite quote type when value contains one quote type, default to double quotes when none present
} else if (typeof options.quoteCharacter === 'undefined') {
if (attrQuote === '"' && hasDoubleQuote && !hasSingleQuote) {
attrQuote = "'";
} else if (attrQuote === "'" && hasSingleQuote && !hasDoubleQuote) {
attrQuote = '"';
// If no quote character yet (empty string), choose based on content
} else if (attrQuote === '') {
if (hasSingleQuote && !hasDoubleQuote) {
attrQuote = '"';
} else if (hasDoubleQuote && !hasSingleQuote) {
attrQuote = "'";
} else {
attrQuote = '"';
}
// Fallback for invalid/unsupported attrQuote values (not `"`, `'`, or empty string):
// Choose safe default based on value content
} else if (attrQuote !== '"' && attrQuote !== "'") {
if (hasSingleQuote && !hasDoubleQuote) {
attrQuote = '"';
} else if (hasDoubleQuote && !hasSingleQuote) {
attrQuote = "'";
} else {
attrQuote = '"';
}
}
} else {
// `quoteCharacter` is explicitly set
const preferredQuote = options.quoteCharacter === '\'' ? '\'' : '"';
// Safety check: If the preferred quote conflicts with value content, switch to the opposite quote
if ((preferredQuote === '"' && hasDoubleQuote && !hasSingleQuote) || (preferredQuote === "'" && hasSingleQuote && !hasDoubleQuote)) {
attrQuote = preferredQuote === '"' ? "'" : '"';
} else if ((preferredQuote === '"' && hasDoubleQuote && hasSingleQuote) || (preferredQuote === "'" && hasSingleQuote && hasDoubleQuote)) {
// Both quote types present: Fall back to escaping despite `preventAttributesEscaping`
attrQuote = preferredQuote;
if (attrQuote === '"') {
attrValue = attrValue.replace(/"/g, '"');
} else {
attrValue = attrValue.replace(/'/g, ''');
}
} else {
attrQuote = preferredQuote;
}
}
}
emittedAttrValue = attrQuote + attrValue + attrQuote;
if (!isLast && !options.removeTagWhitespace) {
emittedAttrValue += ' ';
}
} else if (isLast && !hasUnarySlash) {
// Last attribute in a non-self-closing tag:
// No space needed
emittedAttrValue = attrValue;
} else {
// Not last attribute, or is a self-closing tag:
// Unquoted values must have space after them to delimit from next attribute
emittedAttrValue = attrValue + ' ';
}
if (typeof attrValue === 'undefined' || (options.collapseBooleanAttributes &&
isBooleanAttribute(attrName.toLowerCase(), (attrValue || '').toLowerCase()))) {
attrFragment = attrName;
if (!isLast) {
attrFragment += ' ';
}
} else {
attrFragment = attrName + attr.customAssign + emittedAttrValue;
}
return attr.customOpen + attrFragment + attr.customClose;
}
// Exports
export {
// Validators
isConditionalComment,
isIgnoredComment,
isEventAttribute,
canRemoveAttributeQuotes,
attributesInclude,
isAttributeRedundant,
isScriptTypeAttribute,
keepScriptTypeAttribute,
isExecutableScript,
isStyleLinkTypeAttribute,
isStyleElement,
isBooleanAttribute,
isUriTypeAttribute,
isNumberTypeAttribute,
isLinkType,
isMediaQuery,
isSrcset,
isMetaViewport,
isContentSecurityPolicy,
canDeleteEmptyAttribute,
hasAttrName,
// Cleaners
cleanAttributeValue,
normalizeAttr,
buildAttr,
deduplicateAttributes
};