htmlfy
Version:
html formatter yo! Prettify or minify html.
495 lines (416 loc) • 15.8 kB
JavaScript
import { CONFIG, VOID_ELEMENTS } from './constants.js'
import { getState, setState } from './state.js'
/**
* Checks if content contains at least one HTML element or custom HTML element.
*
* The first regex matches void and self-closing elements.
* The second regex matches normal HTML elements, plus they can have a namespace.
* The third regex matches custom HTML elemtns, plus they can have a namespace.
*
* HTML elements should begin with a letter, and can end with a letter or number.
*
* Custom elements must begin with a letter, and can end with a letter, number,
* hyphen, underscore, or period. However, all letters must be lowercase.
* They must have at least one hyphen, and can only have periods and underscores if there is a hyphen.
*
* These regexes are based on
* https://w3c.github.io/html-reference/syntax.html#tag-name
* and
* https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
* respectively.
*
* @param {string} content Content to evaluate.
* @returns {boolean} A boolean.
*/
export const isHtml = (content) => {
setState({ checked_html: true })
return /<(?:[A-Za-z]+[A-Za-z0-9]*)(?:\s+.*?)*?\/{0,1}>/.test(content) ||
/<(?<Element>(?:[A-Za-z]+[A-Za-z0-9]*:)?(?:[A-Za-z]+[A-Za-z0-9]*))(?:\s+.*?)*?>(?:.|\n)*?<\/{1}\k<Element>>/.test(content) ||
/<(?<Element>(?:[a-z][a-z0-9._]*:)?[a-z][a-z0-9._]*-[a-z0-9._-]+)(?:\s+.*?)*?>(?:.|\n)*?<\/{1}\k<Element>>/.test(content)
}
/**
* Generic utility which merges two objects.
*
* @param {any} current Original object.
* @param {any} updates Object to merge with original.
* @returns {any}
*/
const mergeObjects = (current, updates) => {
if (!current || !updates)
throw new Error("Both 'current' and 'updates' must be passed-in to mergeObjects()")
/**
* @type {any}
*/
let merged
if (Array.isArray(current)) {
merged = structuredClone(current).concat(updates)
} else if (typeof current === 'object') {
merged = { ...current }
for (let key of Object.keys(updates)) {
if (typeof updates[key] !== 'object') {
merged[key] = updates[key]
} else {
/* key is an object, run mergeObjects again. */
merged[key] = mergeObjects(merged[key] || {}, updates[key])
}
}
}
return merged
}
/**
* Merge a user config with the default config.
*
* @param {import('htmlfy').Config} default_config The default config.
* @param {import('htmlfy').UserConfig} config The user config.
* @returns {import('htmlfy').Config}
*/
export const mergeConfig = (default_config, config) => {
const validated_config = mergeObjects(default_config, config)
/* Below `constants` prefixes and suffixes must be in sync with those in state.js */
setState({
config: validated_config,
constants: {
CONTENT_IGNORE_PLACEHOLDER: `${validated_config.ignore_with}_`,
SELF_CLOSING_PLACEHOLDER: `${validated_config.ignore_with}/_>`,
ATTRIBUTE_IGNORE_PLACEHOLDER: `${validated_config.ignore_with}=_`
}
})
return validated_config
}
/**
*
* @param {string} html
*/
export const protectAttributes = (html) => {
const { constants } = getState()
html = html.replace(/<[\w:\-]+([^>]*[^\/])>/g, (/** @type {string} */match, /** @type {any} */capture) => {
return match.replace(capture, (match) => {
return match
.replace(/\n/g, constants.ATTRIBUTE_IGNORE_PLACEHOLDER + 'nl!')
.replace(/\r/g, constants.ATTRIBUTE_IGNORE_PLACEHOLDER + 'cr!')
.replace(/\s/g, constants.ATTRIBUTE_IGNORE_PLACEHOLDER + 'ws!')
})
})
return html
}
/**
*
* @param {string} html
*/
export const protectContent = (html) => {
const { constants } = getState()
return html
.replace(/\n/g, constants.CONTENT_IGNORE_PLACEHOLDER + 'nl!')
.replace(/\r/g, constants.CONTENT_IGNORE_PLACEHOLDER + 'cr!')
.replace(/\s/g, constants.CONTENT_IGNORE_PLACEHOLDER + 'ws!')
}
/**
*
* @param {string} html
*/
export const finalProtectContent = (html) => {
const regex = /\s*<([a-zA-Z0-9:-]+)[^>]*>\n\s*<\/\1>(?=\n[ ]*[^\n]*__!i-£___£%__[^\n]*\n)(\n[ ]*\S[^\n]*\n)|<([a-zA-Z0-9:-]+)[^>]*>(?=\n[ ]*[^\n]*__!i-£___£%__[^\n]*\n)(\n[ ]*\S[^\n]*\n\s*)<\/\3>/g
const { constants } = getState()
return html
.replace(regex, (/** @type {string} */match, p1, p2, p3, p4) => {
const text_to_protect = p2 || p4
if (!text_to_protect)
return match
const protected_text = text_to_protect
.replace(/\n/g, constants.CONTENT_IGNORE_PLACEHOLDER + 'nl!')
.replace(/\r/g, constants.CONTENT_IGNORE_PLACEHOLDER + 'cr!')
.replace(/\s/g, constants.CONTENT_IGNORE_PLACEHOLDER + "ws!")
return match.replace(text_to_protect, protected_text)
})
}
/**
* Replace html brackets with ignore string.
*
* @param {string} html
* @returns {string}
*/
export const setIgnoreAttribute = (html) => {
const regex = /<([A-Za-z][A-Za-z0-9]*|[a-z][a-z0-9._]*-[a-z0-9._-]+)((?:\s+[A-Za-z0-9_-]+="[^"]*"|\s*[a-z]*)*)>/g
const { constants } = getState()
html = html.replace(regex, (/** @type {string} */match, p1, p2) => {
return match.replace(p2, (match) => {
return match
.replace(/</g, constants.ATTRIBUTE_IGNORE_PLACEHOLDER + 'lt!')
.replace(/>/g, constants.ATTRIBUTE_IGNORE_PLACEHOLDER + 'gt!')
})
})
return html
}
/**
* Trim leading and trailing whitespace characters.
*
* @param {string} html
* @param {string[]} trim
* @returns {string}
*/
export const trimify = (html, trim) => {
for (let e = 0; e < trim.length; e++) {
/* Whitespace character must be escaped with '\' or RegExp() won't include it. */
const leading_whitespace = new RegExp(`(<${trim[e]}[^>]*>)\\s+`, "g")
const trailing_whitespace = new RegExp(`\\s+(</${trim[e]}>)`, "g")
html = html
.replace(leading_whitespace, '$1')
.replace(trailing_whitespace, '$1')
}
return html
}
/**
*
* @param {string} html
*/
export const unprotectAttributes = (html) => {
const { constants } = getState()
html = html.replace(/<[\w:\-]+([^>]*[^\/])>/g, (/** @type {string} */match, /** @type {any} */capture) => {
return match.replace(capture, (match) => {
return match
.replace(new RegExp(constants.ATTRIBUTE_IGNORE_PLACEHOLDER + 'nl!', "g"), '\n')
.replace(new RegExp(constants.ATTRIBUTE_IGNORE_PLACEHOLDER + 'cr!', "g"), '\r')
.replace(new RegExp(constants.ATTRIBUTE_IGNORE_PLACEHOLDER + 'ws!', "g"), ' ')
})
})
return html
}
/**
*
* @param {string} html
*/
export const unprotectContent = (html) => {
const { constants } = getState()
html = html.replace(new RegExp(`.*${constants.CONTENT_IGNORE_PLACEHOLDER}[a-z]{2}!.*`, "g"), (/** @type {string} */match) => {
return match.replace(new RegExp(`${constants.CONTENT_IGNORE_PLACEHOLDER}[a-z]{2}!`, "g"), (match) => {
return match
.replace(new RegExp(constants.CONTENT_IGNORE_PLACEHOLDER + 'nl!', "g"), '\n')
.replace(new RegExp(constants.CONTENT_IGNORE_PLACEHOLDER + 'cr!', "g"), '\r')
.replace(new RegExp(constants.CONTENT_IGNORE_PLACEHOLDER + 'ws!', "g"), ' ')
})
})
return html
}
/**
* Replace ignore string with html brackets.
*
* @param {string} html
* @returns {string}
*/
export const unsetIgnoreAttribute = (html) => {
/* Regex to find opening tags and capture their attributes. */
const tagRegex = /<([\w:\-]+)([^>]*)>/g
const { constants } = getState()
const escapedIgnoreString = constants.ATTRIBUTE_IGNORE_PLACEHOLDER.replace(
/[-\/\\^$*+?.()|[\]{}]/g,
"\\$&"
)
const ltPlaceholderRegex = new RegExp(escapedIgnoreString + "lt!", "g")
const gtPlaceholderRegex = new RegExp(escapedIgnoreString + "gt!", "g")
return html.replace(
tagRegex,
(
/** @type {string} */ fullMatch,
/** @type {string} */ tagName,
/** @type {string} */ attributesCapture
) => {
const processedAttributes = attributesCapture
.replace(ltPlaceholderRegex, "<")
.replace(gtPlaceholderRegex, ">")
/* Reconstruct the tag. */
return `<${tagName}${processedAttributes}>`
}
)
}
/**
* Validate any passed-in config options and merge with CONFIG.
*
* @param {import('htmlfy').UserConfig} config A user config.
* @returns {import('htmlfy').Config} A validated config.
*/
export const validateConfig = (config) => {
if (typeof config !== 'object') throw new Error('Config must be an object.')
const default_config = { ...CONFIG }
const config_empty = !(
Object.hasOwn(config, 'content_wrap') ||
Object.hasOwn(config, 'ignore') ||
Object.hasOwn(config, 'ignore_with') ||
Object.hasOwn(config, 'strict') ||
Object.hasOwn(config, 'tab_size') ||
Object.hasOwn(config, 'tag_wrap') ||
Object.hasOwn(config, 'trim')
)
if (config_empty) {
setState({ config: default_config })
return default_config
}
let tab_size = config.tab_size
if (tab_size) {
if (typeof tab_size !== 'number') throw new Error(`tab_size must be a number, not ${typeof config.tab_size}.`)
const safe = Number.isSafeInteger(tab_size)
if (!safe) throw new Error(`Tab size ${tab_size} is not safe. See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/isSafeInteger for more info.`)
/**
* Round down, just in case a safe floating point,
* like 4.0, was passed.
*/
tab_size = Math.floor(tab_size)
if (tab_size < 1 || tab_size > 16) throw new Error('Tab size out of range. Expecting 1 to 16.')
config.tab_size = tab_size
}
if (Object.hasOwn(config, 'content_wrap') && typeof config.content_wrap !== 'number')
throw new Error(`content_wrap config must be a number, not ${typeof config.content_wrap}.`)
if (Object.hasOwn(config, 'ignore') && (!Array.isArray(config.ignore) || !config.ignore?.every((e) => typeof e === 'string')))
throw new Error('Ignore config must be an array of strings.')
if (Object.hasOwn(config, 'ignore_with')) {
if (typeof config.ignore_with !== 'string')
throw new Error(`ignore_with must be a string, not ${typeof config.ignore_with}.`)
else if (config.ignore_with.startsWith('_'))
/**
* This negatively affects processing of preserved tag attributes,
* because tag names can end with an underscore, so the regex
* does not capture them.
*/
throw new Error(`ignore_with cannot start with an underscore.`)
}
if (Object.hasOwn(config, 'strict') && typeof config.strict !== 'boolean')
throw new Error(`Strict config must be a boolean, not ${typeof config.strict}.`)
if (Object.hasOwn(config, 'tag_wrap') && typeof config.tag_wrap !== 'number')
throw new Error(`tag_wrap config must be a number, not ${typeof config.tag_wrap}.`)
if (Object.hasOwn(config, 'trim') && (!Array.isArray(config.trim) || !config.trim?.every((e) => typeof e === 'string')))
throw new Error('Trim config must be an array of strings.')
return mergeConfig(default_config, config)
}
/**
*
* @param {string} text
* @param {number} width
* @param {string} indent
*/
export const wordWrap = (text, width, indent) => {
const words = text.trim().split(/\s+/)
if (words.length === 0 || (words.length === 1 && words[0] === ''))
return ""
const lines = []
let current_line = ""
const padding_string = indent
words.forEach((word) => {
if (word === "") return
if (word.length >= width) {
/* If there's content on the current line, push it first with correct padding. */
if (current_line !== "")
lines.push(lines.length === 0 ? indent + current_line : padding_string + current_line)
/* Push a long word on its own line with correct padding. */
lines.push(lines.length === 0 ? indent + word : padding_string + word)
current_line = "" // Reset current line
return // Move to the next word
}
/* Check if adding the next word exceeds the wrap width. */
const test_line = current_line === "" ? word : current_line + " " + word
if (test_line.length <= width) {
current_line = test_line
} else {
/* Word doesn't fit, finish the current line and push it. */
if (current_line !== "") {
/* Add padding based on whether it's the first line added or not. */
lines.push(lines.length === 0 ? indent + current_line : padding_string + current_line)
}
/* Start a new line with the current word. */
current_line = word
}
})
/* Add the last remaining line with appropriate padding. */
if (current_line !== "")
lines.push(lines.length === 0 ? indent + current_line : padding_string + current_line)
const result = lines.join("\n")
return protectContent(result)
}
/**
* Extract any HTML blocks to be ignored,
* and replace them with a placeholder
* for re-insertion later.
*
* @param {string} html
* @returns {{ html_with_markers: string, extracted_map: Map<any,any> }}
*/
export function extractIgnoredBlocks(html) {
setState({ ignored: true })
const config = (getState()).config
let current_html = html
const extracted_blocks = new Map()
let marker_id = 0
const MARKER_PREFIX = "___HTMLFY_SPECIAL_IGNORE_MARKER_"
for (const tag of config.ignore) {
/* Ensure tag is escaped if it can contain regex special chars. */
const safe_tag_name = tag.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&")
const regex = new RegExp(
`(<\\s*${safe_tag_name}[^>]*>)(.*?)(<\\s*\/\\s*${safe_tag_name}\\s*>)`,
"gs" // global and dotAll
)
/** @type RegExpExecArray | null */
let match
/**
* @type {{ start: number; end: number; marker: string }[]}
*/
const replacements = []
while ((match = regex.exec(current_html)) !== null) {
const marker = `${MARKER_PREFIX}${marker_id++}___`
/* Only store content, and minify tags later. */
extracted_blocks.set(marker, match[2])
replacements.push({
start: match.index + match[1].length, // start of content
end: match.index + match[1].length + match[2].length, // end of content
marker: marker,
})
}
/* Apply replacements from the end to the beginning to keep indices valid. */
for (let i = replacements.length - 1; i >= 0; i--) {
const rep = replacements[i]
current_html =
current_html.substring(0, rep.start) +
rep.marker +
current_html.substring(rep.end)
}
}
return { html_with_markers: current_html, extracted_map: extracted_blocks }
}
/**
* Re-insert ignored HTML blocks.
*
* @param {string} html_with_markers
* @param {Map<any,any>} extracted_map
* @returns
*/
export function reinsertIgnoredBlocks(html_with_markers, extracted_map) {
setState({ ignored: false })
let final_html = html_with_markers
for (const [marker, original_block] of extracted_map) {
final_html = final_html.split(marker).join(original_block)
}
return final_html
}
const void_element_regex = new RegExp(`<(${VOID_ELEMENTS.join("|")})(?:\\s(?:[^/>]|/(?!>))*)*>`, 'g')
/**
* Add a placeholder for void elements that are not self-closing.
* This is for internal processing only.
*
* @param {string} html
* @returns
*/
export function setSelfClosing(html) {
const { constants } = getState()
return html.replace(
// match only void elements that are not self-closing
void_element_regex,
match => match.replace(/>$/, constants.SELF_CLOSING_PLACEHOLDER)
)
}
/**
* Remove internal placeholder for non-native self-closing void elements.
*
* @param {string} html
* @returns
*/
export function unsetSelfClosing(html) {
const { constants } = getState()
return html.replace(constants.SELF_CLOSING_PLACEHOLDER, ">")
}