htmlfy
Version:
HTML formatter yo! Prettify, minify, and more!
412 lines (348 loc) • 13.7 kB
JavaScript
import { IGNORE_STRING, CONFIG, CONTENT_IGNORE_STRING } from './constants.js'
/**
* Checks if content contains at least one HTML element or custom HTML element.
*
* The first regex matches void and self-closing elements.
* The second regex matches normal HTML elements, plus they can have a namespace.
* The third regex matches custom HTML elemtns, plus they can have a namespace.
*
* HTML elements should begin with a letter, and can end with a letter or number.
*
* Custom elements must begin with a letter, and can end with a letter, number,
* hyphen, underscore, or period. However, all letters must be lowercase.
* They must have at least one hyphen, and can only have periods and underscores if there is a hyphen.
*
* These regexes are based on
* https://w3c.github.io/html-reference/syntax.html#tag-name
* and
* https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
* respectively.
*
* @param {string} content Content to evaluate.
* @returns {boolean} A boolean.
*/
export const isHtml = (content) =>
/<(?:[A-Za-z]+[A-Za-z0-9]*)(?:\s+.*?)*?\/{0,1}>/.test(content) ||
/<(?<Element>(?:[A-Za-z]+[A-Za-z0-9]*:)?(?:[A-Za-z]+[A-Za-z0-9]*))(?:\s+.*?)*?>(?:.|\n)*?<\/{1}\k<Element>>/.test(content) ||
/<(?<Element>(?:[a-z][a-z0-9._]*:)?[a-z][a-z0-9._]*-[a-z0-9._-]+)(?:\s+.*?)*?>(?:.|\n)*?<\/{1}\k<Element>>/.test(content)
/**
* Generic utility which merges two objects.
*
* @param {any} current Original object.
* @param {any} updates Object to merge with original.
* @returns {any}
*/
const mergeObjects = (current, updates) => {
if (!current || !updates)
throw new Error("Both 'current' and 'updates' must be passed-in to mergeObjects()")
/**
* @type {any}
*/
let merged
if (Array.isArray(current)) {
merged = structuredClone(current).concat(updates)
} else if (typeof current === 'object') {
merged = { ...current }
for (let key of Object.keys(updates)) {
if (typeof updates[key] !== 'object') {
merged[key] = updates[key]
} else {
/* key is an object, run mergeObjects again. */
merged[key] = mergeObjects(merged[key] || {}, updates[key])
}
}
}
return merged
}
/**
* Merge a user config with the default config.
*
* @param {import('htmlfy').Config} dconfig The default config.
* @param {import('htmlfy').UserConfig} config The user config.
* @returns {import('htmlfy').Config}
*/
export const mergeConfig = (dconfig, config) => {
/**
* We need to make a deep copy of `dconfig`,
* otherwise we end up altering the original `CONFIG` because `dconfig` is a reference to it.
*/
return mergeObjects(structuredClone(dconfig), config)
}
/**
*
* @param {string} html
*/
export const protectAttributes = (html) => {
html = html.replace(/<[\w:\-]+([^>]*[^\/])>/g, (/** @type {string} */match, /** @type {any} */capture) => {
return match.replace(capture, (match) => {
return match
.replace(/\n/g, IGNORE_STRING + 'nl!')
.replace(/\r/g, IGNORE_STRING + 'cr!')
.replace(/\s/g, IGNORE_STRING + 'ws!')
})
})
return html
}
/**
*
* @param {string} html
*/
export const protectContent = (html) => {
return html
.replace(/\n/g, CONTENT_IGNORE_STRING + 'nl!')
.replace(/\r/g, CONTENT_IGNORE_STRING + 'cr!')
.replace(/\s/g, CONTENT_IGNORE_STRING + 'ws!')
}
/**
*
* @param {string} html
*/
export const finalProtectContent = (html) => {
const regex = /\s*<([a-zA-Z0-9:-]+)[^>]*>\n\s*<\/\1>(?=\n[ ]*[^\n]*__!i-£___£%__[^\n]*\n)(\n[ ]*\S[^\n]*\n)|<([a-zA-Z0-9:-]+)[^>]*>(?=\n[ ]*[^\n]*__!i-£___£%__[^\n]*\n)(\n[ ]*\S[^\n]*\n\s*)<\/\3>/g
return html
.replace(regex, (/** @type {string} */match, p1, p2, p3, p4) => {
const text_to_protect = p2 || p4
if (!text_to_protect)
return match
const protected_text = text_to_protect
.replace(/\n/g, CONTENT_IGNORE_STRING + 'nl!')
.replace(/\r/g, CONTENT_IGNORE_STRING + 'cr!')
.replace(/\s/g, CONTENT_IGNORE_STRING + "ws!");
return match.replace(text_to_protect, protected_text)
})
}
/**
* Replace html brackets with ignore string.
*
* @param {string} html
* @returns {string}
*/
export const setIgnoreAttribute = (html) => {
const regex = /<([A-Za-z][A-Za-z0-9]*|[a-z][a-z0-9._]*-[a-z0-9._-]+)((?:\s+[A-Za-z0-9_-]+="[^"]*"|\s*[a-z]*)*)>/g
html = html.replace(regex, (/** @type {string} */match, p1, p2) => {
return match.replace(p2, (match) => {
return match
.replace(/</g, IGNORE_STRING + 'lt!')
.replace(/>/g, IGNORE_STRING + 'gt!')
})
})
return html
}
/**
* Replace entities with ignore string.
*
* @param {string} html
* @param {import('htmlfy').Config} config
* @returns {string}
*/
export const setIgnoreElement = (html, config) => {
const ignore = config.ignore
const ignore_string = config.ignore_with
for (let e = 0; e < ignore.length; e++) {
const regex = new RegExp(`<${ignore[e]}[^>]*>((.|\n)*?)<\/${ignore[e]}>`, "g")
html = html.replace(regex, (/** @type {string} */match, /** @type {any} */capture) => {
return match.replace(capture, (match) => {
return match
.replace(/</g, '-' + ignore_string + 'lt-')
.replace(/>/g, '-' + ignore_string + 'gt-')
.replace(/\n/g, '-' + ignore_string + 'nl-')
.replace(/\r/g, '-' + ignore_string + 'cr-')
.replace(/\s/g, '-' + ignore_string + 'ws-')
})
})
}
return html
}
/**
* Trim leading and trailing whitespace characters.
*
* @param {string} html
* @param {string[]} trim
* @returns {string}
*/
export const trimify = (html, trim) => {
for (let e = 0; e < trim.length; e++) {
/* Whitespace character must be escaped with '\' or RegExp() won't include it. */
const leading_whitespace = new RegExp(`(<${trim[e]}[^>]*>)\\s+`, "g")
const trailing_whitespace = new RegExp(`\\s+(</${trim[e]}>)`, "g")
html = html
.replace(leading_whitespace, '$1')
.replace(trailing_whitespace, '$1')
}
return html
}
/**
*
* @param {string} html
*/
export const unprotectAttributes = (html) => {
html = html.replace(/<[\w:\-]+([^>]*[^\/])>/g, (/** @type {string} */match, /** @type {any} */capture) => {
return match.replace(capture, (match) => {
return match
.replace(new RegExp(IGNORE_STRING + 'nl!', "g"), '\n')
.replace(new RegExp(IGNORE_STRING + 'cr!', "g"), '\r')
.replace(new RegExp(IGNORE_STRING + 'ws!', "g"), ' ')
})
})
return html
}
/**
*
* @param {string} html
*/
export const unprotectContent = (html) => {
html = html.replace(/.*__!i-£___£%__[a-z]{2}!.*/g, (/** @type {string} */match) => {
return match.replace(/__!i-£___£%__[a-z]{2}!/g, (match) => {
return match
.replace(new RegExp(CONTENT_IGNORE_STRING + 'nl!', "g"), '\n')
.replace(new RegExp(CONTENT_IGNORE_STRING + 'cr!', "g"), '\r')
.replace(new RegExp(CONTENT_IGNORE_STRING + 'ws!', "g"), ' ')
})
})
return html
}
/**
* Replace ignore string with html brackets.
*
* @param {string} html
* @returns {string}
*/
export const unsetIgnoreAttribute = (html) => {
html = html.replace(/<[\w:\-]+([^>]*)>/g, (/** @type {string} */match, /** @type {any} */capture) => {
return match.replace(capture, (match) => {
return match
.replace(new RegExp(IGNORE_STRING + 'lt!', "g"), '<')
.replace(new RegExp(IGNORE_STRING + 'gt!', "g"), '>')
})
})
return html
}
/**
* Replace ignore string with entities.
*
* @param {string} html
* @param {import('htmlfy').Config} config
* @returns {string}
*/
export const unsetIgnoreElement = (html, config) => {
const ignore = config.ignore
const ignore_string = config.ignore_with
for (let e = 0; e < ignore.length; e++) {
const regex = new RegExp(`<${ignore[e]}[^>]*>((.|\n)*?)<\/${ignore[e]}>`, "g")
html = html.replace(regex, (/** @type {string} */match, /** @type {any} */capture) => {
return match.replace(capture, (match) => {
return match
.replace(new RegExp('-' + ignore_string + 'lt-', "g"), '<')
.replace(new RegExp('-' + ignore_string + 'gt-', "g"), '>')
.replace(new RegExp('-' + ignore_string + 'nl-', "g"), '\n')
.replace(new RegExp('-' + ignore_string + 'cr-', "g"), '\r')
.replace(new RegExp('-' + ignore_string + 'ws-', "g"), ' ')
})
})
}
return html
}
/**
* Validate any passed-in config options and merge with CONFIG.
*
* @param {import('htmlfy').UserConfig} config A user config.
* @returns {import('htmlfy').Config} A validated config.
*/
export const validateConfig = (config) => {
if (typeof config !== 'object') throw new Error('Config must be an object.')
const config_empty = !(
Object.hasOwn(config, 'content_wrap') ||
Object.hasOwn(config, 'ignore') ||
Object.hasOwn(config, 'ignore_with') ||
Object.hasOwn(config, 'strict') ||
Object.hasOwn(config, 'tab_size') ||
Object.hasOwn(config, 'tag_wrap') ||
Object.hasOwn(config, 'tag_wrap_width') ||
Object.hasOwn(config, 'trim')
)
if (config_empty) return CONFIG
let tab_size = config.tab_size
if (tab_size) {
if (typeof tab_size !== 'number') throw new Error(`tab_size must be a number, not ${typeof config.tab_size}.`)
const safe = Number.isSafeInteger(tab_size)
if (!safe) throw new Error(`Tab size ${tab_size} is not safe. See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/isSafeInteger for more info.`)
/**
* Round down, just in case a safe floating point,
* like 4.0, was passed.
*/
tab_size = Math.floor(tab_size)
if (tab_size < 1 || tab_size > 16) throw new Error('Tab size out of range. Expecting 1 to 16.')
config.tab_size = tab_size
}
if (Object.hasOwn(config, 'content_wrap') && typeof config.content_wrap !== 'number')
throw new Error(`content_wrap config must be a number, not ${typeof config.tag_wrap_width}.`)
if (Object.hasOwn(config, 'ignore') && (!Array.isArray(config.ignore) || !config.ignore?.every((e) => typeof e === 'string')))
throw new Error('Ignore config must be an array of strings.')
if (Object.hasOwn(config, 'ignore_with') && typeof config.ignore_with !== 'string')
throw new Error(`Ignore_with config must be a string, not ${typeof config.ignore_with}.`)
if (Object.hasOwn(config, 'strict') && typeof config.strict !== 'boolean')
throw new Error(`Strict config must be a boolean, not ${typeof config.strict}.`)
/* TODO remove in v0.9.0 */
if (Object.hasOwn(config, 'tag_wrap') && typeof config.tag_wrap === 'boolean') {
console.warn('tag_wrap as a boolean is deprecated, and will not be supported in v0.9.0+. Use `tag_wrap: <number>` instead; where <number> is the max character width acceptable before wrapping attributes.')
if (config.tag_wrap_width)
config.tag_wrap = config.tag_wrap_width
else
config.tag_wrap = CONFIG.tag_wrap_width
}
if (Object.hasOwn(config, 'tag_wrap') && typeof config.tag_wrap !== 'number')
throw new Error(`tag_wrap config must be a number, not ${typeof config.tag_wrap}.`)
/* TODO remove in v0.9.0 */
if (Object.hasOwn(config, 'tag_wrap_width'))
console.warn('tag_wrap_width is deprecated, and will not be supported in v0.9.0+. Use `tag_wrap: <number>` instead; where <number> is the max character width acceptable before wrapping attributes.')
/* TODO remove in v0.9.0 */
if (Object.hasOwn(config, 'tag_wrap_width') && typeof config.tag_wrap_width !== 'number')
throw new Error(`tag_wrap_width config must be a number, not ${typeof config.tag_wrap_width}.`)
if (Object.hasOwn(config, 'trim') && (!Array.isArray(config.trim) || !config.trim?.every((e) => typeof e === 'string')))
throw new Error('Trim config must be an array of strings.')
return mergeConfig(CONFIG, config)
}
/**
*
* @param {string} text
* @param {number} width
* @param {string} indent
*/
export const wordWrap = (text, width, indent) => {
const words = text.trim().split(/\s+/)
if (words.length === 0 || (words.length === 1 && words[0] === ''))
return ""
const lines = []
let current_line = ""
const padding_string = indent
words.forEach((word) => {
if (word === "") return
if (word.length >= width) {
/* If there's content on the current line, push it first with correct padding. */
if (current_line !== "")
lines.push(lines.length === 0 ? indent + current_line : padding_string + current_line)
/* Push a long word on its own line with correct padding. */
lines.push(lines.length === 0 ? indent + word : padding_string + word)
current_line = "" // Reset current line
return // Move to the next word
}
/* Check if adding the next word exceeds the wrap width. */
const test_line = current_line === "" ? word : current_line + " " + word
if (test_line.length <= width) {
current_line = test_line
} else {
/* Word doesn't fit, finish the current line and push it. */
if (current_line !== "") {
/* Add padding based on whether it's the first line added or not. */
lines.push(lines.length === 0 ? indent + current_line : padding_string + current_line)
}
/* Start a new line with the current word. */
current_line = word
}
})
/* Add the last remaining line with appropriate padding. */
if (current_line !== "")
lines.push(lines.length === 0 ? indent + current_line : padding_string + current_line)
const result = lines.join("\n")
return protectContent(result)
}