htmlfy
Version:
html formatter yo! Prettify or minify html.
292 lines (241 loc) • 9.28 kB
JavaScript
import { minify } from './minify.js'
import {
extractIgnoredBlocks,
finalProtectContent,
isHtml,
protectAttributes,
reinsertIgnoredBlocks,
setIgnoreAttribute,
setSelfClosing,
unsetSelfClosing,
trimify,
unprotectAttributes,
unprotectContent,
unsetIgnoreAttribute,
validateConfig,
wordWrap
} from './utils.js'
import { VOID_ELEMENTS } from './constants.js'
import { getState } from './state.js'
/**
* @type {{ line: Record<string,string>[] }}
*/
const convert = {
line: []
}
/**
* @type {Map<any,any>}
*/
let ignore_map
/**
* Isolate tags, content, and comments.
*
* @param {string} html The HTML string to evaluate.
* @example <div>Hello World!</div> =>
* [#-# : 0 : <div> : #-#]
* Hello World!
* [#-# : 1 : </div> : #-#]
*/
const enqueue = (html) => {
convert.line = []
let i = -1
/* Regex to find tags OR text content between tags. */
const regex = /(<[^>]+>)|([^<]+)/g
html.replace(regex, (match, c1, c2) => {
if (c1) {
convert.line.push({ type: "tag", value: match })
} else if (c2 && c2.trim().length > 0) {
/* It's text content (and not just whitespace). */
convert.line.push({ type: "text", value: match })
}
i++
return `\n[#-# : ${i} : ${match} : #-#]\n`
})
}
/**
* Process enqueued content.
*
* @returns {string}
*/
const process = () => {
const { config, constants } = getState()
const step = " ".repeat(config.tab_size)
const tag_wrap = config.tag_wrap
const content_wrap = config.content_wrap
const strict = config.strict
/* Track current number of indentations needed. */
let indents = ''
/** @type string[] */
const output_lines = []
const tag_regex = /<[A-Za-z]+\b[^>]*(?:.|\n)*?\//g /* Is opening tag or void element. */
const attribute_regex = /\s{1}[A-Za-z-]+(?:=".*?")?/g /* Matches all tag/element attributes. */
/* Process lines and indent. */
convert.line.forEach((source, index) => {
let current_line_value = source.value
const is_ignored_content =
current_line_value.startsWith('___HTMLFY_SPECIAL_IGNORE_MARKER_')
let subtrahend = 0
const prev_line_data = convert.line[index - 1]
const prev_line_value = prev_line_data?.value ?? "" // Use empty string if no prev line
/**
* Arbitratry character, to keep track of the string's length.
*/
indents += '0'
if (index === 0) subtrahend++
/* We're processing a closing tag. */
if (current_line_value.trim().startsWith("</")) subtrahend++
/* prevLine is a doctype declaration. */
if (prev_line_value.trim().startsWith("<!doctype")) subtrahend++
/* prevLine is a comment. */
if (prev_line_value.trim().startsWith("<!--")) subtrahend++
/* prevLine is a void element. */
if (
prev_line_value.trim().endsWith("/>") // native self-closing
||
prev_line_value.trim().endsWith(constants.SELF_CLOSING_PLACEHOLDER) // synthetic self-closing
) subtrahend++
/* prevLine is a closing tag. */
if (prev_line_value.trim().startsWith("</")) subtrahend++
/* prevLine is text. */
if (prev_line_data?.type === "text") subtrahend++
/* Determine offset for line indentation. */
const offset = Math.max(0, indents.length - subtrahend)
/* Correct indent level for *this* line's content */
const current_indent_level = offset // Store the level for this line
indents = indents.substring(0, current_indent_level) // Adjust for *next* round
/**
* Starts with a single punctuation character.
* Add punctuation to end of previous line.
*/
if (source.type === 'text' && /^[!,;\.]/.test(current_line_value)) {
if (current_line_value.length === 1) {
output_lines[output_lines.length - 1] =
output_lines.at(-1) + current_line_value
return
} else {
output_lines[output_lines.length - 1] =
output_lines.at(-1) + current_line_value.charAt(0)
current_line_value = current_line_value.slice(1).trim()
}
}
const padding = step.repeat(current_indent_level)
if (is_ignored_content) {
/* Stop processing this line, as it's set to be ignored. */
output_lines.push(current_line_value)
} else {
/* Remove comment. */
if (strict && current_line_value.trim().startsWith("<!--"))
return
let result = current_line_value
/* Remove self-closing placeholder, if needed. */
result = unsetSelfClosing(result)
if (
source.type === 'text' &&
content_wrap > 0 &&
result.length >= content_wrap
) {
result = wordWrap(result, content_wrap, padding)
}
/* Wrap the attributes of open tags and void elements. */
else if (
tag_wrap > 0 &&
result.length > tag_wrap &&
tag_regex.test(result)
) {
tag_regex.lastIndex = 0 // Reset stateful regex
attribute_regex.lastIndex = 0 // Reset stateful regex
const tag_parts = result.split(attribute_regex).filter(Boolean)
if (tag_parts.length >= 2) {
const attributes = result.matchAll(attribute_regex)
const inner_padding = padding + step
let wrapped_tag = padding + tag_parts[0] + "\n"
for (const a of attributes) {
const attribute_string = a[0].trim()
wrapped_tag += inner_padding + attribute_string + "\n"
}
const tag_name_match = tag_parts[0].match(/<([A-Za-z_:-]+)/)
const tag_name = tag_name_match ? tag_name_match[1] : ""
const is_self_closing = tag_parts.at(-1)?.endsWith("/>") && VOID_ELEMENTS.includes(tag_name)
const closing_part = tag_parts[1].trim()
const closing_padding = padding + (strict && is_self_closing ? " " : "")
wrapped_tag += closing_padding + closing_part
result = wrapped_tag // Assign the fully wrapped string
} else {
result = padding + result
}
} else {
/* Apply simple indentation (if no wrapping occurred) */
result = padding + result
}
/* Add the processed line (or lines if wordWrap creates them) to the output */
output_lines.push(result)
}
})
/* Join all processed lines into the final HTML string */
let final_html = output_lines.join("\n")
/* Preserve wrapped attributes. */
if (tag_wrap > 0) final_html = protectAttributes(final_html)
/* Extra preserve wrapped content. */
if (content_wrap > 0 && new RegExp(`/\\n[ ]*[^\\n]*${constants.CONTENT_IGNORE_PLACEHOLDER}[^\\n]*\\n/`).test(final_html))
final_html = finalProtectContent(final_html)
/* Remove line returns, tabs, and consecutive spaces within html elements or their content. */
final_html = final_html.replace(
/<(?<Element>[^>\s]+)[^>]*>[^<]*?[^><\/\s][^<]*?<\/\k<Element>>|<script[^>]*>[\s]*<\/script>|<([\w:\._-]+)([^>]*)><\/\2>|<([\w:\._-]+)([^>]*)>[\s]+<\/\4>/g,
match => {
// Check if this contains placeholder
if (match.includes(constants.SELF_CLOSING_PLACEHOLDER) || match.includes(constants.CONTENT_IGNORE_PLACEHOLDER)) {
return match // Don't modify if it contains the placeholder
}
return match.replace(/\n|\t|\s{2,}/g, '')
}
)
/* Revert wrapped content. */
if (content_wrap > 0) final_html = unprotectContent(final_html)
/* Revert wrapped attributes. */
if (tag_wrap > 0) final_html = unprotectAttributes(final_html)
/* Remove self-closing nature of void elements. */
if (strict) final_html = final_html.replace(/\s\/>|\/>/g, '>')
/* Trim leading and/or trailing line returns. */
if (final_html.startsWith("\n")) final_html = final_html.substring(1)
if (final_html.endsWith("\n")) final_html = final_html.substring(0, final_html.length - 1)
return final_html
}
/**
* Format HTML with line returns and indentations.
*
* @param {string} html The HTML string to prettify.
* @param {import('htmlfy').UserConfig} [config] A user configuration object.
* @returns {string} A well-formed HTML string.
*/
export const prettify = (html, config) => {
let reinsert_ignored = false
const { checked_html, ignored } = getState()
/* Return content as-is if it does not contain any HTML elements. */
if (!checked_html && !isHtml(html)) return html
/* Runs setState for config. */
const validated_config = validateConfig(config || {})
const ignore = validated_config.ignore.length > 0
/* Allows you to trimify before ignoring. */
if (validated_config.trim.length > 0) html = trimify(html, validated_config.trim)
/* Extract ignored elements. */
if (!ignored && ignore) {
const { html_with_markers, extracted_map } = extractIgnoredBlocks(html)
html = html_with_markers
ignore_map = extracted_map
reinsert_ignored = true
}
/* Preserve html text within attribute values. */
html = setIgnoreAttribute(html)
/* Insert placeholder for void elements that aren't self-closing. */
html = setSelfClosing(html)
html = minify(html)
enqueue(html)
html = process()
/* Revert html text within attribute values. */
html = unsetIgnoreAttribute(html)
/* Re-insert ignored elements. */
if (reinsert_ignored) {
html = reinsertIgnoredBlocks(html, ignore_map)
}
return html
}