UNPKG

fortissimo-html

Version:

Fortissimo HTML - Flexible, Forgiving, Formatting HTML Parser

365 lines 17.5 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.formatHtml = exports.ValueQuoteStyle = exports.ValueQuoting = void 0; const dom_1 = require("./dom"); const characters_1 = require("./characters"); var ValueQuoting; (function (ValueQuoting) { ValueQuoting[ValueQuoting["LEAVE_AS_IS"] = 0] = "LEAVE_AS_IS"; ValueQuoting[ValueQuoting["ALWAYS_QUOTE"] = 1] = "ALWAYS_QUOTE"; ValueQuoting[ValueQuoting["UNQUOTE_INTEGERS"] = 2] = "UNQUOTE_INTEGERS"; ValueQuoting[ValueQuoting["UNQUOTE_SIMPLE_VALUES"] = 3] = "UNQUOTE_SIMPLE_VALUES"; })(ValueQuoting = exports.ValueQuoting || (exports.ValueQuoting = {})); const SIMPLE_VALUE = /^[-\da-z._]+$/i; var ValueQuoteStyle; (function (ValueQuoteStyle) { ValueQuoteStyle[ValueQuoteStyle["PREFER_DOUBLE"] = 0] = "PREFER_DOUBLE"; ValueQuoteStyle[ValueQuoteStyle["PREFER_SINGLE"] = 1] = "PREFER_SINGLE"; ValueQuoteStyle[ValueQuoteStyle["DOUBLE"] = 2] = "DOUBLE"; ValueQuoteStyle[ValueQuoteStyle["SINGLE"] = 3] = "SINGLE"; })(ValueQuoteStyle = exports.ValueQuoteStyle || (exports.ValueQuoteStyle = {})); const DEFAULT_OPTIONS = { alignAttributes: true, continuationIndent: 8, childrenNotIndented: new Set(['/', 'html', 'body', 'thead', 'tbody', 'tfoot']), dontBreakIfInline: new Set(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'title']), endDocumentWithNewline: true, eol: null, escapeOptions: { entityStyle: characters_1.EntityStyle.SHORTEST, reencode: characters_1.ReencodeOptions.DONT_CHANGE, target: characters_1.TargetEncoding.UNICODE, undoUnneededEntities: false }, indent: 4, inline: new Set(['a', 'abbr', 'acronym', 'b', 'basefont', 'bdo', 'big', 'br', 'cite', 'cite', 'code', 'dfn', 'em', 'font', 'i', 'img', 'input', 'kbd', 'label', 'q', 's', 'samp', 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'text', 'tt', 'u', 'var']), instantiateSyntheticNodes: false, keepWhitespaceInside: new Set(['pre', 'textarea']), lastText: null, maxBlankLines: 1, newLineBefore: new Set(['body', 'div', 'form', 'h1', 'h2', 'h3', 'p']), normalizeAttributeSpacing: true, removeNewLineBefore: new Set(['br']), removeUnclosedTags: true, spaceAroundAttributeEquals: false, tabSize: 8, trimDocument: true, useTabCharacters: true, valueQuoting: ValueQuoting.ALWAYS_QUOTE, valueQuoteStyle: ValueQuoteStyle.PREFER_DOUBLE, }; function formatHtml(node, options) { const opts = processOptions(options || {}); if (!opts.eol) opts.eol = '\n'; if (opts.instantiateSyntheticNodes) instantiateSyntheticNodes(node); else removeSyntheticNodes(node); if (opts.indent > 0) { opts.lastText = null; preprocessWhitespace(node, opts); } if (opts.indent > 0 && (opts.indent === 1 || opts.trimDocument) && node.children && node.children.length > 0) { if (node.children[0] instanceof dom_1.TextElement) node.children[0].content = (0, characters_1.trimLeft)(node.children[0].content); const last = node.children.length - 1; if (node.children[last] instanceof dom_1.TextElement) node.children[last].content = (0, characters_1.trimRight)(node.children[last].content); } if (opts.indent > 1 && opts.endDocumentWithNewline) { if (!node.children) node.children = []; if (node.children.length === 0 || !(node.children[node.children.length - 1] instanceof dom_1.TextElement)) node.children.push(new dom_1.TextElement(opts.eol, 0, 0, false)); else { const text = node.children[node.children.length - 1]; text.content = text.content.replace(/\s*$/, opts.eol); } } formatNode(node, opts, 0); } exports.formatHtml = formatHtml; function formatNode(node, options, indent) { const children = node.children; if (!children) return; const delta = options.childrenNotIndented.has(node.tagLc) ? 0 : 1; const keepWhitespaceInside = options.keepWhitespaceInside.has(node.tagLc) || node.tagLc === '/'; const specialText = (node.tagLc === 'script' || node.tagLc === 'style'); let pre_indented = -2; for (let i = 0; i < children.length; ++i) { const elem = children[i]; if (elem instanceof dom_1.DomNode) { formatAttributes(elem, indent + delta, options); if (options.indent > 0) { if (options.indent === 1) elem.endTagText = (0, characters_1.compactWhitespace)(elem.endTagText || '').replace(/\s+>$/, '>'); if (/[\r\n][ \t\f]*>/.test(elem.endTagText || '')) { // Would prefer to simply use `.*` instead of `(?:.|\s)*`, but Firefox // doesn't support regex "dotall" `s` flag. const $ = /((?:.|\s)*)[\r\n][ \t\f]*>/.exec(elem.endTagText); elem.endTagText = $[1] + options.eol + tabify(' '.repeat((indent + delta) * options.indent), options) + '>'; pre_indented = i; } if (options.lastText && options.removeNewLineBefore.has(elem.tagLc)) options.lastText.content = options.lastText.content.replace(/\s+$/, ''); else if ((options.newLineBefore.has(elem.tagLc) || elem.blockContext) && pre_indented !== i - 1) { if (!options.lastText) { options.lastText = new dom_1.TextElement('', 0, 0, false); children.splice(i++, 0, options.lastText); } applyIndentation(options.lastText, indent + delta, true, options); } } if (options.indent === 1) elem.innerWhitespace = ''; const saveLastText = options.lastText; options.lastText = null; formatNode(elem, options, indent + delta); if (!elem.children) options.lastText = elem.closureState === dom_1.ClosureState.EXPLICITLY_CLOSED ? undefined : null; else if (options.lastText === null) options.lastText = saveLastText; } else if (elem instanceof dom_1.TextElement) { options.lastText = elem; if (options.escapeOptions.reencode !== characters_1.ReencodeOptions.DONT_CHANGE && !keepWhitespaceInside && node.tagLc !== 'script' && node.tagLc !== 'style') { if (elem.possibleEntities) elem.content = (0, characters_1.reencodeEntities)(elem.content, options.escapeOptions); else elem.content = (0, characters_1.escapeToEntities)(elem.content, options.escapeOptions); } } else { if (options.indent > 0 && options.lastText && (options.indent === 1 || /[\r\n]/.test(options.lastText.content))) applyIndentation(options.lastText, indent + delta, false, options); options.lastText = null; } } if (options.indent > 1 && (specialText || (!keepWhitespaceInside && !onlyContainsInline(node)))) { if (!options.lastText) { options.lastText = new dom_1.TextElement('', 0, 0, false); children.push(options.lastText); } if (node.closureState === dom_1.ClosureState.EXPLICITLY_CLOSED && !options.inline.has(node.tagLc)) { const indentation = tabify(' '.repeat(indent * options.indent), options); const $ = /^((?:.|\s)*(?:\r\n|\n|\r))[ \t\f]*$/.exec(options.lastText.content); options.lastText.content = ($ && $[1] || options.lastText.content + options.eol) + indentation; } else options.lastText.content = options.lastText.content.replace(/(?:\r\n|\n|\r)[ \t\f]*$/, ''); } if (node.closureState === dom_1.ClosureState.EXPLICITLY_CLOSED) options.lastText = undefined; // undefined signifies that any saved lastText should be cleared. else if (node.closureState !== dom_1.ClosureState.IMPLICITLY_CLOSED) options.lastText = null; // null signifies that any saved lastText should be restored. } function onlyContainsInline(node) { if (!node.children) return true; let onlyInline = true; for (let i = 0; i < node.children.length && onlyInline; ++i) onlyInline = !(node.children[i] instanceof dom_1.DomNode && node.children[i].blockContext); return onlyInline; } function applyIndentation(elem, indent, addNewLine, options) { if (options.indent > 1) { const indentation = tabify(' '.repeat(indent * options.indent), options); const $ = /((?:.|\s)*(?:\r\n|\n|\r))[ \t\f]*$/.exec(elem.content); elem.content = ($ && $[1] || elem.content + (addNewLine ? options.eol : '')) + indentation; } } function formatAttributes(node, indent, options) { for (let i = 0; i < node.attributes.length; ++i) { node.equals[i] = node.equals[i].trim(); const value = node.values[i]; if ((value || node.quotes[i]) && options.valueQuoting !== ValueQuoting.LEAVE_AS_IS) { if (options.valueQuoting === ValueQuoting.UNQUOTE_SIMPLE_VALUES && SIMPLE_VALUE.test(value) || options.valueQuoting === ValueQuoting.UNQUOTE_INTEGERS && /^\d+$/.test(value)) node.quotes[i] = ''; else if (options.valueQuoteStyle === ValueQuoteStyle.DOUBLE || (options.valueQuoteStyle === ValueQuoteStyle.PREFER_DOUBLE && (!/"/.test(value) || /'/.test(value)))) { node.quotes[i] = '"'; node.values[i] = value.replace(/"/g, '&quot;'); } else if (options.valueQuoteStyle === ValueQuoteStyle.SINGLE || (options.valueQuoteStyle === ValueQuoteStyle.PREFER_SINGLE && (!/'/.test(value) || /"/.test(value)))) { node.quotes[i] = "'"; node.values[i] = value.replace(/'/g, '&apos;'); } } if (options.escapeOptions.reencode !== characters_1.ReencodeOptions.DONT_CHANGE) node.values[i] = (0, characters_1.reencodeEntities)(node.values[i], options.escapeOptions, true); let spacing = node.spacing[i]; if (options.indent > 1 && options.normalizeAttributeSpacing) { if (/[\r\n]/.test(spacing)) { const extraIndent = options.alignAttributes ? (0, characters_1.columnWidth)(node.tag) + 2 : options.continuationIndent; spacing = spacing.replace(/[^\r\n]/g, '') + ' '.repeat(indent * options.indent + extraIndent); } else spacing = ' '; node.spacing[i] = tabify(spacing, options); if (node.equals[i]) node.equals[i] = options.spaceAroundAttributeEquals ? ' = ' : '='; } else if (options.indent === 1) node.spacing[i] = ' '; } } function instantiateSyntheticNodes(node) { if (!node.children) return; for (const elem of node.children) { if (elem instanceof dom_1.DomNode) { if (elem.synthetic) { elem.synthetic = false; elem.closureState = dom_1.ClosureState.EXPLICITLY_CLOSED; elem.endTagText = '</' + elem.tag + '>'; } instantiateSyntheticNodes(elem); } } } function removeSyntheticNodes(node) { const children = node.children; if (!children) return; for (let i = 0; i < children.length; ++i) { const elem = children[i]; if (elem instanceof dom_1.DomNode) { let childNode = elem; while (childNode.synthetic) childNode = childNode.children[0]; children[i] = childNode; removeSyntheticNodes(childNode); } } } function preprocessWhitespace(node, options, blockStart = false, blockEnd = false) { if (options.keepWhitespaceInside.has(node.tagLc) || node.tagLc === 'script' || node.tagLc === 'style') { node.blockContext = true; options.lastText = null; return; } const children = node.children || []; const isBlock = (node.blockContext = !options.inline.has(node.tagLc)); for (let i = 0; i < children.length; ++i) { if (isBlock) { if (i === 0) blockStart = true; if (i === children.length - 1) blockEnd = true; } const child = children[i]; if (child instanceof dom_1.DomNode) { preprocessWhitespace(child, options, blockStart, blockEnd); blockStart = child.blockContext; } else if (child instanceof dom_1.TextElement) { const afterComment = (0, dom_1.isCommentLike)(children[i - 1]); const beforeComment = (0, dom_1.isCommentLike)(children[i + 1]); if (afterComment || beforeComment) child.content = child.content.replace(/[ \f\t]+/g, ' ').replace(/[\n\r]+/g, options.eol) .replace(/^ (?=[\n\r])/, ''); else { const keepNewlines = options.maxBlankLines >= 0; child.content = (0, characters_1.compactWhitespace)(child.content, keepNewlines).replace(/(^|[\r\n])[ \f\t]+(?=[\r\n]|$)/g, '$1'); if (keepNewlines && options.maxBlankLines >= -1) child.content = (0, characters_1.compactNewlines)(child.content, options.maxBlankLines + 1); if (blockStart || child.content.startsWith(' ') && options.lastText && options.lastText.content.endsWith(' ')) { child.content = (0, characters_1.trimLeft)(child.content, keepNewlines); child.blockContext = true; blockStart = false; } if (blockEnd || followedByBlock(node, i, options)) child.content = (0, characters_1.trimRight)(child.content, keepNewlines); } if (child.content.startsWith(' ') && options.lastText) options.lastText.content = (0, characters_1.trimRight)(options.lastText.content); options.lastText = child; } else if (child instanceof dom_1.CData) { blockStart = false; options.lastText = null; } // else if (blockStart || isBlock || followedByBlock(node, i, options)) // blockStart = child.blockContext = true; } if (isBlock) options.lastText = null; } function followedByBlock(parent, childIndex, options) { while (++childIndex < parent.children.length) { const sibling = parent.children[childIndex]; if (sibling instanceof dom_1.DomNode) return !options.inline.has(sibling.tagLc); else if (sibling instanceof dom_1.TextElement || sibling instanceof dom_1.CData) return false; } return false; } function processOptions(options) { const opts = Object.assign({}, DEFAULT_OPTIONS); Object.keys(options).forEach(option => { if (option in DEFAULT_OPTIONS && typeof DEFAULT_OPTIONS[option] === typeof options[option] && Array.isArray(DEFAULT_OPTIONS[option]) === Array.isArray(options[option])) opts[option] = options[option]; }); opts.childrenNotIndented = applyTagList(opts.childrenNotIndented, options.childrenNotIndented); opts.dontBreakIfInline = applyTagList(opts.dontBreakIfInline, options.dontBreakIfInline); opts.inline = applyTagList(opts.inline, options.inline); opts.keepWhitespaceInside = applyTagList(opts.keepWhitespaceInside, options.keepWhitespaceInside); opts.keepWhitespaceInside = applyTagList(opts.keepWhitespaceInside, options.keepWhitespaceInside); opts.newLineBefore = applyTagList(opts.newLineBefore, options.newLineBefore); opts.removeNewLineBefore = applyTagList(opts.removeNewLineBefore, options.removeNewLineBefore); opts.escapeOptions = Object.assign({}, opts.escapeOptions); Object.keys(opts.escapeOptions).forEach(subOption => { if (subOption in options) opts.escapeOptions[subOption] = options[subOption]; }); return opts; } function applyTagList(originalSet, mods) { const updated = new Set(originalSet); if (mods) { mods.forEach((elem, index) => { elem = elem.toLowerCase(); if (index === 0 && elem === '-*') updated.clear(); else if (elem.startsWith('-')) updated.delete(elem.substr(1)); else updated.add(elem); }); } return updated; } function tabify(s, options) { if (options.useTabCharacters && s.length >= options.tabSize) { s = s.split(/([\r\n])/).map(ss => ss.replace(/^( +)/, (match, $1) => '\t'.repeat(Math.floor($1.length / options.tabSize)) + ' '.repeat($1.length % options.tabSize))).join(''); } return s; } // noinspection JSUnusedLocalSymbols function detabify(s, options) { if (options.useTabCharacters && s.includes('\t')) { const tabSize = options.tabSize; s = s.split(/([\r\n])/).map(ss => { let adj = 0; ss.replace(/\t/g, (match, offset) => { const len = (offset + adj) % tabSize | tabSize; adj += len - 1; return ' '.repeat(len); }); }).join(''); } return s; } //# sourceMappingURL=formatter.js.map