@ckeditor/ckeditor5-utils
Version:
Miscellaneous utilities used by CKEditor 5.
165 lines (164 loc) • 6.77 kB
JavaScript
/**
* @license Copyright (c) 2003-2025, CKSource Holding sp. z o.o. All rights reserved.
* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-licensing-options
*/
/**
* @module utils/formathtml
*/
/**
* A simple (and naive) HTML code formatter that returns a formatted HTML markup that can be easily
* parsed by human eyes. It beautifies the HTML code by adding new lines between elements that behave like block elements
* (https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
* and a few more like `tr`, `td`, and similar ones) and inserting indents for nested content.
*
* WARNING: This function works only on a text that does not contain any indentations or new lines.
* Calling this function on the already formatted text will damage the formatting.
*
* @param input An HTML string to format.
*/
export default function formatHtml(input) {
// A list of block-like elements around which the new lines should be inserted, and within which
// the indentation of their children should be increased.
// The list is partially based on https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements that contains
// a full list of HTML block-level elements.
// A void element is an element that cannot have any child - https://html.spec.whatwg.org/multipage/syntax.html#void-elements.
// Note that <pre> element is not listed on this list to avoid breaking whitespace formatting.
// Note that <br> element is not listed and handled separately so no additional white spaces are injected.
const elementsToFormat = [
{ name: 'address', isVoid: false },
{ name: 'article', isVoid: false },
{ name: 'aside', isVoid: false },
{ name: 'blockquote', isVoid: false },
{ name: 'details', isVoid: false },
{ name: 'dialog', isVoid: false },
{ name: 'dd', isVoid: false },
{ name: 'div', isVoid: false },
{ name: 'dl', isVoid: false },
{ name: 'dt', isVoid: false },
{ name: 'fieldset', isVoid: false },
{ name: 'figcaption', isVoid: false },
{ name: 'figure', isVoid: false },
{ name: 'footer', isVoid: false },
{ name: 'form', isVoid: false },
{ name: 'h1', isVoid: false },
{ name: 'h2', isVoid: false },
{ name: 'h3', isVoid: false },
{ name: 'h4', isVoid: false },
{ name: 'h5', isVoid: false },
{ name: 'h6', isVoid: false },
{ name: 'header', isVoid: false },
{ name: 'hgroup', isVoid: false },
{ name: 'hr', isVoid: true },
{ name: 'li', isVoid: false },
{ name: 'main', isVoid: false },
{ name: 'nav', isVoid: false },
{ name: 'ol', isVoid: false },
{ name: 'p', isVoid: false },
{ name: 'section', isVoid: false },
{ name: 'table', isVoid: false },
{ name: 'tbody', isVoid: false },
{ name: 'td', isVoid: false },
{ name: 'th', isVoid: false },
{ name: 'thead', isVoid: false },
{ name: 'tr', isVoid: false },
{ name: 'ul', isVoid: false }
];
const elementNamesToFormat = elementsToFormat.map(element => element.name).join('|');
// It is not the fastest way to format the HTML markup but the performance should be good enough.
const lines = input
// Add new line before and after `<tag>` and `</tag>`.
// It may separate individual elements with two new lines, but this will be fixed below.
.replace(new RegExp(`</?(${elementNamesToFormat})( .*?)?>`, 'g'), '\n$&\n')
// Keep `<br>`s at the end of line to avoid adding additional whitespaces before `<br>`.
.replace(/<br[^>]*>/g, '$&\n')
// Divide input string into lines, which start with either an opening tag, a closing tag, or just a text.
.split('\n');
let indentCount = 0;
let isPreformattedLine = false;
return lines
.map(line => {
isPreformattedLine = isPreformattedBlockLine(line, isPreformattedLine);
// Ignore empty lines outside a <pre> block.
if (!line.length && !isPreformattedLine) {
return '';
}
if (isNonVoidOpeningTag(line, elementsToFormat)) {
return indentLine(line, indentCount++);
}
if (isClosingTag(line, elementsToFormat)) {
return indentLine(line, --indentCount);
}
if (isPreformattedLine === 'middle' || isPreformattedLine === 'last') {
return indentLine(line, 0);
}
return indentLine(line, indentCount);
})
.join('')
.trimEnd();
}
/**
* Checks, if an argument is an opening tag of a non-void element to be formatted.
*
* @param line String to check.
* @param elementsToFormat Elements to be formatted.
*/
function isNonVoidOpeningTag(line, elementsToFormat) {
return elementsToFormat.some(element => {
if (element.isVoid) {
return false;
}
if (!new RegExp(`<${element.name}( .*?)?>`).test(line)) {
return false;
}
return true;
});
}
/**
* Checks, if an argument is a closing tag.
*
* @param line String to check.
* @param elementsToFormat Elements to be formatted.
*/
function isClosingTag(line, elementsToFormat) {
return elementsToFormat.some(element => {
return new RegExp(`</${element.name}>`).test(line);
});
}
/**
* Indents a line by a specified number of characters.
*
* @param line Line to indent.
* @param indentCount Number of characters to use for indentation.
* @param indentChar Indentation character(s). 4 spaces by default.
*/
function indentLine(line, indentCount, indentChar = ' ') {
// More about Math.max() here in https://github.com/ckeditor/ckeditor5/issues/10698.
return `${indentChar.repeat(Math.max(0, indentCount))}${line}\n`;
}
/**
* Checks whether a line belongs to a preformatted (`<pre>`) block.
*
* @param line Line to check.
* @param isPreviousLinePreFormatted Information on whether the previous line was preformatted (and how).
*/
function isPreformattedBlockLine(line, isPreviousLinePreFormatted) {
const isPreOpen = /<pre( .*?)?>/.test(line);
const isPreClose = /<\/pre>/.test(line);
if (isPreOpen && isPreClose) {
// If both an opening and closing a <pre> tag, no special treatment needed.
return false;
}
else if (isPreOpen) {
return 'first';
}
else if (isPreClose) {
return 'last';
}
else if (isPreviousLinePreFormatted === 'first' || isPreviousLinePreFormatted === 'middle') {
// This line is just after a 'first' or 'middle' line of a multi-line pre-block.
return 'middle';
}
else {
return false;
}
}