UNPKG

html-bundler-webpack-plugin

Version:

Generates complete single-page or multi-page website from source assets. Built-in support for Markdown, Eta, EJS, Handlebars, Nunjucks, Pug. Alternative to html-webpack-plugin.

github.com/webdiscus/html-bundler-webpack-plugin

webdiscus/html-bundler-webpack-plugin

572 lines (480 loc) • 17.3 kB

JavaScript

// Performance notes: // - comparing numbers is faster than comparing strings // - transforming a string to a byte array with TextEncoder.encode() is 100x faster than charCodeAt() via `for` loop const textEncoder = new TextEncoder(); const spaceCodes = textEncoder.encode(' \n\r\t\f'); const valueSeparatorCodes = textEncoder.encode('= \n\r\t\f'); const slashCode = '/'.charCodeAt(0); const tagStartCode = '<'.charCodeAt(0); const tagEndCode = '>'.charCodeAt(0); const equalCode = '='.charCodeAt(0); const escapeCode = `\\`.charCodeAt(0); const quotCodes = textEncoder.encode(`"'`); const comparePos = (a, b) => a.startPos - b.startPos; /** * Returns the first index of a non-space char. * The space char can be one of: space, tab, new line, carrier return, page break. * * @param {string} content * @param {number} pos * @return {number} */ const indexOfNonSpace = (content, pos = 0) => { for (; spaceCodes.indexOf(content.charCodeAt(pos)) > -1; pos++) {} return pos; }; /** * Returns the first index of a space char or current position. * The space char can be one of: space, tab, new line, carrier return, page break. * * @param {string} content The tag raw content. * @param {string} tagName Used for exception only. * @param {number} startPos Used for exception only. * @param {number} pos The current position in the content. * @return {{pos: number, tagEnd: boolean}} The `pos` is the last position of non tag end char. * The `tagEnd` indicates whether the end of the tag has been reached. */ const indexOfSpaceOrTagEnd = (content, tagName, startPos, pos = 0) => { const len = content.length; let code, next; do { code = content.charCodeAt(pos); if (code === tagEndCode || (code === slashCode && content.charCodeAt(pos + 1) === tagEndCode)) { return { pos, tagEnd: true }; } next = valueSeparatorCodes.indexOf(code) < 0; if (next) { pos++; } if (pos > len) { throw new Error(`The '${tagName}' tag starting at ${startPos} position is missing the closing '>' char.`); } } while (next); return { pos, tagEnd: false }; }; /** * @param {string} content * @param {string} tagName Used for exception only. * @param {number} startPos Used for exception only. * @param {number} pos * @return {number} */ const indexOfNonSpaceOrTagEnd = (content, tagName, startPos, pos = 0) => { const len = content.length; let code, next; do { code = content.charCodeAt(pos); if (code === tagEndCode || (code === slashCode && content.charCodeAt(pos + 1) === tagEndCode)) { return -1; } if (pos > len || code === tagStartCode) { throw new Error(`The '${tagName}' tag starting at ${startPos} position is missing the closing '>' char.`); } next = spaceCodes.indexOf(code) > -1; if (next) { pos++; } } while (next); return pos; }; /** * Whether the link tag load a style or other assets. * * <link href="style.css" type="text/css" /> * <link href="style.css" rel="stylesheet" /> * <link href="style.css" rel="alternate stylesheet" /> * <link href="style.css" rel="preload" as="style" /> * <link href="style.css" rel="preload" as="stylesheet" /> * * @param {{}} attrs The tag attributes. * @return {boolean} */ const isLinkStyle = (attrs) => { if (attrs?.type === 'text/css') return true; if (attrs?.as?.indexOf('style') > -1) return true; if (attrs?.rel?.indexOf('style') > -1) return true; return false; }; /** * Whether the link tag load a script. * * <link href="script.js" rel="prefetch" as="script" /> * <link href="script.js" rel="preload" as="script" /> * <link href="script.js" rel="modulepreload" /> * <link href="script.js" rel="modulepreload" as="script" /> * <link href="script.js" rel="modulepreload" as="worker" /> * <link href="script.js" rel="modulepreload" as="serviceworker" /> * <link href="script.js" rel="modulepreload" as="sharedworker" /> * * @param {{}} attrs The tag attributes. * @return {boolean} */ const isLinkScript = (attrs) => { if (attrs?.as === 'script') return true; if (attrs?.rel === 'modulepreload') { return !attrs?.as || /(worker|serviceworker|sharedworker)/.test(attrs.as); } return false; }; /** * Parse tag attributes in a tag string. * * The string must contains only tag string with attributes, e.g.: * `<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' width='16px' height='16px'>` * * @param {string} string * @returns {Object<key: string, value: string>} The parsed attributes as the object key:value. */ const parseTagAttributes = (string) => { const matches = string.matchAll(/(\S+)=["'](.+?)["']/gm); let attrs = {}; for (const [, key, val] of matches) { attrs[key] = val; } return attrs; }; class HtmlParser { /** * Check whether a given string is a plain HTML attribute value * and not a compiled JavaScript expression. * * @param {string} value The attribute value to test. * @returns {boolean} Returns true if the value appears to be a plain HTML attribute value. */ static isHtmlAttributeValue(value) { if (!value || typeof value !== 'string') return false; // allow if value is a simple static require(path) or url(path) if (value.includes('require(') || value.includes('url(')) return true; // heuristic check: disallow common JS syntax characters const forbidden = /[();|!+*<>]/; return !forbidden.test(value); } /** * Parse values of tag attributes. * * @param {string} content The HTML content. * @param {string} tag The tag to parsing. * @param {string | Array<string>} attributes List of attributes to parsing. * @param {function | null} filter The function to exclude parsed attributes. * @param {string} resourcePath The file of content. This is passed into the filter function. * @return {Array<{}> | boolean} */ static parseTag(content, { tag, attributes: attrList, filter, resourcePath = '' }) { const open = `<${tag}`; const offset = open.length; const result = []; let pos = 0; // detect all attributes in the tag // <tag{space|>}attr{space|=|>}{space}{quote}value{quote}{space|>}... while ((pos = content.indexOf(open, pos)) >= 0) { const { attrs, attrsData, startPos, endPos } = this.parseTagAttributes(content, tag, pos, offset); const raw = content.slice(startPos, endPos); const parsedAttrs = []; let type = 'asset'; if (tag === 'script') { type = 'script'; } else if (tag === 'link') { if (isLinkStyle(attrs)) type = 'style'; else if (isLinkScript(attrs)) type = 'script'; } for (let attrName of attrList) { if (!(attrName in attrs)) continue; const attrValue = attrs[attrName]; const attrData = attrsData[attrName]; const parsedAttr = this.parseAttributeValue(attrName, attrValue, attrData); let resolvedValue = true; if (filter) { const { parsedValue } = parsedAttr; resolvedValue = filter({ tag, attribute: attrName, value: attrValue, parsedValue, attributes: attrs, resourcePath, }); } if (resolvedValue !== false) { if (parsedAttr.attrs) { parsedAttrs.push(...parsedAttr.attrs); } else { parsedAttrs.push(parsedAttr); } if (typeof resolvedValue === 'string') { parsedAttr.resolvedValue = resolvedValue; } } } // sort attributes by pos if (parsedAttrs.length > 1) { parsedAttrs.sort(comparePos); } result.push({ type, tag, raw, parsedAttrs, attrs, startPos, endPos, }); pos = endPos; } return result; } /** * Parse all tag attributes in the content. * * Note: allow zero or more spaces around `=`. * https://www.w3.org/TR/2011/WD-html5-20110113/syntax.html#attributes-0 * * @param {string} content The string contains html. * @param {string} tagName The tag name. * @param {number} pos The starting tag position in the content. * @param {number} offset The offset of the first attribute in the tag. * @return {{attrs: {}, attrsData: {}, startPos: number, endPos: number}} */ static parseTagAttributes(content, tagName, pos, offset = 0) { let attrs = {}; let attrsData = {}; let startPos = pos; let endPos = pos; let isTagEnd = false; pos += offset; while (!isTagEnd && (pos = indexOfNonSpaceOrTagEnd(content, tagName, startPos, pos)) > -1) { let { pos: attrEndPos, tagEnd } = indexOfSpaceOrTagEnd(content, tagName, startPos, pos); let attrStartPos = pos; let attrName = content.substring(attrStartPos, attrEndPos); let value = undefined; if (tagEnd) { // `<img disabled>` attrs[attrName] = value; break; } // next position should be the separator `=` pos = indexOfNonSpace(content, attrEndPos); endPos = pos; let hasValue = content.charCodeAt(pos) === equalCode; if (hasValue) { // allow a space around equal, e.g., `attr = value` const initialValueStartPos = indexOfNonSpace(content, pos + 1); let valueStartPos = initialValueStartPos; let valueEndPos = -1; let nextCharCode = content.charCodeAt(valueStartPos); let isEscaped = false; if (nextCharCode === escapeCode) { isEscaped = true; nextCharCode = content.charCodeAt(++valueStartPos); } if (quotCodes.indexOf(nextCharCode) >= 0) { // value with quotes let quote = String.fromCharCode(nextCharCode); if (isEscaped) quote = `\\` + quote; valueEndPos = content.indexOf(quote, ++valueStartPos); if (valueEndPos > -1) { pos = valueEndPos + 1; isEscaped && pos++; } if (valueEndPos < 0) { throw new Error( `The '${tagName}' '${attrName}' attribute, starting at ${initialValueStartPos} position, is missing a closing quote:\n${content.slice(startPos)}` ); } } else { // value w/o quotes let { pos: valueEndPos2, tagEnd } = indexOfSpaceOrTagEnd(content, tagName, startPos, valueStartPos); isTagEnd = tagEnd; valueEndPos = valueEndPos2; pos = valueEndPos; } value = content.slice(valueStartPos, valueEndPos); endPos = valueEndPos; attrsData[attrName] = { valueStartPos, valueEndPos, isEscaped, }; } attrs[attrName] = value; } // find the tag end pos endPos = content.indexOf('>', endPos) + 1; return { attrs, attrsData, startPos, endPos, }; } /** * Parse the value of the attribute. * * @param {string} attr The attribute name. * @param {string} value The value to parse files. * @param {{}} attrData The parsed info of the attribute. * @return {{attr: string, attrs?: Array, value: string, parsedValue: Array<string>, startPos: number, endPos: number, offset: number, inEscapedDoubleQuotes: boolean} | {} | boolean} */ static parseAttributeValue(attr, value, attrData) { // Note: // 1. The HTML string contains the normal quote: // <img src="image.png"> // the require function should be injected as: // module.exports = '<img src="' + require('image.png') + '">' // 2. The template function can contain the escaped quote, e.g.: // fn("<img src=\"image.png\">") // the require function should be injected as: // module.exports = fn("<img src=\"" + require('image.png') + "\">") let inEscapedDoubleQuotes = attrData.isEscaped; let result = { attr, value, startPos: attrData.valueStartPos, endPos: attrData.valueEndPos, inEscapedDoubleQuotes, parsedValue: [''], }; // check whether the value is JS-template code generated by a template engine if (!this.isHtmlAttributeValue(value)) { return result; } if (attr.indexOf('srcset') > -1) { const { values, attrs } = this.parseSrcsetValue(value, attrData.valueStartPos, inEscapedDoubleQuotes); result.parsedValue = values; result.attrs = attrs; return result; } if (attr === 'style') { const { values, attrs } = this.parseStyleUrlValues(value, attrData.valueStartPos); if (!values) { return {}; } result.parsedValue = values; result.attrs = attrs; return result; } // parse for required values which are not yet resolved by a preprocessor like pug if (value.indexOf('\\u0027') < 0 && value.indexOf('require(') > 0) { const { values, attrs } = this.parseRequiredValues(value, attrData.valueStartPos); return { ...result, parsedValue: values, attrs }; } result.parsedValue = [value.split('?', 1)[0]]; return result; } /** * Parse url() in the style attribute. * * For example: * <div style="background-image: url(./image.png);"></div> * * @param {string} content The attribute value. * @param {Number} valueOffset The absolute offset of value in the raw content. * @return {{values: *[], attrs: *[]} | {}} */ static parseStyleUrlValues(content, valueOffset) { let pos = content.indexOf('url('); if (pos < 0) return {}; let valueStartPos = pos + 4; let valueEndPos = content.indexOf(')', valueStartPos); let quote = content.charAt(valueStartPos); let skipQuotes = 1; if (quote !== '"' && quote !== "'") { quote = ''; skipQuotes = 0; } let parsedValue = content.slice(valueStartPos + skipQuotes, valueEndPos - skipQuotes); // skip quotes around value let attrs = { value: parsedValue, quote, startPos: valueOffset + valueStartPos, endPos: valueOffset + valueEndPos, }; return { values: [parsedValue], attrs: [attrs] }; } /** * Parse require() in the attribute value. * * For example: * <a href="#" data-picture='{ "alt":"picture", "imgSrc": require("./picture.png") }'></a> * * @param {string} content The attribute value. * @param {Number} valueOffset The absolute offset of value in the raw content. * @return {{values: *[], attrs: *[]}} */ static parseRequiredValues(content, valueOffset) { let pos; let values = []; let attrs = []; while ((pos = content.indexOf('require(', pos)) > -1) { let valueStartPos = pos + 8; let valueEndPos = content.indexOf(')', valueStartPos); let quote = content.charAt(valueStartPos); let value = content.slice(valueStartPos + 1, valueEndPos - 1); // skip quotes around value values.push(value.split('?', 1)[0]); attrs.push({ value, quote, // quotes used in require() startPos: valueOffset + pos, // skip `require(` endPos: valueOffset + valueEndPos + 1, // skip `)` }); pos = valueEndPos + 1; } return { values, attrs }; } /** * Parse srcset attribute. * * Possible values: * "img1.png" * "img1.png, img2.png 100w, img3.png 1.5x" * * @param {string} content The srcset value. * @param {Number} valueOffset The absolute offset of value in the raw content. * @param {boolean} inEscapedDoubleQuotes Bypass the property to all `attrs` objects. * @return {{values: *[], attrs: *[]}} */ static parseSrcsetValue(content, valueOffset, inEscapedDoubleQuotes) { const lastPos = content.length; let startPos = 0; let endPos = 0; let currentPos; let values = []; let attrs = []; // supports the query for 'responsive-loader' in following notations: // - image.png?{sizes: [100,200,300], format: 'jpg'} // JSON5 // - require('image.png?sizes[]=100,sizes[]=200,sizes[]=300,format=jpg') // `,` as parameter separator in require(), used in pug if (content.indexOf('?{') > 0 || content.indexOf('require(') > -1) { return { values: [content], attrs: [{ attr: 'srcset', value: content, startPos: valueOffset, endPos: valueOffset + lastPos }], }; } do { currentPos = content.indexOf(',', startPos); if (currentPos < 0) { currentPos = lastPos; } startPos = indexOfNonSpace(content, startPos); // parse value like "img.png" let value = content.slice(startPos, currentPos); let pos = value.indexOf(' '); // parse value like "img.png 100w" if (pos > 0) { value = value.slice(0, pos); endPos = startPos + pos; } else { endPos = currentPos; } values.push(value.split('?', 1)[0]); attrs.push({ attr: 'srcset', value, startPos: valueOffset + startPos, endPos: valueOffset + endPos, inEscapedDoubleQuotes, }); startPos = currentPos + 1; } while (currentPos < lastPos); return { values, attrs }; } } module.exports = { HtmlParser, comparePos, parseTagAttributes };