@fwdslsh/unify
Version:
A lightweight, framework-free static site generator with Bun native APIs
1,291 lines (1,132 loc) • 73.1 kB
JavaScript
/**
* Unified HTML Processor for unify
* Handles both SSI-style includes (<!--#include -->) and DOM templating (<template>, <slot>)
* using HTMLRewriter for high-performance processing.
*/
import fs from "fs/promises";
import path from "path";
import { processIncludes } from "./include-processor.js";
import { logger } from "../utils/logger.js";
import {
BuildError,
FileSystemError,
CircularDependencyError,
PathTraversalError,
IncludeNotFoundError,
LayoutError
} from "../utils/errors.js";
import { isPathWithinDirectory, resolveIncludePath, resolveResourcePath } from "../utils/path-resolver.js";
/**
* Determine if processing should fail fast based on configuration
* @param {Object} config - Configuration object
* @param {string} errorType - Type of error ('warning', 'error', 'fatal')
* @returns {boolean} True if processing should fail fast
*/
function shouldFailFast(config, errorType = 'error') {
// New fail-on logic
if (!config.failOn) {
// Default: don't fail fast, let the build system handle errors
return false;
}
if (config.failOn === 'warning') {
// Fail on any warning or error
return true;
}
if (config.failOn === 'error') {
// Fail on errors (but not warnings)
return errorType === 'error' || errorType === 'fatal';
}
return false;
}
/**
* Process HTML content with unified support for both SSI includes and DOM templating
* Uses HTMLRewriter for high-performance processing
* @param {string} htmlContent - Raw HTML content to process
// Check if the layout itself has a data-layout attribute (nested layouts)
const nestedLayoutMatch = layoutContent.match(/data-layout=["']([^"']+)["']/i);
if (nestedLayoutMatch) {
const nestedLayoutPath = nestedLayoutMatch[1];
// Recursively process the nested layout, but pass the current slot data as page content
const slotResult = extractSlotDataFromHTML(pageContent);
const slotApplication = applySlots(layoutContent, slotResult.slots, config);
const layoutWithSlots = slotApplication.result;
// Log any slot warnings
if (slotApplication.warnings.length > 0) {
slotApplication.warnings.forEach(warning => {
logger.warn(`Slot validation: ${warning.message}`);
});
}
// Now process the nested layout with the slot-applied content as the page content
return await processLayoutAttribute(
layoutWithSlots,
nestedLayoutPath,
resolvedLayoutPath, // Use current layout as the source file for nested layout resolution
sourceRoot,
config
);
}filePath - Path to the HTML file being processed
* @param {string} sourceRoot - Source root directory
* @param {DependencyTracker} dependencyTracker - Dependency tracker instance
* @param {Object} config - Processing configuration
* @returns {Promise<string>} Processed HTML content
*/
export async function processHtmlUnified(
htmlContent,
filePath,
sourceRoot,
dependencyTracker,
config = {}
) {
const processingConfig = {
layoutsDir: ".layouts", // Deprecated but kept for compatibility
optimize: config.minify || config.optimize,
...config,
};
try {
logger.debug(
`Using HTMLRewriter for: ${path.relative(sourceRoot, filePath)}`
);
// Track dependencies before processing
if (dependencyTracker) {
dependencyTracker.analyzePage(filePath, htmlContent, sourceRoot);
}
// Always process includes (SSI and DOM) first
let includeResult = await processIncludesWithStringReplacement(
htmlContent,
filePath,
sourceRoot,
processingConfig,
new Set() // Initialize call stack for circular dependency detection
);
// Handle both old string format and new object format
let processedContent;
let extractedAssets = { styles: [], scripts: [] };
if (typeof includeResult === 'object' && includeResult.content !== undefined) {
processedContent = includeResult.content;
extractedAssets = includeResult;
} else {
processedContent = includeResult;
}
// Apply HTML optimization only after all includes are processed
if (processingConfig.optimize !== false) {
logger.debug(`Optimizing HTML content, optimize=${processingConfig.optimize}`);
processedContent = await optimizeHtmlContent(processedContent);
} else {
logger.debug(`Skipping HTML optimization, optimize=${processingConfig.optimize}`);
}
// Handle layouts and slots if needed (after includes and optimization)
if (shouldUseDOMMode(processedContent)) {
processedContent = await processDOMMode(
processedContent,
filePath,
sourceRoot,
processingConfig,
extractedAssets
);
} else if (
hasDOMTemplating(processedContent) ||
!processedContent.includes("<html")
) {
processedContent = await processDOMTemplating(
processedContent,
filePath,
sourceRoot,
processingConfig,
extractedAssets // Pass extracted assets to DOM templating
);
} else if (extractedAssets && (extractedAssets.styles?.length > 0 || extractedAssets.scripts?.length > 0)) {
// Apply extracted assets to complete HTML documents even without layouts/templating
processedContent = applyExtractedAssets(processedContent, extractedAssets);
}
// Slot/template injection for HTML files (if layout contains <slot> or <template slot="...">)
// This is now handled in file-processor.js after layout chain is discovered and applied
return {
content: processedContent,
extractedAssets: extractedAssets
};
} catch (error) {
logger.error(
`Unified HTML processing failed for ${path.relative(
sourceRoot,
filePath
)}: ${error.message}`
);
throw error; // Re-throw with original error details
}
/**
* Process includes using string replacement (more reliable for async operations)
* Returns an object with processed content and extracted assets
*/
async function processIncludesWithStringReplacement(htmlContent, filePath, sourceRoot, config = {}, callStack = new Set()) {
let processedContent = typeof htmlContent === 'string' ? htmlContent : '';
const extractedAssets = { styles: [], scripts: [] };
// Defensive: always return a string, even if an error occurs
try {
// Check for circular dependency
if (callStack.has(filePath)) {
const chain = Array.from(callStack);
throw new CircularDependencyError(filePath, chain);
}
// Add current file to call stack
const newCallStack = new Set(callStack);
newCallStack.add(filePath);
// Process SSI-style includes
const includeRegex = /<!--\s*#include\s+(virtual|file)="([^"]+)"\s*-->/g;
let match;
const processedIncludes = new Set();
while ((match = includeRegex.exec(htmlContent)) !== null) {
const [fullMatch, type, includePath] = match;
const includeKey = `${type}:${includePath}`;
if (processedIncludes.has(includeKey)) {
continue; // Avoid processing the same include multiple times
}
processedIncludes.add(includeKey);
try {
const resolvedPath = resolveIncludePathInternal(type, includePath, filePath, sourceRoot);
const includeContent = await fs.readFile(resolvedPath, 'utf-8');
// Recursively process nested includes
const nestedResult = await processIncludesWithStringReplacement(includeContent, resolvedPath, sourceRoot, config, newCallStack);
// SSI includes: Do NOT extract assets, keep Apache SSI behavior (inline everything)
const nestedProcessedContent = (typeof nestedResult === 'object' && nestedResult.content !== undefined)
? nestedResult.content
: nestedResult;
// Replace all occurrences of this include
processedContent = (processedContent || '')
.replace(new RegExp(escapeRegExp(fullMatch), 'g'), nestedProcessedContent || '');
logger.debug(`Processed include: ${includePath} -> ${resolvedPath}`);
} catch (error) {
// Convert file not found errors to IncludeNotFoundError with helpful suggestions
if (error.code === 'ENOENT' && !error.formatForCLI) {
const resolvedPath = resolveIncludePathInternal(type, includePath, filePath, sourceRoot);
error = new IncludeNotFoundError(includePath, filePath, [resolvedPath]);
}
// In fail-fast mode, fail fast on any include error
if (shouldFailFast(config, 'error')) {
// Always throw BuildError for any include error in fail-fast mode
let msg;
if (error instanceof CircularDependencyError) {
msg = `Include circular dependency: ${includePath} in ${filePath}`;
} else if (error instanceof PathTraversalError) {
msg = `Include path traversal: ${includePath} in ${filePath}`;
} else if (error instanceof IncludeNotFoundError) {
msg = `Include not found: ${includePath} in ${filePath}`;
} else {
msg = `Include error: ${includePath} in ${filePath}: ${error.message}`;
}
throw new BuildError(msg, [{ file: filePath, error: msg }]);
}
logger.warn(`Include not found: ${includePath} in ${filePath}`);
processedContent = (processedContent || '')
.replace(new RegExp(escapeRegExp(fullMatch), 'g'), `<!-- Include not found: ${includePath} -->`);
}
}
// Recursively process DOM includes until none remain (up to max depth)
const domIncludeRegex = /<include\s+src="([^"]+)"[^>]*>([\s\S]*?)<\/include>/g;
const selfClosingIncludeRegex = /<include\s+src="([^"]+)"[^>]*\/>/g;
let depth = 0;
const maxDepth = 10;
let hasDomIncludes = true;
while (hasDomIncludes && depth < maxDepth) {
domIncludeRegex.lastIndex = 0;
selfClosingIncludeRegex.lastIndex = 0;
// Process includes with children (slot injection)
let domMatches = [...processedContent.matchAll(domIncludeRegex)];
// Process self-closing includes (no slot injection)
let selfClosingMatches = [...processedContent.matchAll(selfClosingIncludeRegex)];
hasDomIncludes = domMatches.length > 0 || selfClosingMatches.length > 0;
depth++;
// Process includes with slot content
for (const domMatch of domMatches) {
const [fullMatch, src, slotContent] = domMatch;
try {
let resolvedPath;
if (src.startsWith('/')) {
// Absolute path from source root
resolvedPath = path.resolve(sourceRoot, src.replace(/^\/+/,''));
} else {
// Relative path from current file
resolvedPath = path.resolve(path.dirname(filePath), src);
}
// Security: ensure resolved path is within source root
if (!isPathWithinDirectory(resolvedPath, sourceRoot)) {
throw new PathTraversalError(src, sourceRoot);
}
// Read the include content
let includeContent = await fs.readFile(resolvedPath, 'utf-8');
// Extract assets from the component content for DOM includes
const componentAssets = extractComponentAssets(includeContent);
includeContent = componentAssets.content;
extractedAssets.styles.push(...componentAssets.assets.styles);
extractedAssets.scripts.push(...componentAssets.assets.scripts);
// Process slot injection if slot content is provided
if (slotContent && slotContent.trim()) {
includeContent = applySlotInjectionToInclude(includeContent, slotContent);
}
// Recursively process nested includes
const nestedResult = await processIncludesWithStringReplacement(includeContent, resolvedPath, sourceRoot, config, newCallStack);
// If the nested result is an object (with assets), extract them
if (typeof nestedResult === 'object' && nestedResult.content !== undefined) {
includeContent = nestedResult.content;
extractedAssets.styles.push(...nestedResult.styles);
extractedAssets.scripts.push(...nestedResult.scripts);
} else {
includeContent = nestedResult;
}
processedContent = (processedContent || '')
.replace(fullMatch, includeContent || '');
logger.debug(`Processed include element with slots: ${src} -> ${resolvedPath}`);
} catch (error) {
let resolvedPath;
if (src.startsWith('/')) {
resolvedPath = path.resolve(sourceRoot, src.replace(/^\/+/,''));
} else {
resolvedPath = path.resolve(path.dirname(filePath), src);
}
const errorFilePath = resolvedPath || filePath;
if (error.code === 'ENOENT' && !error.formatForCLI) {
error = new IncludeNotFoundError(src, errorFilePath, [resolvedPath]);
}
if (shouldFailFast(config, 'error')) {
let msg;
if (error instanceof CircularDependencyError) {
msg = `Include circular dependency: ${src} in ${errorFilePath}`;
} else if (error instanceof PathTraversalError) {
msg = `Include path traversal: ${src} in ${errorFilePath}`;
} else if (error instanceof IncludeNotFoundError) {
msg = `Include not found: ${src} in ${errorFilePath}`;
} else {
msg = `Include element error: ${src} in ${errorFilePath}: ${error.message}`;
}
throw new BuildError(msg, [{ file: errorFilePath, error: msg }]);
}
logger.warn(`Include element not found: ${src} in ${errorFilePath}`);
processedContent = (processedContent || '')
.replace(fullMatch, `<!-- Include not found: ${src} -->`);
}
}
// Process self-closing includes (no slot injection)
for (const selfClosingMatch of selfClosingMatches) {
const [fullMatch, src] = selfClosingMatch;
try {
let resolvedPath;
if (src.startsWith('/')) {
// Absolute path from source root
resolvedPath = path.resolve(sourceRoot, src.replace(/^\/+/,''));
} else {
// Relative path from current file
resolvedPath = path.resolve(path.dirname(filePath), src);
}
// Security: ensure resolved path is within source root
if (!isPathWithinDirectory(resolvedPath, sourceRoot)) {
throw new PathTraversalError(src, sourceRoot);
}
// Read the include content
let includeContent = await fs.readFile(resolvedPath, 'utf-8');
// Extract assets from the component content for DOM includes
const componentAssets = extractComponentAssets(includeContent);
includeContent = componentAssets.content;
extractedAssets.styles.push(...componentAssets.assets.styles);
extractedAssets.scripts.push(...componentAssets.assets.scripts);
// Recursively process nested includes
const nestedResult = await processIncludesWithStringReplacement(includeContent, resolvedPath, sourceRoot, config, newCallStack);
// If the nested result is an object (with assets), extract them
if (typeof nestedResult === 'object' && nestedResult.content !== undefined) {
includeContent = nestedResult.content;
extractedAssets.styles.push(...nestedResult.styles);
extractedAssets.scripts.push(...nestedResult.scripts);
} else {
includeContent = nestedResult;
}
processedContent = (processedContent || '')
.replace(fullMatch, includeContent || '');
logger.debug(`Processed self-closing include element: ${src} -> ${resolvedPath}`);
} catch (error) {
let resolvedPath;
if (src.startsWith('/')) {
resolvedPath = path.resolve(sourceRoot, src.replace(/^\/+/,''));
} else {
resolvedPath = path.resolve(path.dirname(filePath), src);
}
const errorFilePath = resolvedPath || filePath;
if (error.code === 'ENOENT' && !error.formatForCLI) {
error = new IncludeNotFoundError(src, errorFilePath, [resolvedPath]);
}
if (shouldFailFast(config, 'error')) {
let msg;
if (error instanceof CircularDependencyError) {
msg = `Include circular dependency: ${src} in ${errorFilePath}`;
} else if (error instanceof PathTraversalError) {
msg = `Include path traversal: ${src} in ${errorFilePath}`;
} else if (error instanceof IncludeNotFoundError) {
msg = `Include not found: ${src} in ${errorFilePath}`;
} else {
msg = `Include element error: ${src} in ${errorFilePath}: ${error.message}`;
}
throw new BuildError(msg, [{ file: errorFilePath, error: msg }]);
}
logger.warn(`Self-closing include element not found: ${src} in ${errorFilePath}`);
processedContent = (processedContent || '')
.replace(fullMatch, `<!-- Include not found: ${src} -->`);
}
}
}
return {
content: processedContent,
styles: extractedAssets.styles,
scripts: extractedAssets.scripts
};
} catch (err) {
logger.error('processIncludesWithStringReplacement failed:', err);
// In fail-fast mode, re-throw all build-stopping errors
if (shouldFailFast(config, 'error') && (
err instanceof BuildError ||
err instanceof CircularDependencyError ||
err instanceof PathTraversalError ||
err instanceof IncludeNotFoundError ||
err.name === 'BuildError' ||
err.name === 'CircularDependencyError' ||
err.name === 'PathTraversalError' ||
err.name === 'IncludeNotFoundError'
)) {
throw err;
}
return {
content: '',
styles: [],
scripts: []
};
}
}
/**
* Apply slot injection to included component content
* @param {string} componentContent - The component HTML content with data-slot targets
* @param {string} slotContent - The slot content provided within the include element
* @returns {string} Component content with slots injected
*/
function applySlotInjectionToInclude(componentContent, slotContent) {
// Extract slot providers from the include element's children
const slotProviders = {};
// Match elements with data-slot attributes
const slotElementRegex = /<(\w+)([^>]*\s+data-slot=["']([^"']+)["'][^>]*)>([\s\S]*?)<\/\1>/gi;
let match;
let hasExplicitSlots = false;
while ((match = slotElementRegex.exec(slotContent)) !== null) {
hasExplicitSlots = true;
const [fullElement, tagName, attributes, slotName, innerContent] = match;
// Store the full element content (with the wrapping element but without data-slot)
const cleanedElement = fullElement.replace(/\s+data-slot=["'][^"']+["']/, '');
slotProviders[slotName] = cleanedElement;
}
// If no explicit slot targeting was found, treat the entire content as default slot content
if (!hasExplicitSlots && slotContent.trim()) {
slotProviders['default'] = slotContent.trim();
}
// Apply slots to the component content
let result = componentContent;
// For each slot provider, find and replace the corresponding slot target in the component
for (const [slotName, slotHtml] of Object.entries(slotProviders)) {
// Find elements with data-slot="slotName" in the component and replace their entire element
const targetSlotRegex = new RegExp(
`<(\\w+)([^>]*\\s+data-slot=["']${escapeRegExp(slotName)}["'][^>]*)>[\\s\\S]*?<\\/\\1>`,
'gi'
);
result = result.replace(targetSlotRegex, slotHtml);
}
// Final cleanup: Remove any remaining data-slot attributes from unused slots in the component
result = result.replace(/\s+data-slot=["'][^"']+["']/g, '');
return result;
}
/**
* Escape special regex characters
*/
function escapeRegExp(string) {
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
/**
* Apply extracted assets to HTML content
* Styles go to head, scripts go to end of body
*/
function applyExtractedAssets(htmlContent, extractedAssets) {
let processedContent = htmlContent;
// Add styles to head (before </head>)
if (extractedAssets.styles && extractedAssets.styles.length > 0) {
const headEndRegex = /<\/head>/i;
const dedupedStyles = [...new Set(extractedAssets.styles)]; // Remove duplicates
const stylesHTML = dedupedStyles.join('\n');
processedContent = processedContent.replace(headEndRegex, `${stylesHTML}\n</head>`);
}
// Add scripts to end of body (before </body>)
if (extractedAssets.scripts && extractedAssets.scripts.length > 0) {
const bodyEndRegex = /<\/body>/i;
const dedupedScripts = [...new Set(extractedAssets.scripts)]; // Remove duplicates
const scriptsHTML = dedupedScripts.join('\n');
processedContent = processedContent.replace(bodyEndRegex, `${scriptsHTML}\n</body>`);
}
return processedContent;
}
/**
* Extract styles and scripts from component HTML content
*/
function extractComponentAssets(htmlContent) {
const assets = {
styles: [],
scripts: []
};
// Extract style tags
const styleRegex = /<style(?:\s[^>]*)?>[\s\S]*?<\/style>/gi;
let styleMatch;
while ((styleMatch = styleRegex.exec(htmlContent)) !== null) {
assets.styles.push(styleMatch[0]);
}
// Extract script tags
const scriptRegex = /<script(?:\s[^>]*)?>[\s\S]*?<\/script>/gi;
let scriptMatch;
while ((scriptMatch = scriptRegex.exec(htmlContent)) !== null) {
assets.scripts.push(scriptMatch[0]);
}
// Remove extracted assets from content
let cleanContent = htmlContent;
cleanContent = cleanContent.replace(styleRegex, '');
cleanContent = cleanContent.replace(scriptRegex, '');
return {
content: cleanContent,
assets
};
}
/**
* Process SSI include directive
*/
async function processIncludeDirective(comment, type, includePath, filePath, sourceRoot, config, callStack = new Set()) {
try {
const resolvedPath = resolveIncludePathInternal(type, includePath, filePath, sourceRoot);
const includeContent = await fs.readFile(resolvedPath, 'utf-8');
// Recursively process nested includes in the included content
const includeResult = await processIncludesWithStringReplacement(includeContent, resolvedPath, sourceRoot, config, callStack);
// Handle both old string format and new object format
const processedContent = (typeof includeResult === 'object' && includeResult.content !== undefined)
? includeResult.content
: includeResult;
comment.replace(processedContent, { html: true });
logger.debug(`Processed include: ${includePath} -> ${resolvedPath}`);
} catch (error) {
logger.warn(`Include not found: ${includePath} in ${filePath}`);
comment.replace(`<!-- Include not found: ${includePath} -->`, { html: true });
}
}
/**
* Process modern include element
*/
async function processIncludeElement(element, src, filePath, sourceRoot, config, callStack = new Set()) {
try {
const resolvedPath = resolveIncludePathInternal('file', src, filePath, sourceRoot);
const includeContent = await fs.readFile(resolvedPath, 'utf-8');
// Recursively process nested includes in the included content
const includeResult = await processIncludesWithStringReplacement(includeContent, resolvedPath, sourceRoot, config, callStack);
// Handle both old string format and new object format
const processedContent = (typeof includeResult === 'object' && includeResult.content !== undefined)
? includeResult.content
: includeResult;
element.setInnerContent(processedContent, { html: true });
logger.debug(`Processed include element: ${src} -> ${resolvedPath}`);
} catch (error) {
// In fail-fast mode, fail fast on any include error
if (shouldFailFast(config, 'error')) {
throw new Error(`Include not found in fail-fast mode: ${src} in ${filePath}`);
}
logger.warn(`Include element not found: ${src} in ${filePath}`);
element.setInnerContent(`<!-- Include not found: ${src} -->`, { html: true });
}
}
/**
* Resolve include path based on type
*/
function resolveIncludePathInternal(type, includePath, currentFile, sourceRoot) {
return resolveIncludePath(type, includePath, currentFile, sourceRoot);
}
/**
* Optimize HTML content with HTMLRewriter
* @param {string} html - HTML content to optimize
* @returns {string} Optimized HTML
*/
async function optimizeHtmlContent(html) {
// HTMLRewriter is always available
// Proceed with optimization
const rewriter = new HTMLRewriter();
// Remove unnecessary whitespace (basic optimization)
rewriter.on('*', {
text(text) {
if (text.lastInTextNode) {
// Collapse multiple whitespace into single space
const optimized = text.text.replace(/\s+/g, ' ');
if (optimized !== text.text) {
text.replace(optimized);
}
}
}
});
// Optimize attributes (remove empty ones)
rewriter.on('*', {
element(element) {
// Remove empty class attributes
const classAttr = element.getAttribute('class');
if (classAttr === '') {
element.removeAttribute('class');
}
// Remove empty id attributes
const idAttr = element.getAttribute('id');
if (idAttr === '') {
element.removeAttribute('id');
}
}
});
const response = new Response(html, {
headers: { 'Content-Type': 'text/html' }
});
const transformedResponse = rewriter.transform(response);
return await transformedResponse.text();
}
/**
* Check if content should use DOM mode processing
* @param {string} content - HTML content to check
* @returns {boolean} True if content has DOM mode features
*/
function shouldUseDOMMode(content) {
return content.includes('<include ') ||
content.includes('data-slot=') ||
content.includes('data-layout=') ||
content.includes('rel="layout"');
}
/**
* Integrated DOM mode processing - handles <include> elements, layouts, and slots
* @param {string} pageContent - Raw HTML content of the page
* @param {string} pagePath - Path to the page file
* @param {string} sourceRoot - Source root directory
* @param {Object} config - DOM processor configuration
* @returns {Promise<string>} Processed HTML content
*/
async function processDOMMode(pageContent, pagePath, sourceRoot, config = {}, extractedAssets = null) {
const domConfig = {
layoutsDir: '.layouts', // Deprecated but kept for compatibility
componentsDir: '.components',
sourceRoot,
...config
};
// Analyze HTML structure
const htmlStructure = analyzeHtmlStructure(pageContent);
// Validate data-layout attributes for fragments early
validateDataLayoutAttributes(pageContent, htmlStructure.isFullDocument);
// Check for explicit layout indicators
const layoutMatch = pageContent.match(/data-layout=["']([^"']+)["']/i);
const linkLayoutHref = htmlStructure.isFullDocument ? extractLinkLayoutHref(pageContent) : null;
const hasExplicitLayout = !!layoutMatch || !!linkLayoutHref;
// For complete HTML documents without explicit layout, don't apply any layout
if (htmlStructure.isFullDocument && !hasExplicitLayout) {
logger.debug(`Skipping layout for complete HTML document: ${path.relative(sourceRoot, pagePath)}`);
return pageContent;
}
// Use the same layout processing logic as processDOMTemplating
const layoutSpec = linkLayoutHref || (layoutMatch ? layoutMatch[1] : null);
if (layoutSpec) {
try {
return await processLayoutAttribute(
pageContent,
layoutSpec,
pagePath,
sourceRoot,
domConfig,
extractedAssets
);
} catch (error) {
// In fail-fast mode, fail fast on layout detection errors
if (shouldFailFast(domConfig, 'error')) {
throw new Error(`Layout not found in fail-fast mode for ${path.relative(sourceRoot, pagePath)}: ${error.message}`);
}
// Graceful degradation: if specific layout is missing, log warning and continue with discovery
logger.warn(`Layout not found for ${path.relative(sourceRoot, pagePath)}: ${error.message}`);
}
}
// Fall back to layout discovery
let layoutPath;
try {
layoutPath = await detectLayoutFromHTML(pageContent, sourceRoot, domConfig, pagePath);
logger.debug(`Using discovered layout: ${layoutPath}`);
// Use processLayoutAttribute for consistent processing
const relativeLayoutPath = path.relative(path.dirname(pagePath), layoutPath);
return await processLayoutAttribute(
pageContent,
relativeLayoutPath,
pagePath,
sourceRoot,
domConfig,
extractedAssets
);
} catch (error) {
// Use shouldFailFast to determine whether to throw or warn
if (shouldFailFast(domConfig, 'error')) {
throw new Error(`Layout not found for ${path.relative(sourceRoot, pagePath)}: ${error.message}`);
}
logger.warn(`Layout not found for ${path.relative(sourceRoot, pagePath)}: ${error.message}`);
return `<!DOCTYPE html>
<html>
<head>
<title>Page</title>
</head>
<body>
${pageContent}
</body>
</html>`;
}
}
/**
* Detect if HTML content is a full document or page fragment
* @param {string} htmlContent - HTML content to analyze
* @returns {Object} Analysis result with document type and structure info
*/
function analyzeHtmlStructure(htmlContent) {
const hasDoctype = /<!DOCTYPE\s+html/i.test(htmlContent);
const hasHtmlTag = /<html[^>]*>/i.test(htmlContent);
const hasHeadTag = /<head[^>]*>/i.test(htmlContent);
const hasBodyTag = /<body[^>]*>/i.test(htmlContent);
const isFullDocument = hasDoctype && hasHtmlTag && hasHeadTag && hasBodyTag;
return {
isFullDocument,
hasDoctype,
hasHtmlTag,
hasHeadTag,
hasBodyTag
};
}
/**
* Extract link rel=layout from HTML head element
* @param {string} htmlContent - HTML content to search
* @returns {string|null} Layout href value or null if not found
*/
function extractLinkLayoutHref(htmlContent) {
// Only look in head section for link rel=layout
const headMatch = htmlContent.match(/<head[^>]*>([\s\S]*?)<\/head>/i);
if (!headMatch) {
return null;
}
const headContent = headMatch[1];
const linkMatch = headContent.match(/<link[^>]+rel=["']layout["'][^>]*>/i);
if (!linkMatch) {
return null;
}
const hrefMatch = linkMatch[0].match(/href=["']([^"']+)["']/i);
return hrefMatch ? hrefMatch[1] : null;
}
/**
* Validate data-layout attributes in fragments
* @param {string} htmlContent - HTML content to validate
* @param {boolean} isFullDocument - Whether this is a full HTML document
* @throws {Error} If multiple data-layout attributes found in fragment
*/
function validateDataLayoutAttributes(htmlContent, isFullDocument) {
if (isFullDocument) {
return; // Full documents can have data-layout in any element
}
// For fragments, count data-layout attributes
const dataLayoutMatches = htmlContent.match(/data-layout=["'][^"']*["']/gi);
if (dataLayoutMatches && dataLayoutMatches.length > 1) {
throw new BuildError(
'Fragment pages cannot have multiple data-layout attributes',
[{ error: `Found ${dataLayoutMatches.length} data-layout attributes in fragment` }]
);
}
}
/**
* Detect which layout to use for a page using regex-based HTML parsing
*/
async function detectLayoutFromHTML(htmlContent, sourceRoot, config, pagePath) {
const htmlStructure = analyzeHtmlStructure(htmlContent);
// For full HTML documents, check link rel=layout first (highest priority)
if (htmlStructure.isFullDocument) {
const linkLayoutHref = extractLinkLayoutHref(htmlContent);
if (linkLayoutHref) {
logger.debug(`Found link rel=layout: ${linkLayoutHref}`);
const { LayoutDiscovery } = await import('./layout-discovery.js');
const discovery = new LayoutDiscovery();
const resolvedLayoutPath = await discovery.resolveLayoutOverride(linkLayoutHref, sourceRoot, pagePath);
if (resolvedLayoutPath) {
return resolvedLayoutPath;
}
throw new LayoutError(
pagePath,
`Layout not found via link rel=layout: ${linkLayoutHref}`,
[sourceRoot]
);
}
}
// Look for data-layout attribute in HTML content (lower priority)
const layoutMatch = htmlContent.match(/data-layout=["']([^"']+)["']/i);
if (layoutMatch) {
const layoutAttr = layoutMatch[1];
// Use LayoutDiscovery system for both full paths and short names
const { LayoutDiscovery } = await import('./layout-discovery.js');
const discovery = new LayoutDiscovery();
const resolvedLayoutPath = await discovery.resolveLayoutOverride(layoutAttr, sourceRoot, pagePath);
if (resolvedLayoutPath) {
return resolvedLayoutPath;
}
// If layout override resolution failed, throw error
throw new LayoutError(
pagePath,
`Layout not found: ${layoutAttr}`,
[sourceRoot]
);
}
// Fall back to discovered layout using LayoutDiscovery
const { LayoutDiscovery } = await import('./layout-discovery.js');
const discovery = new LayoutDiscovery();
return await discovery.findLayoutForPage(pagePath, sourceRoot);
}
/**
* Process includes in HTML content (both SSI and <include> elements)
*/
async function processIncludesInHTML(htmlContent, layoutPath, sourceRoot, config) {
// Process SSI includes first (already done in main flow, but handle any in layout)
let result = await processIncludes(
htmlContent,
layoutPath, // Use layout path for proper include resolution
sourceRoot,
new Set(),
0,
null, // No dependency tracker needed
shouldFailFast(config)
);
// Then process <include> elements if any remain
const includeRegex = /<include\s+([^>]+)\/??\s*>/gi;
const allStyles = [];
const allScripts = [];
// Process includes recursively until no more are found
let hasIncludes = true;
while (hasIncludes) {
hasIncludes = false;
let match;
while ((match = includeRegex.exec(result)) !== null) {
hasIncludes = true;
const fullMatch = match[0];
const attrs = match[1];
const srcMatch = attrs.match(/src=["']([^"']+)["']/i);
if (!srcMatch) continue;
const src = srcMatch[1];
let resolvedPath;
try {
if (src.startsWith('/')) {
resolvedPath = path.resolve(sourceRoot, src.replace(/^\/+/,''));
} else {
resolvedPath = path.resolve(path.dirname(layoutPath), src);
}
logger.debug('[UNIFY] Attempting to resolve DOM include:', { src, fromFile: layoutPath, resolvedPath });
if (!isPathWithinDirectory(resolvedPath, sourceRoot)) {
throw new PathTraversalError(src, sourceRoot);
}
// Check if this is a component (in components directory)
const isComponent = resolvedPath.includes(config.componentsDir) || resolvedPath.includes('.components');
console.log('[DEBUG] Processing DOM include:', { src, resolvedPath, isComponent, componentsDir: config.componentsDir });
console.log('[DEBUG] Component check:', {
includesComponentsDir: resolvedPath.includes(config.componentsDir),
includesDotComponents: resolvedPath.includes('.components'),
resolvedPath,
componentsDir: config.componentsDir
});
if (isComponent) {
console.log('[DEBUG] Processing as component with asset extraction');
// Use component processing with asset extraction
const component = await loadAndProcessComponent(src, {}, sourceRoot, config);
console.log('[DEBUG] Component processed, styles:', component.styles.length, 'scripts:', component.scripts.length);
// Recursively process any nested includes in the component content
const componentResult = await processIncludesWithStringReplacement(component.content, resolvedPath, sourceRoot, config, new Set());
const componentContent = (typeof componentResult === 'object' && componentResult.content !== undefined)
? componentResult.content
: componentResult;
result = result.replace(fullMatch, componentContent);
// Collect extracted assets
allStyles.push(...component.styles);
allScripts.push(...component.scripts);
console.log('[DEBUG] Total styles collected:', allStyles.length, 'scripts:', allScripts.length);
} else {
// Regular include processing for non-components
let includeContent = await fs.readFile(resolvedPath, 'utf-8');
const includeResult = await processIncludesWithStringReplacement(includeContent, resolvedPath, sourceRoot, config, new Set());
const processedIncludeContent = (typeof includeResult === 'object' && includeResult.content !== undefined)
? includeResult.content
: includeResult;
result = result.replace(fullMatch, processedIncludeContent);
}
logger.debug('[UNIFY] Successfully processed DOM include:', src, '->', resolvedPath);
} catch (error) {
if (src.startsWith('/')) {
resolvedPath = path.resolve(sourceRoot, src.replace(/^\/+/,''));
} else {
resolvedPath = path.resolve(path.dirname(layoutPath), src);
}
logger.error('[UNIFY] Failed to resolve DOM include:', { src, fromFile: layoutPath, resolvedPath, error: error.message });
if (error.code === 'ENOENT' && !error.formatForCLI) {
error = new IncludeNotFoundError(src, layoutPath, [resolvedPath]);
}
if (shouldFailFast(config, 'error')) {
if (error instanceof CircularDependencyError || error instanceof PathTraversalError || error instanceof IncludeNotFoundError) {
throw error;
}
throw new Error('Include element not found in fail-fast mode: ' + src + ' in ' + layoutPath);
}
logger.warn('[UNIFY] Include element not found:', src, 'in', layoutPath);
result = result.replace(fullMatch, '<!-- Include not found: ' + src + ' -->');
}
}
// Reset regex to find new includes in the updated content
includeRegex.lastIndex = 0;
}
// Clean up any remaining artifacts
result = cleanupDOMOutput(result);
// Move styles to head and scripts to end of body
if (allStyles.length > 0) {
const headEndRegex = /<\/head>/i;
const dedupedStyles = [...new Set(allStyles)]; // Remove duplicates
const stylesHTML = dedupedStyles.join('\n');
result = result.replace(headEndRegex, `${stylesHTML}\n</head>`);
}
if (allScripts.length > 0) {
const bodyEndRegex = /<\/body>/i;
const dedupedScripts = [...new Set(allScripts)]; // Remove duplicates
const scriptsHTML = dedupedScripts.join('\n');
result = result.replace(bodyEndRegex, `${scriptsHTML}\n</body>`);
}
return result;
}
/**
* Load and process a component
*/
async function loadAndProcessComponent(src, unused, sourceRoot, config) {
// Resolve component path
const componentPath = resolveResourcePath(src, sourceRoot, config.componentsDir, 'component');
// Load component
const componentContent = await fs.readFile(componentPath, 'utf-8');
// Just return the component content as-is, without token replacement
let processedContent = componentContent;
// Extract and remove styles and scripts
const styleRegex = /<style[^>]*>[\s\S]*?<\/style>/gi;
const scriptRegex = /<script[^>]*>[\s\S]*?<\/script>/gi;
const styles = [...processedContent.matchAll(styleRegex)].map(match => match[0]);
const scripts = [...processedContent.matchAll(scriptRegex)].map(match => match[0]);
// Remove styles and scripts from component content
processedContent = processedContent.replace(styleRegex, '');
processedContent = processedContent.replace(scriptRegex, '');
return {
content: processedContent,
styles,
scripts
};
}
/**
* Clean up DOM output by removing stray tags and artifacts
*/
function cleanupDOMOutput(html) {
let result = html;
// Remove stray closing include tags
result = result.replace(/<\/include>/gi, '');
// Remove stray closing slot tags
result = result.replace(/<\/slot>/gi, '');
// Remove any remaining self-closing include tags that weren't processed
result = result.replace(/<include[^>]*\/>/gi, '');
// Remove any remaining opening include tags that weren't processed
result = result.replace(/<include[^>]*>/gi, '');
// Clean up multiple consecutive empty lines
result = result.replace(/\n\s*\n\s*\n/g, '\n\n');
return result;
}
/**
* Check if content contains DOM templating syntax
* @param {string} content - HTML content to check
* @returns {boolean} True if content has DOM templating
*/
function hasDOMTemplating(content) {
// Check for data-slot attributes, template elements, or layout attributes
return (
content.includes("data-slot=") ||
content.includes("<template") ||
content.includes("data-layout=") ||
content.includes('rel="layout"')
);
}
/**
* Extract slot content from HTML using regex-based parsing (v0.5.0 spec-compliant)
* Supports both <template data-slot="name"> and regular elements with data-slot="name" attribute
* @param {string} htmlContent - HTML content to extract slots from
* @returns {Object} Object with slots, styles, scripts, and slot metadata
*/
function extractSlotDataFromHTML(htmlContent) {
const slots = {};
const slotOrder = {}; // Track document order for multiple assignments
const extractedStyles = [];
const extractedScripts = [];
let match;
let orderIndex = 0;
// Extract named slots with data-slot attribute on template elements
const templateSlotRegex = /<template[^>]+data-slot=["']([^"']+)["'][^>]*>([\s\S]*?)<\/template>/gi;
while ((match = templateSlotRegex.exec(htmlContent)) !== null) {
const slotName = match[1];
const content = match[2];
if (!slots[slotName]) {
slots[slotName] = [];
slotOrder[slotName] = [];
}
slots[slotName].push(content);
slotOrder[slotName].push(orderIndex++);
}
// Extract named slots with data-slot attribute on regular elements
// Use a more robust regex that handles data-slot attribute anywhere in the tag
const elementSlotRegex = /<(\w+)([^>]*\s+data-slot=["']([^"']+)["'][^>]*)>([\s\S]*?)<\/\1>/gi;
while ((match = elementSlotRegex.exec(htmlContent)) !== null) {
const tagName = match[1];
const slotName = match[3]; // slot name is now in group 3
const fullElement = match[0];
// Skip template elements (already handled above)
if (tagName.toLowerCase() === 'template') continue;
if (!slots[slotName]) {
slots[slotName] = [];
slotOrder[slotName] = [];
}
// Remove the data-slot attribute from the element when adding to slot
const cleanedElement = fullElement.replace(/\s+data-slot=["'][^"']+["']/, '');
slots[slotName].push(cleanedElement);
slotOrder[slotName].push(orderIndex++);
}
// Extract default slot content from template without data-slot attributes
const defaultTemplateRegex = /<template(?!\s+data-slot=)[^>]*>([\s\S]*?)<\/template>/gi;
match = defaultTemplateRegex.exec(htmlContent);
if (match) {
if (!slots['default']) {
slots['default'] = [];
slotOrder['default'] = [];
}
slots['default'].push(match[1]);
slotOrder['default'].push(orderIndex++);
} else {
// Extract default slot content (everything not in a template or with data-slot attribute)
let defaultContent = htmlContent;
// Remove all template elements
defaultContent = defaultContent.replace(/<template[^>]*>[\s\S]*?<\/template>/gi, '');
// Remove all elements with data-slot attribute
defaultContent = defaultContent.replace(/<(\w+)([^>]*\s+data-slot=["'][^"']+["'][^>]*)>([\s\S]*?)<\/\1>/gi, '');
// Extract and preserve script and style elements instead of removing them
const styleRegex = /<style[^>]*>([\s\S]*?)<\/style>/gi;
while ((match = styleRegex.exec(defaultContent)) !== null) {
extractedStyles.push(match[0]); // Keep the full <style>...</style> tag
}
const scriptRegex = /<script[^>]*>([\s\S]*?)<\/script>/gi;
while ((match = scriptRegex.exec(defaultContent)) !== null) {
extractedScripts.push(match[0]); // Keep the full <script>...</script> tag
}
// Now remove them from the content (but they're preserved above)
defaultContent = defaultContent.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');
defaultContent = defaultContent.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');
// Remove data-layout attribute
defaultContent = defaultContent.replace(/\s*data-layout=["'][^"']*["']/gi, '');
// Remove html, head, body tags if present to extract just content
defaultContent = defaultContent.replace(/<\/?(?:html|head|body)[^>]*>/gi, '');
// Remove the outer wrapper div/element if it exists (e.g., <div data-layout="...">)
const wrapperMatch = defaultContent.match(/^<[^>]*>([\s\S]*)<\/[^>]*>$/);
if (wrapperMatch) {
defaultContent = wrapperMatch[1];
}
defaultContent = defaultContent.trim();
if (defaultContent) {
if (!slots['default']) {
slots['default'] = [];
slotOrder['default'] = [];
}
slots['default'].push(defaultContent);
slotOrder['default'].push(orderIndex++);
}
}
// Convert slot arrays to strings, preserving document order
const consolidatedSlots = {};
for (const [slotName, contents] of Object.entries(slots)) {
if (contents.length === 1) {
consolidatedSlots[slotName] = contents[0];
} else if (contents.length > 1) {
// Multiple assignments to same slot - preserve document order
const orderedContents = contents
.map((content, idx) => ({ content, order: slotOrder[slotName][idx] }))
.sort((a, b) => a.order - b.order)
.map(item => item.content);
consolidatedSlots[slotName] = orderedContents.join('\n');
}
}
return {
slots: consolidatedSlots,
styles: extractedStyles,
scripts: extractedScripts,
hasMultipleAssignments: Object.values(slots).some(arr => arr.length > 1)
};
}
/**
* Apply slot content to layout using string replacement (v0.5.0 spec-compliant)
* Properly handles fallback content and validation warnings
* @param {string} layoutContent - Layout HTML content
* @param {Object} slotData - Slot data to apply
* @param {Object} config - Configuration for validation
* @returns {Object} Object with result HTML and validation warnings
*/
function applySlots(layoutContent, slotData, config = {}) {
let result = layoutContent;
const warnings = [];
const usedSlots = new Set();
// Find all data-slot names in the layout for validation
const layoutSlotNames = new Set();
const slotNameRegex = /data-slot=["']([^"']+)["']/gi;
let match;
while ((match = slotNameRegex.exec(layoutContent)) !== null) {
layoutSlotNames.add(match[1]);
}
// Check for unmatched slot names in page content
for (const slotName of Object.keys(slotData)) {
if (slotName !== 'default' && !layoutSlotNames.has(slotName)) {
warnings.push({
type: 'unmatched-slot',
message: `Page defines slot "${slotName}" but layout has no matching data-slot="${slotName}"`
});
}
}
// Replace named slots with content or fallback
for (const [slotName, content] of Object.entries(slotData)) {
if (slotName === 'default') continue;
// Find elements with data-slot="slotName" and replace their content
// Use a more precise regex that matches the specific opening tag and its corresponding closing tag
const namedSlotRegex = new RegExp(`(<(\\w+)([^>]*\\s+data-slot=["']${escapeRegex(slotName)}["'][^>]*)>)([\\s\\S]*?)(<\\/\\2>)`, 'gi');
if (namedSlotRegex.test(result)) {
usedSlots.add(slotName);
// Reset regex lastIndex
namedSlotRegex.lastIndex = 0;
// Replace the content between opening and closing tags and remove data-slot attribute
result = result.replace(namedSlotRegex, (match, openingTag, tagName, attributes, oldContent, closingTag) => {
// Remove the data-slot attribute from the opening tag
const cleanedAttributes = attributes.replace(/\s+data-slot=["'][^"']+["']/, '');
const cleanedOpeningTag = `<${tagName}${cleanedAttributes}>`;
return `${cleanedOpeningTag}${content}${closingTag}`;
});
}
}
// Replace default slot with content or fallback
// Check if default content is meaningful (not just whitespace/comments)
const hasMeaningfulDefaultContent = slotData.default &&
slotData.default.replace(/<!--[\s\S]*?-->/g, '').trim().length > 0;
// Look for data-slot="default" first
const defaultSlotRegex = /(<(\w+)([^>]*\s+data-slot=["']default["'][^>]*)>)([\s\S]*?)(<\/\2>)/gi;
const hasDefaultSlot = defaultSlotRegex.test(result);
if (hasDefaultSlot && hasMeaningfulDefaultContent) {
// Page has meaningful default content, replace default slot content
defaultSlotRegex.lastIndex = 0;
result = result.replace(defaultSlotRegex, (match, openingTag, tagName, attributes, oldContent, closingTag) => {
// Remove the data-slot attribute from the opening tag
const cleanedAttributes = attributes.replace(/\s+data-slot=["'][^"']+["']/, '');
const cleanedOpeningTag = `<${tagName}${cleanedAttributes}>`;
return `${cleanedOpeningTag}${slotData.default}${closingTag}`;
});
} else if (!hasDefaultSlot && hasMeaningfulDefaultContent) {
// No data-slot="default" found, look for first <main> element as fallback
const mainElementRegex = /(<main[^>]*>)([\s\S]*?)(<\/main>)/i;
const hasMainElement = mainElementRegex.test(result);
if (hasMainElement)