@fwdslsh/unify
Version:
A lightweight, framework-free static site generator with Bun native APIs
387 lines (337 loc) • 13.1 kB
JavaScript
/**
* Asset Reference Tracker for unify
* Tracks which assets are actually referenced in HTML content
*/
import path from 'path';
import { logger } from '../utils/logger.js';
/**
* Asset tracker for managing asset references and dependencies
*/
export class AssetTracker {
constructor() {
// Maps asset file path to array of pages that reference it
this.assetReferences = new Map();
// Set of all referenced assets for quick lookup
this.referencedAssets = new Set();
// Cache of parsed asset references from HTML content
this.htmlAssetCache = new Map();
}
/**
* Extract asset references from HTML content
* @param {string} htmlContent - HTML content to analyze
* @param {string} pagePath - Path to the page file
* @param {string} sourceRoot - Source root directory
* @returns {string[]} Array of referenced asset paths
*/
extractAssetReferences(htmlContent, pagePath, sourceRoot) {
const references = new Set();
// Patterns to match asset references
const patterns = [
// CSS files
/<link[^>]+href=["']([^"']+\.css)["']/gi,
// JavaScript files
/<script[^>]+src=["']([^"']+\.js)["']/gi,
// Images in img tags
/<img[^>]+src=["']([^"']+\.(png|jpg|jpeg|gif|svg|webp|ico))["']/gi,
// Link icons (favicon, apple-icon, etc.)
/<link[^>]+(?:rel=["'](?:icon|apple-touch-icon|shortcut icon)["'][^>]*href=["']([^"']+\.[^"']+)["']|href=["']([^"']+\.[^"']+)["'][^>]*rel=["'](?:icon|apple-touch-icon|shortcut icon)["'])/gi,
// Background images in style attributes
/style=["'][^"']*background-image:\s*url\(["']?([^"')]+)["']?\)/gi,
// Fonts in link tags
/<link[^>]+href=["']([^"']+\.(woff2?|ttf|eot|otf))["']/gi,
// Video/audio sources
/<(?:video|audio)[^>]+src=["']([^"']+\.(mp4|webm|ogg|mp3|wav))["']/gi,
// Source elements
/<source[^>]+src=["']([^"']+)["']/gi,
// Object data attributes
/<object[^>]+data=["']([^"']+)["']/gi,
// Generic href/src attributes for other files
/(?:href|src)=["']([^"']+\.(pdf|zip|doc|docx|txt|json))["']/gi
];
for (const pattern of patterns) {
let match;
while ((match = pattern.exec(htmlContent)) !== null) {
// Handle different capture groups - some patterns have multiple groups
let assetPath = match[1] || match[2] || match[3];
if (!assetPath) continue;
// Skip external URLs
if (assetPath.startsWith('http://') ||
assetPath.startsWith('https://') ||
assetPath.startsWith('//')) {
continue;
}
// Skip data URLs
if (assetPath.startsWith('data:')) {
continue;
}
// Resolve relative paths
const resolvedPath = this.resolveAssetPath(assetPath, pagePath, sourceRoot);
if (resolvedPath) {
references.add(resolvedPath);
}
}
}
return Array.from(references);
}
/**
* Extract asset references from CSS content
* @param {string} cssContent - CSS content to analyze
* @param {string} cssPath - Path to the CSS file
* @param {string} sourceRoot - Source root directory
* @returns {string[]} Array of referenced asset paths
*/
extractCssAssetReferences(cssContent, cssPath, sourceRoot) {
const references = new Set();
// Match all url() references (background, src, etc.)
const urlPattern = /url\(\s*["']?([^"')]+)["']?\s*\)/gi;
let match;
while ((match = urlPattern.exec(cssContent)) !== null) {
const assetPath = match[1];
if (!assetPath) continue;
if (assetPath.startsWith('http://') || assetPath.startsWith('https://') || assetPath.startsWith('//')) continue;
if (assetPath.startsWith('data:')) continue;
if (assetPath.startsWith('#')) continue;
const resolvedPath = this.resolveAssetPath(assetPath, cssPath, sourceRoot);
if (resolvedPath) {
references.add(resolvedPath);
logger.debug(`Extracted URL reference: ${resolvedPath}`);
}
}
// Match all @font-face src URLs (multiple URLs per src)
const fontFacePattern = /@font-face[^}]*src\s*:\s*([^;}]*)/gi;
while ((match = fontFacePattern.exec(cssContent)) !== null) {
const srcValue = match[1];
// Find all url() inside src
const srcUrlPattern = /url\(\s*["']?([^"')]+)["']?\s*\)/gi;
let srcMatch;
while ((srcMatch = srcUrlPattern.exec(srcValue)) !== null) {
const assetPath = srcMatch[1];
if (!assetPath) continue;
if (assetPath.startsWith('http://') || assetPath.startsWith('https://') || assetPath.startsWith('//')) continue;
if (assetPath.startsWith('data:')) continue;
if (assetPath.startsWith('#')) continue;
const resolvedPath = this.resolveAssetPath(assetPath, cssPath, sourceRoot);
if (resolvedPath) {
references.add(resolvedPath);
logger.debug(`Extracted font-face reference: ${resolvedPath}`);
}
}
}
// Match all @import statements
const importPattern = /@import\s+(?:url\()?\s*["']([^"']+)["']?\s*\)?/gi;
while ((match = importPattern.exec(cssContent)) !== null) {
const assetPath = match[1];
if (!assetPath) continue;
if (assetPath.startsWith('http://') || assetPath.startsWith('https://') || assetPath.startsWith('//')) continue;
if (assetPath.startsWith('data:')) continue;
if (assetPath.startsWith('#')) continue;
const resolvedPath = this.resolveAssetPath(assetPath, cssPath, sourceRoot);
if (resolvedPath) {
references.add(resolvedPath);
logger.debug(`Extracted import reference: ${resolvedPath}`);
}
}
return Array.from(references);
}
/**
* Resolve asset path relative to page and source root
* @param {string} assetPath - Asset path from HTML
* @param {string} pagePath - Path to the page file
* @param {string} sourceRoot - Source root directory
* @returns {string|null} Resolved asset path or null if invalid
*/
resolveAssetPath(assetPath, pagePath, sourceRoot) {
try {
let resolvedPath;
if (assetPath.startsWith('/')) {
// Absolute path from source root
resolvedPath = path.join(sourceRoot, assetPath.slice(1));
} else {
// Relative path from current page
const pageDir = path.dirname(pagePath);
resolvedPath = path.resolve(pageDir, assetPath);
}
// Ensure the resolved path is within source root
const relativePath = path.relative(sourceRoot, resolvedPath);
if (relativePath.startsWith('../')) {
logger.debug(`Asset path outside source root: ${assetPath}`);
return null;
}
return resolvedPath;
} catch (error) {
logger.debug(`Could not resolve asset path: ${assetPath} from ${pagePath}`);
return null;
}
}
/**
* Record asset references for a page
* @param {string} pagePath - Path to the page file
* @param {string} htmlContent - HTML content to analyze
* @param {string} sourceRoot - Source root directory
*/
async recordAssetReferences(pagePath, htmlContent, sourceRoot) {
// Clear existing references for this page
this.clearPageAssetReferences(pagePath);
// Extract new references from HTML
const assets = this.extractAssetReferences(htmlContent, pagePath, sourceRoot);
// Process CSS files recursively to handle @import chains
const cssAssets = new Set();
const processedCssFiles = new Set(); // Prevent infinite loops in case of circular imports
const processCssFile = async (cssPath) => {
if (processedCssFiles.has(cssPath)) {
logger.debug(`Skipping already processed CSS file: ${cssPath}`);
return; // Already processed this CSS file
}
logger.debug(`Processing CSS file: ${cssPath}`);
processedCssFiles.add(cssPath);
try {
const fs = await import('fs/promises');
const cssContent = await fs.default.readFile(cssPath, 'utf-8');
const cssReferences = this.extractCssAssetReferences(cssContent, cssPath, sourceRoot);
for (const cssRef of cssReferences) {
cssAssets.add(cssRef);
// If this reference is another CSS file, process it recursively
if (cssRef.endsWith('.css')) {
logger.debug(`Found nested CSS import: ${cssRef}`);
await processCssFile(cssRef);
}
}
} catch (error) {
// CSS file might not exist or be readable, continue without error
logger.debug(`Could not read CSS file for asset extraction: ${cssPath}`);
}
};
// Process all CSS files found in HTML
for (const assetPath of assets) {
if (assetPath.endsWith('.css')) {
await processCssFile(assetPath);
}
}
// Combine HTML and CSS asset references
const allAssets = [...assets, ...Array.from(cssAssets)];
// Record new references
for (const assetPath of allAssets) {
if (!this.assetReferences.has(assetPath)) {
this.assetReferences.set(assetPath, []);
}
this.assetReferences.get(assetPath).push(pagePath);
this.referencedAssets.add(assetPath);
}
// Cache for this page (include both HTML and CSS references)
this.htmlAssetCache.set(pagePath, allAssets);
if (allAssets.length > 0) {
logger.debug(`Found ${allAssets.length} asset references in ${pagePath}`);
}
}
/**
* Clear asset references for a specific page
* @param {string} pagePath - Path to the page file
*/
clearPageAssetReferences(pagePath) {
const cachedAssets = this.htmlAssetCache.get(pagePath);
if (cachedAssets) {
for (const assetPath of cachedAssets) {
const pages = this.assetReferences.get(assetPath);
if (pages) {
const index = pages.indexOf(pagePath);
if (index > -1) {
pages.splice(index, 1);
}
// Clean up empty arrays
if (pages.length === 0) {
this.assetReferences.delete(assetPath);
this.referencedAssets.delete(assetPath);
}
}
}
this.htmlAssetCache.delete(pagePath);
}
}
/**
* Check if an asset is referenced by any page
* @param {string} assetPath - Path to the asset file
* @returns {boolean} True if asset is referenced
*/
isAssetReferenced(assetPath) {
return this.referencedAssets.has(assetPath);
}
/**
* Get all pages that reference a specific asset
* @param {string} assetPath - Path to the asset file
* @returns {string[]} Array of page paths that reference the asset
*/
getPagesThatReference(assetPath) {
return this.assetReferences.get(assetPath) || [];
}
/**
* Get all referenced assets
* @returns {string[]} Array of all referenced asset paths
*/
getAllReferencedAssets() {
return Array.from(this.referencedAssets);
}
/**
* Get all assets referenced by a specific page
* @param {string} pagePath - Path to the page file
* @returns {string[]} Array of asset paths referenced by the page
*/
getPageAssets(pagePath) {
return this.htmlAssetCache.get(pagePath) || [];
}
/**
* Remove all records of a page (when page is deleted)
* @param {string} pagePath - Path to the deleted page
*/
removePage(pagePath) {
this.clearPageAssetReferences(pagePath);
logger.debug(`Removed page from asset tracking: ${pagePath}`);
}
/**
* Get asset reference statistics for debugging
* @returns {Object} Statistics about tracked asset references
*/
getStats() {
return {
totalReferencedAssets: this.referencedAssets.size,
totalAssetReferences: Array.from(this.assetReferences.values())
.reduce((sum, pages) => sum + pages.length, 0),
pagesWithAssets: this.htmlAssetCache.size
};
}
/**
* Clear all asset reference data
*/
clear() {
this.assetReferences.clear();
this.referencedAssets.clear();
this.htmlAssetCache.clear();
logger.debug('Cleared all asset reference data');
}
/**
* Export asset reference data for debugging or persistence
* @returns {Object} Serializable asset reference data
*/
export() {
return {
assetReferences: Object.fromEntries(this.assetReferences),
referencedAssets: Array.from(this.referencedAssets),
htmlAssetCache: Object.fromEntries(this.htmlAssetCache)
};
}
/**
* Import asset reference data
* @param {Object} data - Asset reference data to import
*/
import(data) {
this.clear();
if (data.assetReferences) {
this.assetReferences = new Map(Object.entries(data.assetReferences));
}
if (data.referencedAssets) {
this.referencedAssets = new Set(data.referencedAssets);
}
if (data.htmlAssetCache) {
this.htmlAssetCache = new Map(Object.entries(data.htmlAssetCache));
}
}
}