mirror-web-cli
Version:
Professional website mirroring tool with intelligent framework preservation, AI-powered analysis, and comprehensive asset optimization
1,099 lines (1,006 loc) • 37.3 kB
JavaScript
import chalk from 'chalk';
import fs from 'fs-extra';
import path from 'path';
export class FrameworkWriter {
constructor(cloner) {
this.cloner = cloner;
this.assetMappings = new Map(); // absolute URL (and alternates) -> local ./assets/... path
}
// PUBLIC: write HTML only (used by auto-fallback), reusing existing mappings and the current disableJs flag
async writeIndexHtmlOnly() {
const html = await this.generateExactHTMLAndReturn();
await fs.writeFile(
path.join(this.cloner.options.outputDir, 'index.html'),
html,
'utf8',
);
}
addOfflineErrorHandling($) {
const isNextJs =
this.cloner.analysis?.primaryFramework?.key === 'nextjs' ||
$('#__next').length > 0 ||
$('script[src*="_next"]').length > 0;
const isReact =
this.cloner.analysis?.primaryFramework?.key === 'react' ||
$('#root').length > 0 ||
$('script[src*="react"]').length > 0;
if (isNextJs || isReact) {
const errorBoundaryScript = `
<script>
(function() {
window.addEventListener('error', function() {
try {
const overlay = document.querySelector('[data-nextjs-dialog-overlay]');
if (overlay) overlay.style.display = 'none';
const root = document.querySelector('#__next, #root, [data-reactroot]');
if (root && (!root.innerHTML || root.innerHTML.trim() === '')) {
root.innerHTML = '<div style="min-height:100vh;display:flex;align-items:center;justify-content:center;color:#fff;background:linear-gradient(135deg,#667eea 0%,#764ba2 100%);font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,sans-serif;padding:2rem;text-align:center;"><div style="background:rgba(255,255,255,0.1);backdrop-filter:blur(10px);border-radius:20px;padding:3rem;max-width:600px;box-shadow:0 8px 32px rgba(0,0,0,0.1);"><h1 style="margin:0 0 1rem 0;font-weight:300;">🪞 Offline Mirror</h1><p style="opacity:.9;">This is an offline mirror of <strong>${this.cloner.url}</strong></p><p style="opacity:.7;">Some interactive features may not work in offline mode.</p><div style="margin-top:1rem;opacity:.6;">Generated by Mirror Web CLI v1.0</div></div></div>';
}
} catch(e){}
return true;
}, true);
window.addEventListener('unhandledrejection', function(event) {
event.preventDefault();
});
})();
</script>`;
$('head').append(errorBoundaryScript);
}
}
// Guard that preserves SSR DOM; also handles file:// case by disabling Next/React scripts immediately
injectHydrationGuard($) {
const isReactOrNext =
this.cloner.analysis?.primaryFramework?.key === 'nextjs' ||
this.cloner.analysis?.primaryFramework?.key === 'react' ||
$('#__next, #root, [data-reactroot]').length > 0;
if (!isReactOrNext) return;
const guard = `
<script>
(function(){
try {
var ROOT_SEL = '#__next, #root, [data-reactroot]';
var root = document.querySelector(ROOT_SEL);
if (!root) return;
var ssrSnapshot = root.innerHTML;
var restored = false;
function stripNextScripts() {
try {
document.querySelectorAll('script[src*="/_next/"],link[rel="preload"][as="script"]').forEach(function(n){ n.parentNode && n.parentNode.removeChild(n); });
// Try to disable inline bootstrap by toggling a flag that guards common Next/React bootstraps
window.__MW_DISABLE_NEXT = true;
} catch(e){}
}
function restoreIfBlank(reason) {
if (restored) return;
var r = document.querySelector(ROOT_SEL);
if (!r) return;
var empty = !r.innerHTML || r.innerHTML.trim().length < 20;
if (!empty) return;
restored = true;
stripNextScripts();
try { r.innerHTML = ssrSnapshot; } catch(e){}
if (console && console.warn) console.warn('[MirrorWeb] Hydration guard restored SSR due to:', reason||'unknown');
}
// If opened directly from disk, disable framework hydration preemptively
if (location.protocol === 'file:') {
stripNextScripts();
// Ensure SSR snapshot remains visible
try { root.innerHTML = ssrSnapshot; } catch(e){}
}
// Timed checks post-DOMContentLoaded
document.addEventListener('DOMContentLoaded', function(){
setTimeout(function(){ restoreIfBlank('post-DCL 50ms'); }, 50);
setTimeout(function(){ restoreIfBlank('post-DCL 500ms'); }, 500);
setTimeout(function(){ restoreIfBlank('post-DCL 2000ms'); }, 2000);
});
// Global error hooks -> try restore
window.addEventListener('error', function(){ restoreIfBlank('window error'); }, true);
window.addEventListener('unhandledrejection', function(){ restoreIfBlank('unhandledrejection'); }, true);
// If mutations clear root, restore
var mo = new MutationObserver(function(){
var r = document.querySelector(ROOT_SEL);
if (!r) return;
if (!r.innerHTML || r.innerHTML.trim().length < 5) {
restoreIfBlank('mutation empty');
}
});
mo.observe(document.documentElement, { subtree: true, childList: true });
} catch(e){}
})();
</script>`;
// Prepend so it runs before other scripts
$('head').prepend(guard);
}
// Remove all scripts for a static snapshot (prevents Next/React from wiping SSR HTML)
stripAllScriptsForStaticSnapshot($) {
$('script').remove();
$('link[rel="preload"][as="script"]').remove();
$('head').append('<meta name="js-disabled" content="true">');
}
async generateOfflineProject() {
await fs.ensureDir(this.cloner.options.outputDir);
const structure = {
assets: {
css: {},
js: {},
images: {},
fonts: {},
icons: {},
media: {},
data: {},
},
};
await this.createDirectoryStructure(structure);
this.buildAssetMappings();
const htmlWithJs = await this.generateExactHTMLAndReturn();
await fs.writeFile(
path.join(this.cloner.options.outputDir, 'index.html'),
htmlWithJs,
'utf8',
);
await this.downloadAssetsWithExactNames();
await this.generateOfflinePackageJson();
await this.generateOfflineReadme();
}
buildAssetMappings() {
for (const s of this.cloner.assets.styles) {
if (s.url) this.assetMappings.set(s.url, `./assets/css/${s.filename}`);
}
for (const s of this.cloner.assets.scripts) {
if (s.url) this.assetMappings.set(s.url, `./assets/js/${s.filename}`);
}
for (const img of this.cloner.assets.images) {
if (img.url)
this.assetMappings.set(img.url, `./assets/images/${img.filename}`);
if (img.nextJsUrl) {
const absNext = this.cloner.resolveUrl(img.nextJsUrl);
this.assetMappings.set(absNext, `./assets/images/${img.filename}`);
}
if (img.originalPath) {
const absOriginalParam = this.cloner.resolveUrl(img.originalPath);
this.assetMappings.set(
absOriginalParam,
`./assets/images/${img.filename}`,
);
}
if (img.local && img.url?.startsWith('data:image/')) {
this.assetMappings.set(img.url, `./assets/images/${img.filename}`);
}
}
for (const f of this.cloner.assets.fonts) {
if (f.url) this.assetMappings.set(f.url, `./assets/fonts/${f.filename}`);
}
for (const i of this.cloner.assets.icons) {
if (i.url) this.assetMappings.set(i.url, `./assets/icons/${i.filename}`);
}
for (const m of this.cloner.assets.media) {
if (m.url) this.assetMappings.set(m.url, `./assets/media/${m.filename}`);
}
}
ensureMappedImage(absUrl) {
if (!absUrl) return null;
if (this.assetMappings.has(absUrl)) return this.assetMappings.get(absUrl);
const existing = this.cloner.assets.images.find((x) => x.url === absUrl);
if (existing) {
const local = `./assets/images/${existing.filename}`;
this.assetMappings.set(absUrl, local);
return local;
}
const filename = this.cloner.generateFilename(absUrl, 'images');
this.cloner.assets.images.push({
url: absUrl,
filename,
element: 'img',
attribute: 'src',
local: false,
});
const local = `./assets/images/${filename}`;
this.assetMappings.set(absUrl, local);
return local;
}
extractNextImageUrl(nextUrl) {
try {
const u = new URL(nextUrl, this.cloner.url);
const val = u.searchParams.get('url');
return val ? decodeURIComponent(val) : null;
} catch {
return null;
}
}
getLocalForNextImage(nextUrl) {
const absNext = this.cloner.resolveUrl(nextUrl);
if (this.assetMappings.has(absNext)) return this.assetMappings.get(absNext);
const original = this.extractNextImageUrl(nextUrl);
if (original) {
const absOriginal = this.cloner.resolveUrl(original);
const local = this.ensureMappedImage(absOriginal);
if (local) this.assetMappings.set(absNext, local);
return local;
}
return null;
}
// Keep JS by default; only skip runtime injection if JS is explicitly disabled
injectRuntimeRewriter($) {
if (this.cloner.options.disableJs) return;
const map = Object.fromEntries(this.assetMappings.entries());
const debug = !!this.cloner.options.debug;
const runtimeScript = `
<script>
(function(){
try{
const MAP = ${JSON.stringify(map)};
const DEBUG = ${JSON.stringify(debug)};
function log(){ if (DEBUG) console.log.apply(console, ['[MW rewrite]'].concat([].slice.call(arguments))); }
function resolve(u){
if (!u) return null;
if (MAP[u]) return MAP[u];
if (u.includes('/_next/image') && u.includes('url=')) {
try{
const p = new URL(u, location.href);
const t = decodeURIComponent(p.searchParams.get('url')||'');
if (t) {
const abs = new URL(t, location.href).href;
if (MAP[abs]) return MAP[abs];
}
}catch{}
}
return null;
}
function extractCssUrl(val){
const m = /url\\(\\s*['"]?([^'")]+)['"]?\\s*\\)/i.exec(val||'');
return m ? m[1] : null;
}
function rewriteSrcsetString(srcsetStr){
if (!srcsetStr) return srcsetStr;
const parts = srcsetStr.split(',').map(s => s.trim()).filter(Boolean);
let changed = false;
const rewritten = parts.map(entry => {
const segs = entry.split(/\\s+/);
const url = segs[0];
const desc = segs.slice(1).join(' ');
if (!url) return entry;
const rep = resolve(url);
if (rep && rep !== url) { changed = true; return [rep, desc].filter(Boolean).join(' '); }
return entry;
}).join(', ');
return changed ? rewritten : srcsetStr;
}
function rewriteNode(el){
if (!el || el.nodeType !== 1) return;
['src','href','poster'].forEach(attr=>{
const v = el.getAttribute && el.getAttribute(attr);
const r = resolve(v);
if (r && v !== r) { log(attr+':', v, '->', r); el.setAttribute(attr, r); }
});
['srcset','imagesrcset'].forEach(attr=>{
const v = el.getAttribute && el.getAttribute(attr);
if (!v) return;
const nv = rewriteSrcsetString(v);
if (nv && nv !== v) { log(attr+':', v, '->', nv); el.setAttribute(attr, nv); }
});
if (el.hasAttribute && el.hasAttribute('style')) {
const s = el.getAttribute('style') || '';
const url = extractCssUrl(s);
const r = resolve(url);
if (r && url && url !== r) { log('style background-image:', url, '->', r); el.style.backgroundImage = "url('"+r+"')"; }
}
}
document.querySelectorAll('[src],[href],[poster],[srcset],[imagesrcset],[style]').forEach(rewriteNode);
const obs = new MutationObserver(muts=>{
muts.forEach(m=>{
if (m.type === 'attributes' && (m.attributeName==='src'||m.attributeName==='href'||m.attributeName==='poster'||m.attributeName==='style'||m.attributeName==='srcset'||m.attributeName==='imagesrcset')) {
rewriteNode(m.target);
} else if (m.type === 'childList') {
m.addedNodes.forEach(n=>{
if (n.nodeType===1){
rewriteNode(n);
n.querySelectorAll && n.querySelectorAll('[src],[href],[poster],[srcset],[imagesrcset],[style]').forEach(rewriteNode);
}
});
}
});
});
obs.observe(document.documentElement, {subtree:true, childList:true, attributes:true, attributeFilter:['src','href','poster','style','srcset','imagesrcset']});
}catch(e){ if (${JSON.stringify(
!!this.cloner.options.debug,
)}) console.warn('[MW runtime error]', e); }
})();
</script>`;
$('head').append(runtimeScript);
}
// Patch inline Next.js asset URLs inside inline scripts (when JS is enabled)
enhanceJavaScriptContentRewriting($) {
if (this.cloner.options.disableJs) return;
const isNextJs =
this.cloner.analysis?.primaryFramework?.key === 'nextjs' ||
$('#__next').length > 0 ||
$('script[src*="_next"]').length > 0;
if (!isNextJs) return;
const extendedMap = new Map(this.assetMappings);
for (const [fullUrl, localPath] of this.assetMappings.entries()) {
try {
const u = new URL(fullUrl);
const filename = path.basename(u.pathname);
if (filename) extendedMap.set(filename, localPath);
} catch {}
}
$('script:not([src])').each((_, el) => {
let content = $(el).html();
if (
!content ||
(!content.includes('/_next/') && !content.includes('static/'))
)
return;
content = content.replace(
/"\/_next\/static\/css\/([^"]+\.css[^"]*?)"/g,
(m, fname) => {
for (const [full, local] of extendedMap.entries())
if (full.includes(fname.split('?')[0])) return `"${local}"`;
return m;
},
);
content = content.replace(
/"\/_next\/static\/chunks\/([^"]+\.js[^"]*?)"/g,
(m, fname) => {
for (const [full, local] of extendedMap.entries())
if (full.includes(fname.split('?')[0])) return `"${local}"`;
return m;
},
);
content = content.replace(
/"static\/chunks\/([^"]+\.js[^"]*?)"/g,
(m, fname) => {
for (const [full, local] of extendedMap.entries())
if (full.includes(fname.split('?')[0])) return `"${local}"`;
return m;
},
);
content = content.replace(
/"\/_next\/static\/media\/([^"]+\.(woff2?|ttf|otf)[^"]*?)"/g,
(m, fname) => {
for (const [full, local] of extendedMap.entries())
if (full.includes(fname.split('?')[0])) return `"${local}"`;
return m;
},
);
$(el).html(content);
});
}
async generateExactHTMLAndReturn() {
const $ = this.cloner.$;
$('head').append(`<meta name="offline-ready" content="true">`);
$('head').append(
`<meta name="mirrored-from" content="${this.cloner.url}">`,
);
$('head').append(
`<meta name="mirrored-date" content="${new Date().toISOString()}">`,
);
// JS kept or removed based on auto decision
const shouldStrip = this.cloner.options.disableJs;
if (shouldStrip) {
this.stripAllScriptsForStaticSnapshot($);
} else {
// Guard before other scripts to prevent blank pages on hydration errors
this.injectHydrationGuard($);
this.addOfflineErrorHandling($);
this.injectRuntimeRewriter($);
this.enhanceJavaScriptContentRewriting($);
}
$('img[data-mw-computed]').remove();
$('link[rel="stylesheet"], link[rel="preload"][as="style"]').each(
(_, el) => {
const $el = $(el);
const href = $el.attr('href');
if (!href) return;
const abs = this.cloner.resolveUrl(href);
if (this.assetMappings.has(abs)) {
$el.attr('href', this.assetMappings.get(abs));
}
},
);
if (!shouldStrip) {
$('script[src]').each((_, el) => {
const $el = $(el);
const src = $el.attr('src');
if (!src) return;
if (
this.cloner.options.clean &&
this.cloner.assetManager.isTrackingScript(src)
) {
$el.remove();
return;
}
const abs = this.cloner.resolveUrl(src);
if (this.assetMappings.has(abs)) {
$el.attr('src', this.assetMappings.get(abs));
}
});
$('link[rel="preload"][as="script"]').each((_, el) => {
const $el = $(el);
const href = $el.attr('href');
if (!href) return;
const abs = this.cloner.resolveUrl(href);
if (this.assetMappings.has(abs)) {
$el.attr('href', this.assetMappings.get(abs));
} else {
$el.remove();
}
});
}
const firstTruthy = (...vals) =>
vals.find((v) => v && String(v).trim().length > 0) || null;
// Images
$('img').each((_, el) => {
const $el = $(el);
const src = $el.attr('src') || '';
const dataSrc = firstTruthy(
$el.attr('data-src'),
$el.attr('data-lazy-src'),
$el.attr('data-original'),
);
if (src.includes('/_next/image')) {
const local = this.getLocalForNextImage(src);
if (local) {
$el.attr('src', local);
}
} else if (src && !src.startsWith('data:')) {
const abs = this.cloner.resolveUrl(src);
const local = this.ensureMappedImage(abs);
if (local) $el.attr('src', local);
}
if (dataSrc && !dataSrc.startsWith('data:')) {
const abs = this.cloner.resolveUrl(dataSrc);
const local = this.ensureMappedImage(abs);
if (local) $el.attr('src', local);
}
});
// srcset
$('[srcset]').each((_, el) => {
const $el = $(el);
const srcset = $el.attr('srcset');
if (!srcset) return;
const updated = srcset
.split(',')
.map((s) => s.trim())
.filter(Boolean)
.map((entry) => {
const parts = entry.split(/\s+/);
const url = parts[0];
const desc = parts.slice(1).join(' ');
if (!url) return entry;
if (url.includes('/_next/image')) {
const local = this.getLocalForNextImage(url);
if (local) return [local, desc].filter(Boolean).join(' ');
} else if (!url.startsWith('data:')) {
const abs = this.cloner.resolveUrl(url);
const local = this.ensureMappedImage(abs);
if (local) return [local, desc].filter(Boolean).join(' ');
}
return entry;
})
.join(', ');
$el.attr('srcset', updated);
});
// imagesrcset
$('[imagesrcset]').each((_, el) => {
const $el = $(el);
const imagesrcset = $el.attr('imagesrcset');
if (!imagesrcset) return;
const updated = imagesrcset
.split(',')
.map((s) => s.trim())
.filter(Boolean)
.map((entry) => {
const parts = entry.split(/\s+/);
const url = parts[0];
const desc = parts.slice(1).join(' ');
if (!url) return entry;
if (url.includes('/_next/image')) {
const local = this.getLocalForNextImage(url);
if (local) return [local, desc].filter(Boolean).join(' ');
} else if (!url.startsWith('data:')) {
const abs = this.cloner.resolveUrl(url);
const local = this.ensureMappedImage(abs);
if (local) return [local, desc].filter(Boolean).join(' ');
}
return entry;
})
.join(', ');
$el.attr('imagesrcset', updated);
});
// Posters
$('video[poster]').each((_, el) => {
const $el = $(el);
const poster = $el.attr('poster');
if (!poster || poster.startsWith('data:')) return;
const abs = this.cloner.resolveUrl(poster);
const local = this.ensureMappedImage(abs);
if (local) $el.attr('poster', local);
});
// Video/Audio
$('video[src], audio[src], source[src]').each((_, el) => {
const $el = $(el);
const src = $el.attr('src');
if (!src || src.startsWith('data:')) return;
const abs = this.cloner.resolveUrl(src);
if (this.assetMappings.has(abs)) {
$el.attr('src', this.assetMappings.get(abs));
}
});
// SVG refs
$(
'svg image[href], svg image[xlink\\:href], svg use[href], svg use[xlink\\:href]',
).each((_, el) => {
const $el = $(el);
const href = $el.attr('href') || $el.attr('xlink:href');
if (!href || href.startsWith('data:')) return;
const abs = this.cloner.resolveUrl(href);
const local = this.ensureMappedImage(abs);
if (local) {
$el.attr('href', local);
$el.attr('xlink:href', local);
}
});
// Inline style backgrounds
$('[style]').each((_, el) => {
const $el = $(el);
const style = $el.attr('style');
if (!style) return;
const updated = style.replace(
/url\(\s*(['"]?)([^'")]+)\1\s*\)/gi,
(m, _q, raw) => {
if (!raw || raw.startsWith('data:')) return m;
const abs = this.cloner.resolveUrl(raw);
const local = this.ensureMappedImage(abs);
return local ? `url('${local}')` : m;
},
);
if (updated !== style) $el.attr('style', updated);
});
// Inline <style> blocks: rewrite CSS url(...) and download assets
const axios = (await import('axios')).default;
await Promise.all(
$('style')
.toArray()
.map(async (el) => {
const $el = $(el);
let css = $el.html() || '';
if (!css.trim()) return;
css = await this.rewriteCssUrlsAndDownload(
css,
this.cloner.url,
axios,
{ fromInline: true },
);
$el.html(css);
}),
);
// Icons
$('link[rel*="icon"]').each((_, el) => {
const $el = $(el);
const href = $el.attr('href');
if (!href || href.startsWith('data:')) return;
const abs = this.cloner.resolveUrl(href);
if (this.assetMappings.has(abs)) {
$el.attr('href', this.assetMappings.get(abs));
}
});
// Safety net: finalize Next.js image replacements
let html = $.html();
html = this.finalizeNextImageReplacements(html);
return html;
}
finalizeNextImageReplacements(html) {
html = html.replace(
/\b(src|poster|href)=["']([^"']*_next\/image\?[^"']+)["']/gi,
(_m, attr, urlVal) => {
const local = this.getLocalForNextImage(urlVal);
return local ? `${attr}="${local}"` : `${attr}="${urlVal}"`;
},
);
html = html.replace(/\bsrcset=["']([^"']+)["']/gi, (_m, srcsetVal) => {
const updated = srcsetVal
.split(',')
.map((s) => s.trim())
.filter(Boolean)
.map((entry) => {
const parts = entry.split(/\s+/);
const url = parts[0];
const desc = parts.slice(1).join(' ');
if (url && url.includes('/_next/image')) {
const local = this.getLocalForNextImage(url);
if (local) return [local, desc].filter(Boolean).join(' ');
}
return entry;
})
.join(', ');
return `srcset="${updated}"`;
});
return html;
}
async downloadAssetsWithExactNames() {
const axios = (await import('axios')).default;
// Images
if (this.cloner.assets.images.length) {
if (!this.cloner.options.quiet) {
console.log(
chalk.gray(
` Downloading ${this.cloner.assets.images.length} images...`,
),
);
}
for (const img of this.cloner.assets.images) {
if (img.url && img.url.includes('/_next/image')) continue; // skip optimizer endpoints
const dest = path.join(
this.cloner.options.outputDir,
'assets',
'images',
img.filename,
);
await fs.ensureDir(path.dirname(dest));
if (img.buffer) {
await fs.writeFile(dest, img.buffer);
continue;
}
const tryUrls = [];
if (img.url) tryUrls.push(img.url);
if (img.nextJsUrl) tryUrls.push(this.cloner.resolveUrl(img.nextJsUrl));
let saved = false;
for (const u of tryUrls) {
try {
const res = await axios.get(u, {
responseType: 'arraybuffer',
timeout: 60000,
headers: {
'User-Agent': 'Mozilla/5.0',
Accept: 'image/avif,image/webp,image/apng,image/*,*/*;q=0.8',
Referer: this.cloner.url,
},
validateStatus: () => true,
});
const status = res.status || 0;
const ctype = String(
res.headers?.['content-type'] || '',
).toLowerCase();
// Follow Microlink JSON if necessary
if (
/microlink\.io/i.test(u) &&
ctype.includes('application/json')
) {
try {
const j = await axios.get(u, {
responseType: 'json',
timeout: 60000,
headers: { 'User-Agent': 'Mozilla/5.0' },
});
const target =
j.data?.data?.image?.url ||
j.data?.data?.screenshot?.url ||
j.data?.data?.thumbnail?.url ||
j.data?.image?.url ||
j.data?.screenshot?.url;
if (target) {
const res2 = await axios.get(target, {
responseType: 'arraybuffer',
timeout: 60000,
headers: {
'User-Agent': 'Mozilla/5.0',
Referer: this.cloner.url,
Accept:
'image/avif,image/webp,image/apng,image/*,*/*;q=0.8',
},
validateStatus: () => true,
});
if ((res2.status || 0) >= 200 && (res2.status || 0) < 300) {
await fs.writeFile(dest, res2.data);
saved = true;
break;
}
}
} catch (e) {
this.cloner.logger.warnNonCritical('image', u, e);
}
}
if (status >= 200 && status < 300 && res.data?.byteLength > 0) {
await fs.writeFile(dest, res.data);
saved = true;
break;
}
} catch (e) {
this.cloner.logger.warnNonCritical('image', u, e);
}
}
if (!saved) {
this.cloner.logger.warnNonCritical(
'image',
img.url || img.nextJsUrl,
new Error('exhausted sources'),
);
}
}
}
// CSS externals
const cssExternals = this.cloner.assets.styles.filter(
(s) => s.url && s.type === 'external',
);
if (cssExternals.length) {
if (!this.cloner.options.quiet) {
console.log(
chalk.gray(` Downloading ${cssExternals.length} CSS files...`),
);
}
for (const css of cssExternals) {
const dest = path.join(
this.cloner.options.outputDir,
'assets',
'css',
css.filename,
);
try {
const res = await axios.get(css.url, {
responseType: 'text',
timeout: 30000,
headers: { 'User-Agent': 'Mozilla/5.0' },
});
let text = res.data || '';
text = await this.rewriteCssUrlsAndDownload(text, css.url, axios, {
fromInline: false,
});
await fs.ensureDir(path.dirname(dest));
await fs.writeFile(dest, text, 'utf8');
} catch (e) {
this.cloner.logger.warnNonCritical('styles', css.url, e);
}
}
}
// JS externals: download when JS is enabled (based on decision)
const jsExternals = this.cloner.options.disableJs
? []
: this.cloner.assets.scripts.filter((s) => s.url);
if (jsExternals.length) {
if (!this.cloner.options.quiet) {
console.log(
chalk.gray(` Downloading ${jsExternals.length} JS files...`),
);
}
for (const s of jsExternals) {
const dest = path.join(
this.cloner.options.outputDir,
'assets',
'js',
s.filename,
);
try {
const res = await axios.get(s.url, {
responseType: 'text',
timeout: 30000,
headers: { 'User-Agent': 'Mozilla/5.0' },
});
await fs.ensureDir(path.dirname(dest));
await fs.writeFile(dest, res.data || '', 'utf8');
} catch (e) {
this.cloner.logger.warnNonCritical('scripts', s.url, e);
}
}
}
// Fonts
if (this.cloner.assets.fonts.length) {
if (!this.cloner.options.quiet) {
console.log(
chalk.gray(
` Downloading ${this.cloner.assets.fonts.length} fonts...`,
),
);
}
for (const f of this.cloner.assets.fonts) {
const dest = path.join(
this.cloner.options.outputDir,
'assets',
'fonts',
f.filename,
);
if (!f.url) continue;
try {
const res = await axios.get(f.url, {
responseType: 'arraybuffer',
timeout: 30000,
headers: { 'User-Agent': 'Mozilla/5.0' },
});
await fs.ensureDir(path.dirname(dest));
await fs.writeFile(dest, res.data);
} catch (e) {
this.cloner.logger.warnNonCritical('fonts', f.url, e);
}
}
}
// Icons
if (this.cloner.assets.icons.length) {
if (!this.cloner.options.quiet) {
console.log(
chalk.gray(
` Downloading ${this.cloner.assets.icons.length} icons...`,
),
);
}
for (const i of this.cloner.assets.icons) {
const dest = path.join(
this.cloner.options.outputDir,
'assets',
'icons',
i.filename,
);
if (!i.url) continue;
try {
const res = await axios.get(i.url, {
responseType: 'arraybuffer',
timeout: 30000,
headers: { 'User-Agent': 'Mozilla/5.0' },
});
await fs.ensureDir(path.dirname(dest));
await fs.writeFile(dest, res.data);
} catch (e) {
this.cloner.logger.warnNonCritical('icon', i.url, e);
}
}
}
// Media
if (this.cloner.assets.media.length) {
if (!this.cloner.options.quiet) {
console.log(
chalk.gray(
` Downloading ${this.cloner.assets.media.length} media files...`,
),
);
}
for (const media of this.cloner.assets.media) {
const dest = path.join(
this.cloner.options.outputDir,
'assets',
'media',
media.filename,
);
if (!media.url) continue;
try {
const res = await axios.get(media.url, {
responseType: 'arraybuffer',
timeout: 120000,
headers: {
'User-Agent': 'Mozilla/5.0',
Accept: 'video/*;q=0.9,audio/*;q=0.9,*/*;q=0.5',
Referer: this.cloner.url,
},
});
await fs.ensureDir(path.dirname(dest));
await fs.writeFile(dest, res.data);
} catch (e) {
this.cloner.logger.warnNonCritical(
media.type || 'media',
media.url,
e,
);
}
}
}
}
async rewriteCssUrlsAndDownload(
cssText,
cssBaseUrl,
axios,
options = { fromInline: false },
) {
const urlRegex = /url\(\s*(['"]?)([^'")]+)\1\s*\)/gi;
const outputDir = this.cloner.options.outputDir;
const fromInline = !!options.fromInline;
const replacements = await Promise.all(
Array.from(cssText.matchAll(urlRegex)).map(async (m) => {
const full = m[0];
const raw = m[2];
if (!raw || raw.startsWith('data:')) return { from: full, to: full };
let abs;
try {
abs = new URL(raw, cssBaseUrl).href;
} catch {
return { from: full, to: full };
}
const lower = abs.split('?')[0].toLowerCase();
let subdir = 'images';
if (/\.(woff2?|ttf|otf|eot)$/.test(lower)) subdir = 'fonts';
else if (/\.(mp4|webm|ogg|mp3|wav|m4a)$/.test(lower)) subdir = 'media';
else if (/\.(svg|png|jpe?g|gif|webp|avif|ico)$/.test(lower))
subdir = 'images';
const filename = this.cloner.generateFilename(
abs,
subdir === 'images'
? 'images'
: subdir === 'fonts'
? 'fonts'
: 'media',
);
const destRel = fromInline
? `./assets/${subdir}/${filename}`
: `../${subdir}/${filename}`;
const destAbs = path.join(outputDir, 'assets', subdir, filename);
try {
await fs.ensureDir(path.dirname(destAbs));
const exists = await fs.pathExists(destAbs);
if (!exists) {
const res = await axios.get(abs, {
responseType: 'arraybuffer',
timeout: 45000,
headers: {
'User-Agent': 'Mozilla/5.0',
Accept: 'image/avif,image/webp,image/apng,image/*,*/*;q=0.8',
Referer: this.cloner.url,
},
});
await fs.writeFile(destAbs, res.data);
}
this.assetMappings.set(abs, `./assets/${subdir}/${filename}`);
return { from: full, to: `url('${destRel}')` };
} catch (e) {
this.cloner.logger.warnNonCritical('css-asset', abs, e);
return { from: full, to: full };
}
}),
);
let out = cssText;
for (const r of replacements) out = out.replace(r.from, r.to);
return out;
}
async createDirectoryStructure(structure, basePath = '') {
for (const [name, content] of Object.entries(structure)) {
const fullPath = path.join(this.cloner.options.outputDir, basePath, name);
if (typeof content === 'object' && content !== null) {
await fs.ensureDir(fullPath);
await this.createDirectoryStructure(content, path.join(basePath, name));
} else {
await fs.ensureDir(path.dirname(fullPath));
if (typeof content === 'string') await fs.writeFile(fullPath, content);
}
}
}
async generateOfflinePackageJson() {
const packageJson = {
name: `mirror-${this.cloner.domain}`,
version: '1.1.0',
description: `Offline mirror of ${this.cloner.url}`,
main: 'index.html',
type: 'module',
scripts: {
start: 'node server.js',
serve: 'python -m http.server 8000',
'serve-node': 'npx http-server -p 8000 -o',
},
keywords: ['mirror', 'offline', 'website'],
author: 'Mirror Web CLI',
license: 'MIT',
};
await fs.writeFile(
path.join(this.cloner.options.outputDir, 'package.json'),
JSON.stringify(packageJson, null, 2),
);
}
async generateOfflineReadme() {
const readme = `# ${this.cloner.domain} - Offline Mirror
> Offline snapshot of ${this.cloner.url}
Files:
- index.html -> ${
this.cloner.options.disableJs
? 'Static (JS removed) with localized assets'
: 'JS-enabled page with all assets rewritten locally (hydration guard + validation)'
}
JS mode:
- Automatically chosen by preflight (no flags needed). Safety net validation will fallback to static if needed.
Serve locally:
- python -m http.server 8000
- or: npm start (uses provided server.js)
- then open http://localhost:8000
Note:
- Opening index.html directly from disk (file://) may disable Next/React hydration to preserve SSR content.
`;
await fs.writeFile(
path.join(this.cloner.options.outputDir, 'README.md'),
readme,
);
}
}