UNPKG

mirror-web-cli

Version:

Professional website mirroring tool with intelligent framework preservation, AI-powered analysis, and comprehensive asset optimization

1,099 lines (1,006 loc) 37.3 kB
import chalk from 'chalk'; import fs from 'fs-extra'; import path from 'path'; export class FrameworkWriter { constructor(cloner) { this.cloner = cloner; this.assetMappings = new Map(); // absolute URL (and alternates) -> local ./assets/... path } // PUBLIC: write HTML only (used by auto-fallback), reusing existing mappings and the current disableJs flag async writeIndexHtmlOnly() { const html = await this.generateExactHTMLAndReturn(); await fs.writeFile( path.join(this.cloner.options.outputDir, 'index.html'), html, 'utf8', ); } addOfflineErrorHandling($) { const isNextJs = this.cloner.analysis?.primaryFramework?.key === 'nextjs' || $('#__next').length > 0 || $('script[src*="_next"]').length > 0; const isReact = this.cloner.analysis?.primaryFramework?.key === 'react' || $('#root').length > 0 || $('script[src*="react"]').length > 0; if (isNextJs || isReact) { const errorBoundaryScript = ` <script> (function() { window.addEventListener('error', function() { try { const overlay = document.querySelector('[data-nextjs-dialog-overlay]'); if (overlay) overlay.style.display = 'none'; const root = document.querySelector('#__next, #root, [data-reactroot]'); if (root && (!root.innerHTML || root.innerHTML.trim() === '')) { root.innerHTML = '<div style="min-height:100vh;display:flex;align-items:center;justify-content:center;color:#fff;background:linear-gradient(135deg,#667eea 0%,#764ba2 100%);font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,sans-serif;padding:2rem;text-align:center;"><div style="background:rgba(255,255,255,0.1);backdrop-filter:blur(10px);border-radius:20px;padding:3rem;max-width:600px;box-shadow:0 8px 32px rgba(0,0,0,0.1);"><h1 style="margin:0 0 1rem 0;font-weight:300;">🪞 Offline Mirror</h1><p style="opacity:.9;">This is an offline mirror of <strong>${this.cloner.url}</strong></p><p style="opacity:.7;">Some interactive features may not work in offline mode.</p><div style="margin-top:1rem;opacity:.6;">Generated by Mirror Web CLI v1.0</div></div></div>'; } } catch(e){} return true; }, true); window.addEventListener('unhandledrejection', function(event) { event.preventDefault(); }); })(); </script>`; $('head').append(errorBoundaryScript); } } // Guard that preserves SSR DOM; also handles file:// case by disabling Next/React scripts immediately injectHydrationGuard($) { const isReactOrNext = this.cloner.analysis?.primaryFramework?.key === 'nextjs' || this.cloner.analysis?.primaryFramework?.key === 'react' || $('#__next, #root, [data-reactroot]').length > 0; if (!isReactOrNext) return; const guard = ` <script> (function(){ try { var ROOT_SEL = '#__next, #root, [data-reactroot]'; var root = document.querySelector(ROOT_SEL); if (!root) return; var ssrSnapshot = root.innerHTML; var restored = false; function stripNextScripts() { try { document.querySelectorAll('script[src*="/_next/"],link[rel="preload"][as="script"]').forEach(function(n){ n.parentNode && n.parentNode.removeChild(n); }); // Try to disable inline bootstrap by toggling a flag that guards common Next/React bootstraps window.__MW_DISABLE_NEXT = true; } catch(e){} } function restoreIfBlank(reason) { if (restored) return; var r = document.querySelector(ROOT_SEL); if (!r) return; var empty = !r.innerHTML || r.innerHTML.trim().length < 20; if (!empty) return; restored = true; stripNextScripts(); try { r.innerHTML = ssrSnapshot; } catch(e){} if (console && console.warn) console.warn('[MirrorWeb] Hydration guard restored SSR due to:', reason||'unknown'); } // If opened directly from disk, disable framework hydration preemptively if (location.protocol === 'file:') { stripNextScripts(); // Ensure SSR snapshot remains visible try { root.innerHTML = ssrSnapshot; } catch(e){} } // Timed checks post-DOMContentLoaded document.addEventListener('DOMContentLoaded', function(){ setTimeout(function(){ restoreIfBlank('post-DCL 50ms'); }, 50); setTimeout(function(){ restoreIfBlank('post-DCL 500ms'); }, 500); setTimeout(function(){ restoreIfBlank('post-DCL 2000ms'); }, 2000); }); // Global error hooks -> try restore window.addEventListener('error', function(){ restoreIfBlank('window error'); }, true); window.addEventListener('unhandledrejection', function(){ restoreIfBlank('unhandledrejection'); }, true); // If mutations clear root, restore var mo = new MutationObserver(function(){ var r = document.querySelector(ROOT_SEL); if (!r) return; if (!r.innerHTML || r.innerHTML.trim().length < 5) { restoreIfBlank('mutation empty'); } }); mo.observe(document.documentElement, { subtree: true, childList: true }); } catch(e){} })(); </script>`; // Prepend so it runs before other scripts $('head').prepend(guard); } // Remove all scripts for a static snapshot (prevents Next/React from wiping SSR HTML) stripAllScriptsForStaticSnapshot($) { $('script').remove(); $('link[rel="preload"][as="script"]').remove(); $('head').append('<meta name="js-disabled" content="true">'); } async generateOfflineProject() { await fs.ensureDir(this.cloner.options.outputDir); const structure = { assets: { css: {}, js: {}, images: {}, fonts: {}, icons: {}, media: {}, data: {}, }, }; await this.createDirectoryStructure(structure); this.buildAssetMappings(); const htmlWithJs = await this.generateExactHTMLAndReturn(); await fs.writeFile( path.join(this.cloner.options.outputDir, 'index.html'), htmlWithJs, 'utf8', ); await this.downloadAssetsWithExactNames(); await this.generateOfflinePackageJson(); await this.generateOfflineReadme(); } buildAssetMappings() { for (const s of this.cloner.assets.styles) { if (s.url) this.assetMappings.set(s.url, `./assets/css/${s.filename}`); } for (const s of this.cloner.assets.scripts) { if (s.url) this.assetMappings.set(s.url, `./assets/js/${s.filename}`); } for (const img of this.cloner.assets.images) { if (img.url) this.assetMappings.set(img.url, `./assets/images/${img.filename}`); if (img.nextJsUrl) { const absNext = this.cloner.resolveUrl(img.nextJsUrl); this.assetMappings.set(absNext, `./assets/images/${img.filename}`); } if (img.originalPath) { const absOriginalParam = this.cloner.resolveUrl(img.originalPath); this.assetMappings.set( absOriginalParam, `./assets/images/${img.filename}`, ); } if (img.local && img.url?.startsWith('data:image/')) { this.assetMappings.set(img.url, `./assets/images/${img.filename}`); } } for (const f of this.cloner.assets.fonts) { if (f.url) this.assetMappings.set(f.url, `./assets/fonts/${f.filename}`); } for (const i of this.cloner.assets.icons) { if (i.url) this.assetMappings.set(i.url, `./assets/icons/${i.filename}`); } for (const m of this.cloner.assets.media) { if (m.url) this.assetMappings.set(m.url, `./assets/media/${m.filename}`); } } ensureMappedImage(absUrl) { if (!absUrl) return null; if (this.assetMappings.has(absUrl)) return this.assetMappings.get(absUrl); const existing = this.cloner.assets.images.find((x) => x.url === absUrl); if (existing) { const local = `./assets/images/${existing.filename}`; this.assetMappings.set(absUrl, local); return local; } const filename = this.cloner.generateFilename(absUrl, 'images'); this.cloner.assets.images.push({ url: absUrl, filename, element: 'img', attribute: 'src', local: false, }); const local = `./assets/images/${filename}`; this.assetMappings.set(absUrl, local); return local; } extractNextImageUrl(nextUrl) { try { const u = new URL(nextUrl, this.cloner.url); const val = u.searchParams.get('url'); return val ? decodeURIComponent(val) : null; } catch { return null; } } getLocalForNextImage(nextUrl) { const absNext = this.cloner.resolveUrl(nextUrl); if (this.assetMappings.has(absNext)) return this.assetMappings.get(absNext); const original = this.extractNextImageUrl(nextUrl); if (original) { const absOriginal = this.cloner.resolveUrl(original); const local = this.ensureMappedImage(absOriginal); if (local) this.assetMappings.set(absNext, local); return local; } return null; } // Keep JS by default; only skip runtime injection if JS is explicitly disabled injectRuntimeRewriter($) { if (this.cloner.options.disableJs) return; const map = Object.fromEntries(this.assetMappings.entries()); const debug = !!this.cloner.options.debug; const runtimeScript = ` <script> (function(){ try{ const MAP = ${JSON.stringify(map)}; const DEBUG = ${JSON.stringify(debug)}; function log(){ if (DEBUG) console.log.apply(console, ['[MW rewrite]'].concat([].slice.call(arguments))); } function resolve(u){ if (!u) return null; if (MAP[u]) return MAP[u]; if (u.includes('/_next/image') && u.includes('url=')) { try{ const p = new URL(u, location.href); const t = decodeURIComponent(p.searchParams.get('url')||''); if (t) { const abs = new URL(t, location.href).href; if (MAP[abs]) return MAP[abs]; } }catch{} } return null; } function extractCssUrl(val){ const m = /url\\(\\s*['"]?([^'")]+)['"]?\\s*\\)/i.exec(val||''); return m ? m[1] : null; } function rewriteSrcsetString(srcsetStr){ if (!srcsetStr) return srcsetStr; const parts = srcsetStr.split(',').map(s => s.trim()).filter(Boolean); let changed = false; const rewritten = parts.map(entry => { const segs = entry.split(/\\s+/); const url = segs[0]; const desc = segs.slice(1).join(' '); if (!url) return entry; const rep = resolve(url); if (rep && rep !== url) { changed = true; return [rep, desc].filter(Boolean).join(' '); } return entry; }).join(', '); return changed ? rewritten : srcsetStr; } function rewriteNode(el){ if (!el || el.nodeType !== 1) return; ['src','href','poster'].forEach(attr=>{ const v = el.getAttribute && el.getAttribute(attr); const r = resolve(v); if (r && v !== r) { log(attr+':', v, '->', r); el.setAttribute(attr, r); } }); ['srcset','imagesrcset'].forEach(attr=>{ const v = el.getAttribute && el.getAttribute(attr); if (!v) return; const nv = rewriteSrcsetString(v); if (nv && nv !== v) { log(attr+':', v, '->', nv); el.setAttribute(attr, nv); } }); if (el.hasAttribute && el.hasAttribute('style')) { const s = el.getAttribute('style') || ''; const url = extractCssUrl(s); const r = resolve(url); if (r && url && url !== r) { log('style background-image:', url, '->', r); el.style.backgroundImage = "url('"+r+"')"; } } } document.querySelectorAll('[src],[href],[poster],[srcset],[imagesrcset],[style]').forEach(rewriteNode); const obs = new MutationObserver(muts=>{ muts.forEach(m=>{ if (m.type === 'attributes' && (m.attributeName==='src'||m.attributeName==='href'||m.attributeName==='poster'||m.attributeName==='style'||m.attributeName==='srcset'||m.attributeName==='imagesrcset')) { rewriteNode(m.target); } else if (m.type === 'childList') { m.addedNodes.forEach(n=>{ if (n.nodeType===1){ rewriteNode(n); n.querySelectorAll && n.querySelectorAll('[src],[href],[poster],[srcset],[imagesrcset],[style]').forEach(rewriteNode); } }); } }); }); obs.observe(document.documentElement, {subtree:true, childList:true, attributes:true, attributeFilter:['src','href','poster','style','srcset','imagesrcset']}); }catch(e){ if (${JSON.stringify( !!this.cloner.options.debug, )}) console.warn('[MW runtime error]', e); } })(); </script>`; $('head').append(runtimeScript); } // Patch inline Next.js asset URLs inside inline scripts (when JS is enabled) enhanceJavaScriptContentRewriting($) { if (this.cloner.options.disableJs) return; const isNextJs = this.cloner.analysis?.primaryFramework?.key === 'nextjs' || $('#__next').length > 0 || $('script[src*="_next"]').length > 0; if (!isNextJs) return; const extendedMap = new Map(this.assetMappings); for (const [fullUrl, localPath] of this.assetMappings.entries()) { try { const u = new URL(fullUrl); const filename = path.basename(u.pathname); if (filename) extendedMap.set(filename, localPath); } catch {} } $('script:not([src])').each((_, el) => { let content = $(el).html(); if ( !content || (!content.includes('/_next/') && !content.includes('static/')) ) return; content = content.replace( /"\/_next\/static\/css\/([^"]+\.css[^"]*?)"/g, (m, fname) => { for (const [full, local] of extendedMap.entries()) if (full.includes(fname.split('?')[0])) return `"${local}"`; return m; }, ); content = content.replace( /"\/_next\/static\/chunks\/([^"]+\.js[^"]*?)"/g, (m, fname) => { for (const [full, local] of extendedMap.entries()) if (full.includes(fname.split('?')[0])) return `"${local}"`; return m; }, ); content = content.replace( /"static\/chunks\/([^"]+\.js[^"]*?)"/g, (m, fname) => { for (const [full, local] of extendedMap.entries()) if (full.includes(fname.split('?')[0])) return `"${local}"`; return m; }, ); content = content.replace( /"\/_next\/static\/media\/([^"]+\.(woff2?|ttf|otf)[^"]*?)"/g, (m, fname) => { for (const [full, local] of extendedMap.entries()) if (full.includes(fname.split('?')[0])) return `"${local}"`; return m; }, ); $(el).html(content); }); } async generateExactHTMLAndReturn() { const $ = this.cloner.$; $('head').append(`<meta name="offline-ready" content="true">`); $('head').append( `<meta name="mirrored-from" content="${this.cloner.url}">`, ); $('head').append( `<meta name="mirrored-date" content="${new Date().toISOString()}">`, ); // JS kept or removed based on auto decision const shouldStrip = this.cloner.options.disableJs; if (shouldStrip) { this.stripAllScriptsForStaticSnapshot($); } else { // Guard before other scripts to prevent blank pages on hydration errors this.injectHydrationGuard($); this.addOfflineErrorHandling($); this.injectRuntimeRewriter($); this.enhanceJavaScriptContentRewriting($); } $('img[data-mw-computed]').remove(); $('link[rel="stylesheet"], link[rel="preload"][as="style"]').each( (_, el) => { const $el = $(el); const href = $el.attr('href'); if (!href) return; const abs = this.cloner.resolveUrl(href); if (this.assetMappings.has(abs)) { $el.attr('href', this.assetMappings.get(abs)); } }, ); if (!shouldStrip) { $('script[src]').each((_, el) => { const $el = $(el); const src = $el.attr('src'); if (!src) return; if ( this.cloner.options.clean && this.cloner.assetManager.isTrackingScript(src) ) { $el.remove(); return; } const abs = this.cloner.resolveUrl(src); if (this.assetMappings.has(abs)) { $el.attr('src', this.assetMappings.get(abs)); } }); $('link[rel="preload"][as="script"]').each((_, el) => { const $el = $(el); const href = $el.attr('href'); if (!href) return; const abs = this.cloner.resolveUrl(href); if (this.assetMappings.has(abs)) { $el.attr('href', this.assetMappings.get(abs)); } else { $el.remove(); } }); } const firstTruthy = (...vals) => vals.find((v) => v && String(v).trim().length > 0) || null; // Images $('img').each((_, el) => { const $el = $(el); const src = $el.attr('src') || ''; const dataSrc = firstTruthy( $el.attr('data-src'), $el.attr('data-lazy-src'), $el.attr('data-original'), ); if (src.includes('/_next/image')) { const local = this.getLocalForNextImage(src); if (local) { $el.attr('src', local); } } else if (src && !src.startsWith('data:')) { const abs = this.cloner.resolveUrl(src); const local = this.ensureMappedImage(abs); if (local) $el.attr('src', local); } if (dataSrc && !dataSrc.startsWith('data:')) { const abs = this.cloner.resolveUrl(dataSrc); const local = this.ensureMappedImage(abs); if (local) $el.attr('src', local); } }); // srcset $('[srcset]').each((_, el) => { const $el = $(el); const srcset = $el.attr('srcset'); if (!srcset) return; const updated = srcset .split(',') .map((s) => s.trim()) .filter(Boolean) .map((entry) => { const parts = entry.split(/\s+/); const url = parts[0]; const desc = parts.slice(1).join(' '); if (!url) return entry; if (url.includes('/_next/image')) { const local = this.getLocalForNextImage(url); if (local) return [local, desc].filter(Boolean).join(' '); } else if (!url.startsWith('data:')) { const abs = this.cloner.resolveUrl(url); const local = this.ensureMappedImage(abs); if (local) return [local, desc].filter(Boolean).join(' '); } return entry; }) .join(', '); $el.attr('srcset', updated); }); // imagesrcset $('[imagesrcset]').each((_, el) => { const $el = $(el); const imagesrcset = $el.attr('imagesrcset'); if (!imagesrcset) return; const updated = imagesrcset .split(',') .map((s) => s.trim()) .filter(Boolean) .map((entry) => { const parts = entry.split(/\s+/); const url = parts[0]; const desc = parts.slice(1).join(' '); if (!url) return entry; if (url.includes('/_next/image')) { const local = this.getLocalForNextImage(url); if (local) return [local, desc].filter(Boolean).join(' '); } else if (!url.startsWith('data:')) { const abs = this.cloner.resolveUrl(url); const local = this.ensureMappedImage(abs); if (local) return [local, desc].filter(Boolean).join(' '); } return entry; }) .join(', '); $el.attr('imagesrcset', updated); }); // Posters $('video[poster]').each((_, el) => { const $el = $(el); const poster = $el.attr('poster'); if (!poster || poster.startsWith('data:')) return; const abs = this.cloner.resolveUrl(poster); const local = this.ensureMappedImage(abs); if (local) $el.attr('poster', local); }); // Video/Audio $('video[src], audio[src], source[src]').each((_, el) => { const $el = $(el); const src = $el.attr('src'); if (!src || src.startsWith('data:')) return; const abs = this.cloner.resolveUrl(src); if (this.assetMappings.has(abs)) { $el.attr('src', this.assetMappings.get(abs)); } }); // SVG refs $( 'svg image[href], svg image[xlink\\:href], svg use[href], svg use[xlink\\:href]', ).each((_, el) => { const $el = $(el); const href = $el.attr('href') || $el.attr('xlink:href'); if (!href || href.startsWith('data:')) return; const abs = this.cloner.resolveUrl(href); const local = this.ensureMappedImage(abs); if (local) { $el.attr('href', local); $el.attr('xlink:href', local); } }); // Inline style backgrounds $('[style]').each((_, el) => { const $el = $(el); const style = $el.attr('style'); if (!style) return; const updated = style.replace( /url\(\s*(['"]?)([^'")]+)\1\s*\)/gi, (m, _q, raw) => { if (!raw || raw.startsWith('data:')) return m; const abs = this.cloner.resolveUrl(raw); const local = this.ensureMappedImage(abs); return local ? `url('${local}')` : m; }, ); if (updated !== style) $el.attr('style', updated); }); // Inline <style> blocks: rewrite CSS url(...) and download assets const axios = (await import('axios')).default; await Promise.all( $('style') .toArray() .map(async (el) => { const $el = $(el); let css = $el.html() || ''; if (!css.trim()) return; css = await this.rewriteCssUrlsAndDownload( css, this.cloner.url, axios, { fromInline: true }, ); $el.html(css); }), ); // Icons $('link[rel*="icon"]').each((_, el) => { const $el = $(el); const href = $el.attr('href'); if (!href || href.startsWith('data:')) return; const abs = this.cloner.resolveUrl(href); if (this.assetMappings.has(abs)) { $el.attr('href', this.assetMappings.get(abs)); } }); // Safety net: finalize Next.js image replacements let html = $.html(); html = this.finalizeNextImageReplacements(html); return html; } finalizeNextImageReplacements(html) { html = html.replace( /\b(src|poster|href)=["']([^"']*_next\/image\?[^"']+)["']/gi, (_m, attr, urlVal) => { const local = this.getLocalForNextImage(urlVal); return local ? `${attr}="${local}"` : `${attr}="${urlVal}"`; }, ); html = html.replace(/\bsrcset=["']([^"']+)["']/gi, (_m, srcsetVal) => { const updated = srcsetVal .split(',') .map((s) => s.trim()) .filter(Boolean) .map((entry) => { const parts = entry.split(/\s+/); const url = parts[0]; const desc = parts.slice(1).join(' '); if (url && url.includes('/_next/image')) { const local = this.getLocalForNextImage(url); if (local) return [local, desc].filter(Boolean).join(' '); } return entry; }) .join(', '); return `srcset="${updated}"`; }); return html; } async downloadAssetsWithExactNames() { const axios = (await import('axios')).default; // Images if (this.cloner.assets.images.length) { if (!this.cloner.options.quiet) { console.log( chalk.gray( ` Downloading ${this.cloner.assets.images.length} images...`, ), ); } for (const img of this.cloner.assets.images) { if (img.url && img.url.includes('/_next/image')) continue; // skip optimizer endpoints const dest = path.join( this.cloner.options.outputDir, 'assets', 'images', img.filename, ); await fs.ensureDir(path.dirname(dest)); if (img.buffer) { await fs.writeFile(dest, img.buffer); continue; } const tryUrls = []; if (img.url) tryUrls.push(img.url); if (img.nextJsUrl) tryUrls.push(this.cloner.resolveUrl(img.nextJsUrl)); let saved = false; for (const u of tryUrls) { try { const res = await axios.get(u, { responseType: 'arraybuffer', timeout: 60000, headers: { 'User-Agent': 'Mozilla/5.0', Accept: 'image/avif,image/webp,image/apng,image/*,*/*;q=0.8', Referer: this.cloner.url, }, validateStatus: () => true, }); const status = res.status || 0; const ctype = String( res.headers?.['content-type'] || '', ).toLowerCase(); // Follow Microlink JSON if necessary if ( /microlink\.io/i.test(u) && ctype.includes('application/json') ) { try { const j = await axios.get(u, { responseType: 'json', timeout: 60000, headers: { 'User-Agent': 'Mozilla/5.0' }, }); const target = j.data?.data?.image?.url || j.data?.data?.screenshot?.url || j.data?.data?.thumbnail?.url || j.data?.image?.url || j.data?.screenshot?.url; if (target) { const res2 = await axios.get(target, { responseType: 'arraybuffer', timeout: 60000, headers: { 'User-Agent': 'Mozilla/5.0', Referer: this.cloner.url, Accept: 'image/avif,image/webp,image/apng,image/*,*/*;q=0.8', }, validateStatus: () => true, }); if ((res2.status || 0) >= 200 && (res2.status || 0) < 300) { await fs.writeFile(dest, res2.data); saved = true; break; } } } catch (e) { this.cloner.logger.warnNonCritical('image', u, e); } } if (status >= 200 && status < 300 && res.data?.byteLength > 0) { await fs.writeFile(dest, res.data); saved = true; break; } } catch (e) { this.cloner.logger.warnNonCritical('image', u, e); } } if (!saved) { this.cloner.logger.warnNonCritical( 'image', img.url || img.nextJsUrl, new Error('exhausted sources'), ); } } } // CSS externals const cssExternals = this.cloner.assets.styles.filter( (s) => s.url && s.type === 'external', ); if (cssExternals.length) { if (!this.cloner.options.quiet) { console.log( chalk.gray(` Downloading ${cssExternals.length} CSS files...`), ); } for (const css of cssExternals) { const dest = path.join( this.cloner.options.outputDir, 'assets', 'css', css.filename, ); try { const res = await axios.get(css.url, { responseType: 'text', timeout: 30000, headers: { 'User-Agent': 'Mozilla/5.0' }, }); let text = res.data || ''; text = await this.rewriteCssUrlsAndDownload(text, css.url, axios, { fromInline: false, }); await fs.ensureDir(path.dirname(dest)); await fs.writeFile(dest, text, 'utf8'); } catch (e) { this.cloner.logger.warnNonCritical('styles', css.url, e); } } } // JS externals: download when JS is enabled (based on decision) const jsExternals = this.cloner.options.disableJs ? [] : this.cloner.assets.scripts.filter((s) => s.url); if (jsExternals.length) { if (!this.cloner.options.quiet) { console.log( chalk.gray(` Downloading ${jsExternals.length} JS files...`), ); } for (const s of jsExternals) { const dest = path.join( this.cloner.options.outputDir, 'assets', 'js', s.filename, ); try { const res = await axios.get(s.url, { responseType: 'text', timeout: 30000, headers: { 'User-Agent': 'Mozilla/5.0' }, }); await fs.ensureDir(path.dirname(dest)); await fs.writeFile(dest, res.data || '', 'utf8'); } catch (e) { this.cloner.logger.warnNonCritical('scripts', s.url, e); } } } // Fonts if (this.cloner.assets.fonts.length) { if (!this.cloner.options.quiet) { console.log( chalk.gray( ` Downloading ${this.cloner.assets.fonts.length} fonts...`, ), ); } for (const f of this.cloner.assets.fonts) { const dest = path.join( this.cloner.options.outputDir, 'assets', 'fonts', f.filename, ); if (!f.url) continue; try { const res = await axios.get(f.url, { responseType: 'arraybuffer', timeout: 30000, headers: { 'User-Agent': 'Mozilla/5.0' }, }); await fs.ensureDir(path.dirname(dest)); await fs.writeFile(dest, res.data); } catch (e) { this.cloner.logger.warnNonCritical('fonts', f.url, e); } } } // Icons if (this.cloner.assets.icons.length) { if (!this.cloner.options.quiet) { console.log( chalk.gray( ` Downloading ${this.cloner.assets.icons.length} icons...`, ), ); } for (const i of this.cloner.assets.icons) { const dest = path.join( this.cloner.options.outputDir, 'assets', 'icons', i.filename, ); if (!i.url) continue; try { const res = await axios.get(i.url, { responseType: 'arraybuffer', timeout: 30000, headers: { 'User-Agent': 'Mozilla/5.0' }, }); await fs.ensureDir(path.dirname(dest)); await fs.writeFile(dest, res.data); } catch (e) { this.cloner.logger.warnNonCritical('icon', i.url, e); } } } // Media if (this.cloner.assets.media.length) { if (!this.cloner.options.quiet) { console.log( chalk.gray( ` Downloading ${this.cloner.assets.media.length} media files...`, ), ); } for (const media of this.cloner.assets.media) { const dest = path.join( this.cloner.options.outputDir, 'assets', 'media', media.filename, ); if (!media.url) continue; try { const res = await axios.get(media.url, { responseType: 'arraybuffer', timeout: 120000, headers: { 'User-Agent': 'Mozilla/5.0', Accept: 'video/*;q=0.9,audio/*;q=0.9,*/*;q=0.5', Referer: this.cloner.url, }, }); await fs.ensureDir(path.dirname(dest)); await fs.writeFile(dest, res.data); } catch (e) { this.cloner.logger.warnNonCritical( media.type || 'media', media.url, e, ); } } } } async rewriteCssUrlsAndDownload( cssText, cssBaseUrl, axios, options = { fromInline: false }, ) { const urlRegex = /url\(\s*(['"]?)([^'")]+)\1\s*\)/gi; const outputDir = this.cloner.options.outputDir; const fromInline = !!options.fromInline; const replacements = await Promise.all( Array.from(cssText.matchAll(urlRegex)).map(async (m) => { const full = m[0]; const raw = m[2]; if (!raw || raw.startsWith('data:')) return { from: full, to: full }; let abs; try { abs = new URL(raw, cssBaseUrl).href; } catch { return { from: full, to: full }; } const lower = abs.split('?')[0].toLowerCase(); let subdir = 'images'; if (/\.(woff2?|ttf|otf|eot)$/.test(lower)) subdir = 'fonts'; else if (/\.(mp4|webm|ogg|mp3|wav|m4a)$/.test(lower)) subdir = 'media'; else if (/\.(svg|png|jpe?g|gif|webp|avif|ico)$/.test(lower)) subdir = 'images'; const filename = this.cloner.generateFilename( abs, subdir === 'images' ? 'images' : subdir === 'fonts' ? 'fonts' : 'media', ); const destRel = fromInline ? `./assets/${subdir}/${filename}` : `../${subdir}/${filename}`; const destAbs = path.join(outputDir, 'assets', subdir, filename); try { await fs.ensureDir(path.dirname(destAbs)); const exists = await fs.pathExists(destAbs); if (!exists) { const res = await axios.get(abs, { responseType: 'arraybuffer', timeout: 45000, headers: { 'User-Agent': 'Mozilla/5.0', Accept: 'image/avif,image/webp,image/apng,image/*,*/*;q=0.8', Referer: this.cloner.url, }, }); await fs.writeFile(destAbs, res.data); } this.assetMappings.set(abs, `./assets/${subdir}/${filename}`); return { from: full, to: `url('${destRel}')` }; } catch (e) { this.cloner.logger.warnNonCritical('css-asset', abs, e); return { from: full, to: full }; } }), ); let out = cssText; for (const r of replacements) out = out.replace(r.from, r.to); return out; } async createDirectoryStructure(structure, basePath = '') { for (const [name, content] of Object.entries(structure)) { const fullPath = path.join(this.cloner.options.outputDir, basePath, name); if (typeof content === 'object' && content !== null) { await fs.ensureDir(fullPath); await this.createDirectoryStructure(content, path.join(basePath, name)); } else { await fs.ensureDir(path.dirname(fullPath)); if (typeof content === 'string') await fs.writeFile(fullPath, content); } } } async generateOfflinePackageJson() { const packageJson = { name: `mirror-${this.cloner.domain}`, version: '1.1.0', description: `Offline mirror of ${this.cloner.url}`, main: 'index.html', type: 'module', scripts: { start: 'node server.js', serve: 'python -m http.server 8000', 'serve-node': 'npx http-server -p 8000 -o', }, keywords: ['mirror', 'offline', 'website'], author: 'Mirror Web CLI', license: 'MIT', }; await fs.writeFile( path.join(this.cloner.options.outputDir, 'package.json'), JSON.stringify(packageJson, null, 2), ); } async generateOfflineReadme() { const readme = `# ${this.cloner.domain} - Offline Mirror > Offline snapshot of ${this.cloner.url} Files: - index.html -> ${ this.cloner.options.disableJs ? 'Static (JS removed) with localized assets' : 'JS-enabled page with all assets rewritten locally (hydration guard + validation)' } JS mode: - Automatically chosen by preflight (no flags needed). Safety net validation will fallback to static if needed. Serve locally: - python -m http.server 8000 - or: npm start (uses provided server.js) - then open http://localhost:8000 Note: - Opening index.html directly from disk (file://) may disable Next/React hydration to preserve SSR content. `; await fs.writeFile( path.join(this.cloner.options.outputDir, 'README.md'), readme, ); } }