UNPKG

mirror-web-cli

Version:

Professional website mirroring tool with intelligent framework preservation, AI-powered analysis, and comprehensive asset optimization

1,153 lines (1,054 loc) 37.4 kB
/** * @fileoverview MirrorCloner - Core Website Mirroring Engine * @version 1.1.2 */ import { BrowserEngine } from './browser-engine.js'; import { AssetManager } from './asset-manager.js'; import { FrameworkAnalyzer } from './framework-analyzer.js'; import { FrameworkWriter } from './framework-writer.js'; import { Display } from './display.js'; import { Logger } from './logger.js'; import { AIAnalyzer } from '../ai/ai-analyzer.js'; import chalk from 'chalk'; import { load } from 'cheerio'; import { makeAssetFilename } from './filename-utils.js'; import http from 'http'; import fs from 'fs'; import path from 'path'; export class MirrorCloner { constructor(urlStr, options = {}) { this.url = urlStr; this.baseUrl = new URL(urlStr); this.domain = this.baseUrl.hostname.replace(/^www\./, ''); // Always auto by default; engine will decide JS ON/OFF const jsMode = 'auto'; this.options = { outputDir: options.outputDir || options.output || this.generateOutputDir(options.ai, this.domain), debug: !!options.debug, clean: !!options.clean, timeout: options.timeout || 120000, headless: options.headless !== false, quiet: !!options.quiet, suppressWarnings: options.suppressWarnings !== false, ai: !!options.ai, jsMode, // Working flag decided by preflight disableJs: false, ...options, }; if (!this.options.outputDir) { this.options.outputDir = this.generateOutputDir( this.options.ai, this.domain, ); } // Initialize core components this.browserEngine = new BrowserEngine(this.options); this.assetManager = new AssetManager(this); this.frameworkAnalyzer = new FrameworkAnalyzer(); this.frameworkWriter = new FrameworkWriter(this); this.display = new Display(); this.logger = new Logger(this.options); // Optional AI this.aiAnalyzer = this.options.ai ? new AIAnalyzer() : null; // Data this.$ = null; // Cheerio DOM instance this.analysis = null; // Framework analysis results this.assets = { images: [], styles: [], scripts: [], fonts: [], icons: [], media: [], }; this.frameworkData = {}; // Framework-specific data storage // Microlink capture helpers this._microlinkHandlers = { response: null }; this._microlinkCaptured = new Set(); // Keep track of active request interception handlers per page this._reqInterceptionHandlers = new WeakMap(); } // Safe pause that works with Puppeteer, Playwright, or plain timers async pause(pageOrMs, maybeMs) { const hasPage = typeof pageOrMs === 'object' && pageOrMs !== null; const page = hasPage ? pageOrMs : null; const ms = hasPage ? maybeMs : pageOrMs; if (page && typeof page.waitForTimeout === 'function') { try { await page.waitForTimeout(ms); return; } catch { // fallthrough to timer } } await new Promise((res) => setTimeout(res, ms)); } async clone() { const startTime = Date.now(); try { // Display header and configuration info this.display.header( 'Mirror Web CLI v1.1', 'Advanced Website Mirroring Tool', ); this.display.info(`🌐 Source: ${this.url}`); this.display.info(`📁 Output: ${this.options.outputDir}`); // Step 1: Preflight dual-render comparator to choose JS mode automatically this.display.step( 1, 9, 'Preflight', 'Evaluating site with JS ON vs OFF to decide optimal mode...', ); const pre = await this.preflightDualRender(); this.options.disableJs = pre.decision === 'off'; const chosenHtml = this.options.disableJs ? pre.htmlOff : pre.htmlOn; if (this.options.debug) { console.log( chalk.gray( ` Preflight decision: ${pre.decision.toUpperCase()} (framework=${ pre.frameworkHint }, shadowHosts=${pre.metricsOff.shadowHosts}, customEls=${ pre.metricsOff.customEls })`, ), ); } // Step 2: Analyze website framework and technology stack (on chosen HTML) this.display.step( 2, 9, 'Framework Analysis', 'Detecting technology stack and framework patterns...', ); this.analysis = await this.frameworkAnalyzer.analyze( chosenHtml, this.url, ); this.displayFrameworkResults(); // Step 3: Initialize browser for asset collection (single pass for assets) this.display.step(3, 9, 'Browser Setup', 'Launching headless browser...'); const page = await this.browserEngine.createPage(); // Attach Microlink sniffer BEFORE navigation to capture preview assets as they load this.attachMicrolinkSniffer(page); // Step 4: Load and process target website for asset harvesting this.display.step( 4, 9, 'Page Loading', 'Loading website content and harvesting assets...', ); await this.loadPage(page); // Simulate link preview hovers so previews render in DOM and network calls are made await this.simulateLinkPreviews(page).catch(() => {}); await this.waitForNetworkIdle(page, 1200).catch(() => {}); await this.waitForImagesSettled(page, 4000).catch(() => {}); // Harvest computed-style assets (background images, pseudo-elements) into the DOM await this.collectComputedAssets(page).catch(() => {}); await this.waitForNetworkIdle(page, 1000).catch(() => {}); // Detach sniffer now that previews have had a chance to load this.detachMicrolinkSniffer(page); // Handle URL redirects and update internal references const finalUrl = page.url(); if (finalUrl && finalUrl !== this.url) { this.url = finalUrl; this.baseUrl = new URL(finalUrl); this.domain = this.baseUrl.hostname.replace(/^www\./, ''); if (this.options.debug) { this.display.info(`🔁 Final URL detected: ${finalUrl}`); } } // Step 5: Shadow DOM serialization (improves static snapshots) await this.serializeShadowDOM(page).catch(() => {}); // Step 6: Extract all website assets using chosen HTML as baseline this.display.step( 5, 9, 'Asset Extraction', 'Downloading and processing assets...', ); const htmlForExtraction = await page.content().catch(() => chosenHtml); // Prefer current page DOM (richer with computed asset injections); fallback to chosen preflight HTML this.$ = load(htmlForExtraction || chosenHtml); await this.assetManager.extractAllAssets(); // Step 7: AI-powered analysis (optional) this.display.step( 6, 9, 'AI Analysis', this.options.ai ? 'Using OpenAI GPT-4o for insights...' : 'Skipping AI analysis...', ); if (this.options.ai && this.aiAnalyzer?.isEnabled) { try { const ai = await this.aiAnalyzer.analyzeWebsite( this.url, htmlForExtraction || chosenHtml, this.analysis, this.assets, ); this.analysis.aiInsights = ai.aiInsights; this.analysis.enhanced = ai.enhanced; this.aiAnalyzer.displayAnalysis({ ...this.analysis, aiInsights: ai.aiInsights, enhanced: ai.enhanced, }); } catch (e) { if (this.options.debug) console.log(chalk.yellow('⚠️ AI analysis error:'), e?.message || e); } } // Step 8: Generate output based on decided JS mode this.display.step( 7, 9, 'Output Generation', this.options.disableJs ? 'Creating static snapshot (JS disabled)...' : 'Creating JS-enabled offline output...', ); await this.frameworkWriter.generateOfflineProject(); // Step 9: Offline validation and auto-fallback (safety net) let autoFallenBack = false; if (!this.options.disableJs) { this.display.step( 8, 9, 'Offline Validation', 'Verifying offline rendering (http and file protocols)...', ); const validHttp = await this.validateOfflineOutputHttp(); const validFile = await this.validateOfflineOutputFile(); const valid = validHttp && validFile; if (!valid) { this.display.warning( `Offline validation detected a blank/empty root (${ validHttp ? 'http OK' : 'http blank' }, ${ validFile ? 'file OK' : 'file blank' }). Applying auto static snapshot fallback...`, ); this.options.disableJs = true; await this.frameworkWriter.writeIndexHtmlOnly(); autoFallenBack = true; } } // Finalize this.display.step( 9, 9, 'Finalize', autoFallenBack ? 'Auto-fallback applied (static snapshot).' : 'Output ready.', ); await this.browserEngine.close(); // Display success summary this.display.success('Website mirroring completed successfully!'); this.display.summary({ outputDir: this.options.outputDir, framework: this.analysis?.primaryFramework?.name || 'Vanilla HTML', outputType: this.getOutputType(), assets: this.getAssetStats(), duration: Date.now() - startTime, }); // Show any suppressed warning summary this.logger.printSuppressedSummary(); return true; } catch (error) { await this.browserEngine.close(); this.display.error('Mirroring failed', error.message); if (this.options.debug) { console.log(chalk.red('Debug details:'), error.stack); } return false; } } // --- Preflight: decide JS ON vs OFF automatically --- async preflightDualRender() { // Pass A: JS ON (normal) const pageOn = await this.browserEngine.createPage(); const consoleOn = []; const collectConsole = (msg) => { try { const t = msg.type ? msg.type() : 'log'; consoleOn.push(`[${t}] ${msg.text?.() || msg.text()}`); } catch { // ignore } }; pageOn.on('console', collectConsole); await pageOn.goto(this.url, { waitUntil: 'domcontentloaded', timeout: this.options.timeout, }); await this.waitForRootReady(pageOn); // Lightweight settle await this.pause(pageOn, 500); const metricsOn = await this._collectPreflightMetrics(pageOn); const htmlOn = await pageOn.content(); await pageOn.close(); // Pass B: JS "OFF" by blocking script requests const pageOff = await this.browserEngine.createPage(); await this._blockScripts(pageOff, true); const consoleOff = []; const collectConsoleOff = (msg) => { try { const t = msg.type ? msg.type() : 'log'; consoleOff.push(`[${t}] ${msg.text?.() || msg.text()}`); } catch {} }; pageOff.on('console', collectConsoleOff); await pageOff.goto(this.url, { waitUntil: 'domcontentloaded', timeout: this.options.timeout, }); await this.waitForRootReady(pageOff).catch(() => {}); await this.pause(pageOff, 300); const metricsOff = await this._collectPreflightMetrics(pageOff); const htmlOff = await pageOff.content(); await pageOff.close(); // Framework hint: detect Next.js in htmlOn or htmlOff const frameworkHint = this._frameworkHint(htmlOn || '') || this._frameworkHint(htmlOff || ''); // Decision rules const hydrationError = consoleOn .join('\n') .match(/Hydration failed|did not match|Minified React error/i) != null || metricsOn.overlayPresent || (metricsOn.blank && frameworkHint === 'nextjs'); let decision = 'on'; // default prefer JS ON for richer sites if (frameworkHint === 'nextjs' && hydrationError) { decision = 'off'; } else if (!metricsOn.blank && metricsOff.blank) { // JS ON looks healthy, JS OFF is blank -> ON decision = 'on'; } else if (metricsOn.blank && !metricsOff.blank) { // JS ON blanks (likely hydration wiping) -> OFF decision = 'off'; } else if (!metricsOn.blank && !metricsOff.blank) { // Both render; bias using shadow/custom elements density (prefer ON if complex) if ( metricsOff.shadowHosts > 25 || metricsOff.customEls > 40 || metricsOff.shadowHosts + metricsOff.customEls > Math.max(60, (metricsOn.nodeCount || 0) * 0.02) ) { decision = 'on'; } else { decision = 'on'; // still prefer ON when both are fine } } else { // Both look blank; for safety choose OFF to preserve SSR if present decision = 'off'; } if (this.options.debug) { console.log( chalk.gray( ` Preflight metrics: ON(blank=${metricsOn.blank}, text=${metricsOn.textLen}, overlay=${metricsOn.overlayPresent}) | OFF(blank=${metricsOff.blank}, text=${metricsOff.textLen}, shadow=${metricsOff.shadowHosts}, custom=${metricsOff.customEls})`, ), ); } return { decision, htmlOn, htmlOff, metricsOn, metricsOff, frameworkHint }; } _frameworkHint(html) { try { if (!html) return null; if (html.includes('id="__NEXT_DATA__"') || /\/_next\//i.test(html)) return 'nextjs'; if (/<div id="root">/.test(html)) return 'react'; return null; } catch { return null; } } async _collectPreflightMetrics(page) { try { return await page.evaluate(() => { function getRoot() { return ( document.querySelector('#__next, #root, [data-reactroot]') || document.body ); } function rectArea(el) { try { const r = el.getBoundingClientRect(); return Math.max(0, r.width) * Math.max(0, r.height); } catch { return 0; } } function countCustomElements() { try { const all = Array.from(document.querySelectorAll('*')); return all.filter((el) => el.tagName.includes('-')).length; } catch { return 0; } } function countShadowHosts() { let n = 0; try { document.querySelectorAll('*').forEach((el) => { if (el.shadowRoot) n++; }); } catch {} return n; } const root = getRoot(); const htmlLen = (root?.innerHTML || '').trim().length; const textLen = (root?.innerText || '').replace(/\s+/g, '').length; const area = root ? rectArea(root) : 0; const style = root ? getComputedStyle(root) : null; const hidden = !root || (style && (style.display === 'none' || style.visibility === 'hidden')); const overlayPresent = !!document.querySelector( '[data-nextjs-dialog-overlay]', ); const nodeCount = document.querySelectorAll('*').length; const blank = hidden || htmlLen < 100 || (textLen < 10 && area < 1500); // small/empty return { blank, htmlLen, textLen, area, hidden, overlayPresent, nodeCount, shadowHosts: countShadowHosts(), customEls: countCustomElements(), }; }); } catch { return { blank: false, htmlLen: 0, textLen: 0, area: 0, hidden: false, overlayPresent: false, nodeCount: 0, shadowHosts: 0, customEls: 0, }; } } // Robust script blocking for Puppeteer (avoids "Request is already handled!" by ensuring one handler and single resolution) async _blockScripts(page, enabled) { try { // Puppeteer-style interception if (page.setRequestInterception) { // Remove any existing handler we attached earlier const existing = this._reqInterceptionHandlers.get(page); if (existing) { try { page.off('request', existing); } catch {} this._reqInterceptionHandlers.delete(page); } await page.setRequestInterception(enabled); if (enabled) { const handler = (req) => { try { const type = typeof req.resourceType === 'function' ? req.resourceType() : ''; if (type === 'script') { // Abort script requests to simulate JS OFF // Never call continue() afterwards for this request return req.abort('blockedbyclient').catch(() => {}); } // Allow other resource types return req.continue().catch(() => {}); } catch { // Swallow any errors to avoid double-handling } }; page.on('request', handler); this._reqInterceptionHandlers.set(page, handler); } return; } // Playwright-style routing (best-effort; noop if not available) if (page.route) { // Note: To avoid stacking routes we'd need unroute. Since our main env is Puppeteer, // we keep this light and avoid adding multiple routes. if (enabled) { await page.route('**/*', (route) => { try { const req = route.request(); const type = req && typeof req.resourceType === 'function' ? req.resourceType() : ''; if (type === 'script') return route.abort(); return route.continue(); } catch { try { return route.continue(); } catch {} } }); } else if (page.unroute) { try { await page.unroute('**/*'); } catch {} } } } catch { // ignore } } async loadPage(page) { await page.goto(this.url, { waitUntil: 'domcontentloaded', timeout: this.options.timeout, }); await this.waitForRootReady(page); await this.scrollToBottomAndLoad(page); await this.waitForImagesSettled(page, 8000); await this.waitForNetworkIdle(page, 1500).catch(() => {}); } // Serialize shadow DOM into Declarative Shadow DOM (<template shadowroot="open">) // so static snapshots include content of web components. async serializeShadowDOM(page) { await page.evaluate(() => { try { const hosts = []; document.querySelectorAll('*').forEach((el) => { if (el.shadowRoot) hosts.push(el); }); hosts.forEach((host) => { if (host.querySelector(':scope > template[shadowroot]')) return; const tmpl = document.createElement('template'); const mode = host.shadowRoot && host.shadowRoot.mode ? host.shadowRoot.mode : 'open'; tmpl.setAttribute( 'shadowroot', mode === 'closed' ? 'closed' : 'open', ); let html = ''; try { html = host.shadowRoot ? host.shadowRoot.innerHTML : ''; } catch (e) { html = ''; } tmpl.innerHTML = html; host.appendChild(tmpl); }); } catch (e) { // ignore } }); } // Network sniffer for Microlink assets (images and JSON to discover screenshot URLs) attachMicrolinkSniffer(page) { const isMicrolink = (u) => { try { const host = new URL(u).hostname.toLowerCase(); return host.endsWith('microlink.io'); } catch { return /microlink\.io/i.test(String(u)); } }; const handler = async (response) => { try { const u = response.url(); if (!isMicrolink(u)) return; const status = response.status(); const headers = response.headers() || {}; const ctype = String(headers['content-type'] || '').toLowerCase(); if (status < 200 || status >= 300) return; if (this._microlinkCaptured.has(u)) return; this._microlinkCaptured.add(u); if (ctype.startsWith('image/')) { const buf = await response.buffer().catch(() => null); if (!buf || !buf.length) return; const filename = this.generateFilename(u, 'images'); this.assets.images.push({ url: u, filename, buffer: buf, local: true, }); if (this.options.debug) { console.log(chalk.gray(` ⤵︎ Captured Microlink image: ${u}`)); } return; } if (ctype.includes('application/json')) { const text = await response.text().catch(() => ''); if (!text) return; let data; try { data = JSON.parse(text); } catch { return; } const target = data?.data?.image?.url || data?.data?.screenshot?.url || data?.data?.thumbnail?.url || data?.image?.url || data?.screenshot?.url || null; if (target) { const abs = this.resolveUrl(target); const filename = this.generateFilename(abs, 'images'); if (!this.assets.images.find((x) => x.url === abs)) { this.assets.images.push({ url: abs, filename, local: false, }); if (this.options.debug) { console.log( chalk.gray( ` ⤵︎ Discovered Microlink target image: ${abs}`, ), ); } } } } } catch { // ignore errors } }; page.on('response', handler); this._microlinkHandlers.response = handler; } detachMicrolinkSniffer(page) { if (this._microlinkHandlers.response) { page.off('response', this._microlinkHandlers.response); this._microlinkHandlers.response = null; } } // Hover/link-preview simulator to surface dynamic hovercards/popovers into the DOM async simulateLinkPreviews(page) { try { await page.evaluate(() => window.scrollTo(0, 0)); const selectors = [ '[data-hovercard]', '[data-popover]', '[data-tooltip]', '[data-trigger="hover"]', '[role="tooltip"]', 'a', ]; const handles = await page.$$(selectors.join(',')); let count = 0; for (const h of handles) { if (count++ >= 30) break; try { await h.evaluate((el) => { const r = el.getBoundingClientRect(); if (r.width > 0 && r.height > 0) el.scrollIntoView({ block: 'center', inline: 'center' }); }); await h.hover(); await Promise.race([ page .waitForResponse((r) => /microlink\.io/.test(r.url()), { timeout: 800, }) .catch(() => {}), this.pause(page, 150), ]); } catch {} } } catch { // ignore failures } } // Collect computed-style url(...) assets and inject hidden <img> so the snapshot includes them async collectComputedAssets(page) { await page.evaluate(() => { function extractUrls(val) { if (!val || typeof val !== 'string') return []; const urls = []; const re = /url\(\s*['"]?([^'")]+)['"]?\s*\)/gi; let m; while ((m = re.exec(val)) !== null) urls.push(m[1]); return urls; } function collect(el, out) { const cs = window.getComputedStyle(el); [ 'backgroundImage', 'background', 'maskImage', 'WebkitMaskImage', 'borderImageSource', 'listStyleImage', ].forEach((p) => extractUrls(cs[p]).forEach((u) => out.add(u))); const before = window.getComputedStyle(el, '::before'); const after = window.getComputedStyle(el, '::after'); [before, after].forEach((ps) => { extractUrls(ps.content).forEach((u) => out.add(u)); extractUrls(ps.backgroundImage).forEach((u) => out.add(u)); }); if (el.shadowRoot) { el.shadowRoot.querySelectorAll('img[src]').forEach((img) => { const s = img.getAttribute('src'); if (s) out.add(s); }); } } const urls = new Set(); document.querySelectorAll('*').forEach((el) => collect(el, urls)); urls.forEach((u) => { if (!u || String(u).startsWith('data:')) return; const img = document.createElement('img'); img.setAttribute('data-mw-computed', '1'); img.decoding = 'async'; img.loading = 'eager'; img.alt = ''; img.src = u; img.style.cssText = 'display:none !important;width:0;height:0;opacity:0;'; document.body.appendChild(img); }); }); } async waitForRootReady(page) { try { await page.waitForFunction( () => { const roots = ['#__next', '#root', '#app']; for (const sel of roots) { const el = document.querySelector(sel); if (el && el.children && el.children.length > 0) return true; } return document.readyState === 'complete'; }, { timeout: 20000 }, ); } catch { this.display.warning('Framework/app root readiness timeout'); } } async scrollToBottomAndLoad(page) { await page.evaluate(() => { return new Promise((resolve) => { let lastHeight = 0; let sameCount = 0; const tick = () => { window.scrollTo(0, document.body.scrollHeight); const h = document.body.scrollHeight; if (h > lastHeight) { lastHeight = h; sameCount = 0; setTimeout(tick, 300); } else { sameCount++; if (sameCount < 5) { setTimeout(tick, 300); } else { window.scrollTo(0, 0); resolve(); } } }; tick(); }); }); } async waitForImagesSettled(page, maxWait = 8000) { try { await page.waitForFunction( () => { const imgs = Array.from(document.images || []); if (imgs.length === 0) return true; return imgs.every((img) => { const ok = img.complete && img.naturalWidth > 0; const chosen = !!img.currentSrc || !!img.getAttribute('src'); return ok || chosen; }); }, { timeout: maxWait }, ); } catch { // proceed anyway } } async waitForNetworkIdle(page, idleTime = 2000, maxInflight = 2) { let inflight = 0; let fulfill; let timeoutId; const cleanup = () => { page.off('request', onRequest); page.off('requestfinished', onDone); page.off('requestfailed', onDone); if (timeoutId) clearTimeout(timeoutId); }; const onRequest = (req) => { const reqUrl = req.url(); if (reqUrl.startsWith('ws:') || reqUrl.startsWith('wss:')) return; inflight++; }; const onDone = (req) => { const reqUrl = req.url(); if (reqUrl.startsWith('ws:') || reqUrl.startsWith('wss:')) return; inflight = Math.max(0, inflight - 1); tick(); }; const tick = () => { if (inflight <= maxInflight) { if (timeoutId) clearTimeout(timeoutId); timeoutId = setTimeout(() => { cleanup(); fulfill(); }, idleTime); } else if (timeoutId) { clearTimeout(timeoutId); } }; return new Promise((x) => { fulfill = x; page.on('request', onRequest); page.on('requestfinished', onDone); page.on('requestfailed', onDone); tick(); }); } displayFrameworkResults() { if (this.analysis?.detected?.length > 0) { const primary = this.analysis.detected[0]; this.display.frameworkCard({ name: primary.name, confidence: Math.round(primary.confidence * 100), complexity: this.analysis.complexity, strategy: this.getMirroringStrategy(primary.key), }); } } async processFrameworkSpecific(_html) { // no-op (handled by generic pipeline) } getMirroringStrategy(framework) { const strategies = { nextjs: this.options.disableJs ? 'Static snapshot (JS removed) to avoid hydration/blank page' : 'Preserve DOM; localize assets for exact Next.js look', react: this.options.disableJs ? 'Static snapshot (JS removed) to preserve SSR DOM' : 'Preserve DOM; localize assets for exact UI', gatsby: 'Gatsby static DOM with localized assets', vue: 'Preserve DOM; localize assets for exact UI', angular: 'Preserve DOM; localize assets for exact UI', }; return strategies[framework] || 'HTML/CSS/JS static files'; } getOutputType() { const willStrip = this.options.disableJs; return willStrip ? 'Static HTML (JS removed) + CSS/Images' : 'HTML/CSS/JS (Offline-Ready)'; } getAssetStats() { return { images: this.assets.images.length, styles: this.assets.styles.length, scripts: this.assets.scripts.length, fonts: this.assets.fonts.length, icons: this.assets.icons.length, media: this.assets.media.length, total: Object.values(this.assets).reduce( (sum, arr) => sum + arr.length, 0, ), }; } resolveUrl(u) { if (!u) return ''; try { if (u.startsWith('http') || u.startsWith('data:')) return u; if (u.startsWith('//')) return this.baseUrl.protocol + u; if (u.startsWith('/')) return `${this.baseUrl.protocol}//${this.baseUrl.host}${u}`; return new URL(u, this.url).href; } catch { return u; } } generateFilename(u, category = 'bin') { return makeAssetFilename(this.resolveUrl(u), category); } generateOutputDir(aiEnabled, domain) { const suffix = aiEnabled ? '-ai-enhanced' : '-standard'; return `./${domain}${suffix}`; } // --- Offline validation helpers (safety net) --- async validateOfflineOutputHttp() { const { server, baseUrl } = await this._startStaticServer( this.options.outputDir, ); try { const page = await this.browserEngine.createPage(); await page.goto(baseUrl + '/index.html', { waitUntil: 'domcontentloaded', timeout: 30000, }); const ok = await this._evaluateOfflineHealth(page); await page.close(); return ok; } catch (e) { if (this.options.debug) { console.log(chalk.yellow('⚠️ HTTP validation error:'), e.message); } // On validation errors, prefer to accept output (don’t fallback aggressively) return true; } finally { await new Promise((res) => server.close(() => res())); } } async validateOfflineOutputFile() { try { const page = await this.browserEngine.createPage(); const fileUrl = 'file://' + path.resolve(this.options.outputDir, 'index.html'); await page.goto(fileUrl, { waitUntil: 'domcontentloaded', timeout: 30000, }); const ok = await this._evaluateOfflineHealth(page); await page.close(); return ok; } catch (e) { if (this.options.debug) { console.log(chalk.yellow('⚠️ file:// validation error:'), e.message); } // Treat file validation failures as a signal to fallback return false; } } async _evaluateOfflineHealth(page) { const delays = [120, 700, 2200, 3500]; for (const d of delays) { await this.pause(page, d); /* eslint-disable no-await-in-loop */ const status = await page.evaluate(() => { function getRoot() { return ( document.querySelector('#__next, #root, [data-reactroot]') || document.body ); } function rectArea(el) { try { const r = el.getBoundingClientRect(); return Math.max(0, r.width) * Math.max(0, r.height); } catch { return 0; } } const root = getRoot(); const htmlLen = (root?.innerHTML || '').trim().length; const textLen = (root?.innerText || '').replace(/\s+/g, '').length; const area = root ? rectArea(root) : 0; const style = root ? getComputedStyle(root) : null; const hidden = !root || (style && (style.display === 'none' || style.visibility === 'hidden')); const blank = hidden || htmlLen < 100 || (textLen < 10 && area < 1500); // small/empty return { blank, htmlLen, textLen, area, hidden }; }); if (!status.blank) return true; } return false; } async _startStaticServer(rootDir) { const root = path.resolve(rootDir); const server = http.createServer((req, res) => { try { const reqUrl = new URL(req.url, 'http://localhost'); let pathname = decodeURIComponent(reqUrl.pathname); if (pathname === '/') pathname = '/index.html'; const filePath = this._safeJoin(root, '.' + pathname); if (fs.existsSync(filePath) && fs.statSync(filePath).isFile()) { this._serveFile(res, filePath); return; } if (fs.existsSync(filePath) && fs.statSync(filePath).isDirectory()) { const indexInDir = path.join(filePath, 'index.html'); if (fs.existsSync(indexInDir)) { this._serveFile(res, indexInDir); return; } } const fallback = path.join(root, 'index.html'); if (fs.existsSync(fallback)) { this._serveFile(res, fallback); return; } res.writeHead(404, { 'Content-Type': 'text/plain; charset=utf-8' }); res.end('404 Not Found'); } catch (e) { res.writeHead(500, { 'Content-Type': 'text/plain; charset=utf-8' }); res.end('500 Internal Server Error\n' + (e?.message || '')); } }); await new Promise((res) => server.listen(0, () => res())); const port = server.address().port; const baseUrl = `http://127.0.0.1:${port}`; return { server, baseUrl }; } _safeJoin(root, p) { const resolved = path.resolve(root, p); if (!resolved.startsWith(root)) return root; return resolved; } _serveFile(res, filePath) { const ext = path.extname(filePath).toLowerCase(); const mime = { '.html': 'text/html; charset=utf-8', '.css': 'text/css; charset=utf-8', '.js': 'application/javascript; charset=utf-8', '.mjs': 'application/javascript; charset=utf-8', '.json': 'application/json; charset=utf-8', '.svg': 'image/svg+xml', '.png': 'image/png', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.webp': 'image/webp', '.avif': 'image/avif', '.gif': 'image/gif', '.ico': 'image/x-icon', '.mp4': 'video/mp4', '.webm': 'video/webm', '.ogg': 'video/ogg', '.mp3': 'audio/mpeg', '.wav': 'audio/wav', '.m4a': 'audio/mp4', '.woff2': 'font/woff2', '.woff': 'font/woff', '.ttf': 'font/ttf', '.otf': 'font/otf', '.eot': 'application/vnd.ms-fontobject', '.txt': 'text/plain; charset=utf-8', '.map': 'application/json; charset=utf-8', }[ext] || 'application/octet-stream'; res.writeHead(200, { 'Content-Type': mime, 'Cache-Control': ext === '.html' ? 'no-cache' : 'public, max-age=60', }); fs.createReadStream(filePath).pipe(res); } }