prerendererest
Version:
A CLI-based prerenderer for static site generation
579 lines (522 loc) • 19.2 kB
JavaScript
const puppeteer = require("puppeteer");
const express = require("express");
const serveStatic = require("serve-static");
const fallback = require("express-history-api-fallback");
const path = require("path");
const nativeFs = require("fs");
const mkdirp = require("mkdirp");
const minify = require("html-minifier").minify;
// Main flow:
// run() → startServer() → crawl() → fetchPage() → save HTML files
// Usage:
// node react_snap_replacement.js --include "/index.html,/about.html" --source "./build" --headless
const defaultOptions = {
port: 45678,
crawl: true,
source: "./docs",
destination: "./docs",
include: ["/index.html"],
userAgent: "Prerendererest",
headless: false,
puppeteerArgs: ["--no-sandbox", "--disable-setuid-sandbox"],
puppeteer: { cache: false },
minifyHtml: {
collapseWhitespace: true,
removeComments: true
},
viewport: { width: 480, height: 850 },
skipThirdPartyRequests: false,
concurrency: 1,
inlineCss: false,
removeStyleTags: false,
preloadImages: false,
asyncScriptTags: false,
removeScriptTags: false,
skipExistingCheck: false,
};
const defaults = userOptions => {
const options = { ...defaultOptions, ...userOptions };
options.destination = options.destination || options.source;
if (!options.include.length) throw new Error("Include option should be non-empty");
return options;
};
const crawl = async (opt) => {
const {
options,
basePath,
beforeFetch,
afterFetch,
onEnd,
publicPath,
sourceDir
} = opt;
let streamClosed = false;
const errorReport = {
pageErrors: [],
consoleErrors: [],
httpErrors: [],
fetchErrors: []
};
// exit process
const onSigint = () => {
console.log("\nGracefully shutting down...");
process.exit(1);
};
process.on("SIGINT", onSigint);
// Exit on unhandled promise rejections
process.on("unhandledRejection", error => {
console.log("🔥 UnhandledPromiseRejectionWarning", error);
errorReport.pageErrors.push({ route: 'global', error: error.message });
});
const queue = [];
let enqued = 0;
let processed = 0;
const uniqueUrls = new Set();
const sourcemapStore = {};
// Add a URL to the queue if it's not already there and it's not a third-party URL
const addToQueue = newUrl => {
if(!newUrl) return;
if(newUrl.includes('mailto:')) return;
if(newUrl.includes('javascript:')) return;
const { hostname, search, hash } = new URL(newUrl);
newUrl = newUrl.replace(`${search || ""}${hash || ""}`, "");
if (hostname === "localhost" && !uniqueUrls.has(newUrl) && !streamClosed) {
uniqueUrls.add(newUrl);
enqued++;
queue.push(newUrl);
if (enqued == 2 && options.crawl) {
addToQueue(`${basePath}${publicPath}404.html`);
}
}
};
const browser = await puppeteer.launch({
headless: options.headless,
args: options.puppeteerArgs,
executablePath: options.puppeteerExecutablePath,
ignoreHTTPSErrors: options.puppeteerIgnoreHTTPSErrors,
handleSIGINT: false
});
//
const fetchPage = async pageUrl => {
const route = pageUrl.replace(basePath, "");
let skipExistingFile = false;
const routePath = route.replace(/\//g, path.sep);
const { ext } = path.parse(routePath);
if (ext !== ".html" && ext !== "") {
const filePath = path.join(sourceDir, routePath);
console.log(`🕸 Inspecting File ${filePath}`);
skipExistingFile = nativeFs.existsSync(filePath);
}
else{
}
// Crawl the page if it's not already crawled and it's not a third-party URL
if (!skipExistingFile) {
console.log(`🕸 Pulling file ${route}`);
try {
const page = await browser.newPage();
const client = await page.target().createCDPSession();
await client.send('ServiceWorker.disable');
await page.setCacheEnabled(options.puppeteer.cache);
if (options.viewport) await page.setViewport(options.viewport);
if (options.skipThirdPartyRequests) await skipThirdPartyRequests({ page, options, basePath });
enableLogging({ page, options, route, onError: (error) => {
errorReport.pageErrors.push({ route, error });
}, sourcemapStore, errorReport });
if (beforeFetch) await beforeFetch({ page, route });
await page.setUserAgent(options.userAgent);
const tracker = createTracker(page);
try {
await page.goto(pageUrl, { waitUntil: "networkidle2" });
} catch (e) {
e.message = tracker.augmentTimeoutError(e.message);
throw e;
} finally {
tracker.dispose();
}
if (options.waitFor) await page.waitFor(options.waitFor);
if (options.crawl) {
console.log(`🕸 Crawling Rendered Page: ${route}`);
const links = await getLinks({ page });
links.forEach(addToQueue);
}
if (afterFetch) await afterFetch({ page, route, browser, addToQueue });
await page.close();
console.log(`✅ crawled ${processed + 1} out of ${enqued} (${route})`);
} catch (e) {
console.log(`🔥 error at ${route}`, e);
errorReport.fetchErrors.push({ route, error: e.message });
}
}
else{
console.log(`DID NOT CRAWL ${route}`);
}
processed++;
if (enqued === processed) {
streamClosed = true;
}
return pageUrl;
};
if (options.include) {
options.include.map(x => addToQueue(`${basePath}${x}`));
}
while (queue.length > 0) {
await Promise.all(
queue.splice(0, options.concurrency).map(fetchPage)
);
}
await browser.close();
// Print error report
console.log('\n📊 CRAWLING COMPLETE - ERROR REPORT:');
console.log('=====================================');
const totalErrors = errorReport.pageErrors.length + errorReport.consoleErrors.length +
errorReport.httpErrors.length + errorReport.fetchErrors.length;
if (totalErrors === 0) {
console.log('✅ No errors detected during crawling!');
} else {
console.log(`❌ Total errors found: ${totalErrors}\n`);
if (errorReport.fetchErrors.length > 0) {
console.log(`🔥 Fetch Errors (${errorReport.fetchErrors.length}):`);
errorReport.fetchErrors.forEach(({ route, error }) => {
console.log(` - ${route}: ${error}`);
});
console.log('');
}
if (errorReport.pageErrors.length > 0) {
console.log(`🔥 Page Errors (${errorReport.pageErrors.length}):`);
errorReport.pageErrors.forEach(({ route, error }) => {
console.log(` - ${route}: ${error}`);
});
console.log('');
}
if (errorReport.consoleErrors.length > 0) {
console.log(`🔥 Console Errors (${errorReport.consoleErrors.length}):`);
errorReport.consoleErrors.forEach(({ route, error }) => {
console.log(` - ${route}: ${error}`);
});
console.log('');
}
if (errorReport.httpErrors.length > 0) {
console.log(`⚠️ HTTP Errors (${errorReport.httpErrors.length}):`);
errorReport.httpErrors.forEach(({ route, error }) => {
console.log(` - ${route}: ${error}`);
});
console.log('');
}
}
onEnd && onEnd();
};
const run = async (userOptions, { fs } = { fs: nativeFs }) => {
const options = defaults(userOptions);
const sourceDir = path.normalize(`${process.cwd()}/${options.source}`);
const destinationDir = path.normalize(`${process.cwd()}/${options.destination}`);
const startServer = options => {
const app = express()
.use(options.publicPath || '/', serveStatic(sourceDir))
.use(fallback("200.html", { root: sourceDir }));
const server = require("http").createServer(app);
server.listen(options.port);
return server;
};
if (!options.skipExistingCheck && fs.existsSync(path.join(sourceDir, "200.html"))) {
throw new Error("Cannot run prerendererest - this will break the build");
}
fs.createReadStream(path.join(sourceDir, "index.html")).pipe(fs.createWriteStream(path.join(sourceDir, "200.html")));
if (destinationDir !== sourceDir) {
mkdirp.sync(destinationDir);
fs.createReadStream(path.join(sourceDir, "index.html")).pipe(fs.createWriteStream(path.join(destinationDir, "200.html")));
}
const server = startServer(options);
const basePath = `http://localhost:${options.port}`;
await crawl({
options,
basePath,
publicPath: options.publicPath || '/',
sourceDir,
beforeFetch: async ({ page }) => {
if (options.skipThirdPartyRequests) {
await page.setRequestInterception(true);
page.on('request', request => {
if (request.url().startsWith(basePath)) {
request.continue();
} else {
request.abort();
}
});
}
},
afterFetch: async ({ page, route }) => {
const content = await page.content();
const minifiedContent = minify(content, options.minifyHtml);
const routePath = route.replace(options.publicPath || '/', "");
const filePath = path.join(destinationDir, routePath);
// Create directories if they do not exist
if (!fs.existsSync(path.dirname(filePath))) {
fs.mkdirSync(path.dirname(filePath), { recursive: true });
}
fs.writeFileSync(filePath, minifiedContent);
},
onEnd: () => {
server.close();
},
});
};
if (require.main === module) {
const args = process.argv.slice(2);
const userOptions = {};
// Show help if requested
if (args.includes('--help') || args.includes('-h')) {
console.log(`
Usage: prerendererest [options]
Options:
--source <path> Source directory (default: ./docs)
--destination <path> Destination directory (default: same as source)
--include <pages> Comma-separated list of pages to include (default: /index.html)
--headless Run browser in headless mode
--crawl Enable automatic crawling (default: true)
--no-crawl Disable automatic crawling
--port <number> Port for local server (default: 45678)
--concurrency <number> Number of concurrent processes (default: 1)
--userAgent <string> Custom user agent (default: Prerendererest)
--viewport <json> Viewport size as JSON object (default: {"width":480,"height":850})
--skipThirdPartyRequests Block external requests during rendering
--skipExistingCheck Skip the 200.html existence check
--minifyHtml <json> HTML minification options as JSON
--removeScriptTags Remove script tags from HTML
--removeStyleTags Remove style tags from HTML
--asyncScriptTags Add async attribute to script tags
--inlineCss Inline CSS styles
--preloadImages Add preload hints for images
--puppeteerArgs <args> Comma-separated Puppeteer arguments
-h, --help Show this help message
Examples:
prerendererest --source ./build --headless
prerendererest --include "/index.html,/about.html" --source ./build --headless
prerendererest --source ./build --destination ./dist --crawl --concurrency 4
`);
process.exit(0);
}
for (let i = 0; i < args.length; i++) {
if (args[i] === '--include' && args[i + 1]) {
userOptions.include = args[i + 1].split(',');
i++;
} else if (args[i] === '--source' && args[i + 1]) {
userOptions.source = args[i + 1];
i++;
} else if (args[i] === '--destination' && args[i + 1]) {
userOptions.destination = args[i + 1];
i++;
} else if (args[i] === '--headless') {
userOptions.headless = true;
} else if (args[i] === '--port' && args[i + 1]) {
userOptions.port = parseInt(args[i + 1], 10);
i++;
} else if (args[i] === '--crawl') {
userOptions.crawl = true;
} else if (args[i] === '--no-crawl') {
userOptions.crawl = false;
} else if (args[i] === '--userAgent' && args[i + 1]) {
userOptions.userAgent = args[i + 1];
i++;
} else if (args[i] === '--puppeteerArgs' && args[i + 1]) {
userOptions.puppeteerArgs = args[i + 1].split(',');
i++;
} else if (args[i] === '--puppeteer.cache' && args[i + 1]) {
userOptions.puppeteer = userOptions.puppeteer || {};
userOptions.puppeteer.cache = args[i + 1] === 'true';
i++;
} else if (args[i] === '--minifyHtml' && args[i + 1]) {
try {
userOptions.minifyHtml = JSON.parse(args[i + 1]);
} catch {
// ignore parse error
}
i++;
} else if (args[i] === '--viewport' && args[i + 1]) {
try {
userOptions.viewport = JSON.parse(args[i + 1]);
} catch {
// ignore parse error
}
i++;
} else if (args[i] === '--skipThirdPartyRequests') {
userOptions.skipThirdPartyRequests = true;
} else if (args[i] === '--concurrency' && args[i + 1]) {
userOptions.concurrency = parseInt(args[i + 1], 10);
i++;
} else if (args[i] === '--inlineCss') {
userOptions.inlineCss = true;
} else if (args[i] === '--removeStyleTags') {
userOptions.removeStyleTags = true;
} else if (args[i] === '--preloadImages') {
userOptions.preloadImages = true;
} else if (args[i] === '--asyncScriptTags') {
userOptions.asyncScriptTags = true;
} else if (args[i] === '--removeScriptTags') {
userOptions.removeScriptTags = true;
} else if (args[i] === '--skipExistingCheck') {
userOptions.skipExistingCheck = true;
}
}
run(userOptions).catch(e => {
console.error(e);
process.exit(1);
});
}
exports.run = run;
exports.defaultOptions = defaultOptions;
// Helper functions for crawling
const skipThirdPartyRequests = async opt => {
const { page, options, basePath } = opt;
if (!options.skipThirdPartyRequests) return;
await page.setRequestInterception(true);
page.on("request", request => {
if (request.url().startsWith(basePath)) {
request.continue();
} else {
request.abort();
}
});
};
const enableLogging = opt => {
const { page, options, route, onError, sourcemapStore, errorReport } = opt;
page.on("console", msg => {
const text = msg.text();
if (text === "JSHandle@object") {
Promise.all(msg.args().map(objectToJson)).then(args =>
console.log(`💬 console.log at ${route}:`, ...args)
);
} else if (text === "JSHandle@error") {
Promise.all(msg.args().map(errorToString)).then(args => {
console.log(`💬 console.log at ${route}:`, ...args);
errorReport.consoleErrors.push({ route, error: args.join(' ') });
});
} else {
console.log(`️️️💬 console.log at ${route}:`, text);
}
});
page.on("error", msg => {
console.log(`🔥 error at ${route}:`, msg);
errorReport.pageErrors.push({ route, error: msg.message });
onError && onError(msg.message);
});
page.on("pageerror", e => {
if (options.sourceMaps) {
mapStackTrace(e.stack || e.message, {
isChromeOrEdge: true,
store: sourcemapStore || {}
})
.then(result => {
const stackRows = result.split("\n");
const puppeteerLine =
stackRows.findIndex(x => x.includes("puppeteer")) ||
stackRows.length - 1;
const errorMsg = `${(e.stack || e.message).split("\n")[0] + "\n"}${stackRows.slice(0, puppeteerLine).join("\n")}`;
console.log(`🔥 pageerror at ${route}: ${errorMsg}`);
errorReport.pageErrors.push({ route, error: errorMsg });
})
.catch(e2 => {
console.log(`🔥 pageerror at ${route}:`, e);
console.log(`️️️⚠️ warning at ${route} (error in source maps):`, e2.message);
errorReport.pageErrors.push({ route, error: e.message });
});
} else {
console.log(`🔥 pageerror at ${route}:`, e);
errorReport.pageErrors.push({ route, error: e.message });
}
onError && onError(e.message);
});
page.on("response", response => {
if (response.status() >= 400) {
let responseRoute = "";
try {
responseRoute = response._request
.headers()
.referer.replace(`http://localhost:${options.port}`, "");
} catch (e) {}
const errorMsg = `got ${response.status()} HTTP code for ${response.url()}`;
console.log(`️️️⚠️ warning at ${responseRoute}: ${errorMsg}`);
errorReport.httpErrors.push({ route: responseRoute, error: errorMsg });
}
});
};
const getLinks = async opt => {
const { page } = opt;
const anchors = await page.evaluate(() =>
Array.from(document.querySelectorAll("a")).map(anchor => {
if (anchor.href.baseVal) {
const a = document.createElement("a");
a.href = anchor.href.baseVal;
return a.href;
}
return anchor.href;
})
);
const iframes = await page.evaluate(() =>
Array.from(document.querySelectorAll("iframe")).map(iframe => iframe.src)
);
return anchors.concat(iframes);
};
const createTracker = (page) => {
let requestCount = 0;
let successCount = 0;
let failureCount = 0;
const pendingRequests = new Set();
const updateStatus = (request) => {
if (request._failureText) { failureCount += 1; }
else { successCount += 1; }
pendingRequests.delete(request);
};
const onRequest = (request) => {
requestCount += 1;
pendingRequests.add(request);
};
const onRequestFinished = (request) => updateStatus(request);
const onRequestFailed = (request) => updateStatus(request);
page.on('request', onRequest);
page.on('requestfinished', onRequestFinished);
page.on('requestfailed', onRequestFailed);
const dispose = () => {
page.off('request', onRequest);
page.off('requestfinished', onRequestFinished);
page.off('requestfailed', onRequestFailed);
};
const augmentTimeoutError = (error) => {
return `${error.message}\nPending requests: ${pendingRequests.size}\nSuccess requests: ${successCount}\nFailure requests: ${failureCount}\nRequest count: ${requestCount}`;
};
return {
dispose,
augmentTimeoutError,
};
};
const objectToJson = async jsHandle => {
try {
return await jsHandle.jsonValue();
} catch (e) {
// If jsonValue fails, try to get a string representation
try {
return await jsHandle.evaluate(obj => {
if (obj === null) return 'null';
if (obj === undefined) return 'undefined';
if (typeof obj === 'function') return obj.toString();
if (typeof obj === 'object') {
try {
return JSON.stringify(obj, null, 2);
} catch {
return Object.prototype.toString.call(obj);
}
}
return String(obj);
});
} catch {
return jsHandle.toString();
}
}
};
const errorToString = async jsHandle => {
try {
return await jsHandle.evaluate(error => error.toString());
} catch (e) {
return jsHandle.toString();
}
};