UNPKG

squint-cli

Version:

Squint makes visual reviews of web app releases easy

76 lines (75 loc) 3.2 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.crawlPaths = void 0; const url_1 = require("url"); const lodash_1 = __importDefault(require("lodash")); const p_queue_1 = __importDefault(require("p-queue")); const chalk_1 = __importDefault(require("chalk")); function normalizeUrl(url, opts) { const urlParts = new url_1.URL(url); if (!opts.includeHash) { urlParts.hash = ''; } if (!opts.includeSearchQuery) { urlParts.search = ''; } if (opts.trailingSlashMode === 'add') { urlParts.pathname = `${lodash_1.default.trimEnd(urlParts.pathname, '/')}/`; } else if (opts.trailingSlashMode === 'remove') { urlParts.pathname = lodash_1.default.trimEnd(urlParts.pathname, '/'); } return urlParts.toString(); } // Breadth-first traversal of urls async function crawlPaths(inputs, memory = { visited: new Set(inputs.urlsToVisit), depth: 0, }) { const { pagePool, urlsToVisit, shouldVisit = () => true, maxDepth } = inputs; // Remember that memory is shared between the tasks that run concurrently // with PQueue const queue = new p_queue_1.default(); const newUrlsToVisit = new Set(); // Stop following links if max depth has been reached already const maxLinkDepthReached = memory.depth >= maxDepth; if (!maxLinkDepthReached) { urlsToVisit.forEach((url) => { const visitTask = async () => { const hrefs = await pagePool.use(async (page) => { await page.goto(url, { waitUntil: 'networkidle2' }); return page.$$eval('a', (as) => as.map((a) => a.href)); }); console.error(chalk_1.default.dim `Visited ${url}`); hrefs.forEach((href) => { const resolvedHrefParts = new url_1.URL(href, url); const resolvedHref = normalizeUrl(resolvedHrefParts.toString(), inputs); const isVisitedAlready = memory.visited.has(resolvedHref); const isCorrectProtocol = ['http:', 'https:'].includes(resolvedHrefParts.protocol); const shouldVisitResult = isCorrectProtocol && !isVisitedAlready && shouldVisit(resolvedHref, { currentUrl: url, href }, memory.visited); if (shouldVisitResult) { newUrlsToVisit.add(resolvedHref); memory.visited.add(resolvedHref); } }); }; queue.add(visitTask); }); } await queue.onIdle(); if (newUrlsToVisit.size > 0) { await crawlPaths({ ...inputs, urlsToVisit: newUrlsToVisit }, { ...memory, depth: memory.depth + 1 }); } const urls = [...memory.visited]; const urlPaths = new Set(urls.map((url) => { const parts = new url_1.URL(url); return `${parts.pathname}${parts.search}${parts.hash}`; })); return [...urlPaths]; } exports.crawlPaths = crawlPaths;