@percy/agent
Version:
An agent process for integrating with Percy.
256 lines (255 loc) • 12 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.AssetDiscoveryService = exports.MAX_SNAPSHOT_WIDTHS = void 0;
const merge = require("deepmerge");
const pool = require("generic-pool");
const puppeteer = require("puppeteer");
const url_1 = require("url");
const configuration_1 = require("../configuration/configuration");
const domain_match_1 = require("../utils/domain-match");
const logger_1 = require("../utils/logger");
const response_cache_1 = require("../utils/response-cache");
const wait_for_network_idle_1 = require("../utils/wait-for-network-idle");
const percy_client_service_1 = require("./percy-client-service");
const response_service_1 = require("./response-service");
exports.MAX_SNAPSHOT_WIDTHS = 10;
class AssetDiscoveryService extends percy_client_service_1.default {
constructor(buildId, configuration) {
super();
this.browser = null;
this.pagePool = null;
this.configuration = configuration ||
configuration_1.DEFAULT_CONFIGURATION.agent['asset-discovery'];
this.responseService = new response_service_1.default(buildId, this.configuration['allowed-hostnames'], this.configuration['cache-responses']);
}
async setup() {
logger_1.profile('-> assetDiscoveryService.setup');
const browser = this.browser = await this.createBrowser();
this.pagePool = await this.createPagePool(() => {
return this.createPage(browser);
}, this.configuration['page-pool-size-min'], this.configuration['page-pool-size-max']);
logger_1.profile('-> assetDiscoveryService.setup');
}
async createBrowser() {
logger_1.profile('-> assetDiscoveryService.puppeteer.launch');
const browser = await puppeteer.launch({
args: [
'--no-sandbox',
'--disable-web-security',
],
ignoreHTTPSErrors: true,
handleSIGINT: false,
});
logger_1.profile('-> assetDiscoveryService.puppeteer.launch');
return browser;
}
async createPagePool(exec, min, max) {
logger_1.profile('-> assetDiscoveryService.createPagePool');
const result = pool.createPool({
create() {
return exec();
},
destroy(page) {
return page.close();
},
}, { min, max });
logger_1.profile('-> assetDiscoveryService.createPagePool');
return result;
}
async createPage(browser) {
logger_1.profile('-> assetDiscoveryService.browser.newPage');
const page = await browser.newPage();
await page.setRequestInterception(true);
logger_1.profile('-> assetDiscoveryService.browser.newPage');
return page;
}
async discoverResources(rootResourceUrl, domSnapshot, options, logger) {
logger_1.profile('-> assetDiscoveryService.discoverResources');
if (this.browser === null) {
logger.error('Puppeteer failed to open browser.');
return [];
}
if (!this.pagePool) {
logger.error('Failed to create pool of pages.');
return [];
}
if (options.widths && options.widths.length > exports.MAX_SNAPSHOT_WIDTHS) {
logger.error(`Too many widths requested. Max is ${exports.MAX_SNAPSHOT_WIDTHS}. Requested: ${options.widths}`);
return [];
}
rootResourceUrl = this.parseRequestPath(rootResourceUrl);
logger.debug(logger_1.addLogDate(`discovering assets for URL: ${rootResourceUrl}`));
const { enableJavaScript = false, widths = configuration_1.DEFAULT_CONFIGURATION.snapshot.widths, requestHeaders, } = options;
// Do asset discovery for each requested width in parallel. We don't keep track of which page
// is doing work, and instead rely on the fact that we always have fewer widths to work on than
// the number of pages in our pool. If we wanted to do something smarter here, we should consider
// switching to use puppeteer-cluster instead.
logger_1.profile('--> assetDiscoveryService.discoverForWidths', { url: rootResourceUrl });
let resources = [].concat(...(await Promise.all(widths.map((width) => this.resourcesForWidth(
// @ts-ignore - for some reason, ts thinks we're assigning null here
this.pagePool, width, domSnapshot, rootResourceUrl, enableJavaScript, requestHeaders, logger)))));
logger_1.profile('--> assetDiscoveryService.discoverForWidths');
const resourceUrls = [];
// Dedup by resourceUrl as they must be unique when sent to Percy API down the line.
resources = resources.filter((resource) => {
if (!resourceUrls.includes(resource.resourceUrl)) {
resourceUrls.push(resource.resourceUrl);
return true;
}
return false;
});
logger_1.profile('-> assetDiscoveryService.discoverResources', { resourcesDiscovered: resources.length });
return resources;
}
shouldRequestResolve(request) {
const requestPurpose = request.headers().purpose;
switch (requestPurpose) {
case 'prefetch':
case 'preload':
case 'dns-prefetch':
case 'prerender':
case 'preconnect':
case 'subresource':
return false;
default:
return true;
}
}
async teardown() {
await this.cleanPagePool();
await this.closeBrowser();
}
// We shouldn't bother passing on requests that will never be saved
shouldProcessRequest(resourceUrl, rootResourceUrl) {
const parsedRootResourceUrl = new url_1.URL(rootResourceUrl);
const rootUrl = `${parsedRootResourceUrl.protocol}//${parsedRootResourceUrl.host}`;
// Only capture resources with a proper protocol we support capturing
if ((/^https?:/).test(resourceUrl)) {
return true;
}
// Process if the resourceUrl has a hostname in the allowedHostnames
if (this.configuration['allowed-hostnames'].some((hostname) => domain_match_1.default(hostname, resourceUrl))) {
return true;
}
// Capture if the resourceUrl is the same as the rootUrL
if (resourceUrl.startsWith(rootUrl)) {
return true;
}
// We won't be capturing this asset, no need to wait for it to respond
return false;
}
async resourcesForWidth(pool, width, domSnapshot, rootResourceUrl, enableJavaScript, requestHeaders = {}, logger) {
logger.debug(logger_1.addLogDate(`discovering assets for width: ${width}`));
logger_1.profile('--> assetDiscoveryService.pool.acquire', { url: rootResourceUrl });
const page = await pool.acquire();
logger_1.profile('--> assetDiscoveryService.pool.acquire');
page.on('request', async (request) => {
const requestUrl = request.url();
try {
if (!this.shouldRequestResolve(request)) {
await request.abort();
return;
}
if (requestUrl === rootResourceUrl) {
await request.respond({
body: domSnapshot,
contentType: 'text/html',
status: 200,
});
return;
}
if (!this.shouldProcessRequest(requestUrl, rootResourceUrl)) {
logger.debug(logger_1.addLogDate(`Aborting ${requestUrl} -- will never be saved`));
await request.abort();
return;
}
if (this.configuration['cache-responses'] === true && response_cache_1.getResponseCache(requestUrl)) {
logger.debug(logger_1.addLogDate(`Asset cache hit for ${requestUrl}`));
await request.respond(response_cache_1.getResponseCache(requestUrl));
return;
}
logger.debug(logger_1.addLogDate(`Starting processing for: ${requestUrl}`));
await request.continue();
}
catch (error) {
logger_1.logError(error);
await request.abort();
}
});
const maybeResourcePromises = [];
// Listen on 'requestfinished', which tells us a request completed successfully.
// We could also listen on 'response', but then we'd have to check if it was successful.
page.on('requestfinished', async (request) => {
const response = request.response();
if (response) {
if (this.configuration['cache-responses'] === true) {
await response_cache_1.cacheResponse(response, logger);
}
// Parallelize the work in processResponse as much as possible, but make sure to
// wait for it to complete before returning from the asset discovery phase.
const promise = this.responseService.processResponse(rootResourceUrl, response, width, logger);
promise.catch(logger_1.logError);
maybeResourcePromises.push(promise);
}
else {
logger.debug(logger_1.addLogDate(`No response for ${request.url()}. Skipping.`));
}
});
// Debug log failed requests.
page.on('requestfailed', async (request) => {
logger.debug(logger_1.addLogDate(`Failed to load ${request.url()} : ${request.failure().errorText}}`));
});
let maybeResources = [];
try {
await page.setJavaScriptEnabled(enableJavaScript);
await page.setViewport(Object.assign(page.viewport(), { width }));
await page.setExtraHTTPHeaders(merge.all([
this.configuration['request-headers'],
requestHeaders,
]));
logger_1.profile('--> assetDiscoveryService.page.goto', { url: rootResourceUrl });
await page.goto(rootResourceUrl);
logger_1.profile('--> assetDiscoveryService.page.goto');
logger_1.profile('--> assetDiscoveryService.waitForNetworkIdle');
await wait_for_network_idle_1.default(page, this.configuration['network-idle-timeout']);
logger_1.profile('--> assetDiscoveryService.waitForNetworkIdle');
}
catch (error) {
logger.error(logger_1.addLogDate(`${error.name} ${error.message}`));
logger.debug(logger_1.addLogDate(error));
}
try {
logger_1.profile('--> assetDiscoveryServer.waitForResourceProcessing');
maybeResources = await Promise.all(maybeResourcePromises);
logger_1.profile('--> assetDiscoveryServer.waitForResourceProcessing');
}
catch (error) {
logger.error(logger_1.addLogDate(`${error.name} ${error.message}`));
logger.debug(logger_1.addLogDate(error));
}
// always release the page from the pool
logger_1.profile('--> assetDiscoveryService.pool.release', { url: rootResourceUrl });
page.removeAllListeners('request');
page.removeAllListeners('requestfinished');
page.removeAllListeners('requestfailed');
await pool.release(page);
logger_1.profile('--> assetDiscoveryService.pool.release');
return maybeResources.filter(Boolean);
}
async cleanPagePool() {
if (this.pagePool === null) {
return;
}
await this.pagePool.drain();
await this.pagePool.clear();
this.pagePool = null;
}
async closeBrowser() {
if (this.browser === null) {
return;
}
await this.browser.close();
this.browser = null;
}
}
exports.AssetDiscoveryService = AssetDiscoveryService;