UNPKG

chrome-har-capturer

Version:

Capture HAR files from a headless Chrome instance

372 lines (355 loc) 13 kB
'use strict'; const url = require('url'); const querystring = require('querystring'); function create(pages) { // HAR template const packageInfo = require('../package'); const har = { log: { version: '1.2', creator: { name: 'Chrome HAR Capturer', version: packageInfo.version, comment: packageInfo.homepage }, pages: [], entries: [] } }; // fill the HAR template each page info for (const [pageIndex, stats] of pages.entries()) { const pageId = `page_${pageIndex + 1}_${String(Math.random()).slice(2)}`; const log = parsePage(String(pageId), stats); har.log.pages.push(log.page); har.log.entries.push(...log.entries); } return har; } function parsePage(pageId, stats) { // page load started at const firstRequest = stats.entries.get(stats.firstRequestId).requestParams; const wallTimeMs = firstRequest.wallTime * 1000; const startedDateTime = new Date(wallTimeMs).toISOString(); // page timings const onContentLoad = stats.domContentEventFiredMs - stats.firstRequestMs; const onLoad = stats.loadEventFiredMs - stats.firstRequestMs; // process this page load entries const entries = [...stats.entries.values()] .map((entry) => parseEntry(pageId, entry)) .filter((entry) => entry); // outcome return { page: { id: pageId, title: stats.url, startedDateTime, pageTimings: { onContentLoad, onLoad }, _user: stats.user }, entries }; } function parseEntry(pageref, entry) { // skip requests without response (requestParams is always present; except // for WebSockets see Stats._Network_webSocketClosed) if (!entry.responseParams || !entry.isWebSocket && !entry.responseFinishedS && !entry.responseFailedS) { return null; } // skip entries without timing information (doc says optional) if (!entry.isWebSocket && !entry.responseParams.response.timing) { return null; } // extract common fields const {request} = entry.requestParams; const {response} = entry.responseParams; // fix WebSocket values since the protocol provides incomplete information if (entry.isWebSocket) { const requestStatus = entry.responseParams.response.requestHeadersText.split(' '); request.method = requestStatus[0]; request.url = requestStatus[1]; response.protocol = entry.responseParams.response.headersText.split(' ')[0]; } // entry started const wallTimeMs = entry.requestParams.wallTime * 1000; const startedDateTime = new Date(wallTimeMs).toISOString(); // HTTP version or protocol name (e.g., quic) const httpVersion = response.protocol || 'unknown'; // request/response status const {method, url} = request; const {status, statusText} = response; // parse and measure headers const headers = parseHeaders(httpVersion, request, response); // check for redirections const redirectURL = getHeaderValue(response.headers, 'location', ''); // parse query string const queryString = parseQueryString(request.url); // parse post data const postData = parsePostData(request, headers); // compute entry timings const {time, timings} = computeTimings(entry); // fetch connection information (strip IPv6 [...]) let serverIPAddress = response.remoteIPAddress; if (serverIPAddress) { serverIPAddress = serverIPAddress.replace(/^\[(.*)\]$/, '$1'); } const connection = String(response.connectionId); // fetch entry initiator const _initiator = entry.requestParams.initiator; // fetch resource priority const {changedPriority} = entry; const newPriority = changedPriority && changedPriority.newPriority; const _priority = newPriority || request.initialPriority; let _resourceType = entry.requestParams.type ? entry.requestParams.type.toLowerCase() : undefined; // parse and measure payloads const payload = computePayload(entry, headers); const {mimeType} = response; const encoding = entry.responseBodyIsBase64 ? 'base64' : undefined; // add WebSocket frames let _webSocketMessages; if (entry.isWebSocket) { _webSocketMessages = entry.frames; _resourceType = 'websocket'; } // fill entry return { pageref, startedDateTime, time, request: { method, url, httpVersion, cookies: [], // TODO headers: headers.request.pairs, queryString, headersSize: headers.request.size, bodySize: payload.request.bodySize, postData }, response: { status, statusText, httpVersion, cookies: [], // TODO headers: headers.response.pairs, redirectURL, headersSize: headers.response.size, bodySize: payload.response.bodySize, _transferSize: payload.response.transferSize, content: { size: entry.responseLength, mimeType: entry.isWebSocket ? 'x-unknown' : mimeType, compression: payload.response.compression, text: entry.responseBody, encoding } }, cache: {}, _fromDiskCache: response.fromDiskCache, timings, serverIPAddress, connection, _initiator, _priority, _webSocketMessages, _resourceType }; } function parseHeaders(httpVersion, request, response) { // convert headers from map to pairs const requestHeaders = response.requestHeaders || request.headers; const responseHeaders = response.headers; const headers = { request: { map: requestHeaders, pairs: zipNameValue(requestHeaders), size: -1 }, response: { map: responseHeaders, pairs: zipNameValue(responseHeaders), size: -1 } }; // estimate the header size (including HTTP status line) according to the // protocol (this information not available due to possible compression in // newer versions of HTTP) if (httpVersion.match(/^http\/[01].[01]$/)) { const requestText = getRawRequest(request, headers.request.pairs); const responseText = getRawResponse(response, headers.response.pairs); headers.request.size = requestText.length; headers.response.size = responseText.length; } return headers; } function computeTimings(entry) { // handle the websocket case specially if (entry.isWebSocket) { // from initial request to the last frame, this is obviously an // approximation, but HAR does not directly support WebSockets const sessionTime = ( entry.frames.length === 0 ? -1 : toMilliseconds(entry.frames[entry.frames.length - 1].time - entry.requestParams.timestamp) ); return { time: sessionTime, timings: { blocked: -1, dns: -1, connect: -1, send: 0, wait: sessionTime, receive: -1, // XXX does not really make sense for WebSockets... ssl: -1 } }; } // see https://github.com/ChromeDevTools/devtools-frontend/blob/29fab47578afb1ead4eb63414ec30cada4814b62/front_end/sdk/HARLog.js#L255-L329 const timing = entry.responseParams.response.timing; // compute the total duration (including blocking time) const finishedTimestamp = entry.responseFinishedS || entry.responseFailedS; const time = toMilliseconds(finishedTimestamp - entry.requestParams.timestamp); // compute individual components const blockedBase = toMilliseconds(timing.requestTime - entry.requestParams.timestamp); const blockedStart = firstNonNegative([ timing.dnsStart, timing.connectStart, timing.sendStart ]); const blocked = blockedBase + (blockedStart === -1 ? 0 : blockedStart); let dns = -1; if (timing.dnsStart >= 0) { const start = firstNonNegative([timing.connectStart, timing.sendStart]); dns = start - timing.dnsStart; } let connect = -1; if (timing.connectStart >= 0) { connect = timing.sendStart - timing.connectStart; } const send = timing.sendEnd - timing.sendStart; const wait = timing.receiveHeadersEnd - timing.sendEnd; const receive = toMilliseconds(finishedTimestamp - (timing.requestTime + timing.receiveHeadersEnd / 1000)); let ssl = -1; if (timing.sslStart >= 0 && timing.sslEnd >= 0) { ssl = timing.sslEnd - timing.sslStart; } return { time, timings: {blocked, dns, connect, send, wait, receive, ssl} }; } function computePayload(entry, headers) { // From Chrome: // - responseHeaders.size: size of the headers if available (otherwise // -1, e.g., HTTP/2) // - entry.responseLength: actual *decoded* body size // - entry.encodedResponseLength: total on-the-wire data // // To HAR: // - headersSize: size of the headers if available (otherwise -1, e.g., // HTTP/2) // - bodySize: *encoded* body size // - _transferSize: total on-the-wire data // - content.size: *decoded* body size // - content.compression: *decoded* body size - *encoded* body size let bodySize; let compression; let transferSize = entry.encodedResponseLength; if (headers.response.size === -1) { // if the headers size is not available (e.g., newer versions of // HTTP) then there is no way (?) to figure out the encoded body // size (see #27) bodySize = -1; compression = undefined; } else if (entry.responseFailedS) { // for failed requests (`Network.loadingFailed`) the transferSize is // just the header size, since that evend does not hold the // `encodedDataLength` field, this is performed manually (however this // cannot be done for HTTP/2 which is handled by the above if) bodySize = 0; compression = 0; transferSize = headers.response.size; } else { // otherwise the encoded body size can be obtained as follows bodySize = entry.encodedResponseLength - headers.response.size; compression = entry.responseLength - bodySize; } return { request: { // trivial case for request bodySize: parseInt(getHeaderValue(headers.request.map, 'content-length', -1), 10) }, response: { bodySize, transferSize, compression } }; } function zipNameValue(map) { const pairs = []; for (const [name, value] of Object.entries(map)) { // insert multiple pairs if the key is repeated const values = Array.isArray(value) ? value : [value]; for (const value of values) { pairs.push({name, value}); } } return pairs; } function getRawRequest(request, headerPairs) { const {method, url, protocol} = request; const lines = [`${method} ${url} ${protocol}`]; for (const {name, value} of headerPairs) { lines.push(`${name}: ${value}`); } lines.push('', ''); return lines.join('\r\n'); } function getRawResponse(response, headerPairs) { const {status, statusText, protocol} = response; const lines = [`${protocol} ${status} ${statusText}`]; for (const {name, value} of headerPairs) { lines.push(`${name}: ${value}`); } lines.push('', ''); return lines.join('\r\n'); } function getHeaderValue(headers, name, fallback) { const pattern = new RegExp(`^${name}$`, 'i'); const key = Object.keys(headers).find((name) => { return name.match(pattern); }); return key === undefined ? fallback : headers[key]; } function parseQueryString(requestUrl) { const {query} = url.parse(requestUrl, true); const pairs = zipNameValue(query); return pairs; } function parsePostData(request, headers) { const {postData} = request; if (!postData) { return undefined; } const mimeType = getHeaderValue(headers.request.map, 'content-type'); const params = ( mimeType === 'application/x-www-form-urlencoded' ? zipNameValue(querystring.parse(postData)) : [] ); return { mimeType, params, text: postData }; } function firstNonNegative(values) { const value = values.find((value) => value >= 0); return value === undefined ? -1 : value; } function toMilliseconds(time) { return time < 0 ? -1 : time * 1000; } module.exports = {create};