phantomhar
Version:
a PhantomJS script to generate HTTP Archive (HAR) data from captured network traffic
409 lines (337 loc) • 11.5 kB
JavaScript
var http = require('http');
var zlib = require('zlib');
var phantom = require('node-phantom-simple');
var Prom = require('es6-promise').Promise;
var request = require('request');
var errors = require('./errors');
var utils = require('./utils');
var pkg = require('../package');
// The contents of these types of files should be included in the HAR.
var ALLOWED_CONTENT_TYPES = ['css', 'js', 'json', 'doc'];
phantom.create = utils.promisify(phantom.create);
function openPage(opts) {
var phantomInstance;
return phantom.create().then(function (ph) {
phantomInstance = ph;
return utils.promisify(ph.createPage)().then(function (page) {
return createPage({
options: opts,
page: page,
ph: ph
});
});
}).catch(function () {
phantomInstance.exit(); // Abort PhantomJS when an error occurs.
});
}
function createPage(opts) {
opts = opts || {};
var options = opts.options || {};
options.delay = options.delay || 0;
var page = opts.page;
var ph = opts.ph;
return new Prom(function (resolve, reject) {
page.address = options.url;
page.customHeaders = {
'Cache-Control': 'no-cache',
'Pragma': 'no-cache'
};
page.resources = {};
page.types = {};
page.options = options;
page.onLoadStarted = function () {
page.startTime = new Date();
};
page.onResourceRequested = function (req) {
// TODO: File issue against `node-phantom-simple` for this.
req = req[0];
page.resources[req.id] = {
request: req,
startReply: null,
endReply: null
};
};
page.onResourceReceived = function (res) {
var resource = page.resources[res.id];
if (typeof resource === 'undefined') {
// TODO: File issue against `node-phantom-simple` for data URIs not
// having requests.
resource = {
request: {},
startReply: null,
endReply: null
};
}
switch (res.stage) {
case 'start':
resource.startReply = res;
break;
case 'end':
resource.endReply = res;
break;
}
page.endTime = new Date();
};
// Clear browser cache/cookies/localStorage.
// TODO: Figure out a way to do this with SlimerJS. (This works fine in PhantomJS.)
//fs.removeTree(page.offlineStoragePath);
return page.open(page.address, function (err, status) {
if (status !== 'success') {
return reject(
new errors.ConnectionError('Failed to load the URL (status: ' + status + ')'));
}
setTimeout(function () {
page.get('cookies', function (err, cookies) {
if (err) {
return console.error(
"Error occurred running `page.get('cookies')`:\n" + err);
}
page.get('content', function (err, content) {
ph.exit();
if (err) {
return console.error(
"Error occurred running `page.get('content')`:\n" + err);
}
page.cookies = (cookies || []).map(function (cookie) {
if ('expiry' in cookie) {
// Convert Unix timestamp to ISO 8601 timestamp.
cookie.expires = new Date(cookie.expiry * 1000).toISOString();
// Remove `expiry` since it was renamed to `expires`
// (per the HAR spec).
delete cookie.expiry;
}
return cookie;
});
page.content = {
// Extract `Content-Type` from root (i.e., first) resource.
mimeType: '1' in page.resources ?
page.resources['1'].endReply.contentType : null,
size: content.length,
text: options.bodies ? content : null
};
resolve(createHAR(page));
});
});
}, options.delay * 1000);
});
});
}
function createHAR(page) {
var address = page.address;
var title = page.title;
var startTime = page.startTime;
var types = page.types;
var entries = [];
Object.keys(page.resources).forEach(function (key) {
var resource = page.resources[key];
var request = resource.request;
var startReply = resource.startReply;
var endReply = resource.endReply;
var error = resource.error;
if (!request || !startReply || !endReply) {
return;
}
// Exclude data URIs from the HAR because they aren't
// included in the spec.
if (request.url.substring(0, 5).toLowerCase() === 'data:') {
return;
}
var resType = types[request.url];
if (!resType && endReply.contentType &&
typeof endReply.contentType === 'string') {
resType = utils.getType(endReply.contentType, request.url);
}
if (typeof request.time === 'string') {
request.time = new Date(request.time);
}
if (error) {
startReply.bodySize = 0;
startReply.time = 0;
endReply.time = 0;
endReply.content = {};
endReply.contentType = null;
endReply.headers = [];
endReply.statusText = utils.getErrorString(error);
endReply.status = null;
resType = null;
}
entries.push({
cache: {},
pageref: address,
request: {
// Accurate `bodySize` blocked on https://github.com/ariya/phantomjs/pull/11484
// bodySize: -1,
bodySize: startReply.bodySize,
cookies: [],
headers: request.headers,
// Accurate `headersSize` blocked on https://github.com/ariya/phantomjs/pull/11484
// headersSize: -1,
headersSize: 0,
httpVersion: 'HTTP/1.1',
method: request.method,
queryString: [],
url: request.url
},
response: {
// Accurate `bodySize` (after gzip/deflate) blocked on https://github.com/ariya/phantomjs/issues/10156
// bodySize: -1,
bodySize: endReply.bodySize,
cookies: [],
headers: endReply.headers,
headersSize: -1,
httpVersion: 'HTTP/1.1',
redirectURL: '',
status: endReply.status,
statusText: endReply.statusText,
content: {
_type: resType,
mimeType: endReply.contentType,
size: endReply.bodySize,
// This will be empty because of this PhantomJS bug: https://github.com/ariya/phantomjs/pull/11484
// Fortunately, in `processResponses` we have a workaround :)
text: page.options.bodies && ALLOWED_CONTENT_TYPES.indexOf(endReply.contentType) !== -1 ? endReply.body : null
}
},
startedDateTime: request.time.toISOString(),
time: endReply.time - request.time,
timings: {
blocked: 0,
dns: -1,
connect: -1,
send: 0,
wait: startReply.time - request.time,
receive: endReply.time - startReply.time,
ssl: -1
}
});
});
return {
log: {
creator: {
name: pkg.name,
version: pkg.version
},
entries: entries,
cookies: page.cookies,
content: page.content,
pages: [
{
startedDateTime: startTime.toISOString(),
id: address,
title: title,
pageTimings: {
onLoad: page.endTime.getTime() - page.startTime.getTime()
}
}
],
version: pkg.version
}
};
}
function processResponses(opts) {
opts = opts || {};
var data = opts.data;
var options = opts.options || {};
var reqOpts = {};
var reqPromises = [];
if (!data) {
throw 'PhantomJS could not process the page';
}
// Fetch each request separately.
Object.keys(data.log.entries).forEach(function (key, idx) {
var entry = data.log.entries[key];
reqPromises.push(new Prom(function (resolve) {
reqOpts = {
method: entry.request.method,
url: entry.request.url,
headers: {}
};
entry.request.headers.forEach(function (header) {
reqOpts.headers[header.name] = header.value;
});
var rawReqHeaders = 'HTTP/1.1 GET ' + entry.request.url + '\r\n';
Object.keys(reqOpts.headers).forEach(function (headerKey) {
rawReqHeaders += headerKey + ': ' + reqOpts.headers[headerKey] + '\r\n';
});
rawReqHeaders += '\r\n';
request(reqOpts).on('response', function (res) {
// Raw headers were added in v0.12
// (https://github.com/joyent/node/issues/4844), but let's
// reconstruct them for backwards compatibility.
var rawResHeaders = ('HTTP/' + res.httpVersion + ' ' + res.statusCode +
' ' + http.STATUS_CODES[res.statusCode] + '\r\n');
Object.keys(res.headers).forEach(function (headerKey) {
rawResHeaders += headerKey + ': ' + res.headers[headerKey] + '\r\n';
});
rawResHeaders += '\r\n';
var uncompressedSize = 0; // size after uncompression
var bodySize = 0; // bytes size over the wire
var body = ''; // plain text body (after uncompressing gzip/deflate)
function tally() {
entry.request.headerSize = Buffer.byteLength(rawReqHeaders, 'utf8');
if (options.bodies && ALLOWED_CONTENT_TYPES.indexOf(entry.response.content._type) !== -1) {
// Store only human-readable content (i.e., not binary)
// (and if the user actually wants the response bodies in the HAR).
entry.response.content.text = body;
}
entry.response.bodySize = bodySize;
entry.response.content.headersSize = Buffer.byteLength(rawResHeaders, 'utf8');
entry.response.content.size = uncompressedSize;
entry.response.content.compression = uncompressedSize - bodySize;
entry.response.content.bodySize = bodySize + entry.response.content.compression;
resolve({idx: idx, data: entry});
}
switch (res.headers['content-encoding']) {
case 'gzip':
var gzip = zlib.createGunzip();
gzip.on('data', function (data) {
body += data;
uncompressedSize += data.length;
}).on('end', function () {
tally();
});
res.on('data', function (data) {
bodySize += data.length;
}).pipe(gzip);
break;
case 'deflate':
var deflate = zlib.createInflate();
deflate.on('data', function (data) {
body += data;
uncompressedSize += data.length;
}).on('end', function () {
tally();
});
res.on('data', function (data) {
bodySize += data.length;
}).pipe(deflate);
break;
default:
res.on('data', function (data) {
body += data;
uncompressedSize += bodySize += data.length;
}).on('end', function () {
tally();
});
break;
}
});
}));
});
return Prom.all(reqPromises).then(function (responses) {
Object.keys(responses).forEach(function (key) {
var res = responses[key];
data.log.entries[res.idx] = res.data;
});
return data;
});
}
function har(opts) {
return openPage(opts).then(function (data) {
return processResponses({
data: data,
options: opts
});
});
}
module.exports.har = har;