node-phantom-simple
Version:
Simple and reliable bridge between Node.js and PhantomJS / SlimerJS
654 lines (511 loc) • 19 kB
JavaScript
/*global document*/
/*eslint-disable no-console*/
;
var HeadlessError = require('./headless_error');
var http = require('http');
var spawn = require('child_process').spawn;
var exec = require('child_process').exec;
var util = require('util');
var path = require('path');
var Emitter = require('events').EventEmitter;
var POLL_INTERVAL = process.env.POLL_INTERVAL || 500;
// Setup events proxy to avoid warnings "possible memory leak"
//
var processProxy = new Emitter();
processProxy.setMaxListeners(0);
[ 'SIGINT', 'SIGTERM' ].forEach(function(sig) {
process.on(sig, function () {
processProxy.emit(sig);
});
});
var queue = function (worker) {
var _q = [];
var running = false;
var q = {
push: function (obj) {
_q.push(obj);
q.process();
},
process: function () {
if (running || _q.length === 0) { return; }
running = true;
var cb = function () {
running = false;
q.process();
};
var task = _q.shift();
worker(task, cb);
}
};
return q;
};
function callbackOrDummy (callback, poll_func) {
if (!callback) { return function () {}; }
if (poll_func) {
return function () {
var args = Array.prototype.slice.call(arguments);
poll_func(function (err) {
if (err) {
// We could send back the original arguments,
// but I'm assuming that this error is better.
callback(err);
return;
}
callback.apply(null, args);
});
};
}
return callback;
}
function unwrapArray (arr) {
return arr && arr.length === 1 ? arr[0] : arr;
}
function wrapArray(arr) {
// Ensure that arr is an Array
return (arr instanceof Array) ? arr : [ arr ];
}
var pageEvaluateDeprecatedFn = util.deprecate(function () {}, "Deprecated 'page.evaluate(fn, callback, args...)' syntax - use 'page.evaluate(fn, args..., callback)' instead");
var createDeprecatedFn = util.deprecate(function () {}, "Deprecated '.create(callback, options)' syntax - use '.create(options, callback)' instead");
var pageWaitForSelectorDeprecatedFn = util.deprecate(function () {}, "Deprecated 'page.waitForSelector(selector, callback, timeout)' syntax - use 'page.waitForSelector(selector, timeout, callback)' instead");
var phantomPathDeprecatedFn = util.deprecate(function () {}, "Deprecated 'phantomPath' option - use 'path' instead");
exports.create = function (options, callback) {
if (callback && Object.prototype.toString.call(options) === '[object Function]') {
createDeprecatedFn();
var tmp = options;
options = callback;
callback = tmp;
}
if (!callback) {
callback = options;
options = {};
}
if (options.phantomPath) {
phantomPathDeprecatedFn();
options.path = options.phantomPath;
}
if (!options.path) {
options.path = 'phantomjs';
}
if (typeof options.parameters === 'undefined') { options.parameters = {}; }
function spawnPhantom (callback) {
var args = [];
Object.keys(options.parameters).forEach(function (parm) {
args.push('--' + parm + '=' + options.parameters[parm]);
});
args = args.concat([ path.join(__dirname, 'bridge.js') ]);
var phantom = spawn(options.path, args);
// Ensure that the child process is closed when this process dies
var closeChild = function () {
try {
phantom.kill();
} catch (__) {
//
}
};
// Note it's possible to blow up maxEventListeners doing this - consider moving to a single handler.
[ 'SIGINT', 'SIGTERM' ].forEach(function(sig) {
processProxy.on(sig, closeChild);
});
phantom.once('error', function (err) {
callback(err);
});
phantom.stderr.on('data', function (data) {
if (options.ignoreErrorPattern && options.ignoreErrorPattern.exec(data)) {
return;
}
console.warn('phantom stderr: ' + data);
});
var exitCode = 0;
phantom.once('exit', function (code) {
[ 'SIGINT', 'SIGTERM' ].forEach(function(sig) {
processProxy.removeListener(sig, closeChild);
});
exitCode = code;
});
// Wait for 'Ready' line
phantom.stdout.once('data', function (data) {
// setup normal listener now
phantom.stdout.on('data', function (data) {
console.log('phantom stdout: ' + data);
});
var matches = data.toString().match(/Ready \[(\d+)\] \[(.+?)\]/);
if (!matches) {
phantom.kill();
callback(new HeadlessError('Unexpected output from PhantomJS: ' + data));
return;
}
var phantom_port = matches[2].indexOf(':') === -1 ? matches[2] : matches[2].split(':')[1];
phantom_port = parseInt(phantom_port, 0);
if (phantom_port !== 0) {
callback(null, phantom, phantom_port);
return;
}
var phantom_pid = parseInt(matches[1], 0);
// Now need to figure out what port it's listening on - since
// Phantom is busted and can't tell us this we need to use lsof on mac, and netstat on Linux
// Note that if phantom could tell you the port it ends up listening
// on we wouldn't need to do this - server.port returns 0 when you ask
// for port 0 (i.e. random free port). If they ever fix that this will
// become much simpler
var platform = require('os').platform();
var cmd = null;
switch (platform) {
case 'linux':
// Modern distros usually have `iproute2` instead of `net-tools`.
// Try `ss` first, then fallback to `netstat`.
//
// Note, `grep "[,=]%d,"` contains variation,
// because `ss` output differs between versions.
cmd = 'if which ss > /dev/null; then ss -nlp | grep "[,=]%d,"; else netstat -nlp | grep "[[:space:]]%d/"; fi';
break;
case 'darwin':
cmd = 'lsof -np %d | grep LISTEN';
break;
case 'win32':
cmd = 'netstat -ano | findstr /R "\\<%d\\>"';
break;
case 'cygwin':
cmd = 'netstat -ano | grep %d';
break;
case 'freebsd':
cmd = 'sockstat | grep %d';
break;
default:
phantom.kill();
callback(new HeadlessError('Your OS is not supported yet. Tell us how to get the listening port based on PID'));
return;
}
// We do this twice - first to get ports this process is listening on
// and again to get ports phantom is listening on. This is to work
// around this bug in libuv: https://github.com/joyent/libuv/issues/962
// - this is only necessary when using cluster, but it's here regardless
var my_pid_command = cmd.replace(/%d/g, process.pid);
exec(my_pid_command, function (err, stdout /*, stderr*/) {
if (err !== null) {
// This can happen if grep finds no matching lines, so ignore it.
stdout = '';
}
var re = /(?:127\.0\.0\.1|localhost):(\d+)/ig, match;
var ports = [];
while ((match = re.exec(stdout)) !== null) {
ports.push(match[1]);
}
var phantom_pid_command = cmd.replace(/%d/g, phantom_pid);
exec(phantom_pid_command, function (err, stdout /*, stderr*/) {
if (err !== null) {
phantom.kill();
callback(new HeadlessError('Error executing command to extract phantom ports: ' + err));
return;
}
var port;
while ((match = re.exec(stdout)) !== null) {
if (ports.indexOf(match[1]) === -1) {
port = match[1];
}
}
if (!port) {
phantom.kill();
callback(new HeadlessError('Error extracting port from: ' + stdout));
return;
}
callback(null, phantom, port);
});
});
});
setTimeout(function () { // wait a bit to see if the spawning of phantomjs immediately fails due to bad path or similar
if (exitCode !== 0) {
return callback(new HeadlessError('Phantom immediately exited with: ' + exitCode));
}
}, 100);
}
spawnPhantom(function (err, phantom, port) {
if (err) {
callback(err);
return;
}
var pages = {};
var setup_new_page = function (id) {
var methods = [
'addCookie', 'childFramesCount', 'childFramesName', 'clearCookies', 'close',
'currentFrameName', 'deleteCookie', 'evaluateJavaScript',
'evaluateAsync', 'getPage', 'go', 'goBack', 'goForward', 'includeJs',
'injectJs', 'open', 'openUrl', 'release', 'reload', 'render', 'renderBase64',
'sendEvent', 'setContent', 'stop', 'switchToFocusedFrame', 'switchToFrame',
'switchToFrame', 'switchToChildFrame', 'switchToChildFrame', 'switchToMainFrame',
'switchToParentFrame', 'uploadFile', 'clearMemoryCache'
];
var page = {
setFn: function (name, fn, cb) {
request_queue.push([ [ id, 'setFunction', name, fn.toString() ], callbackOrDummy(cb, poll_func) ]);
},
get: function (name, cb) {
request_queue.push([ [ id, 'getProperty', name ], callbackOrDummy(cb, poll_func) ]);
},
set: function (name, val, cb) {
request_queue.push([ [ id, 'setProperty', name, val ], callbackOrDummy(cb, poll_func) ]);
},
evaluate: function (fn, cb) {
var extra_args = [];
if (arguments.length > 2) {
if (Object.prototype.toString.call(arguments[arguments.length - 1]) === '[object Function]') {
extra_args = Array.prototype.slice.call(arguments, 1, -1);
cb = arguments[arguments.length - 1];
} else {
pageEvaluateDeprecatedFn();
extra_args = Array.prototype.slice.call(arguments, 2);
}
}
request_queue.push([ [ id, 'evaluate', fn.toString() ].concat(extra_args), callbackOrDummy(cb, poll_func) ]);
},
waitForSelector: function (selector, timeout, cb) {
if (cb && Object.prototype.toString.call(timeout) === '[object Function]') {
pageWaitForSelectorDeprecatedFn();
var tmp = cb;
cb = timeout;
timeout = tmp;
}
if (!cb) {
cb = timeout;
// Default timeout is 10 sec
timeout = 10000;
}
var startTime = Date.now();
var timeoutInterval = 150;
// if evaluate succeeds, invokes callback w/ true, if timeout,
// invokes w/ false, otherwise just exits
var testForSelector = function () {
var elapsedTime = Date.now() - startTime;
if (elapsedTime > timeout) {
cb(new HeadlessError('Timeout waiting for selector: ' + selector));
return;
}
/*eslint-disable handle-callback-err*/
page.evaluate(function (selector) {
return document.querySelectorAll(selector).length;
}, selector, function (err, result) {
if (result > 0) { // selector found
cb();
} else {
setTimeout(testForSelector, timeoutInterval);
}
});
};
setTimeout(testForSelector, timeoutInterval);
}
};
methods.forEach(function (method) {
page[method] = function () {
var all_args = Array.prototype.slice.call(arguments);
var callback = null;
if (all_args.length > 0 && typeof all_args[all_args.length - 1] === 'function') {
callback = all_args.pop();
}
var req_params = [ id, method ];
request_queue.push([ req_params.concat(all_args), callbackOrDummy(callback, poll_func) ]);
};
});
pages[id] = page;
return page;
};
var poll_func = setup_long_poll(phantom, port, pages, setup_new_page);
var request_queue = queue(function (paramarr, next) {
var params = paramarr[0];
var callback = paramarr[1];
var page = params[0];
var method = params[1];
var args = params.slice(2);
var http_opts = {
hostname: 'localhost',
port: port,
path: '/',
method: 'POST'
};
phantom.POSTING = true;
var req = http.request(http_opts, function (res) {
var err = res.statusCode === 500 ? true : false;
var data = '';
res.setEncoding('utf8');
res.on('data', function (chunk) {
data += chunk;
});
res.on('end', function () {
phantom.POSTING = false;
if (!data) {
// If method is exit - response may be empty, because server could be stopped while sending
if (method === 'exit') {
next();
callback();
return;
}
next();
callback(new HeadlessError('No response body for page.' + method + '()'));
return;
}
var results;
try {
results = JSON.parse(data).data;
} catch (error) {
// If method is exit - response may be broken, because server could be stopped while sending
if (method === 'exit') {
next();
callback();
return;
}
next();
callback(error);
return;
}
if (err) {
next();
callback(results);
return;
}
if (method === 'createPage') {
var id = results.page_id;
var page = setup_new_page(id);
next();
callback(null, page);
return;
}
// Not createPage - just run the callback
next();
callback(null, results);
});
});
req.on('error', function (err) {
// If phantom already killed by `exit` command - callback without error
if (phantom.killed) {
next();
callback();
return;
}
console.warn('Request() error evaluating ' + method + '() call: ' + err);
callback(new HeadlessError('Request() error evaluating ' + method + '() call: ' + err));
});
req.setHeader('Content-Type', 'application/json');
var json = JSON.stringify({ page: page, method: method, args: args });
req.setHeader('Content-Length', Buffer.byteLength(json));
req.write(json);
req.end();
});
var proxy = {
process: phantom,
setProxy: function (ip, port, proxyType, user, password, callback) {
request_queue.push([ [ 0, 'setProxy', ip, port, proxyType, user, password ], callbackOrDummy(callback, poll_func) ]);
},
createPage: function (callback) {
request_queue.push([ [ 0, 'createPage' ], callbackOrDummy(callback, poll_func) ]);
},
injectJs: function (filename, callback) {
request_queue.push([ [ 0, 'injectJs', filename ], callbackOrDummy(callback, poll_func) ]);
},
addCookie: function (cookie, callback) {
request_queue.push([ [ 0, 'addCookie', cookie ], callbackOrDummy(callback, poll_func) ]);
},
clearCookies: function (callback) {
request_queue.push([ [ 0, 'clearCookies' ], callbackOrDummy(callback, poll_func) ]);
},
deleteCookie: function (cookie, callback) {
request_queue.push([ [ 0, 'deleteCookie', cookie ], callbackOrDummy(callback, poll_func) ]);
},
set : function (property, value, callback) {
request_queue.push([ [ 0, 'setProperty', property, value ], callbackOrDummy(callback, poll_func) ]);
},
get : function (property, callback) {
request_queue.push([ [ 0, 'getProperty', property ], callbackOrDummy(callback, poll_func) ]);
},
exit: function (callback) {
phantom.kill('SIGTERM');
// In case of SlimerJS `kill` will close only wrapper of xulrunner.
// We should send `exit` command to process.
request_queue.push([ [ 0, 'exit', 0 ], callbackOrDummy(callback) ]);
},
on: function () {
phantom.on.apply(phantom, arguments);
}
};
callback(null, proxy);
});
};
function setup_long_poll (phantom, port, pages, setup_new_page) {
var http_opts = {
hostname: 'localhost',
port: port,
path: '/',
method: 'GET'
};
var dead = false;
phantom.once('exit', function () { dead = true; });
var poll_func = function (cb) {
if (dead) {
cb(new HeadlessError('Phantom Process died'));
return;
}
if (phantom.POSTING) {
cb();
return;
}
var req = http.get(http_opts, function(res) {
res.setEncoding('utf8');
var data = '';
res.on('data', function (chunk) {
data += chunk;
});
res.on('end', function () {
var results;
if (dead) {
cb(new HeadlessError('Phantom Process died'));
return;
}
try {
results = JSON.parse(data).data;
} catch (err) {
console.warn('Error parsing JSON from phantom: ' + err);
console.warn('Data from phantom was: ' + data);
cb(new HeadlessError('Error parsing JSON from phantom: ' + err
+ '\nData from phantom was: ' + data));
return;
}
results.forEach(function (r) {
var new_page, callbackFunc, cb;
if (r.page_id) {
if (pages[r.page_id] && r.callback === 'onPageCreated') {
new_page = setup_new_page(r.args[0]);
if (pages[r.page_id].onPageCreated) {
pages[r.page_id].onPageCreated(new_page);
}
} else if (pages[r.page_id] && pages[r.page_id][r.callback]) {
callbackFunc = pages[r.page_id][r.callback];
if (callbackFunc.length > 1) {
// We use `apply` if the function is expecting multiple args
callbackFunc.apply(pages[r.page_id], wrapArray(r.args));
} else {
// Old `call` behaviour is deprecated
callbackFunc.call(pages[r.page_id], unwrapArray(r.args));
}
}
} else {
cb = callbackOrDummy(phantom[r.callback]);
cb.apply(phantom, r.args);
}
});
cb();
});
});
req.on('error', function (err) {
if (dead || phantom.killed) { return; }
console.warn('Poll Request error: ' + err);
});
};
var repeater = function () {
// If phantom already killed - stop repeat timer
if (dead || phantom.killed) {
return;
}
setTimeout(function () {
poll_func(repeater);
}, POLL_INTERVAL);
};
repeater();
return poll_func;
}