machinepack-phantomjscloud
Version:
Work with the PhantomJSCloud.com API to render HTML web pages, etc.
92 lines (70 loc) • 2.24 kB
JavaScript
/**
* Module dependencies
*/
var util = require('util');
var _ = require('lodash');
var doJSONRequest = require('./do-request');
/**
* [getPageContent description]
* @param {[type]} options [description]
* @param {Function} cb [description]
* @return {[type]} [description]
*/
module.exports = function getPageContent(options, cb) {
var results = [];
getPageContentRecursive({
url: options.url
}, function (err){
if (err) {
return cb(err);
}
return cb(null, results);
});
// Keep checking the `url` until there are no more `stillProcessing` results.
// Goal is to fetch the `pageContent` from each one.
function getPageContentRecursive(options, cb) {
doJSONRequest({
method: 'get',
url: options.url
}, function (err, responseBody) {
if (err) {
return cb(err);
}
// Example response from calling the `url`:
// {
// "stillProcessing": 1,
// "justCompleted": []
// }
try {
// Handle any new `justCompleted` results by pushing them on `results`
_.each(responseBody.justCompleted, function (newlyCompletedResult) {
// `justCompleted` seems to be double-wrapped in JSON.
// we need to parse it again.
newlyCompletedResult = JSON.parse(newlyCompletedResult);
results.push({
url: (function getUrl(x) {
return x.request.targetUrl;
})(newlyCompletedResult),
html: (function getPageContent(x){
return x.pageContent;
})(newlyCompletedResult)
});
});
}
catch(e) {
return cb(e);
}
// All done, get out.
if (responseBody.stillProcessing === 0) {
return cb();
}
// Otherwise there are more pages `stillProcessing`, so handle them
// and try running `getPageContentRecursive()` again in a while.
setTimeout(function (){
getPageContentRecursive({url: options.url}, cb);
}, 5000);
// (TODO: Use exponential backoff.)
// console.log(util.format('%d pages still processing. Trying again in 5 secs...', responseBody.stillProcessing));
});
}
}