UNPKG

machinepack-phantomjscloud

Version:

Work with the PhantomJSCloud.com API to render HTML web pages, etc.

92 lines (70 loc) 2.24 kB
/** * Module dependencies */ var util = require('util'); var _ = require('lodash'); var doJSONRequest = require('./do-request'); /** * [getPageContent description] * @param {[type]} options [description] * @param {Function} cb [description] * @return {[type]} [description] */ module.exports = function getPageContent(options, cb) { var results = []; getPageContentRecursive({ url: options.url }, function (err){ if (err) { return cb(err); } return cb(null, results); }); // Keep checking the `url` until there are no more `stillProcessing` results. // Goal is to fetch the `pageContent` from each one. function getPageContentRecursive(options, cb) { doJSONRequest({ method: 'get', url: options.url }, function (err, responseBody) { if (err) { return cb(err); } // Example response from calling the `url`: // { // "stillProcessing": 1, // "justCompleted": [] // } try { // Handle any new `justCompleted` results by pushing them on `results` _.each(responseBody.justCompleted, function (newlyCompletedResult) { // `justCompleted` seems to be double-wrapped in JSON. // we need to parse it again. newlyCompletedResult = JSON.parse(newlyCompletedResult); results.push({ url: (function getUrl(x) { return x.request.targetUrl; })(newlyCompletedResult), html: (function getPageContent(x){ return x.pageContent; })(newlyCompletedResult) }); }); } catch(e) { return cb(e); } // All done, get out. if (responseBody.stillProcessing === 0) { return cb(); } // Otherwise there are more pages `stillProcessing`, so handle them // and try running `getPageContentRecursive()` again in a while. setTimeout(function (){ getPageContentRecursive({url: options.url}, cb); }, 5000); // (TODO: Use exponential backoff.) // console.log(util.format('%d pages still processing. Trying again in 5 secs...', responseBody.stillProcessing)); }); } }