UNPKG

escaped-server

Version:
303 lines (226 loc) 7.91 kB
var urlu = require('url'); var async = require('async'); var _ = require('underscore'); var cache = require('memory-cache'); var defaults = { log: 'info', port: 80, capacity: 3, userAgent: "Googlebot/2.1", renderTimeout: 30 * 1000, pageTimeout: 60 * 1000, cacheTime: 3600000 }; module.exports = function(options) { options = _.defaults(options || {}, defaults); var log = new (require('log'))(options.log); var es = { server: null, options: options, phantom: null, sem: require('semaphore')(1), listen: function(port, callback) { port = port || es.options.port; if (es.server) { throw new Error('Already listening.'); } es.server = require('http').createServer(es.request); log.info('Listening on ' + port + '...'); es.server.listen(port, callback); }, close: function(callback) { log.info('Closing...') if (es.phantom) { log.debug('Exiting phantom.'); es.phantom.exit(); } if (es.server) { log.debug("Closing listener..."); es.server.on('close', function() { log.debug("Listener closed."); callback(); }); return es.server.close(); } callback(); }, errorResponse: function(res, msg, code) { msg = msg || 'Unknown error'; code = code || 500; res.statusCode = code; res.end(msg + '\n'); log.info('--> ERROR: ' + msg); }, request: function(req, res) { var url, state = {}; try { url = es.translateRequest(req); } catch(e) { log.debug("Received a request that could not be parsed:\n%j", req); return es.errorResponse(res, e.message); } log.info('<-- ' + req.url + ' for ' + req.headers['x-escaped-site']); var allow = true; if (es.options.allow) { if (typeof es.options.allow == 'string') { // Split the rule into a regex. "/test/g" becomes /test/g. var split = /^\/(.+)\/([a-z]*)$/.exec(es.options.allow); var re = new RegExp(split[1], split[2]); log.debug("Parsed allow rule '%s' into regex '%s'", es.options.allow, re); es.options.allow = re; } if (typeof es.options.allow == 'object') { // Regex allow = es.options.allow.test(req.headers['x-escaped-site']); if (!allow) { log.debug("Denying request for '%s' which failed to comply with allow rule '%s'", req.headers["x-escaped-site"], es.options.allow); } } else { throw new Error(util.format('Unable to understand allow option of type %s.', es.options.allow)); } } if (!allow) { return es.errorResponse(res, 'Host not allowed.', 403); } log.info('--> ' + url); var cached = cache.get('pages.' + url); if (cached) { log.info('--> ' + cached.length + ' bytes (cache hit)'); return res.end(cached); } async.series({ semaphore: function(callback) { log.debug("Waiting for semaphore. Current use is %d of %d.", es.sem.current, es.sem.capacity); es.sem.take(callback); }, phantom: function(callback) { log.debug("Took semaphore. Current use is %d of %d.", es.sem.current, es.sem.capacity); if (es.phantom) return callback(); log.debug("Creating phantom..."); require('phantom').create(function(phantom) { log.debug("Phantom created."); es.phantom = phantom; // Does not apply before this one is done unless // a setter is implemented om semaphore. es.sem.capacity = es.options.capacity callback(); }); }, createPage: function(callback) { log.debug("Creating page..."); es.phantom.createPage(function(page) { log.debug("Page created."); page.onError = function(msg, trace) { log.debug("Page error: %s\n%s", msg, trace); }; state.page = page; log.debug("Page events assigned."); // Time-out in case phantomjs fails. setTimeout(function() { // Completed the normal way. if (!state.page) return; state.requestTimedOut = true; es.errorResponse(res, 'Internal time-out.'); }, es.options.pageTimeout); page.open(url); callback(); }); }, wait: function(callback) { log.debug("Waiting for the page to render..."); var timeOutAt = +new Date + es.options.renderTimeout, startedAt = +new Date; async.whilst( function() { return true; }, function(step) { state.page.evaluate( function() { try { return { rendered: window && window.rendered, content: window && window.rendered && window.static ? window.static() : (document && document.documentElement) ? document.documentElement.outerHTML : null }; } catch(e) { if (console && console.log) { try { console.log('Failed to evaluate for renderer', e); } catch(e) { ; } return null; } } }, function(res) { if (state.requestTimedOut) { return callback(new Error("The request has timed out.")); } if ((!res || !res.rendered) && +new Date < timeOutAt) { return setTimeout(step, 100); } if (res.rendered) { log.debug("The page indicated that it was rendered after %ds.", (+new Date - startedAt) / 1000.0); } else { log.debug("The page did not indicate that it had completed rendering."); } if (!res || !res.content) { log.debug("The content is empty."); return callback(null, null); } else if (res.length == 39) { log.debug("The content is 39 bytes (empty page)"); } else if (res.length < 39) { log.debug("The content is unknown, %d bytes.", res.content.length); } else { log.debug("The content is %s bytes.", res.content.length); } state.content = res && res.content && res.content.length > 39 ? res.content : null return callback(null); } ); }, function(err) { if (state.requestTimedOut) { log.warn("Rendering result was obtained after the request had timed out."); return; } if (err) return callback(err); callback(); }); } }, function(err) { if (state.page) { log.debug("Releasing page."); state.page.release(); state.page = null; } es.sem.leave(); log.debug("Released semaphore. Current use is %d of %d.", es.sem.current, es.sem.capacity); if (err) return es.errorResponse(res, 'Rendering failed: ' + err.message); if (!state.content) return es.errorResponse(res, 'Failed to retrieve page.'); log.info('--> ' + state.content.length + ' bytes'); if (options.cacheTime) { cache.put('pages.' + url, state.content, options.cacheTime); } return res.end(state.content); }); }, translateRequest: function(req) { var site = req.headers['x-escaped-site']; if (!site) throw new Error('X-Escaped-Site header is missing.'); var purl = urlu.parse(req.url); var psite = urlu.parse(site); var query = purl.query; if (!query) { throw new Error('Unable to find query in the url.'); } // No other query parameters. query = query.replace(/^_escaped_fragment_=/, '#!'); // With other query parameters. query = query.replace(/^(.+)&_escaped_fragment_=/, '?$1#!'); // Unescape var pieces = /^(.*)\#\!(.*)$/.exec(query); query = pieces[1] + '#!' + unescape(pieces[2]); var result = psite.protocol + '//' + psite.host + purl.pathname + query; return result; } }; return es; };