escaped-server
Version:
303 lines (226 loc) • 7.91 kB
JavaScript
var urlu = require('url');
var async = require('async');
var _ = require('underscore');
var cache = require('memory-cache');
var defaults = {
log: 'info',
port: 80,
capacity: 3,
userAgent: "Googlebot/2.1",
renderTimeout: 30 * 1000,
pageTimeout: 60 * 1000,
cacheTime: 3600000
};
module.exports = function(options) {
options = _.defaults(options || {}, defaults);
var log = new (require('log'))(options.log);
var es = {
server: null,
options: options,
phantom: null,
sem: require('semaphore')(1),
listen: function(port, callback) {
port = port || es.options.port;
if (es.server) {
throw new Error('Already listening.');
}
es.server = require('http').createServer(es.request);
log.info('Listening on ' + port + '...');
es.server.listen(port, callback);
},
close: function(callback) {
log.info('Closing...')
if (es.phantom) {
log.debug('Exiting phantom.');
es.phantom.exit();
}
if (es.server) {
log.debug("Closing listener...");
es.server.on('close', function() {
log.debug("Listener closed.");
callback();
});
return es.server.close();
}
callback();
},
errorResponse: function(res, msg, code) {
msg = msg || 'Unknown error';
code = code || 500;
res.statusCode = code;
res.end(msg + '\n');
log.info('--> ERROR: ' + msg);
},
request: function(req, res) {
var url, state = {};
try {
url = es.translateRequest(req);
}
catch(e) {
log.debug("Received a request that could not be parsed:\n%j", req);
return es.errorResponse(res, e.message);
}
log.info('<-- ' + req.url + ' for ' + req.headers['x-escaped-site']);
var allow = true;
if (es.options.allow) {
if (typeof es.options.allow == 'string') {
// Split the rule into a regex. "/test/g" becomes /test/g.
var split = /^\/(.+)\/([a-z]*)$/.exec(es.options.allow);
var re = new RegExp(split[1], split[2]);
log.debug("Parsed allow rule '%s' into regex '%s'", es.options.allow, re);
es.options.allow = re;
}
if (typeof es.options.allow == 'object') {
// Regex
allow = es.options.allow.test(req.headers['x-escaped-site']);
if (!allow) {
log.debug("Denying request for '%s' which failed to comply with allow rule '%s'", req.headers["x-escaped-site"], es.options.allow);
}
} else {
throw new Error(util.format('Unable to understand allow option of type %s.', es.options.allow));
}
}
if (!allow) {
return es.errorResponse(res, 'Host not allowed.', 403);
}
log.info('--> ' + url);
var cached = cache.get('pages.' + url);
if (cached) {
log.info('--> ' + cached.length + ' bytes (cache hit)');
return res.end(cached);
}
async.series({
semaphore: function(callback) {
log.debug("Waiting for semaphore. Current use is %d of %d.", es.sem.current, es.sem.capacity);
es.sem.take(callback);
},
phantom: function(callback) {
log.debug("Took semaphore. Current use is %d of %d.", es.sem.current, es.sem.capacity);
if (es.phantom) return callback();
log.debug("Creating phantom...");
require('phantom').create(function(phantom) {
log.debug("Phantom created.");
es.phantom = phantom;
// Does not apply before this one is done unless
// a setter is implemented om semaphore.
es.sem.capacity = es.options.capacity
callback();
});
},
createPage: function(callback) {
log.debug("Creating page...");
es.phantom.createPage(function(page) {
log.debug("Page created.");
page.onError = function(msg, trace) {
log.debug("Page error: %s\n%s", msg, trace);
};
state.page = page;
log.debug("Page events assigned.");
// Time-out in case phantomjs fails.
setTimeout(function() {
// Completed the normal way.
if (!state.page) return;
state.requestTimedOut = true;
es.errorResponse(res, 'Internal time-out.');
}, es.options.pageTimeout);
page.open(url);
callback();
});
},
wait: function(callback) {
log.debug("Waiting for the page to render...");
var timeOutAt = +new Date + es.options.renderTimeout, startedAt = +new Date;
async.whilst(
function() { return true; },
function(step) {
state.page.evaluate(
function() {
try {
return {
rendered: window && window.rendered,
content: window && window.rendered && window.static ? window.static() : (document && document.documentElement) ? document.documentElement.outerHTML : null
};
} catch(e) {
if (console && console.log) {
try {
console.log('Failed to evaluate for renderer', e);
} catch(e) { ; }
return null;
}
}
},
function(res) {
if (state.requestTimedOut) {
return callback(new Error("The request has timed out."));
}
if ((!res || !res.rendered) && +new Date < timeOutAt) {
return setTimeout(step, 100);
}
if (res.rendered) {
log.debug("The page indicated that it was rendered after %ds.", (+new Date - startedAt) / 1000.0);
} else {
log.debug("The page did not indicate that it had completed rendering.");
}
if (!res || !res.content) {
log.debug("The content is empty.");
return callback(null, null);
} else if (res.length == 39) {
log.debug("The content is 39 bytes (empty page)");
} else if (res.length < 39) {
log.debug("The content is unknown, %d bytes.", res.content.length);
} else {
log.debug("The content is %s bytes.", res.content.length);
}
state.content = res && res.content && res.content.length > 39 ? res.content : null
return callback(null);
}
);
},
function(err) {
if (state.requestTimedOut) {
log.warn("Rendering result was obtained after the request had timed out.");
return;
}
if (err) return callback(err);
callback();
});
}
}, function(err) {
if (state.page) {
log.debug("Releasing page.");
state.page.release();
state.page = null;
}
es.sem.leave();
log.debug("Released semaphore. Current use is %d of %d.", es.sem.current, es.sem.capacity);
if (err) return es.errorResponse(res, 'Rendering failed: ' + err.message);
if (!state.content) return es.errorResponse(res, 'Failed to retrieve page.');
log.info('--> ' + state.content.length + ' bytes');
if (options.cacheTime) {
cache.put('pages.' + url, state.content, options.cacheTime);
}
return res.end(state.content);
});
},
translateRequest: function(req) {
var site = req.headers['x-escaped-site'];
if (!site) throw new Error('X-Escaped-Site header is missing.');
var purl = urlu.parse(req.url);
var psite = urlu.parse(site);
var query = purl.query;
if (!query) {
throw new Error('Unable to find query in the url.');
}
// No other query parameters.
query = query.replace(/^_escaped_fragment_=/, '#!');
// With other query parameters.
query = query.replace(/^(.+)&_escaped_fragment_=/, '?$1#!');
// Unescape
var pieces = /^(.*)\#\!(.*)$/.exec(query);
query = pieces[1] + '#!' + unescape(pieces[2]);
var result = psite.protocol + '//' + psite.host + purl.pathname + query;
return result;
}
};
return es;
};