UNPKG

bigseo

Version:

BigSEO is a ExpresJS module built for apps who need a SEO Engine exclusively for web crawlers such as Google, Bing, Facebook, etc.

233 lines (200 loc) 7.37 kB
'use_strict'; var _ = require('underscore'), fs = require('fs'), _this; function BigSEO(opts) { _this = this; this.TAG = "BigSEO"; this.opts = { log: process.env.NODE_ENV != 'production', cache_path: 'caches', cache_url: '/save/cache', valid_url: '/valid/cache', valid_for: 24 // hours }; this.ua = { "Ruby": true, "bigseo/test": true, 'undefined': true, "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)": true, "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true, "Mozilla/5.0 (compatible; Genieo/1.0 http://www.genieo.com/webfilter.html)": true, "Mozilla/5.0 (compatible; DotBot/1.1; http://www.opensiteexplorer.org/dotbot, help@moz.com)": true, "Mozilla/5.0 (compatible; proximic; +http://www.proximic.com/info/spider.php)": true, "msnbot-media/1.1 (+http://search.msn.com/msnbot.htm)": true, "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)": true, "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)": true, "Mozilla/5.0 (compatible; EasouSpider; +http://www.easou.com/search/spider.html)": true, "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)": true, "Mozilla/5.0 (compatible; Linux x86_64; Mail.RU_Bot/Robots/2.0; +http://go.mail.ru/help/robots)": true, "Mozilla/5.0 (compatible; URLAppendBot/1.0; +http://www.profound.net/urlappendbot.html)": true, "www.integromedb.org/Crawler": true, "Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)": true, "Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)": true, "Mozilla/5.0 (compatible; archive.org_bot +http://www.archive.org/details/archive.org_bot)": true, "Mozilla/5.0 (compatible; AhrefsBot/5.0; +http://ahrefs.com/robot/)": true, "rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-wherecat@moz.com)": true, "voltron": true, "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)": true, "Twitterbot/1.0": true }; _.extend(this.opts, opts); fs.exists(this.opts.cache_path, function(exists) { if(!exists) { fs.mkdir(_this.opts.cache_path, function(err) { if(err) { _this.log(err); } }); } }); } BigSEO.prototype.cache = function(req, res) { var body = req.body.dom; var rawUrl = req.body.url; rawUrl = _this.processUrl(rawUrl); var url = _this.encodeURL(rawUrl); _this.validCacheFor(rawUrl, function(valid) { if(!valid) { _this.log("Saving cache for: " + rawUrl); _this.log("Saving at: " + _this.cachePathFor(url)); fs.writeFile(_this.cachePathFor(url), body, function(err) { if(err) { console.log(err); _this.log("Error saving cache for: " + rawUrl); res.send(500); } else { _this.log("New cache for url: " + rawUrl); res.send(200); } }); } else { _this.log("Cache still valid for: " + rawUrl); res.send(500); } }); }; BigSEO.prototype.middleware = function(req, res, next) { var ua = req.headers['user-agent']; _this.log("UA: " + ua); var url = req.protocol + "://" + req.headers.host + req.originalUrl; url = _this.processUrl(url); if (req.method == "GET" && _this.matchUA(ua)) { _this.log("Verifying if has cache for: " + url); _this.hasCacheFor(url, function(hasCache) { if(hasCache) { _this.log('Cache Hit for ' + url); _this.getCacheContentFor(url, function(data) { res.send(data); }); } else { _this.log('Cache Miss for ' + url); next(); } }); } else { _this.log('Cache Miss for ' + url); next(); } }; BigSEO.prototype.processUrl = function(url) { var endsWith = function(u, suffix) { return u.indexOf(suffix, u.length - suffix.length) !== -1; }; url = unescape(url.replace('?_escaped_fragment_=0', '#!')); url = unescape(url.replace('?_escaped_fragment_=', '#!')); if(endsWith(url, '#!') || endsWith(url, '#!/')) { url = url.replace('#!/', ''); url = url.replace('#!', ''); } return url; }; BigSEO.prototype.staticJS = function(req, res) { fs.readFile(__dirname + '/static/bigseo.js', function(err, data) { if(!err) { res.status(200).send(data); } else { res.send(404); } }); }; BigSEO.prototype.angularJS = function(req, res) { fs.readFile(__dirname + '/static/angular-bigseo.js', function(err, data) { if(!err) { res.status(200).send(data); } else { res.send(404); } }); }; BigSEO.prototype.valid = function(req, res) { var url = req.body.url; url = _this.processUrl(url); _this.validCacheFor(url, function(valid) { res.json({ valid: valid }); }); }; BigSEO.prototype.run = function() { var express = require('express'); var router = express.Router(); router.use(this.middleware); router.post(this.opts.cache_url, this.cache); router.post(this.opts.valid_url, this.valid); router.get('/bigseo/bigseo.js', this.staticJS); router.get('/bigseo/angular-bigseo.js', this.angularJS); return router; }; BigSEO.prototype.validCacheFor = function(url, cb) { this.log('Verifying cache validation for: ' + url); this.hasCacheFor(url, function(has) { if(has) { fs.stat(_this.cachePathFor(_this.encodeURL(url)), function(err, stat) { if(!err) { var lastModified = stat.mtime; var now = new Date(); var diff = (now.getTime() - lastModified.getTime()) / 1000; // seconds diff = diff / (60 * 60) ; // hours cb(diff < _this.opts.valid_for); } else { cb(false); } }); } else { cb(false); } }); }; BigSEO.prototype.getCacheContentFor = function(url, cb) { fs.readFile(this.cachePathFor(this.encodeURL(url)), function(err, data) { cb(data.toString('utf8')); }); }; BigSEO.prototype.log = function(mixed) { if (this.opts.log) { console.log(this.TAG, mixed); } }; BigSEO.prototype.hasCacheFor = function(url, cb) { return fs.exists(this.cachePathFor(this.encodeURL(url)), cb); }; BigSEO.prototype.cachePathFor = function(encodedUrl) { return this.opts.cache_path + '/' + encodedUrl + ".html"; }; BigSEO.prototype.matchUA = function(uaStr) { return this.ua[uaStr] === true; }; BigSEO.prototype.encodeURL = function(url) { return new Buffer(url).toString('base64'); }; module.exports = function(opts) { return new BigSEO(opts); };