UNPKG

is-satire

Version:

Strategically determine if a website is satire or not

555 lines (440 loc) 18 kB
'use strict'; var _construct = require('babel-runtime/core-js/reflect/construct'); var _construct2 = _interopRequireDefault(_construct); var _toConsumableArray2 = require('babel-runtime/helpers/toConsumableArray'); var _toConsumableArray3 = _interopRequireDefault(_toConsumableArray2); var _create = require('babel-runtime/core-js/object/create'); var _create2 = _interopRequireDefault(_create); var _assign = require('babel-runtime/core-js/object/assign'); var _assign2 = _interopRequireDefault(_assign); var _set = require('babel-runtime/core-js/set'); var _set2 = _interopRequireDefault(_set); var _map = require('babel-runtime/core-js/map'); var _map2 = _interopRequireDefault(_map); var _regenerator = require('babel-runtime/regenerator'); var _regenerator2 = _interopRequireDefault(_regenerator); var _asyncToGenerator2 = require('babel-runtime/helpers/asyncToGenerator'); var _asyncToGenerator3 = _interopRequireDefault(_asyncToGenerator2); var _classCallCheck2 = require('babel-runtime/helpers/classCallCheck'); var _classCallCheck3 = _interopRequireDefault(_classCallCheck2); var _createClass2 = require('babel-runtime/helpers/createClass'); var _createClass3 = _interopRequireDefault(_createClass2); var _url = require('url'); var Url = _interopRequireWildcard(_url); var _cheerio = require('cheerio'); var Cheerio = _interopRequireWildcard(_cheerio); var _nodeFetch = require('node-fetch'); var _nodeFetch2 = _interopRequireDefault(_nodeFetch); var _bluebird = require('bluebird'); var _fs = require('fs'); var _path = require('path'); function _interopRequireWildcard(obj) { if (obj && obj.__esModule) { return obj; } else { var newObj = {}; if (obj != null) { for (var key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) newObj[key] = obj[key]; } } newObj.default = obj; return newObj; } } function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } /* eslint no-console:0 */ var readFileAsync = _bluebird.Promise.promisify(_fs.readFile); var IsSatire = function () { /** * @param {Array} uri [ 'http://...', 'extraneous argument', ... ] * @return {String} "http://..." */ function IsSatire() { var uri = arguments.length <= 0 || arguments[0] === undefined ? process.argv.slice(2) : arguments[0]; (0, _classCallCheck3.default)(this, IsSatire); var target = typeof uri !== 'string' ? uri.slice(0, 1).toString() : uri; return this.init(target).catch(function (err) { return console.log(err); }); } /** * @param {String} uri 'http://...' * @return {*} */ (0, _createClass3.default)(IsSatire, [{ key: 'init', value: function () { var ref = (0, _asyncToGenerator3.default)(_regenerator2.default.mark(function _callee(uri) { var _Url$parse, protocol; return _regenerator2.default.wrap(function _callee$(_context) { while (1) { switch (_context.prev = _context.next) { case 0: _Url$parse = Url.parse(uri); protocol = _Url$parse.protocol; _context.next = 4; return this.configurationFor(uri); case 4: this.config = _context.sent; return _context.abrupt('return', protocol !== null ? this.scanUrl(uri) : this.displayHelp()); case 6: case 'end': return _context.stop(); } } }, _callee, this); })); function init(_x2) { return ref.apply(this, arguments); } return init; }() }, { key: 'configurationFor', value: function () { var ref = (0, _asyncToGenerator3.default)(_regenerator2.default.mark(function _callee2(uri) { var config; return _regenerator2.default.wrap(function _callee2$(_context2) { while (1) { switch (_context2.prev = _context2.next) { case 0: config = new _map2.default(); _context2.t0 = config; _context2.t1 = _set2.default; _context2.t2 = JSON; _context2.next = 6; return readFileAsync((0, _path.join)(__dirname, '..', 'data', 'known.json'), 'utf-8'); case 6: _context2.t3 = _context2.sent; _context2.t4 = _context2.t2.parse.call(_context2.t2, _context2.t3).sites; _context2.t5 = new _context2.t1(_context2.t4); _context2.t0.set.call(_context2.t0, 'blacklist', _context2.t5); _context2.t6 = config; _context2.t7 = (0, _create2.default)(null); _context2.t8 = JSON; _context2.next = 15; return readFileAsync((0, _path.join)(__dirname, '..', 'data', 'fingerprint.json'), 'utf-8'); case 15: _context2.t9 = _context2.sent; _context2.t10 = _context2.t8.parse.call(_context2.t8, _context2.t9).checks; _context2.t11 = (0, _assign2.default)(_context2.t7, _context2.t10); _context2.t6.set.call(_context2.t6, 'fingerprint', _context2.t11); config.set('uri', uri); return _context2.abrupt('return', config); case 21: case 'end': return _context2.stop(); } } }, _callee2, this); })); function configurationFor(_x3) { return ref.apply(this, arguments); } return configurationFor; }() }, { key: 'displayHelp', value: function displayHelp() { console.log('usage: is-satire http(s)://...\n'); return process.exit(0); } }, { key: 'isBlacklisted', value: function isBlacklisted(hostname) { return this.blacklist.has(hostname); } /** * @param {String} url "http://..." * @param {Array} arr [ '/about', '/about-us', ... ] * @return {Array} [ '/about', '/terms', ... ] */ }, { key: 'analyzePaths', value: function () { var ref = (0, _asyncToGenerator3.default)(_regenerator2.default.mark(function _callee4(url, arr) { return _regenerator2.default.wrap(function _callee4$(_context4) { while (1) { switch (_context4.prev = _context4.next) { case 0: _context4.next = 2; return arr.reduce(function () { var ref = (0, _asyncToGenerator3.default)(_regenerator2.default.mark(function _callee3(paths, path, data, httpStatus, isValidPath) { return _regenerator2.default.wrap(function _callee3$(_context3) { while (1) { switch (_context3.prev = _context3.next) { case 0: _context3.next = 2; return paths; case 2: paths = _context3.sent; _context3.next = 5; return (0, _nodeFetch2.default)(Url.resolve(url, path)); case 5: data = _context3.sent; /* avoid non200's & shortlinks */ isValidPath = 200 === data.status && !data.headers.get('link'); if (isValidPath) { _context3.next = 9; break; } return _context3.abrupt('return', paths); case 9: _context3.t0 = paths; _context3.t1 = this; _context3.next = 13; return data.text(); case 13: _context3.t2 = _context3.sent; _context3.t3 = this.fingerprint.keywords; _context3.next = 17; return _context3.t1.findKeywords.call(_context3.t1, _context3.t2, _context3.t3); case 17: _context3.t4 = _context3.sent; _context3.t5 = path; _context3.t6 = { keywordsFound: _context3.t4, path: _context3.t5 }; _context3.t0.push.call(_context3.t0, _context3.t6); return _context3.abrupt('return', paths); case 22: case 'end': return _context3.stop(); } } }, _callee3, this); })); return function (_x6, _x7, _x8, _x9, _x10) { return ref.apply(this, arguments); }; }().bind(this), []); case 2: return _context4.abrupt('return', _context4.sent); case 3: case 'end': return _context4.stop(); } } }, _callee4, this); })); function analyzePaths(_x4, _x5) { return ref.apply(this, arguments); } return analyzePaths; }() /** * @param {Array} data [ '<!html><head...', ... ] * @param {Array} arr [ 'parody', 'satire', ... ] * @return {Array} [ 'satire'] */ }, { key: 'findKeywords', value: function () { var ref = (0, _asyncToGenerator3.default)(_regenerator2.default.mark(function _callee6(data, arr) { var $, pageContent, keywordsRegex, maybeHasKeyword; return _regenerator2.default.wrap(function _callee6$(_context6) { while (1) { switch (_context6.prev = _context6.next) { case 0: _context6.next = 2; return Cheerio.load(data); case 2: $ = _context6.sent; _context6.next = 5; return this.fingerprint.elements.map(function (el) { return $(el).text(); }); case 5: pageContent = _context6.sent; keywordsRegex = new RegExp(this.fingerprint.keywords.join('|')); _context6.next = 9; return pageContent.some(function (text) { return keywordsRegex.test(text); }); case 9: maybeHasKeyword = _context6.sent; if (maybeHasKeyword) { _context6.next = 14; break; } _context6.next = 13; return []; case 13: return _context6.abrupt('return', _context6.sent); case 14: _context6.next = 16; return arr.reduce(function () { var ref = (0, _asyncToGenerator3.default)(_regenerator2.default.mark(function _callee5(keywords, keyword) { return _regenerator2.default.wrap(function _callee5$(_context5) { while (1) { switch (_context5.prev = _context5.next) { case 0: _context5.next = 2; return keywords; case 2: keywords = _context5.sent; if (new RegExp(keyword).test(pageContent)) keywords.push(keyword); return _context5.abrupt('return', keywords); case 5: case 'end': return _context5.stop(); } } }, _callee5, this); })); return function (_x13, _x14) { return ref.apply(this, arguments); }; }(), []); case 16: return _context6.abrupt('return', _context6.sent); case 17: case 'end': return _context6.stop(); } } }, _callee6, this); })); function findKeywords(_x11, _x12) { return ref.apply(this, arguments); } return findKeywords; }() /** * @param {Array} data [ { keywordsFound: [ 'satire', ... ] } ] * @return {Array} [ 'satire', 'fictional', ... ] */ }, { key: 'calculateLikelihood', value: function () { var ref = (0, _asyncToGenerator3.default)(_regenerator2.default.mark(function _callee8(data) { var keywordsFound; return _regenerator2.default.wrap(function _callee8$(_context8) { while (1) { switch (_context8.prev = _context8.next) { case 0: _context8.next = 2; return data.reduce(function () { var ref = (0, _asyncToGenerator3.default)(_regenerator2.default.mark(function _callee7(keywords, key) { return _regenerator2.default.wrap(function _callee7$(_context7) { while (1) { switch (_context7.prev = _context7.next) { case 0: _context7.next = 2; return keywords; case 2: keywords = _context7.sent; return _context7.abrupt('return', keywords.concat(key.keywordsFound)); case 4: case 'end': return _context7.stop(); } } }, _callee7, this); })); return function (_x16, _x17) { return ref.apply(this, arguments); }; }(), []); case 2: keywordsFound = _context8.sent; _context8.next = 5; return [].concat((0, _toConsumableArray3.default)(new _set2.default(keywordsFound))); case 5: return _context8.abrupt('return', _context8.sent); case 6: case 'end': return _context8.stop(); } } }, _callee8, this); })); function calculateLikelihood(_x15) { return ref.apply(this, arguments); } return calculateLikelihood; }() /** * @return {String} "there's a ... likelihood that this is..." */ }, { key: 'beginScan', value: function () { var ref = (0, _asyncToGenerator3.default)(_regenerator2.default.mark(function _callee9() { var analysis; return _regenerator2.default.wrap(function _callee9$(_context9) { while (1) { switch (_context9.prev = _context9.next) { case 0: _context9.next = 2; return this.analyzePaths(this.target, this.fingerprint.paths); case 2: analysis = _context9.sent; _context9.next = 5; return this.calculateLikelihood(analysis); case 5: return _context9.abrupt('return', _context9.sent); case 6: case 'end': return _context9.stop(); } } }, _callee9, this); })); function beginScan() { return ref.apply(this, arguments); } return beginScan; }() /** * @param {String} url "http://..." * @return {*} */ }, { key: 'scanUrl', value: function () { var ref = (0, _asyncToGenerator3.default)(_regenerator2.default.mark(function _callee10(uri) { var result; return _regenerator2.default.wrap(function _callee10$(_context10) { while (1) { switch (_context10.prev = _context10.next) { case 0: console.log('checking ' + Url.parse(uri).hostname + '...'); if (!this.isBlacklisted(Url.parse(uri).hostname)) { _context10.next = 3; break; } return _context10.abrupt('return', console.log(uri + ' is a known satire site!')); case 3: _context10.next = 5; return this.beginScan(); case 5: result = _context10.sent; console.log('Found Keywords: ' + result.join(', ')); if (result.length === 0) console.log('this does not seem to be a satire site. but i could be wrong.'); if (result.length === 1) console.log('there\'s a small likelihood that this is a satire site.'); if (result.length > 1) console.log('there\'s a strong likelihood that this is a satire site.'); _context10.next = 12; return false; case 12: return _context10.abrupt('return', _context10.sent); case 13: case 'end': return _context10.stop(); } } }, _callee10, this); })); function scanUrl(_x18) { return ref.apply(this, arguments); } return scanUrl; }() }, { key: 'target', get: function get() { return this.config.get('uri'); } }, { key: 'blacklist', get: function get() { return this.config.get('blacklist'); } }, { key: 'fingerprint', get: function get() { return this.config.get('fingerprint'); } }]); return IsSatire; }(); (0, _construct2.default)(IsSatire, [process.argv.slice(2)]);