UNPKG

drudgereport

Version:

Polls the DrudgeReport.com website and tracks the links

214 lines 7.37 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : new P(function (resolve) { resolve(result.value); }).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; Object.defineProperty(exports, "__esModule", { value: true }); const fme_logger_1 = require("fme-logger"); var L = new fme_logger_1.Log("models/drudge"); L.setLevel("info"); const request = require("request-promise-native"); const url = require("url"); const cheerio = require("cheerio"); class DrudgeArticle { } exports.DrudgeArticle = DrudgeArticle; class DrudgeFetch { constructor() { L.debug("DrudgeFetch.constructor Starting; ===>"); this.ready = false; this.history = []; this.latest = []; this.ready = true; } findNew() { var newArticles = []; return new Promise((resolve, reject) => __awaiter(this, void 0, void 0, function* () { var articles = yield this.getLatest(); for (var i in articles) { var article = articles[i]; if (this.isDuplicate(article)) { // L.info("DrudgeFetch: Duplicate found",article.href) continue; } else { newArticles.push(article); this.history.push(article); } } resolve(newArticles); })); } isDuplicate(article) { for (var i = 0; i < this.history.length; i++) { var h = this.history[i]; if (h.href == article.href) { return true; } } return false; } getLatest() { return new Promise((resolve, reject) => { var d = []; try { this.get(). then(data => { d.push(...this.parseHeadlines(data)); d.push(...this.parseMainHeadline(data)); d.push(...this.parseFirstColumn(data)); d.push(...this.parseSecondColumn(data)); d.push(...this.parseThirdColumn(data)); resolve(d); }); } catch (err) { L.error("drudgeFetch.getLatest Error", err); reject(err); } }); } get() { return new Promise((resolve, reject) => { request("http://drudgereport.com", (err, res, body) => { if (err) { reject(err); } else { resolve(body); } }); }); } parseHeadlines(body) { var links = []; var $ = cheerio.load(body); $("#app_topstories a").each(function (index) { var source = url.parse($(this).attr("href")).hostname; links.push({ href: $(this).attr("href"), full_link: $(this).attr("href"), html: $(this).html(), time: new (Date), location: "leftHeadlines", index: index, source: source }); }); return links; } parseMainHeadline(body) { var links = []; var $ = cheerio.load(body); $("#app_mainheadline a").each(function (index) { var source = url.parse($(this).attr("href")).hostname; links.push({ href: $(this).attr("href"), full_link: $(this).attr("href"), html: $(this).html(), time: new (Date), location: "mainHeadlines", index: index, source: source }); }); return links; } parseFirstColumn(body) { var links = []; var $ = cheerio.load(body); var col1 = $("#app_col1 table td")[0]; $(col1).find("a").each(function (idx) { var source = url.parse($(this).attr("href")).hostname; links.push({ href: $(this).attr("href"), full_link: $(this).attr("href"), html: $(this).html(), time: new (Date), location: "firstColumn", index: idx, source: source }); }); // // => filter to remove links list // for (var i = 0; i < links.length; i++) { var link = links[i]; if (link.html == "FRONT PAGES UK") { i = i - 1; var length = links.length; links.splice(i, length - i); break; } } return links; } parseSecondColumn(body) { var links = []; var $ = cheerio.load(body); var col2 = $("#app_col2"); $(col2).find("a").each(function (idx) { var source = url.parse($(this).attr("href")).hostname; // L.info($(this).html()) links.push({ href: $(this).attr("href"), full_link: $(this).attr("href"), html: $(this).html(), time: new (Date), location: "secondColumn", index: idx, source: source }); }); // // => filter to remove links liet // for (var i = 0; i < links.length; i++) { var link = links[i]; if (link.html == "3 AM GIRLS") { //i = i-1; var length = links.length; links.splice(i, length - i); break; } } return links; } parseThirdColumn(body) { var links = []; var $ = cheerio.load(body); var col = $("#app_col3"); $(col).find("a").each(function (idx) { var source = url.parse($(this).attr("href")).hostname; links.push({ href: $(this).attr("href"), full_link: $(this).attr("href"), html: $(this).html(), time: new (Date), location: "thirdColumn", index: idx, source: source }); }); // // => filter to remove links list // for (var i = 0; i < links.length; i++) { var link = links[i]; if (link.html == "AGENCE FRANCE-PRESSE") { i = i - 1; var length = links.length; links.splice(i, length - i); break; } } return links; } } exports.DrudgeFetch = DrudgeFetch; //# sourceMappingURL=drudge.js.map