UNPKG

drudgereport

Version:

Polls the DrudgeReport.com website and tracks the links

238 lines (218 loc) 7.01 kB
import { Log } from "fme-logger"; var L = new Log("models/drudge"); L.setLevel("info"); import * as request from "request-promise-native"; import * as url from "url"; import * as cheerio from "cheerio"; export class DrudgeArticle { html: string; time: Date; location: string; index: number; source: string; href: string; latest: Date; } export class DrudgeFetch { ready:boolean; history: DrudgeArticle[]; latest : DrudgeArticle[]; constructor () { L.debug("DrudgeFetch.constructor Starting; ===>") this.ready = false; this.history = []; this.latest = []; this.ready = true; } findNew() { var newArticles:any[] = []; return new Promise <DrudgeArticle[]>( async (resolve,reject)=> { var articles:DrudgeArticle[] = await this.getLatest(); for (var i in articles ) { var article = articles[i]; if ( this.isDuplicate(article) ) { // L.info("DrudgeFetch: Duplicate found",article.href) continue; } else { newArticles.push(article); this.history.push(article); } } resolve(newArticles) }); } isDuplicate (article:any) { for (var i=0; i<this.history.length; i++) { var h = this.history[i]; if (h.href == article.href) { return true; } } return false; } getLatest () { return new Promise<DrudgeArticle[]>((resolve,reject)=> { var d:any[] = []; try { this.get(). then (data => { d.push(...this.parseHeadlines(<string>data)); d.push(...this.parseMainHeadline(<string>data)); d.push(...this.parseFirstColumn(<string>data)); d.push(...this.parseSecondColumn(<string>data)); d.push(...this.parseThirdColumn(<string>data)); resolve(d) }) } catch (err) { L.error("drudgeFetch.getLatest Error",err); reject(err) } }); } get () { return new Promise((resolve, reject) => { request("http://drudgereport.com",(err,res,body) =>{ if (err) { reject(err); } else { resolve(body); } }) }); } parseHeadlines (body:string) { var links:any[] = []; var $ = cheerio.load(body); $("#app_topstories a").each(function(this:any, index:any){ var source = url.parse($(this).attr("href")).hostname links.push( { href: $(this).attr("href"), full_link: $(this).attr("href"), html: $(this).html(), time: new (Date), location: "leftHeadlines", index: index, source: source } ) }); return links; } parseMainHeadline (body:string) { var links:any[] = []; var $ = cheerio.load(body); $("#app_mainheadline a").each(function(this:any, index:any){ var source = url.parse($(this).attr("href")).hostname links.push( { href: $(this).attr("href"), full_link: $(this).attr("href"), html: $(this).html(), time: new (Date), location: "mainHeadlines", index: index, source: source } ) }); return links; } parseFirstColumn (body:string) { var links:any[] = []; var $ = cheerio.load(body); var col1 = $("#app_col1 table td")[0]; $(col1).find("a").each (function (this:any, idx:any){ var source = url.parse($(this).attr("href")).hostname links.push( { href: $(this).attr("href"), full_link: $(this).attr("href"), html: $(this).html(), time: new (Date), location: "firstColumn", index: idx, source: source } ) }) // // => filter to remove links list // for (var i=0; i<links.length; i++) { var link = links[i]; if (link.html == "FRONT PAGES UK") { i = i-1; var length = links.length; links.splice(i,length-i); break; } } return links; } parseSecondColumn (body:string) { var links:any = []; var $ = cheerio.load(body); var col2 = $("#app_col2"); $(col2).find("a").each (function (this:any, idx:any){ var source = url.parse($(this).attr("href")).hostname // L.info($(this).html()) links.push( { href: $(this).attr("href"), full_link: $(this).attr("href"), html: $(this).html(), time: new (Date), location: "secondColumn", index: idx, source: source } ) }) // // => filter to remove links liet // for (var i=0; i<links.length; i++) { var link = links[i]; if (link.html == "3 AM GIRLS") { //i = i-1; var length = links.length; links.splice(i,length-i); break; } } return links; } parseThirdColumn (body:string) { var links:any[] = []; var $ = cheerio.load(body); var col = $("#app_col3"); $(col).find("a").each (function (this:any, idx:any){ var source = url.parse($(this).attr("href")).hostname links.push( { href: $(this).attr("href"), full_link: $(this).attr("href"), html: $(this).html(), time: new (Date), location: "thirdColumn", index: idx, source: source } ) }) // // => filter to remove links list // for (var i=0; i<links.length; i++) { var link = links[i]; if (link.html == "AGENCE FRANCE-PRESSE") { i = i-1; var length = links.length; links.splice(i,length-i); break; } } return links; } }