drudgereport
Version:
Polls the DrudgeReport.com website and tracks the links
214 lines • 7.37 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : new P(function (resolve) { resolve(result.value); }).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
const fme_logger_1 = require("fme-logger");
var L = new fme_logger_1.Log("models/drudge");
L.setLevel("info");
const request = require("request-promise-native");
const url = require("url");
const cheerio = require("cheerio");
class DrudgeArticle {
}
exports.DrudgeArticle = DrudgeArticle;
class DrudgeFetch {
constructor() {
L.debug("DrudgeFetch.constructor Starting; ===>");
this.ready = false;
this.history = [];
this.latest = [];
this.ready = true;
}
findNew() {
var newArticles = [];
return new Promise((resolve, reject) => __awaiter(this, void 0, void 0, function* () {
var articles = yield this.getLatest();
for (var i in articles) {
var article = articles[i];
if (this.isDuplicate(article)) {
// L.info("DrudgeFetch: Duplicate found",article.href)
continue;
}
else {
newArticles.push(article);
this.history.push(article);
}
}
resolve(newArticles);
}));
}
isDuplicate(article) {
for (var i = 0; i < this.history.length; i++) {
var h = this.history[i];
if (h.href == article.href) {
return true;
}
}
return false;
}
getLatest() {
return new Promise((resolve, reject) => {
var d = [];
try {
this.get().
then(data => {
d.push(...this.parseHeadlines(data));
d.push(...this.parseMainHeadline(data));
d.push(...this.parseFirstColumn(data));
d.push(...this.parseSecondColumn(data));
d.push(...this.parseThirdColumn(data));
resolve(d);
});
}
catch (err) {
L.error("drudgeFetch.getLatest Error", err);
reject(err);
}
});
}
get() {
return new Promise((resolve, reject) => {
request("http://drudgereport.com", (err, res, body) => {
if (err) {
reject(err);
}
else {
resolve(body);
}
});
});
}
parseHeadlines(body) {
var links = [];
var $ = cheerio.load(body);
$("#app_topstories a").each(function (index) {
var source = url.parse($(this).attr("href")).hostname;
links.push({
href: $(this).attr("href"),
full_link: $(this).attr("href"),
html: $(this).html(),
time: new (Date),
location: "leftHeadlines",
index: index,
source: source
});
});
return links;
}
parseMainHeadline(body) {
var links = [];
var $ = cheerio.load(body);
$("#app_mainheadline a").each(function (index) {
var source = url.parse($(this).attr("href")).hostname;
links.push({
href: $(this).attr("href"),
full_link: $(this).attr("href"),
html: $(this).html(),
time: new (Date),
location: "mainHeadlines",
index: index,
source: source
});
});
return links;
}
parseFirstColumn(body) {
var links = [];
var $ = cheerio.load(body);
var col1 = $("#app_col1 table td")[0];
$(col1).find("a").each(function (idx) {
var source = url.parse($(this).attr("href")).hostname;
links.push({
href: $(this).attr("href"),
full_link: $(this).attr("href"),
html: $(this).html(),
time: new (Date),
location: "firstColumn",
index: idx,
source: source
});
});
//
// => filter to remove links list
//
for (var i = 0; i < links.length; i++) {
var link = links[i];
if (link.html == "FRONT PAGES UK") {
i = i - 1;
var length = links.length;
links.splice(i, length - i);
break;
}
}
return links;
}
parseSecondColumn(body) {
var links = [];
var $ = cheerio.load(body);
var col2 = $("#app_col2");
$(col2).find("a").each(function (idx) {
var source = url.parse($(this).attr("href")).hostname;
// L.info($(this).html())
links.push({
href: $(this).attr("href"),
full_link: $(this).attr("href"),
html: $(this).html(),
time: new (Date),
location: "secondColumn",
index: idx,
source: source
});
});
//
// => filter to remove links liet
//
for (var i = 0; i < links.length; i++) {
var link = links[i];
if (link.html == "3 AM GIRLS") {
//i = i-1;
var length = links.length;
links.splice(i, length - i);
break;
}
}
return links;
}
parseThirdColumn(body) {
var links = [];
var $ = cheerio.load(body);
var col = $("#app_col3");
$(col).find("a").each(function (idx) {
var source = url.parse($(this).attr("href")).hostname;
links.push({
href: $(this).attr("href"),
full_link: $(this).attr("href"),
html: $(this).html(),
time: new (Date),
location: "thirdColumn",
index: idx,
source: source
});
});
//
// => filter to remove links list
//
for (var i = 0; i < links.length; i++) {
var link = links[i];
if (link.html == "AGENCE FRANCE-PRESSE") {
i = i - 1;
var length = links.length;
links.splice(i, length - i);
break;
}
}
return links;
}
}
exports.DrudgeFetch = DrudgeFetch;
//# sourceMappingURL=drudge.js.map