UNPKG

psuk-parliament

Version:

A library by PublicScrutiny.UK to make it easier to get information from the UK Parliament

84 lines (67 loc) 2.28 kB
var request = require('request'), BBCThings = require('bbc-things'), gramophone = require('gramophone'), request = require('request'), xml2js = require('xml2js'), Q = require('q'), cheerio = require('cheerio'); request({ url: 'http://feeds.bbci.co.uk/news/world/rss.xml', timeout: 2000 }, function(error, response, body) { var parser = new xml2js.Parser(); parser.parseString(body, function (err, result) { result.rss.channel[0].item.forEach(function(e,i) { var article = { title: e.title[0], description: e.description[0], url: e.link[0], date: e.pubDate[0] }; extractEntities(article) .then(function(article) { }); }); }); }); function extractEntities(article) { var deferred = Q.defer(); var promise = request(article.url, function (error, response, body) { var deferred2 = Q.defer(); var $ = cheerio.load(body); var text = article.title+" "+article.description+" "+$('div[class=story-body]').text(); // Get tags from text var gramophoneOptions = { score: false, stopWords: [], limit: 50, ngrams: [1,2,3,4], stem: true }; var entities = gramophone.extract(text, gramophoneOptions); entities.forEach(function(entity, i) { // Overrides for common tag mis-matches }); var promise2 = BBCThings.search(entities,true) .then(function(things) { var entities = []; var validThings = {}; if (things) { for (var thing in things) { entities.push(thing); if (things[thing].length > 0) validThings[thing] = things[thing][0]; } } article.tags = validThings; article.rawTags = entities; if (Object.keys(article.tags).length > 0) { console.log("Title: "+article.title); console.log("Description: "+article.description); console.log("Tags: "+article.rawTags.join(',')); console.log(article.tags); console.log('---------'); } deferred2.resolve(article); }); return deferred2.promise; }); deferred.resolve(promise); return deferred.promise; }