UNPKG

psuk-parliament

Version:

A library by PublicScrutiny.UK to make it easier to get information from the UK Parliament

82 lines (64 loc) 2.33 kB
var request = require('request'), BBCThings = require('bbc-things'), gramophone = require('gramophone'), request = require('request'), Q = require('q'); request({ url: 'http://public-scrutiny-office.org/bills.json', timeout: 2000 }, function(error, response, body) { var bills = JSON.parse(body); for (var bill in bills) { getTagsForBill(bills[bill]) .then(function(bill) { if (Object.keys(bill.tags).length == 0) { return; } }); } }); function getTagsForBill(bill) { var deferred = Q.defer(); var promise = request(bill.textUrl, function (error, response, body) { var deferred2 = Q.defer(); var text = bill.name+" "+bill.description+" "+body; // Get tags from text var gramophoneOptions = { score: false, stopWords: ['bill', 'parliament', 'paragraph', 'a', 'b', 'c', 'act', 'sheet', 'found', 'laid', 'disclosure', 'daughter'], limit: 50, ngrams: [1,2,3], stem: true }; var entities = gramophone.extract(text, gramophoneOptions); entities.forEach(function(entity, i) { // Example overrides to avoid common tag mis-matches with BBC Things // Otherwise it tends to match against bands like "Queen" and "Pavement" if (entity.match(/^health$/i)) entities[i] = "Healthcare"; if (entity.match(/^(pavement|street|highway|bridge|traffic)$/i)) entities[i] = "infrastructure"; if (entity.match(/^queen$/i)) entities[i] = "Queen Elizabeth II"; }); var promise2 = BBCThings.search(entities,true) .then(function(things) { var entities = []; var validThings = {}; if (things) { for (var thing in things) { entities.push(thing); if (things[thing].length > 0) validThings[thing] = things[thing][0]; } } bill.tags = validThings; bill.rawTags = entities; if (Object.keys(bill.tags).length > 0) { console.log("Tags for "+bill.name+" Bill:"); console.log(bill.tags); console.log('--'); } deferred2.resolve(bill); }); return deferred2.promise; }); deferred.resolve(promise); return deferred.promise; }