psuk-parliament
Version:
A library by PublicScrutiny.UK to make it easier to get information from the UK Parliament
82 lines (64 loc) • 2.33 kB
JavaScript
var request = require('request'),
BBCThings = require('bbc-things'),
gramophone = require('gramophone'),
request = require('request'),
Q = require('q');
request({ url: 'http://public-scrutiny-office.org/bills.json', timeout: 2000 }, function(error, response, body) {
var bills = JSON.parse(body);
for (var bill in bills) {
getTagsForBill(bills[bill])
.then(function(bill) {
if (Object.keys(bill.tags).length == 0) {
return;
}
});
}
});
function getTagsForBill(bill) {
var deferred = Q.defer();
var promise = request(bill.textUrl, function (error, response, body) {
var deferred2 = Q.defer();
var text = bill.name+" "+bill.description+" "+body;
// Get tags from text
var gramophoneOptions = { score: false,
stopWords: ['bill', 'parliament', 'paragraph', 'a', 'b', 'c', 'act', 'sheet', 'found', 'laid', 'disclosure', 'daughter'],
limit: 50,
ngrams: [1,2,3],
stem: true
};
var entities = gramophone.extract(text, gramophoneOptions);
entities.forEach(function(entity, i) {
// Example overrides to avoid common tag mis-matches with BBC Things
// Otherwise it tends to match against bands like "Queen" and "Pavement"
if (entity.match(/^health$/i))
entities[i] = "Healthcare";
if (entity.match(/^(pavement|street|highway|bridge|traffic)$/i))
entities[i] = "infrastructure";
if (entity.match(/^queen$/i))
entities[i] = "Queen Elizabeth II";
});
var promise2 = BBCThings.search(entities,true)
.then(function(things) {
var entities = [];
var validThings = {};
if (things) {
for (var thing in things) {
entities.push(thing);
if (things[thing].length > 0)
validThings[thing] = things[thing][0];
}
}
bill.tags = validThings;
bill.rawTags = entities;
if (Object.keys(bill.tags).length > 0) {
console.log("Tags for "+bill.name+" Bill:");
console.log(bill.tags);
console.log('--');
}
deferred2.resolve(bill);
});
return deferred2.promise;
});
deferred.resolve(promise);
return deferred.promise;
}