UNPKG

calfire

Version:

Unofficial CalFire data scraper.

115 lines (107 loc) 3.66 kB
'use strict'; const http = require('http'); const cheerio = require('cheerio'); const parseIncident = require('./lib/parse-incident'); module.exports = { currentIncidents(params) { params = params || {}; params.page = params.page || 1; return new Promise((resolve, reject) => { let req = http.request({ hostname: 'www.fire.ca.gov', protocol: 'http:', path: `/current_incidents/?page=${params.page}`, headers: { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.53 Safari/525.19' }, method: 'GET' }, (res) => { let data = ''; res.on('data', (chunk) => { data += chunk; }); res.on('end', () => { let $ = cheerio.load(data); let promiseChain = Promise.resolve([]); $('table.incident_table[id]').each((i, table) => { let incidentId = table.attribs.id; promiseChain = promiseChain.then((results) => { return this.get(incidentId).then((incident) => { results.push(incident); return Promise.resolve(results); }) }); }); promiseChain.then(resolve); }); res.on('error', reject); }); req.end(); req.on('error', reject); }); }, rss() { return new Promise((resolve, reject) => { let req = http.request({ hostname: 'cdfdata.fire.ca.gov', protocol: 'http:', path: `/incidents/rss.xml`, headers: { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.53 Safari/525.19' }, method: 'GET' }, (res) => { let data = ''; res.on('data', (chunk) => { data += chunk; }); res.on('end', () => { let $ = cheerio.load(data, {xmlMode: true}); let promiseChain = Promise.resolve([]); $('item link').each((i, link) => { let href = link.children[0].data; promiseChain = promiseChain.then((results) => { let incidentId = (href.match(/\/current_incidents\/incidentdetails\/Index\/(\d+)/) || [])[1]; return this.get(incidentId).then((incident) => { results.push(incident); return Promise.resolve(results); }) }); }); promiseChain.then(resolve); }); res.on('error', reject); }); req.end(); req.on('error', reject); }); }, get(incidentId){ return new Promise((resolve, reject) => { let req = http.request({ hostname: 'www.fire.ca.gov', protocol: 'http:', path: `/current_incidents/incidentdetails/Index/${incidentId}`, headers: { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.53 Safari/525.19' }, method: 'GET' }, (res) => { let data = ''; res.on('data', (chunk) => { data += chunk; }); res.on('end', () => { let incident = parseIncident(data); incident._id = `${incidentId}`.trim(); incident.href = `http://www.fire.ca.gov/current_incidents/incidentdetails/Index/${incidentId}`; resolve(incident); }); res.on('error', reject); }); req.end(); req.on('error', reject); }); } } module.exports.get(process.argv[2]).then(console.log)