calfire
Version: 
Unofficial CalFire data scraper.
115 lines (107 loc) • 3.66 kB
JavaScript
;
const http    = require('http');
const cheerio = require('cheerio');
const parseIncident = require('./lib/parse-incident');
module.exports = {
  currentIncidents(params) {
    params      = params || {};
    params.page = params.page || 1;
    return new Promise((resolve, reject) => {
      let req = http.request({
        hostname: 'www.fire.ca.gov',
        protocol: 'http:',
        path: `/current_incidents/?page=${params.page}`,
        headers: {
          'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.53 Safari/525.19'
        },
        method: 'GET'
      }, (res) => {
        let data = '';
        res.on('data', (chunk) => {
          data += chunk;
        });
        res.on('end', () => {
          let $            = cheerio.load(data);
          let promiseChain = Promise.resolve([]);
          $('table.incident_table[id]').each((i, table) => {
            let incidentId = table.attribs.id;
            promiseChain   = promiseChain.then((results) => {
              return this.get(incidentId).then((incident) => {
                results.push(incident);
                return Promise.resolve(results);
              })
            });
          });
          promiseChain.then(resolve);
        });
        res.on('error', reject);
      });
      req.end();
      req.on('error', reject);
    });
  },
  rss() {
    return new Promise((resolve, reject) => {
      let req = http.request({
        hostname: 'cdfdata.fire.ca.gov',
        protocol: 'http:',
        path: `/incidents/rss.xml`,
        headers: {
          'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.53 Safari/525.19'
        },
        method: 'GET'
      }, (res) => {
        let data = '';
        res.on('data', (chunk) => {
          data += chunk;
        });
        res.on('end', () => {
          let $            = cheerio.load(data, {xmlMode: true});
          let promiseChain = Promise.resolve([]);
          $('item link').each((i, link) => {
            let href = link.children[0].data;
            promiseChain = promiseChain.then((results) => {
              let incidentId = (href.match(/\/current_incidents\/incidentdetails\/Index\/(\d+)/) || [])[1];
              return this.get(incidentId).then((incident) => {
                results.push(incident);
                return Promise.resolve(results);
              })
            });
          });
          promiseChain.then(resolve);
        });
        res.on('error', reject);
      });
      req.end();
      req.on('error', reject);
    });
  },
  get(incidentId){
    return new Promise((resolve, reject) => {
      let req = http.request({
        hostname: 'www.fire.ca.gov',
        protocol: 'http:',
        path: `/current_incidents/incidentdetails/Index/${incidentId}`,
        headers: {
          'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.53 Safari/525.19'
        },
        method: 'GET'
      }, (res) => {
        let data = '';
        res.on('data', (chunk) => {
          data += chunk;
        });
        res.on('end', () => {
          let incident = parseIncident(data);
          incident._id  = `${incidentId}`.trim();
          incident.href = `http://www.fire.ca.gov/current_incidents/incidentdetails/Index/${incidentId}`;
          resolve(incident);
        });
        res.on('error', reject);
      });
      req.end();
      req.on('error', reject);
    });
  }
}
module.exports.get(process.argv[2]).then(console.log)