UNPKG

calfire

Version:

Unofficial CalFire data scraper.

166 lines (145 loc) 5.92 kB
'use strict'; const cheerio = require('cheerio'); const fs = require('fs'); const defaults = require('json-schema-defaults'); const slugify = require('slug'); const path = require('path'); const incidentSchema = require('incident-schema'); const TYPE = { string(obj){ // Sometimes, when using the YAML parser, a valid string // will seem like a K/V pair to the parser. For strings // that are likely to contain colons with multiple lines, // this helps to undo the damage. if(typeof obj === 'object'){ let str = ''; Object.keys(obj).forEach((k) => { let v = obj[k]; str += `${k}:\n${v}`; }); return str; } else { return obj; } }, number(obj) { // try to convert string to a number // or fall back on original object return (obj * 1) || obj; }, numberString(obj){ if(!obj){return undefined}; let number = this.number(obj); if(number || number === 0){ return `${this.number(obj)}` } else { return undefined; } }, date(obj) { let d = new Date(obj); if ( d.toString() === "[object Date]" ) { // it is a date if (isNaN(d.getTime())) { // d.valueOf() could also work return undefined; } else { return d; } } else { return undefined; } } } function parseIncident(html){ let $ = cheerio.load(html); // incident._id = (($('.list_decorative_blue a, .list_decorative_dark a') // .first().attr('href') || "") // .match(/\?incident_id=(\d+)/) || [])[1]; let incident = defaults(incidentSchema); incident.name = $('h3.incident_h3').text(); // What??? $('td.emphasized').map(function(i, el) { el.children[0].data = `${el.children[0].data.trim().replace(/:/, "")}:`; return el; }); let json = {}; // Translate the incident information table to an object. $('table#incident_information tr:not(.header_tr)').map((i, el) => { let $el = $(el); let key = $el.find('td.emphasized').text().trim().replace(/:/, ""); let value = $el.find('td:not(.emphasized)').first().text().trim(); json[key] = value; }); // Gather relevant links from the table. $('table#incident_information a').each((i, el) => { let $el = $(el); let link = { href: $el.attr('href'), description: $el.text(), tags: [] }; if(link.href.match(/inciweb\.nwcg\.gov\/incident/)){ link.title = "Inciweb Incident Page"; link.tags.push("inciweb"); } else if (link.href.match(/rvcfire\.org/)) { link.title = "Riverside County FD Incident Page"; link.tags.push("riverside"); } else if (link.href.match(/twitter/)) { link.title = `${$el.text()} Twitter`; link.tags.push("twitter"); } incident.links.push(link); }); // Basics incident.updatedAt = json['Last Updated'] ? new Date(json['Last Updated']) : undefined; incident.startedAt = json['Date/Time Started'] ? new Date(json['Date/Time Started']) : undefined; incident.administrativeUnit = json['Administrative Unit']; incident.cause = json['Cause']; incident.cooperatingAgencies = json['Cooperating Agencies']; incident.roadClosures = TYPE.string(json['Road Closures']); incident.schoolClosures = TYPE.string(json['School Closures']); incident.evacuations = TYPE.string(json['Evacuation Info'] || json['Evacuations']); let year = new Date(incident.startedAt || incident.updatedAt || new Date()).getFullYear(); let county = json['County']; incident.slug = slugify(`${incident.name} ${county} ${year}`, {lower: true}); // Stats - Damage incident.stats.damage.acres = TYPE.numberString(((json['Acres Burned - Containment'] || '').match(/(\d+) acres/i) || [])[1]); incident.stats.damage.contained = ((json['Acres Burned - Containment'] || '').match(/(\d{1,3}%) contained/i) || [])[1]; incident.stats.damage.contained = json['Estimated - Containment'] ? json['Estimated - Containment'] : incident.stats.damage.contained; incident.stats.damage.injuries = json['Injuries']; incident.stats.damage.structuresDestroyed = json['Structures Destroyed']; incident.stats.damage.structuresThreatened = json['Structures Threatened']; // Stats - Resources incident.stats.resources.airtankers = json['Total Airtankers']; incident.stats.resources.dozers = json['Total Dozers']; incident.stats.resources.fireCrews = json['Total Fire Crews']; incident.stats.resources.fireEngines = json['Total Fire Engines']; incident.stats.resources.firefighters = json['Total Fire Personnel']; incident.stats.resources.helicopters = json['Total Helicopters']; incident.stats.resources.waterTenders = json['Total Water Tenders']; // Location incident.location.county = ((json['County'] || '').match(/([A-z|\s]+) County/i) || [])[1]; incident.location.description = TYPE.string(json['Location']); let coordinates = (json['Long/Lat'] || '').match(/(\-*\d+\.\d+)\/(\-*\d+\.\d+)/) || []; incident.location.coordinates = { lat: coordinates[2], long: coordinates[1] }; // Parse phone numbers $("table#incident_information tr:contains('Phone Numbers')").each((i, el) => { let $el = $(el); // let key = $el.find('td.emphasized').text().trim().replace(/:/, ""); let value = $el.find('td:not(.emphasized)').first().text().trim(); let phoneNumber = (value || "").match(/(\(*[\d|A-Z]{3}\)*[\s|\-][\d|A-Z]{3}[\s|\-][\d|A-Z]{4})\s*\(([A-z|\s|&|\w|\-|\_]*)\s*\)*/); if(phoneNumber){ incident.contact.phoneNumbers.push({ title: phoneNumber[2], number: phoneNumber[1] }); } }); return incident; } module.exports = parseIncident;