UNPKG

nyc-bytes

Version:

A node.js module for working with NYC's BYTES of the BIG APPLE datasets.

github.com/sedenardi/node-nyc-bytes

sedenardi/node-nyc-bytes

76 lines (66 loc) • 2.2 kB

JavaScript

const dataset = require('../datasets.json').PAD; const fs = require('fs'); const path = require('path'); const Dataset = require('./Dataset.js'); const parse = require('csv-parse'); const StreamConcat = require('stream-concat'); const Transform = require('stream').Transform; const trimAll = function(obj) { for (let f in obj) { obj[f] = obj[f].trim(); } }; const replaceWhitespace = function(obj) { for (let f in obj) { obj[f] = obj[f].replace(/\s{2,}/g, ' '); } }; class PAD extends Dataset { constructor() { super(dataset); } stream(options) { super.stream(); let table = 'BOTH'; let sanitize = true; if (options) { if (typeof options.table !== 'undefined' && (options.table === 'BBL' || options.table === 'ADR')) table = options.table; if (typeof options.sanitize !== 'undefined') sanitize = options.sanitize; } const fileArray = []; if (table === 'BOTH' || table === 'BBL') fileArray.push('bbl'); if (table === 'BOTH' || table === 'ADR') fileArray.push('adr'); let fileIndex = 0; const combined = new StreamConcat(() => { if (fileIndex === fileArray.length) return null; const f = fileArray[fileIndex++]; const fileName = this.dataset.files.filter((file) => { return file.indexOf(f) !== -1; })[0]; const fileStream = fs.createReadStream(path.resolve(this.path, fileName)); const csv = parse({ columns: (r) => { return r; }, cast: false }); return fileStream.pipe(csv); }, { objectMode: true }); const self = this; const bblTransform = new Transform({ objectMode: true, transform(chunk, encoding, done) { const borough = chunk.boro + ''; const block = self.padWithZeros(chunk.block, 5); const lot = self.padWithZeros(chunk.lot, 4); chunk.BBL = `${borough}${block}${lot}`; if (sanitize) { trimAll(chunk); replaceWhitespace(chunk); } this.push(chunk); done(); } }); return combined.pipe(bblTransform); } } module.exports = new PAD();