UNPKG

ya-csv

Version:

CSV parser and generator for Node.js

379 lines (344 loc) 12.1 kB
var sys; try { sys = require('util'); } catch (e) { sys = require('sys'); } var events = require('events'), fs = require('fs'); var csv = exports; /** * Provides Base CSV Reading capabilities * @class CsvReader * @extends EventEmitter */ /** * The constructor * @constructor * @param readStream {ReadStread} An instance of the ReadStream Cl * @param options {Object} optional paramaters for the reader <br/> * - separator {String} * - quote {String} * - escape {String} * - comment {String} * - columnNames {Boolean} * - columnsFromHeader {Boolean} * - nestedQuotes {Boolean} */ var CsvReader = csv.CsvReader = function(readStream, options) { var self = this; var hasEnded = false; _setOptions(self, options); self.parsingStatus = { rows: 0, openRecord: [], openField: '', lastChar: '', quotedField: false, commentedLine: false }; if (readStream) { readStream.addListener('data', function (data) { if (hasEnded) { return; } try { self.parse(data); } catch (e) { hasEnded = true; self.emit('error', e); } }); readStream.addListener('error', this.emit.bind(this, 'error')); readStream.addListener('end', this.end.bind(this)); /** * Pauses the readStream * @method pause * @return {ReadStream} the readstream instance */ self.pause = function(){ readStream.pause(); return self; }; /** * Resumes the readStream * @method resume * @return {ReadStream} the readstream instance */ self.resume = function(){ readStream.resume(); return self; } /** * Closes the readStream * @method destroy * @return {ReadStream} the readstream instance */ self.destroy = function(){ readStream.destroy(); return self; } /** * Closes the readStream when its file stream has been drained * @method destroySoon * @return {ReadStream} the readstream instance */ self.destroySoon = function(){ readstream.destroy(); return self; } } }; sys.inherits(CsvReader, events.EventEmitter); /** * Parses incoming data as a readable CSV file * @method parse * @param data {Array} Array of values to parse from the incommin file */ CsvReader.prototype.parse = function(data) { var ps = this.parsingStatus; if (ps.openRecord.length == 0) { if (data.charCodeAt(0) === 0xFEFF) { data = data.slice(1); } } for (var i = 0; i < data.length; i++) { var c = data.charAt(i); switch (c) { // escape and separator may be the same char, typically '"' case this.escapechar: case this.quotechar: if (ps.commentedLine) break; var isEscape = false; if (c === this.escapechar) { // double-quote at the field beginning does not count as an escape string` if (c !== this.quotechar || ps.openField || ps.quotedField) { var nextChar = data.charAt(i + 1); if (this._isEscapable(nextChar)) { this._addCharacter(nextChar); i++; isEscape = true; } } } if (!isEscape && (c === this.quotechar)) { if (ps.openField && !ps.quotedField) { ps.quotedField = true; break; } if (ps.quotedField) { // closing quote should be followed by separator unless the nested quotes option is set var nextChar = data.charAt(i + 1); if (nextChar && nextChar != '\r' && nextChar != '\n' && nextChar !== this.separator && this.nestedQuotes != true) { throw new Error("separator expected after a closing quote; found " + nextChar); } else { ps.quotedField = false; } } else if (ps.openField === '') { ps.quotedField = true; } } break; case this.separator: if (ps.commentedLine) break; if (ps.quotedField) { this._addCharacter(c); } else { this._addField(); } break; case '\n': // handle CRLF sequence if (!ps.quotedField && (ps.lastChar === '\r')) { break; } case '\r': if (ps.commentedLine) { ps.commentedLine = false; } else if (ps.quotedField) { this._addCharacter(c); } else { this._addField(); this._addRecord(); } break; case this.commentchar: if (ps.commentedLine) break; if (ps.openRecord.length === 0 && ps.openField === '' && !ps.quotedField) { ps.commentedLine = true; } else { this._addCharacter(c); } break; default: if (ps.commentedLine) break; this._addCharacter(c); } ps.lastChar = c; } }; CsvReader.prototype.end = function() { var ps = this.parsingStatus; if (ps.quotedField) { this.emit('error', new Error('Input stream ended but closing quotes expected')); } else { // dump open record if (ps.openField) { this._addField(); } if (ps.openRecord.length > 0) { this._addRecord(); } this.emit('end'); } } CsvReader.prototype._isEscapable = function(c) { if ((c === this.escapechar) || (c === this.quotechar)) { return true; } return false; }; CsvReader.prototype._addCharacter = function(c) { this.parsingStatus.openField += c; }; CsvReader.prototype._addField = function() { var ps = this.parsingStatus; ps.openRecord.push(ps.openField); ps.openField = ''; ps.quotedField = false; }; CsvReader.prototype.setColumnNames = function(names) { this.columnNames = names; }; CsvReader.prototype._addRecord = function() { var ps = this.parsingStatus; if (this.columnsFromHeader && ps.rows === 0) { // user has passed columnNames through option if (this.columnNames.length === 0) this.setColumnNames(ps.openRecord); } else if (this.columnNames != null && this.columnNames.length > 0) { var objResult = {}; for (var i = 0; i < this.columnNames.length; i++) { objResult[this.columnNames[i]] = ps.openRecord[i]; } this.emit('data', objResult); } else { this.emit('data', ps.openRecord); } ps.rows++; ps.openRecord = []; ps.openField = ''; ps.quotedField = false; }; csv.createCsvFileReader = function(path, options) { options = options || {}; var readStream = fs.createReadStream(path, { 'flags': options.flags || 'r' }); readStream.setEncoding(options.encoding || 'utf8'); return new CsvReader(readStream, options); }; csv.createCsvStreamReader = function(readStream, options) { if (options === undefined && typeof readStream === 'object') { options = readStream; readStream = undefined; } options = options || {}; if (readStream) readStream.setEncoding(options.encoding || 'utf8'); return new CsvReader(readStream, options); }; /** * * @param options {Object} optional paramaters for the reader <br/> * - separator {String} * - quote {String} * - escape {String} * - escapeFormulas {boolean} - prepend an apostrophe in front of * fields starting with '=', '+', or '-' */ var CsvWriter = csv.CsvWriter = function(writeStream, options) { var self = this; self.writeStream = writeStream; options = options || {}; _setOptions(self, options); self.encoding = options.encoding || 'utf8'; if (typeof writeStream.setEncoding === 'function') { writeStream.setEncoding(self.encoding); } writeStream.addListener('drain', this.emit.bind(this, 'drain')); writeStream.addListener('error', this.emit.bind(this, 'error')); writeStream.addListener('close', this.emit.bind(this, 'close')); }; sys.inherits(CsvWriter, events.EventEmitter); CsvWriter.prototype.writeRecord = function(rec) { if (!rec) return; // ignore empty records if (!Array.isArray(rec)) { throw new Error("CsvWriter.writeRecord only takes an array as an argument"); } _writeArray(this, rec); }; CsvWriter.prototype.close = function() { if (this.writeStream.destroy) { this.writeStream.destroy(); } else if (this.writeStream.end) { this.writeStream.end(); } else if (this.writeStream.close) { this.writeStream.close(); } }; function _writeArray(writer, arr) { var out = []; for (var i = 0; i < arr.length; i++) { if (i != 0) out.push(writer.separator); out.push(writer.quotechar); _appendField(out, writer, arr[i]); out.push(writer.quotechar); } out.push("\r\n"); writer.writeStream.write(out.join(''), writer.encoding); }; function _appendField(outArr, writer, field) { // Make sure field is a string if(typeof(field) !== 'string') { // We are not interested in outputting "null" or "undefined" if(typeof(field) !== 'undefined' && field !== null) { field = String(field); } else { outArr.push(''); return; } } for (var i = 0; i < field.length; i++) { var nextChar = field.charAt(i); if (nextChar === writer.quotechar || nextChar === writer.escapechar) { outArr.push(writer.escapechar); } else if (writer.escapeFormulas && i === 0 && (nextChar === '=' || nextChar === '+' || nextChar === '-')) { // If a field starts with =, +, or -, Excel etc will interpret it as a formula. Adding an apostrophe fixes this. outArr.push('\''); } outArr.push(nextChar); } }; csv.createCsvFileWriter = function(path, options) { options = options || {'flags': 'w'}; var writeStream = fs.createWriteStream(path, { 'flags': options.flags || 'w' }); return new CsvWriter(writeStream, options); }; csv.createCsvStreamWriter = function(writeStream, options) { return new CsvWriter(writeStream, options); }; // =============== // = utils = // =============== function _setOptions(obj, options) { options = options || {}; obj.separator = (typeof options.separator !== 'undefined') ? options.separator : ','; obj.quotechar = (typeof options.quote !== 'undefined') ? options.quote : '"'; obj.escapechar = (typeof options.escape !== 'undefined') ? options.escape : '"'; obj.commentchar = (typeof options.comment !== 'undefined') ? options.comment : ''; obj.columnNames = (typeof options.columnNames !== 'undefined') ? options.columnNames : []; obj.columnsFromHeader = (typeof options.columnsFromHeader !== 'undefined') ? options.columnsFromHeader : false; obj.nestedQuotes = (typeof options.nestedQuotes !== 'undefined') ? options.nestedQuotes : false; obj.escapeFormulas = (typeof options.escapeFormulas !== 'undefined') ? options.escapeFormulas : false; };