csv
Version:
CSV parser with simple api, full of options and tested against large datasets.
180 lines (159 loc) • 5.35 kB
JavaScript
// Generated by CoffeeScript 1.6.2
var EventEmitter, Parser;
EventEmitter = require('events').EventEmitter;
/*
Parsing
=======
The library extend the [EventEmitter][event] and emit the following events:
* *row*
Emitted by the parser on each line with the line content as an array of fields.
* *end*
Emitted when no more data will be parsed.
* *error*
Emitted when an error occured.
*/
Parser = function(csv) {
this.csv = csv;
this.options = csv.options.from;
this.state = csv.state;
this.lines = 0;
this.buf = '';
this.quoting = false;
this.field = '';
this.lastC = '';
this.nextChar = null;
this.closingQuote = 0;
this.line = [];
return this;
};
Parser.prototype.__proto__ = EventEmitter.prototype;
/*
`write(chars)`
--------------
Parse a string which may hold multiple lines.
Private state object is enriched on each character until
transform is called on a new line.
*/
Parser.prototype.write = function(chars, end) {
var areNextCharsRowDelimiters, char, csv, delimLength, escapeIsQuote, i, isDelimiter, isEscape, isQuote, isReallyEscaped, isRowDelimiter, l, ltrim, rtrim, _results;
csv = this.csv;
ltrim = this.options.trim || this.options.ltrim;
rtrim = this.options.trim || this.options.rtrim;
chars = this.buf + chars;
l = chars.length;
delimLength = this.options.rowDelimiter ? this.options.rowDelimiter.length : 0;
i = 0;
if (this.lines === 0 && csv.options.from.encoding === 'utf8' && 0xFEFF === chars.charCodeAt(0)) {
i++;
}
while (i < l) {
if ((i + delimLength >= l && chars.substr(i, this.options.rowDelimiter.length) !== this.options.rowDelimiter) && !end) {
break;
}
if ((i + this.options.escape.length >= l && chars.substr(i, this.options.escape.length) === this.options.escape) && !end) {
break;
}
char = this.nextChar ? this.nextChar : chars.charAt(i);
this.lastC = char;
this.nextChar = chars.charAt(i + 1);
if ((this.options.rowDelimiter == null) && (this.nextChar === '\n' || this.nextChar === '\r')) {
this.options.rowDelimiter = this.nextChar;
if (this.nextChar === '\r' && chars.charAt(i + 2) === '\n') {
this.options.rowDelimiter += '\n';
}
delimLength = this.options.rowDelimiter.length;
}
isReallyEscaped = false;
if (char === this.options.escape) {
escapeIsQuote = this.options.escape === this.options.quote;
isEscape = this.nextChar === this.options.escape;
isQuote = this.nextChar === this.options.quote;
if (!(escapeIsQuote && !this.field && !this.quoting) && (isEscape || isQuote)) {
i++;
char = this.nextChar;
this.nextChar = chars.charAt(i + 1);
this.field += char;
i++;
continue;
}
}
if (!isReallyEscaped && char === this.options.quote) {
if (this.quoting) {
areNextCharsRowDelimiters = this.options.rowDelimiter && chars.substr(i + 1, this.options.rowDelimiter.length) === this.options.rowDelimiter;
if (this.nextChar && !areNextCharsRowDelimiters && this.nextChar !== this.options.delimiter) {
return this.error(new Error("Invalid closing quote at line " + (this.lines + 1) + "; found " + (JSON.stringify(this.nextChar)) + " instead of delimiter " + (JSON.stringify(this.options.delimiter))));
}
this.quoting = false;
this.closingQuote = i;
i++;
continue;
} else if (!this.field) {
this.quoting = true;
i++;
continue;
}
}
isDelimiter = char === this.options.delimiter;
isRowDelimiter = this.options.rowDelimiter && chars.substr(i, this.options.rowDelimiter.length) === this.options.rowDelimiter;
if (!this.quoting && (isDelimiter || isRowDelimiter)) {
if (rtrim) {
if (this.closingQuote) {
this.field = this.field.substr(0, this.closingQuote);
} else {
this.field = this.field.trimRight();
}
}
this.line.push(this.field);
this.closingQuote = 0;
this.field = '';
if (isRowDelimiter) {
this.emit('row', this.line);
this.line = [];
i += this.options.rowDelimiter.length;
this.nextChar = chars.charAt(i);
continue;
}
} else if (!this.quoting && (char === ' ' || char === '\t')) {
if (!(ltrim && !this.field)) {
this.field += char;
}
} else {
this.field += char;
}
i++;
}
this.buf = '';
_results = [];
while (i < l) {
this.buf += chars.charAt(i);
_results.push(i++);
}
return _results;
};
Parser.prototype.end = function() {
this.write('', true);
if (this.quoting) {
return this.error(new Error("Quoted field not terminated at line " + (this.lines + 1)));
}
if (this.field || this.lastC === this.options.delimiter || this.lastC === this.options.quote) {
if (this.options.trim || this.options.rtrim) {
this.field = this.field.trimRight();
}
this.line.push(this.field);
this.field = '';
}
if (this.line.length > 0) {
this.emit('row', this.line);
}
return this.emit('end', null);
};
Parser.prototype.error = function(e) {
return this.emit('error', e);
};
module.exports = function(csv) {
return new Parser(csv);
};
module.exports.Parser = Parser;
/*
[event]: http://nodejs.org/api/events.html
*/