csvtojson
Version:
A tool concentrating on converting csv data to JSON with customised parser supporting
1,721 lines (1,634 loc) • 302 kB
JavaScript
(function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){
if (window){
window.csvtojson=require("./index.js");
window.csvtojson.version=require("./package.json").version;
}
},{"./index.js":2,"./package.json":130}],2:[function(require,module,exports){
module.exports = require("./libs/csv2json.js");
},{"./libs/csv2json.js":24}],3:[function(require,module,exports){
var util=require("util");
module.exports=CSVError;
function CSVError(err,index,extra){
Error.call(this,"");
this.err=err;
this.line=index;
this.extra=extra;
this.message="Error: "+err+". JSON Line number: "+index+ (extra?" near: "+extra:"");
this.name="CSV Error";
}
util.inherits(CSVError,Error);
CSVError.prototype.toString=function(){
return JSON.stringify([this.err,this.line,this.extra]);
}
CSVError.column_mismatched=function(index,extra){
return new CSVError("column_mismatched",index,extra);
}
CSVError.unclosed_quote=function(index,extra){
return new CSVError("unclosed_quote",index,extra);
}
CSVError.fromArray=function(arr){
return new CSVError(arr[0],arr[1],arr[2]);
}
},{"util":72}],4:[function(require,module,exports){
(function (process){
var util = require("util");
var Transform = require("stream").Transform;
var os = require("os");
var eol = os.EOL;
// var Processor = require("./Processor.js");
var defParam = require("./defParam");
var csvline = require("./csvline");
var fileline = require("./fileline");
var dataToCSVLine = require("./dataToCSVLine");
var fileLineToCSVLine = require("./fileLineToCSVLine");
var linesToJson = require("./linesToJson");
var CSVError = require("./CSVError");
var workerMgr = require("./workerMgr");
function Converter(params, options) {
Transform.call(this, options);
_param = defParam(params);
this._options = options || {};
this.param = _param;
this.param._options = this._options;
// this.resultObject = new Result(this);
// this.pipe(this.resultObject); // it is important to have downstream for a transform otherwise it will stuck
this.started = false;//indicate if parsing has started.
this.recordNum = 0;
this.lineNumber = 0; //file line number
this._csvLineBuffer = "";
this.lastIndex = 0; // index in result json array
//this._pipe(this.lineParser).pipe(this.processor);
// this.initNoFork();
if (this.param.forked) {
this.param.forked = false;
this.workerNum = 2;
}
this.flushCb = null;
this.processEnd = false;
this.sequenceBuffer = [];
this._needJson = null;
this._needEmitResult = null;
this._needEmitFinalResult = null;
this._needEmitJson = null;
this._needPush = null;
this._needEmitCsv = null;
this._csvTransf = null;
this.finalResult = [];
// this.on("data", function() {});
this.on("error", emitDone(this));
this.on("end", emitDone(this));
this.initWorker();
process.nextTick(function () {
if (this._needEmitFinalResult === null) {
this._needEmitFinalResult = this.listeners("end_parsed").length > 0
}
if (this._needEmitResult === null) {
this._needEmitResult = this.listeners("record_parsed").length > 0
}
if (this._needEmitJson === null) {
this._needEmitJson = this.listeners("json").length > 0
}
if (this._needEmitCsv === null) {
this._needEmitCsv = this.listeners("csv").length > 0
}
if (this._needJson === null) {
this._needJson = this._needEmitJson || this._needEmitFinalResult || this._needEmitResult || this.transform || this._options.objectMode;
}
if (this._needPush === null) {
this._needPush = this.listeners("data").length > 0 || this.listeners("readable").length > 0
// this._needPush=false;
}
this.param._needParseJson = this._needJson || this._needPush;
}.bind(this))
return this;
}
util.inherits(Converter, Transform);
function emitDone(conv) {
return function (err) {
process.nextTick(function () {
conv.emit('done', err)
})
}
}
Converter.prototype._transform = function (data, encoding, cb) {
if (this.param.toArrayString && this.started === false) {
this.started = true;
if (this._needPush) {
this.push("[" + eol, "utf8");
}
}
data = data.toString("utf8");
var self = this;
this.preProcessRaw(data, function (d) {
if (d && d.length > 0) {
self.processData(self.prepareData(d), cb);
} else {
cb();
}
})
};
Converter.prototype.prepareData = function (data) {
return this._csvLineBuffer + data;
}
Converter.prototype.setPartialData = function (d) {
this._csvLineBuffer = d;
}
Converter.prototype.processData = function (data, cb) {
var params = this.param;
if (params.ignoreEmpty && !params._headers) {
data = data.trimLeft();
}
var fileLines = fileline(data, this.param)
if (fileLines.lines.length > 0) {
if (this.preProcessLine && typeof this.preProcessLine === "function") {
fileLines.lines = this._preProcessLines(fileLines.lines, this.lastIndex)
}
if (!params._headers) { //header is not inited. init header
this.processHead(fileLines, cb);
} else {
if (params.workerNum <= 1) {
var lines = fileLineToCSVLine(fileLines, params);
this.setPartialData(lines.partial);
var jsonArr = linesToJson(lines.lines, params, this.recordNum);
this.processResult(jsonArr)
this.lastIndex += jsonArr.length;
this.recordNum += jsonArr.length;
cb();
} else {
this.workerProcess(fileLines, cb);
}
}
} else {
this.setPartialData(fileLines.partial)
cb();
}
}
Converter.prototype._preProcessLines = function (lines, startIdx) {
var rtn = []
for (var i = 0; i < lines.length; i++) {
var result = this.preProcessLine(lines[i], startIdx + i + 1)
if (typeof result === "string") {
rtn.push(result)
} else {
rtn.push(lines[i])
this.emit("error", new Error("preProcessLine should return a string but got: " + JSON.stringify(result)))
}
}
return rtn
}
Converter.prototype.initWorker = function () {
var workerNum = this.param.workerNum - 1;
if (workerNum > 0) {
this.workerMgr = workerMgr();
this.workerMgr.initWorker(workerNum, this.param);
}
}
Converter.prototype.preRawData = function (func) {
this.preProcessRaw = func;
return this;
}
Converter.prototype.preFileLine = function (func) {
this.preProcessLine = func;
return this;
}
/**
* workerpRocess does not support embeded multiple lines.
*/
Converter.prototype.workerProcess = function (fileLine, cb) {
var self = this;
var line = fileLine
var eol = this.getEol()
this.setPartialData(line.partial)
this.workerMgr.sendWorker(line.lines.join(eol) + eol, this.lastIndex, cb, function (results, lastIndex) {
var cur = self.sequenceBuffer[0];
if (cur.idx === lastIndex) {
cur.result = results;
var records = [];
while (self.sequenceBuffer[0] && self.sequenceBuffer[0].result) {
var buf = self.sequenceBuffer.shift();
records = records.concat(buf.result)
}
self.processResult(records)
self.recordNum += records.length;
} else {
for (var i = 0; i < self.sequenceBuffer.length; i++) {
var buf = self.sequenceBuffer[i];
if (buf.idx === lastIndex) {
buf.result = results;
break;
}
}
}
// self.processResult(JSON.parse(results),function(){},true);
})
this.sequenceBuffer.push({
idx: this.lastIndex,
result: null
});
this.lastIndex += line.lines.length;
}
Converter.prototype.processHead = function (fileLine, cb) {
var params = this.param;
if (!params._headers) { //header is not inited. init header
var lines = fileLineToCSVLine(fileLine, params);
this.setPartialData(lines.partial);
if (params.noheader) {
if (params.headers) {
params._headers = params.headers;
} else {
params._headers = [];
}
} else {
var headerRow = lines.lines.shift();
if (params.headers) {
params._headers = params.headers;
} else {
params._headers = headerRow;
}
}
if (this.param.workerNum > 1) {
this.workerMgr.setParams(params);
}
var res = linesToJson(lines.lines, params, 0);
this.processResult(res);
this.lastIndex += res.length;
this.recordNum += res.length;
cb();
} else {
cb();
}
}
Converter.prototype.processResult = function (result) {
for (var i = 0; i < result.length; i++) {
var r = result[i];
if (r.err) {
this.emit("error", r.err);
} else {
this.emitResult(r);
}
}
// this.lastIndex+=result.length;
// cb();
}
Converter.prototype.emitResult = function (r) {
var index = r.index;
var row = r.row;
var result = r.json;
var resultJson = null;
var resultStr = null;
if (typeof result === "string") {
resultStr = result;
} else {
resultJson = result;
}
if (resultJson === null && this._needJson) {
resultJson = JSON.parse(resultStr)
if (typeof row === "string") {
row = JSON.parse(row)
}
}
if (this.transform && typeof this.transform === "function") {
this.transform(resultJson, row, index);
resultStr = null;
}
if (this._needEmitJson) {
this.emit("json", resultJson, index)
}
if (this._needEmitCsv) {
if (typeof row === "string") {
row = JSON.parse(row)
}
this.emit("csv", row, index)
}
if (this.param.constructResult && this._needEmitFinalResult) {
this.finalResult.push(resultJson)
}
if (this._needEmitResult) {
this.emit("record_parsed", resultJson, row, index);
}
if (this.param.toArrayString && index > 0 && this._needPush) {
this.push("," + eol);
}
if (this._options && this._options.objectMode) {
this.push(resultJson);
} else {
if (this._needPush) {
if (resultStr === null) {
resultStr = JSON.stringify(resultJson)
}
this.push(!this.param.toArrayString ? resultStr + eol : resultStr, "utf8");
}
}
}
Converter.prototype.preProcessRaw = function (data, cb) {
cb(data);
}
Converter.prototype.preProcessLine = function (line, lineNumber) {
return line;
}
Converter.prototype._flush = function (cb) {
var self = this;
this.flushCb = function () {
self.emit("end_parsed", self.finalResult);
if (self.workerMgr) {
self.workerMgr.destroyWorker();
}
cb()
if (!self._needPush) {
self.emit("end")
}
};
if (this._csvLineBuffer.length > 0) {
if (this._csvLineBuffer[this._csvLineBuffer.length - 1] != this.getEol()) {
this._csvLineBuffer += this.getEol();
}
this.processData(this._csvLineBuffer, function () {
this.checkAndFlush();
}.bind(this));
} else {
this.checkAndFlush();
}
return;
};
// Converter.prototype._transformFork = function(data, encoding, cb) {
// this.child.stdin.write(data, encoding, cb);
// }
// Converter.prototype._flushFork = function(cb) {
// this.child.stdin.end();
// this.child.on("exit", cb);
// }
Converter.prototype.checkAndFlush = function () {
if (this._csvLineBuffer.length !== 0) {
this.emit("error", CSVError.unclosed_quote(this.recordNum, this._csvLineBuffer), this._csvLineBuffer);
}
if (this.param.toArrayString && this._needPush) {
this.push(eol + "]", "utf8");
}
if (this.workerMgr && this.workerMgr.isRunning()) {
this.workerMgr.drain = function () {
this.flushCb();
}.bind(this);
} else {
this.flushCb();
}
}
Converter.prototype.getEol = function (data) {
if (!this.param.eol && data) {
for (var i = 0; i < data.length; i++) {
if (data[i] === "\r") {
if (data[i + 1] === "\n") {
this.param.eol = "\r\n";
} else {
this.param.eol = "\r";
}
return this.param.eol;
} else if (data[i] === "\n") {
this.param.eol = "\n";
return this.param.eol;
}
}
this.param.eol = eol;
}
return this.param.eol || eol;
};
Converter.prototype.fromFile = function (filePath, cb) {
var fs = require('fs');
var rs = null;
this.wrapCallback(cb, function () {
if (rs && rs.destroy) {
rs.destroy();
}
});
fs.exists(filePath, function (exist) {
if (exist) {
rs = fs.createReadStream(filePath);
rs.pipe(this);
} else {
this.emit('error', new Error("File not exist"))
}
}.bind(this));
return this;
}
Converter.prototype.fromStream = function (readStream, cb) {
if (cb && typeof cb === "function") {
this.wrapCallback(cb);
}
readStream.pipe(this);
return this;
}
Converter.prototype.transf = function (func) {
this.transform = func;
return this;
}
Converter.prototype.fromString = function (csvString, cb) {
if (typeof csvString != "string") {
return cb(new Error("Passed CSV Data is not a string."));
}
if (cb && typeof cb === "function") {
this.wrapCallback(cb, function () {
});
}
process.nextTick(function () {
this.end(csvString)
}.bind(this))
return this;
};
Converter.prototype.wrapCallback = function (cb, clean) {
if (clean === undefined) {
clean = function () { }
}
if (cb && typeof cb === "function") {
this.once("end_parsed", function (res) {
if (!this.hasError) {
cb(null, res);
}
}.bind(this));
}
this.once("error", function (err) {
this.hasError = true;
if (cb && typeof cb === "function") {
cb(err);
}
clean();
}.bind(this));
}
module.exports = Converter;
}).call(this,require('_process'))
},{"./CSVError":3,"./csvline":5,"./dataToCSVLine":6,"./defParam":7,"./fileLineToCSVLine":14,"./fileline":15,"./linesToJson":19,"./workerMgr":23,"_process":41,"fs":30,"os":40,"stream":59,"util":72}],5:[function(require,module,exports){
var getEol=require("./getEol");
var getDelimiter=require("./getDelimiter");
var toLines=require("./fileline");
var rowSplit=require("./rowSplit");
/**
* Convert lines to csv columns
* @param {[type]} lines [file lines]
* @param {[type]} param [Converter param]
* @return {[type]} {lines:[[col1,col2,col3...]],partial:String}
*/
module.exports=function(lines,param){
var csvLines=[];
var left="";
while (lines.length){
var line=left+lines.shift();
var row=rowSplit(line,param);
if (row.closed){
csvLines.push(row.cols);
left="";
}else{
left=line+getEol(line,param);
}
}
return {lines:csvLines,partial:left};
}
},{"./fileline":15,"./getDelimiter":16,"./getEol":17,"./rowSplit":22}],6:[function(require,module,exports){
var fileline=require("./fileline");
var csvline=require("./csvline");
/**
* Convert data chunk to csv lines with cols
* @param {[type]} data [description]
* @param {[type]} params [description]
* @return {[type]} {lines:[[col1,col2,col3]],partial:String}
*/
module.exports=function(data,params){
var line=fileline(data,params);
var lines=line.lines;
var csvLines=csvline(lines,params);
return {
lines:csvLines.lines,
partial:csvLines.partial+line.partial
}
}
},{"./csvline":5,"./fileline":15}],7:[function(require,module,exports){
(function (process){
module.exports = function (params) {
var _param = {
constructResult: true, //set to false to not construct result in memory. suitable for big csv data
delimiter: ',', // change the delimiter of csv columns. It is able to use an array to specify potencial delimiters. e.g. [",","|",";"]
ignoreColumns: [], // columns to ignore upon input.
includeColumns: [], // columns to include upon input.
quote: '"', //quote for a column containing delimiter.
trim: true, //trim column's space charcters
checkType: false, //whether check column type
toArrayString: false, //stream down stringified json array instead of string of json. (useful if downstream is file writer etc)
ignoreEmpty: false, //Ignore empty value while parsing. if a value of the column is empty, it will be skipped parsing.
workerNum: getEnv("CSV_WORKER", 1), //number of parallel workers. If multi-core CPU available, increase the number will get better performance for large csv data.
fork: false, //use another CPU core to convert the csv stream
noheader: false, //indicate if first line of CSV file is header or not.
headers: null, //an array of header strings. If noheader is false and headers is array, csv header will be ignored.
flatKeys: false, // Don't interpret dots and square brackets in header fields as nested object or array identifiers at all.
maxRowLength: 0, //the max character a csv row could have. 0 means infinite. If max number exceeded, parser will emit "error" of "row_exceed". if a possibly corrupted csv data provided, give it a number like 65535 so the parser wont consume memory. default: 0
checkColumn: false, //whether check column number of a row is the same as headers. If column number mismatched headers number, an error of "mismatched_column" will be emitted.. default: false
escape: '"', //escape char for quoted column
/**below are internal params */
_headerType: [],
_headerTitle: [],
_headerFlag: [],
_headers: null,
_needFilterRow:false
};
if (!params) {
params = {};
}
for (var key in params) {
if (params.hasOwnProperty(key)) {
_param[key] = params[key];
}
};
if (_param.ignoreColumns.length || _param.includeColumns.length){
_param._needFilterRow=true;
_param.ignoreColumns.sort(function (a, b) { return b - a; });
}
return _param;
}
function getEnv(key, def) {
if (process.env[key]) {
return process.env[key];
} else {
return def;
}
}
}).call(this,require('_process'))
},{"_process":41}],8:[function(require,module,exports){
module.exports = [
require('./parser_array.js'),
require('./parser_json.js'),
require('./parser_omit.js'),
require('./parser_jsonarray.js'),
require("./parser_flat.js")
];
},{"./parser_array.js":9,"./parser_flat.js":10,"./parser_json.js":11,"./parser_jsonarray.js":12,"./parser_omit.js":13}],9:[function(require,module,exports){
module.exports = {
"name": "array",
"processSafe":true,
"regExp": /^\*array\*/,
"parserFunc": function parser_array(params) {
var fieldName = params.head.replace(this.regExp, '');
if (params.resultRow[fieldName] === undefined) {
params.resultRow[fieldName] = [];
}
params.resultRow[fieldName].push(params.item);
}
};
},{}],10:[function(require,module,exports){
module.exports = {
"name": "flat",
"processSafe":true,
"regExp": /^\*flat\*/,
"parserFunc": function parser_flat (params) {
var key=this.getHeadStr();
var val=params.item;
params.resultRow[key]=val;
}
};
},{}],11:[function(require,module,exports){
var arrReg = /\[([0-9]*)\]/;
function processHead(pointer, headArr, arrReg, flatKeys) {
var headStr, match, index;
while (headArr.length > 1) {
headStr = headArr.shift();
// match = headStr.match(arrReg);
match = flatKeys ? false : headStr.match(arrReg);
if (match) { //if its array, we need add an empty json object into specified index.
if (pointer[headStr.replace(match[0], '')] === undefined) {
pointer[headStr.replace(match[0], '')] = [];
}
index = match[1]; //get index where json object should stay
pointer = pointer[headStr.replace(match[0], '')];
if (index === '') { //if its dynamic array index, push to the end
index = pointer.length;
}
if (!pointer[index]) { //current index in the array is empty. we need create a new json object.
pointer[index] = {};
}
pointer = pointer[index];
} else { //not array, just normal JSON object. we get the reference of it
if (pointer[headStr] === undefined) {
pointer[headStr] = {};
}
pointer = pointer[headStr];
}
}
return pointer;
}
module.exports = {
"name": "json",
"processSafe": true,
"regExp": /^\*json\*/,
"parserFunc": function parser_json(params) {
var fieldStr = this.getHeadStr();
var headArr = (params.config && params.config.flatKeys) ? [fieldStr] : fieldStr.split('.');
var match, index, key, pointer;
//now the pointer is pointing the position to add a key/value pair.
var pointer = processHead(params.resultRow, headArr, arrReg, params.config && params.config.flatKeys);
key = headArr.shift();
match = (params.config && params.config.flatKeys) ? false : key.match(arrReg);
if (match) { // the last element is an array, we need check and treat it as an array.
try {
key = key.replace(match[0], '');
if (!pointer[key] || !(pointer[key] instanceof Array)) {
pointer[key] = [];
}
if (pointer[key]) {
index = match[1];
if (index === '') {
index = pointer[key].length;
}
pointer[key][index] = params.item;
} else {
params.resultRow[fieldStr] = params.item;
}
} catch (e) {
params.resultRow[fieldStr] = params.item;
}
} else {
if (typeof pointer=== "string"){
params.resultRow[fieldStr] = params.item;
}else{
pointer[key] = params.item;
}
}
}
};
},{}],12:[function(require,module,exports){
module.exports = {
"name": "jsonarray",
"processSafe":true,
"regExp": /^\*jsonarray\*/,
"parserFunc": function parser_jsonarray (params) {
var fieldStr = params.head.replace(this.regExp, "");
var headArr = fieldStr.split('.');
var pointer = params.resultRow;
while (headArr.length > 1) {
var headStr = headArr.shift();
if (pointer[headStr] === undefined) {
pointer[headStr] = {};
}
pointer = pointer[headStr];
}
var arrFieldName = headArr.shift();
if (pointer[arrFieldName] === undefined) {
pointer[arrFieldName] = [];
}
pointer[arrFieldName].push(params.item);
}
};
},{}],13:[function(require,module,exports){
module.exports = {
"name": "omit",
"regExp": /^\*omit\*/,
"processSafe":true,
"parserFunc": function parser_omit() {}
};
},{}],14:[function(require,module,exports){
var csvline=require("./csvline");
/**
* Convert data chunk to csv lines with cols
* @param {[type]} data [description]
* @param {[type]} params [description]
* @return {[type]} {lines:[[col1,col2,col3]],partial:String}
*/
module.exports=function(fileLine,params){
var lines=fileLine.lines;
var csvLines=csvline(lines,params);
return {
lines:csvLines.lines,
partial:csvLines.partial+fileLine.partial
}
}
},{"./csvline":5}],15:[function(require,module,exports){
var getEol=require("./getEol");
/**
* convert data chunk to file lines array
* @param {string} data data chunk as utf8 string
* @param {object} param Converter param object
* @return {Object} {lines:[line1,line2...],partial:String}
*/
module.exports=function(data,param){
var eol=getEol(data,param);
var lines= data.split(eol);
var partial=lines.pop();
// if (param.ignoreEmpty){
// var trimmedLines=[];
// for (var i=0;i<lines.length;i++){
// trimmedLines.push(lines[i].trim())
// }
// return {lines:trimmedLines,partial:partial};
// }else{
return {lines:lines,partial:partial};
// }
}
},{"./getEol":17}],16:[function(require,module,exports){
module.exports=getDelimiter;
var defaulDelimiters=[",","|","\t",";",":"];
function getDelimiter(rowStr,param) {
var checker;
if (param.delimiter==="auto"){
checker=defaulDelimiters;
}else if (param.delimiter instanceof Array){
checker=param.delimiter;
}else{
return param.delimiter;
}
var count=0;
var rtn=",";
checker.forEach(function(delim){
var delimCount=rowStr.split(delim).length;
if (delimCount>count){
rtn=delim;
count=delimCount;
}
});
return rtn;
}
},{}],17:[function(require,module,exports){
//return eol from a data chunk.
var eol=require("os").EOL;
module.exports=function(data,param){
if (!param.eol && data) {
for (var i=0;i<data.length;i++){
if (data[i]==="\r"){
if (data[i+1] === "\n"){
param.eol="\r\n";
}else{
param.eol="\r";
}
return param.eol;
}else if (data[i]==="\n"){
param.eol="\n";
return param.eol;
}
}
param.eol=eol;
}
return param.eol;
}
},{"os":40}],18:[function(require,module,exports){
module.exports=constructor;
module.exports.Converter = require("./Converter.js");
// module.exports.Parser = require("./parser.js");
// module.exports.parserMgr = require("./parserMgr.js");
function constructor(param,options){
return new module.exports.Converter(param,options)
}
},{"./Converter.js":4}],19:[function(require,module,exports){
var parserMgr = require("./parserMgr.js");
var Parser = require("./parser");
var CSVError = require("./CSVError");
var numReg = /^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$/;
/**
* Convert lines of csv array into json
* @param {[type]} lines [[col1,col2,col3]]
* @param {[type]} params Converter params with _headers field populated
* @param {[type]} idx start pos of the lines
* @return {[type]} [{err:null,json:obj,index:line,row:[csv row]}]
*/
module.exports = function (lines, params, idx) {
if (params._needParseJson) {
if (!params._headers) {
params._headers = [];
}
if (!params.parseRules) {
var row = params._headers;
params.parseRules = parserMgr.initParsers(row, params);
}
return processRows(lines, params, idx);
} else {
return justReturnRows(lines, params, idx);
}
}
function justReturnRows(lines, params, idx) {
var rtn = [];
for (var i = 0; i < lines.length; i++) {
rtn.push({
err: null,
json: {},
index: idx++,
row: lines[i]
})
}
return rtn;
}
function processRows(csvRows, params, startIndex) {
var count = csvRows.length;
var res = [];
for (var i = 0; i < csvRows.length; i++) {
var r = processRow(csvRows[i], params, startIndex++);
if (r) {
res.push(r);
}
}
return res;
}
function getConstParser(number, param) {
var inst = new Parser("field" + number, /.*/, function (params) {
var name = this.getName();
params.resultRow[name] = params.item;
}, true);
inst.setParam(param);
return inst;
}
function processRow(row, param, index) {
var i, item, parser, head;
var parseRules = param.parseRules;
if (param.checkColumn && row.length != parseRules.length) {
return {
err: CSVError.column_mismatched(index)
}
}
var headRow = param._headers;
var resultRow = convertRowToJson(row, headRow, param);
if (resultRow) {
return {
json: resultRow,
index: index,
row: row
};
} else {
return null;
}
}
function convertRowToJson(row, headRow, param) {
var hasValue = false;
var resultRow = {};
for (i = 0; i < row.length; i++) {
item = row[i];
if (param.ignoreEmpty && item === '') {
continue;
}
hasValue = true;
// parser = parseRules[i];
// if (!parser) {
// parser = parseRules[i] = getConstParser(i + 1, param);
// }
head = headRow[i];
if (!head || head === "") {
head = headRow[i] = "field" + (i + 1);
// parser.initHead(head);
}
var flag = getFlag(head, i, param)
if (flag === 'omit') {
continue
}
if (param.checkType) {
convertFunc = checkType(item, head, i, param)
item = convertFunc(item)
}
var title = getTitle(head, i, param)
if (flag === 'flat' || param.flatKeys) {
resultRow[title] = item
} else {
setPath(resultRow, title, item)
}
// _.set(resultRow,head,item)
// parser.parse({
// head: head,
// item: item,
// itemIndex: i,
// rawRow: row,
// resultRow: resultRow,
// rowIndex: index,
// config: param || {}
// });
}
if (hasValue) {
return resultRow
} else {
return false
}
}
function setPath(json, path, value) {
var _set = require('lodash/set')
var pathArr = path.split('.')
if (pathArr.length === 1) {
json[path] = value;
} else {
_set(json, path, value)
}
}
function getFlag(head, i, param) {
if (typeof param._headerFlag[i] === "string") {
return param._headerFlag[i]
} else {
if (head.indexOf('*omit*') > -1) {
return param._headerFlag[i] = 'omit'
} else if (head.indexOf('*flat*') > -1) {
return param._headerFlag[i] = 'flat'
} else {
return param._headerFlag[i] = ''
}
}
}
function getTitle(head, i, param) {
if (param._headerTitle[i]) {
return param._headerTitle[i]
} else {
var flag = getFlag(head, i, param)
var str = head.replace(flag, '')
str = str.replace('string#!', '').replace('number#!', '')
return param._headerTitle[i] = str
}
}
function checkType(item, head, headIdx, param) {
if (param._headerType[headIdx]) {
return param._headerType[headIdx]
} else {
if (head.indexOf('number#!') > -1) {
return param._headerType[headIdx] = numberType
} else if (head.indexOf('string#!') > -1) {
return param._headerType[headIdx] = stringType
} else if (param.checkType) {
return param._headerType[headIdx] = dynamicType
} else {
return param._headerType[headIdx] = stringType
}
}
}
function numberType(item) {
var rtn = parseFloat(item)
if (isNaN(rtn)) {
return item;
}
return rtn;
}
function stringType(item) {
return item.toString();
}
function dynamicType(item) {
var trimed = item.trim();
if (trimed === "") {
return stringType(item);
}
if (numReg.test(trimed)) {
return numberType(item)
} else if (trimed.length === 5 && trimed.toLowerCase() === "false" || trimed.length === 4 && trimed.toLowerCase() === "true") {
return booleanType(item);
} else if (trimed[0] === "{" && trimed[trimed.length - 1] === "}" || trimed[0] === "[" && trimed[trimed.length - 1] === "]") {
return jsonType(item);
} else {
return stringType(item);
}
}
function booleanType(item) {
var trimed = item.trim();
if (trimed.length === 5 && trimed.toLowerCase() === "false") {
return false;
} else {
return true;
}
}
function jsonType(item) {
try {
return JSON.parse(item);
} catch (e) {
return item;
}
}
// function dynamicType(item) {
// var trimed = item.trim();
// if (trimed === "") {
// return trimed;
// }
// if (!isNaN(trimed)) {
// return parseFloat(trimed);
// } else if (trimed.length === 5 && trimed.toLowerCase() === "false") {
// return false;
// } else if (trimed.length === 4 && trimed.toLowerCase() === "true") {
// return true;
// } else if (trimed[0] === "{" && trimed[trimed.length - 1] === "}" || trimed[0] === "[" && trimed[trimed.length - 1] === "]") {
// try {
// return JSON.parse(trimed);
// } catch (e) {
// return item;
// }
// } else {
// return item;
// }
// }
},{"./CSVError":3,"./parser":20,"./parserMgr.js":21,"lodash/set":128}],20:[function(require,module,exports){
var explicitTypes = ["number", "string"];
function Parser(name, regExp, parser, processSafe) {
this.name = typeof name === "undefined" ? "Default" : name;
this.regExp = null;
this.type = "";
this.processSafe = processSafe;
if (typeof regExp !== "undefined") {
if (typeof regExp === "string") {
this.regExp = new RegExp(regExp);
} else {
this.regExp = regExp;
}
}
if (typeof parser !== "undefined") {
this.parse = parser;
}
}
// var numReg = /^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$/;
Parser.prototype.convertType = function(item) {
var type=this.type;
if (type === 'number') {
var rtn = parseFloat(item);
if (isNaN(rtn)) {
return 0;
} else {
return rtn;
}
} else if (this.param && this.param.checkType && type === '') {
var trimed = item.trim();
if (trimed === ""){
return trimed;
}
if (!isNaN(trimed)) {
return parseFloat(trimed);
} else if (trimed.length === 5 && trimed.toLowerCase() === "false") {
return false;
} else if (trimed.length === 4 && trimed.toLowerCase() === "true") {
return true;
} else if (trimed[0] === "{" && trimed[trimed.length - 1] === "}" || trimed[0] === "[" && trimed[trimed.length - 1]==="]") {
try {
return JSON.parse(trimed);
} catch (e) {
return item;
}
} else {
return item;
}
}
return item;
}
Parser.prototype.setParam = function(param) {
this.param = param;
}
Parser.prototype.test = function(str) {
return this.regExp && this.regExp.test(str);
};
Parser.prototype.parse = function(params) {
params.resultRow[params.head] = params.item;
};
Parser.prototype.getHeadStr = function() {
if (this.headStr) {
return this.headStr;
} else {
var head = this.head;
this.headStr = head.replace(this.regExp, '');
if (!this.headStr) {
this.headStr = "Unknown Header";
}
return this.getHeadStr();
}
};
Parser.prototype.getHead = function() {
return this.head;
};
Parser.prototype.initHead = function(columnTitle) {
this.head = columnTitle;
var wholeHead = columnTitle.replace(this.regExp, '');
//init type && headStr
var splitArr = wholeHead.split("#!");
if (splitArr.length === 1) { //no explicit type
this.headStr = splitArr[0];
} else {
var type = splitArr.shift();
if (explicitTypes.indexOf(type.toLowerCase()) > -1) {
this.type = type;
this.headStr = splitArr.join("#!");
} else { //no explicit type
this.headStr = wholeHead;
}
}
if (!this.headStr) {
this.headStr = wholeHead ? wholeHead : "Unknown Head";
}
}
Parser.prototype.clone = function() {
var obj = Object.create(this);
var newParser = new Parser();
for (var key in obj) {
newParser[key] = obj[key];
}
return newParser;
//return new Parser(this.name, this.regExp, this.parse, this.processSafe);
};
Parser.prototype.getName = function() {
return this.name;
};
module.exports = Parser;
},{}],21:[function(require,module,exports){
//implementation
var registeredParsers = [];
var Parser = require("./parser.js");
var defaultParser = require("./defaultParsers");
function registerParser (parser) {
if (parser instanceof Parser && registeredParsers.indexOf(parser) === -1) {
registeredParsers.push(parser); // TODO indexOf doesn't work with object references
}
}
function getParser (columnTitle, param) {
var inst, parser;
function getParserByName (parserName) {
var parser;
registeredParsers.forEach(function(p){
if (p.getName() === parserName){
parser=p;
}
});
if (parser) {
var inst = parser.clone();
return inst;
}
return new Parser(); //TODO remove new
}
columnTitle = columnTitle ? columnTitle : '';
registeredParsers.forEach(function(p){
if (p.test(columnTitle)){
parser=p;
}
});
if (parser) {
inst = parser.clone();
inst.head = columnTitle;
} else {
inst = getParserByName("json", columnTitle);
}
inst.setParam(param);
inst.initHead(columnTitle);
return inst;
}
function addParser (name, regExp, parseFunc) {
var parser = new Parser(name, regExp, parseFunc,false); //TODO remove new
registerParser(parser);
}
function addSafeParser(parserPath){
//TODO impl
}
function initParsers (row, param) {
var parsers = [];
row.forEach(function (columnTitle) {
parsers.push(getParser(columnTitle, param));
});
return parsers;
}
defaultParser.forEach(function (parserCfg){
//TODO refactor this
addParser(parserCfg.name, parserCfg.regExp, parserCfg.parserFunc,parserCfg.processSafe);
});
//module interfaces
module.exports.addParser = addParser;
module.exports.initParsers = initParsers;
module.exports.getParser = getParser;
},{"./defaultParsers":8,"./parser.js":20}],22:[function(require,module,exports){
var getDelimiter = require("./getDelimiter");
/**
* Convert a line of string to csv columns according to its delimiter
* @param {[type]} rowStr [description]
* @param {[type]} param [Converter param]
* @return {[type]} {cols:["a","b","c"],closed:boolean} the closed field indicate if the row is a complete row
*/
module.exports = function rowSplit(rowStr, param) {
if (rowStr === "") {
return { cols: [], closed: true };
}
var quote = param.quote;
var trim = param.trim;
var escape = param.escape;
if (param.delimiter instanceof Array || param.delimiter.toLowerCase() === "auto") {
param.delimiter = getDelimiter(rowStr, param);
}
var delimiter = param.delimiter;
var rowArr = rowStr.split(delimiter);
if (quote === "off") {
return { cols: rowArr, closed: true };
}
var row = [];
var inquote = false;
var quoteBuff = '';
for (var i = 0; i < rowArr.length; i++) {
var e = rowArr[i];
if (!inquote && trim) {
e = e.trim();
}
var len = e.length;
if (!inquote) {
if (isQuoteOpen(e, param)) { //quote open
e = e.substr(1);
if (isQuoteClose(e, param)) { //quote close
e = e.substring(0, e.length - 1);
e = _escapeQuote(e, quote, escape);;
row.push(e);
continue;
} else {
inquote = true;
quoteBuff += e;
continue;
}
} else {
row.push(e);
continue;
}
} else { //previous quote not closed
if (isQuoteClose(e, param)) { //close double quote
inquote = false;
e = e.substr(0, len - 1);
quoteBuff += delimiter + e;
quoteBuff = _escapeQuote(quoteBuff, quote, escape);
if (trim) {
quoteBuff = quoteBuff.trimRight();
}
row.push(quoteBuff);
quoteBuff = "";
} else {
quoteBuff += delimiter + e;
}
}
}
if (!inquote && param._needFilterRow) {
row = filterRow(row, param);
}
return { cols: row, closed: !inquote };
// if (param.workerNum<=1){
// }else{
// if (inquote && quoteBuff.length>0){//for multi core, quote will be closed at the end of line
// quoteBuff=_escapeQuote(quoteBuff,quote,escape);;
// if (trim){
// quoteBuff=quoteBuff.trimRight();
// }
// row.push(quoteBuff);
// }
// return {cols:row,closed:true};
// }
}
function filterRow(row, param) {
if (param.ignoreColumns instanceof Array && param.ignoreColumns.length > 0) {
for (var irow = 0; irow < param.ignoreColumns.length; irow++) {
if (param.ignoreColumns[irow] >= 0) {
row.splice(param.ignoreColumns[irow], 1);
}
}
}
if (param.includeColumns instanceof Array && param.includeColumns.length > 0) {
var cleanRowArr = [];
for (var irow = 0; irow < param.includeColumns.length; irow++) {
if (param.includeColumns[irow] >= 0) {
cleanRowArr.push(row[param.includeColumns[irow]]);
}
}
row = cleanRowArr;
}
return row;
}
function isQuoteOpen(str, param) {
var quote = param.quote;
var escape = param.escape;
return str[0] === quote && (
str[1] !== quote ||
str[1] === escape && (str[2] === quote || str.length === 2));
}
function isQuoteClose(str, param) {
var quote = param.quote;
var count = 0;
var idx = str.length - 1;
var escape = param.escape;
while (str[idx] === quote || str[idx] === escape) {
idx--;
count++;
}
return count % 2 !== 0;
}
function twoDoubleQuote(str, quote) {
var twoQuote = quote + quote;
var curIndex = -1;
while ((curIndex = str.indexOf(twoQuote, curIndex)) > -1) {
str = str.substring(0, curIndex) + str.substring(++curIndex);
}
return str;
}
var cachedRegExp = {}
function _escapeQuote(segment, quote, escape) {
var key = "es|" + quote + "|" + escape;
if (cachedRegExp[key] === undefined) {
if (escape === "\\") {
escape = "\\\\";
}
cachedRegExp[key] = new RegExp(escape + quote, 'g');
}
var regExp = cachedRegExp[key];
return segment.replace(regExp, quote);
}
},{"./getDelimiter":16}],23:[function(require,module,exports){
(function (process,__dirname){
module.exports=workerMgr;
var spawn=require("child_process").spawn;
var eom="\x03"
var eom1="\x0e"
var eom2="\x0f"
var CSVError=require('./CSVError')
function workerMgr(){
var exports={
initWorker:initWorker,
sendWorker:sendWorker,
setParams:setParams,
drain:function(){},
isRunning:isRunning,
destroyWorker:destroyWorker
}
var workers=[];
var running=0;
var waiting=null;
function initWorker(num,params){
workers=[];
running=0;
waiting=null;
for (var i=0;i<num;i++){
workers.push(new Worker(params));
}
}
function isRunning(){
return running>0;
}
function destroyWorker(){
workers.forEach(function(w){
w.destroy();
});
}
function sendWorker(data,startIdx,transformCb,cbResult){
if (workers.length>0){
var worker=workers.shift();
running++;
worker.parse(data,startIdx,function(result){
// var arr=JSON.parse(result);
// arr.forEach(function(item){
// console.log('idx',item.index)
// })
workers.push(worker)
cbResult(result,startIdx);
running--;
if (waiting === null && running===0){
exports.drain();
}else if (waiting){
sendWorker.apply(this,waiting)
waiting=null;
}
});
process.nextTick(transformCb)
}else{
waiting=[data,startIdx,transformCb,cbResult];
}
}
function setParams(params){
workers.forEach(function(w){
w.setParams(params);
});
}
return exports;
}
function Worker(params){
this.cp=spawn(process.execPath,[__dirname+"/worker.js"],{
env:{
child:true
},
stdio:['pipe','pipe',2,'ipc']
// stdio:[0,1,2,'ipc']
});
this.setParams(params);
this.cp.on("message",this.onChildMsg.bind(this));
this.buffer="";
var self=this;
this.cp.stdout.on("data",function(d){
var str=d.toString("utf8");
var all=self.buffer+str;
var cmdArr=all.split(eom)
while (cmdArr.length >1){
self.onChildMsg(cmdArr.shift());
}
self.buffer=cmdArr[0];
})
}
Worker.prototype.setParams=function(params){
var msg="0"+JSON.stringify(params);
this.sendMsg(msg);
}
/**
* msg is like:
* <cmd><data>
* cmd is from 0-9
*/
Worker.prototype.onChildMsg=function(msg){
if (msg){
var cmd=msg[0];
var data=msg.substr(1);
switch (cmd){
case "0": //total line number of current chunk
if (this.cbLine){
var sp=data.split("|");
var len=parseInt(sp[0]);
var partial=sp[1];
this.cbLine(len,partial);
}
break;
case "1": // json array of current chunk
if (this.cbResult){
var rows=data.split(eom1);
rows.pop();
var res=[];
rows.forEach(function(row){
var sp=row.split(eom2);
res.push({
index:sp[0],
row:sp[1],
err:sp[2]?CSVError.fromArray(JSON.parse(sp[2])):null,
json:sp[3]
})
})
this.cbResult(res);
}
break;
}
}
}
Worker.prototype.parse=function(data,startIdx,cbResult){
this.cbResult=cbResult;
var msg="1"+startIdx+"|"+data;
this.sendMsg(msg);
}
Worker.prototype.destroy=function(){
this.cp.kill();
}
Worker.prototype.sendMsg=function(msg){
this.cp.stdin.write(msg+eom,"utf8")
// this.cp.send(msg)
}
}).call(this,require('_process'),"/libs/core")
},{"./CSVError":3,"_process":41,"child_process":30}],24:[function(require,module,exports){
//deprecated but leave it for backword compatibility
module.exports.core=require("./core");
//live apis
module.exports=require("./core");
module.exports.interfaces = require("./interfaces");
},{"./core":18,"./interfaces":27}],25:[function(require,module,exports){
module.exports = require("./main.js");
},{"./main.js":26}],26:[function(require,module,exports){
(function (process){
/**
* Convert input to process stdout
*/
//implementation
var Converter = require("../../core/Converter.js");
function _initConverter(){
var csvConverter = new Converter();
var started = false;
var writeStream = process.stdout;
csvConverter.on("record_parsed",function(rowJSON){
if (started){
writeStream.write(",\n");
}
writeStream.write(JSON.stringify(rowJSON)); //write parsed JSON object one by one.
if (started === false){
started = true;
}
});
writeStream.write("[\n"); //write array symbol
csvConverter.on("end_parsed",function(){
writeStream.write("\n]"); //end array symbol
});
csvConverter.on("error",function(err){
console.error(err);
process.exit(-1);
});
return csvConverter;
}
function convertFile(fileName){
var csvConverter=_initConverter();
csvConverter.from(fileName);
}
function convertString(csvString){
var csvConverter=_initConverter();
csvConverter.from(csvString);
}
//module interfaces
module.exports.convertFile = convertFile;
module.exports.convertString = convertString;
}).call(this,require('_process'))
},{"../../core/Converter.js":4,"_process":41}],27:[function(require,module,exports){
module.exports.web=require("./web");
module.exports.cli=require("./cli");
},{"./cli":25,"./web":28}],28:[function(require,module,exports){
module.exports = require("./webServer.js");
},{"./webServer.js":29}],29:[function(require,module,exports){
var http = require("http");
var Converter = require("../../core/Converter.js");
function startWebServer (args) {
args = args || {};
var serverArgs = {
port: args.port || '8801',
urlpath: args.urlpath || '/parseCSV'
};
var server = http.createServer();
server.on("request", function(req, res){
if (req.url === serverArgs.urlpath && req.method === "POST"){
req.pipe(new Converter({constructResult:false})).pipe(res);
} else {
res.end("Please post data to: " + serverArgs.urlpath);
}
});
server.listen(serverArgs.port);
console.log("CSV Web Server Listen On:" + serverArgs.port);
console.log("POST to " + serverArgs.urlpath + " with CSV data to get parsed.");
return server;
}
module.exports.startWebServer = startWebServer;
},{"../../core/Converter.js":4,"http":60}],30:[function(require,module,exports){
},{}],31:[function(require,module,exports){
arguments[4][30][0].apply(exports,arguments)
},{"dup":30}],32:[function(require,module,exports){
(function (global){
/*!
* The buffer module from node.js, for the browser.
*
* @author Feross Aboukhadijeh <feross@feross.org> <http://feross.org>
* @license MIT
*/
/* eslint-disable no-proto */
'use strict'
var base64 = require('base64-js')
var ieee754 = require('ieee754')
var isArray = require('isarray')
exports.Buffer = Buffer
exports.SlowBuffer = SlowBuffer
exports.INSPECT_MAX_BYTES = 50
Buffer.poolSize = 8192 // not used by this implementation
var rootParent = {}
/**
* If `Buffer.TYPED_ARRAY_SUPPORT`:
* === true Use Uint8Array implementation (fastest)
* === false Use Object implementation (most compatible, even IE6)
*
* Browsers that support typed arrays are IE 10+, Firefox 4+, Chrome 7+, Safari 5.1+,
* Opera 11.6+, iOS 4.2+.
*
* Due to various browser bugs, sometimes the Object implementation will be used even
* when the browser supports typed arrays.
*
* Note:
*
* - Firefox 4-29 lacks support for adding new properties to `Uint8Array` instances,
* See: https://bugzilla.mozilla.org/show_bug.cgi?id=695438.
*
* - Safari 5-7 lacks support for changing the `Object.prototype.constructor` property
* on objects.
*
* - Chrome 9-10 is missing the `TypedArray.prototype.subarray` function.
*
* - IE10 has a broken `TypedArray.prototype.subarray` function which returns arrays of
* incorrect length in some situations.
* We detect these buggy browsers and set `Buffer.TYPED_ARRAY_SUPPORT` to `false` so they
* get the Object implementation, which is slower but behaves correctly.
*/
Buffer.TYPED_ARRAY_SUPPORT = global.TYPED_ARRAY_SUPPORT !== undefined
? global.TYPED_ARRAY_SUPPORT
: typedArraySupport()
function typedArraySupport () {
function Bar () {}
try {
var arr = new Uint8Array(1)
arr.foo = function () { return 42 }
arr.constructor = Bar
return arr.foo() === 42 && // typed array instances can be augmented
arr.constructor === Bar && // constructor can be set
typeof arr.subarray === 'function' && // chrome 9-10 lack `subarray`
arr.subarray(1, 1).byteLength === 0 // ie10 has broken `subarray`
} catch (e) {
return false
}
}
function kMaxLength () {
return Buffer.TYPED_ARRAY_SUPPORT
? 0x7fffffff
: 0x3fffffff
}
/**
* Class: Buffer
* =============
*
* The Buffer constructor returns instances of `Uint8Array` that are augmented
* with function properties for all the node `Buffer` API functions. We use
* `Uint8Array` so that square bracket notation works as expected -- it returns
* a single octet.
*
* By augmenting the instances, we can avoid modifying the `Uint8Array`
* prototype.
*/
function Buffer (arg) {
if (!(this instanceof Buffer)) {
// Avoid going through an ArgumentsAdaptorTrampoline in the common case.
if (arguments.length > 1) return new Buffer(arg, arguments[1])
return new Buffer(arg)
}
if (!Buffer.TYPED_ARRAY_SUPPORT) {
this.length = 0
this.parent = undefined
}
// Common case.
if (typeof arg === 'number') {
return fromNumber(this, arg)
}
// Slightly less common case.
if (typeof