UNPKG

csvtojson

Version:

A tool concentrating on converting csv data to JSON with customised parser supporting

1,721 lines (1,634 loc) 302 kB
(function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){ if (window){ window.csvtojson=require("./index.js"); window.csvtojson.version=require("./package.json").version; } },{"./index.js":2,"./package.json":130}],2:[function(require,module,exports){ module.exports = require("./libs/csv2json.js"); },{"./libs/csv2json.js":24}],3:[function(require,module,exports){ var util=require("util"); module.exports=CSVError; function CSVError(err,index,extra){ Error.call(this,""); this.err=err; this.line=index; this.extra=extra; this.message="Error: "+err+". JSON Line number: "+index+ (extra?" near: "+extra:""); this.name="CSV Error"; } util.inherits(CSVError,Error); CSVError.prototype.toString=function(){ return JSON.stringify([this.err,this.line,this.extra]); } CSVError.column_mismatched=function(index,extra){ return new CSVError("column_mismatched",index,extra); } CSVError.unclosed_quote=function(index,extra){ return new CSVError("unclosed_quote",index,extra); } CSVError.fromArray=function(arr){ return new CSVError(arr[0],arr[1],arr[2]); } },{"util":72}],4:[function(require,module,exports){ (function (process){ var util = require("util"); var Transform = require("stream").Transform; var os = require("os"); var eol = os.EOL; // var Processor = require("./Processor.js"); var defParam = require("./defParam"); var csvline = require("./csvline"); var fileline = require("./fileline"); var dataToCSVLine = require("./dataToCSVLine"); var fileLineToCSVLine = require("./fileLineToCSVLine"); var linesToJson = require("./linesToJson"); var CSVError = require("./CSVError"); var workerMgr = require("./workerMgr"); function Converter(params, options) { Transform.call(this, options); _param = defParam(params); this._options = options || {}; this.param = _param; this.param._options = this._options; // this.resultObject = new Result(this); // this.pipe(this.resultObject); // it is important to have downstream for a transform otherwise it will stuck this.started = false;//indicate if parsing has started. this.recordNum = 0; this.lineNumber = 0; //file line number this._csvLineBuffer = ""; this.lastIndex = 0; // index in result json array //this._pipe(this.lineParser).pipe(this.processor); // this.initNoFork(); if (this.param.forked) { this.param.forked = false; this.workerNum = 2; } this.flushCb = null; this.processEnd = false; this.sequenceBuffer = []; this._needJson = null; this._needEmitResult = null; this._needEmitFinalResult = null; this._needEmitJson = null; this._needPush = null; this._needEmitCsv = null; this._csvTransf = null; this.finalResult = []; // this.on("data", function() {}); this.on("error", emitDone(this)); this.on("end", emitDone(this)); this.initWorker(); process.nextTick(function () { if (this._needEmitFinalResult === null) { this._needEmitFinalResult = this.listeners("end_parsed").length > 0 } if (this._needEmitResult === null) { this._needEmitResult = this.listeners("record_parsed").length > 0 } if (this._needEmitJson === null) { this._needEmitJson = this.listeners("json").length > 0 } if (this._needEmitCsv === null) { this._needEmitCsv = this.listeners("csv").length > 0 } if (this._needJson === null) { this._needJson = this._needEmitJson || this._needEmitFinalResult || this._needEmitResult || this.transform || this._options.objectMode; } if (this._needPush === null) { this._needPush = this.listeners("data").length > 0 || this.listeners("readable").length > 0 // this._needPush=false; } this.param._needParseJson = this._needJson || this._needPush; }.bind(this)) return this; } util.inherits(Converter, Transform); function emitDone(conv) { return function (err) { process.nextTick(function () { conv.emit('done', err) }) } } Converter.prototype._transform = function (data, encoding, cb) { if (this.param.toArrayString && this.started === false) { this.started = true; if (this._needPush) { this.push("[" + eol, "utf8"); } } data = data.toString("utf8"); var self = this; this.preProcessRaw(data, function (d) { if (d && d.length > 0) { self.processData(self.prepareData(d), cb); } else { cb(); } }) }; Converter.prototype.prepareData = function (data) { return this._csvLineBuffer + data; } Converter.prototype.setPartialData = function (d) { this._csvLineBuffer = d; } Converter.prototype.processData = function (data, cb) { var params = this.param; if (params.ignoreEmpty && !params._headers) { data = data.trimLeft(); } var fileLines = fileline(data, this.param) if (fileLines.lines.length > 0) { if (this.preProcessLine && typeof this.preProcessLine === "function") { fileLines.lines = this._preProcessLines(fileLines.lines, this.lastIndex) } if (!params._headers) { //header is not inited. init header this.processHead(fileLines, cb); } else { if (params.workerNum <= 1) { var lines = fileLineToCSVLine(fileLines, params); this.setPartialData(lines.partial); var jsonArr = linesToJson(lines.lines, params, this.recordNum); this.processResult(jsonArr) this.lastIndex += jsonArr.length; this.recordNum += jsonArr.length; cb(); } else { this.workerProcess(fileLines, cb); } } } else { this.setPartialData(fileLines.partial) cb(); } } Converter.prototype._preProcessLines = function (lines, startIdx) { var rtn = [] for (var i = 0; i < lines.length; i++) { var result = this.preProcessLine(lines[i], startIdx + i + 1) if (typeof result === "string") { rtn.push(result) } else { rtn.push(lines[i]) this.emit("error", new Error("preProcessLine should return a string but got: " + JSON.stringify(result))) } } return rtn } Converter.prototype.initWorker = function () { var workerNum = this.param.workerNum - 1; if (workerNum > 0) { this.workerMgr = workerMgr(); this.workerMgr.initWorker(workerNum, this.param); } } Converter.prototype.preRawData = function (func) { this.preProcessRaw = func; return this; } Converter.prototype.preFileLine = function (func) { this.preProcessLine = func; return this; } /** * workerpRocess does not support embeded multiple lines. */ Converter.prototype.workerProcess = function (fileLine, cb) { var self = this; var line = fileLine var eol = this.getEol() this.setPartialData(line.partial) this.workerMgr.sendWorker(line.lines.join(eol) + eol, this.lastIndex, cb, function (results, lastIndex) { var cur = self.sequenceBuffer[0]; if (cur.idx === lastIndex) { cur.result = results; var records = []; while (self.sequenceBuffer[0] && self.sequenceBuffer[0].result) { var buf = self.sequenceBuffer.shift(); records = records.concat(buf.result) } self.processResult(records) self.recordNum += records.length; } else { for (var i = 0; i < self.sequenceBuffer.length; i++) { var buf = self.sequenceBuffer[i]; if (buf.idx === lastIndex) { buf.result = results; break; } } } // self.processResult(JSON.parse(results),function(){},true); }) this.sequenceBuffer.push({ idx: this.lastIndex, result: null }); this.lastIndex += line.lines.length; } Converter.prototype.processHead = function (fileLine, cb) { var params = this.param; if (!params._headers) { //header is not inited. init header var lines = fileLineToCSVLine(fileLine, params); this.setPartialData(lines.partial); if (params.noheader) { if (params.headers) { params._headers = params.headers; } else { params._headers = []; } } else { var headerRow = lines.lines.shift(); if (params.headers) { params._headers = params.headers; } else { params._headers = headerRow; } } if (this.param.workerNum > 1) { this.workerMgr.setParams(params); } var res = linesToJson(lines.lines, params, 0); this.processResult(res); this.lastIndex += res.length; this.recordNum += res.length; cb(); } else { cb(); } } Converter.prototype.processResult = function (result) { for (var i = 0; i < result.length; i++) { var r = result[i]; if (r.err) { this.emit("error", r.err); } else { this.emitResult(r); } } // this.lastIndex+=result.length; // cb(); } Converter.prototype.emitResult = function (r) { var index = r.index; var row = r.row; var result = r.json; var resultJson = null; var resultStr = null; if (typeof result === "string") { resultStr = result; } else { resultJson = result; } if (resultJson === null && this._needJson) { resultJson = JSON.parse(resultStr) if (typeof row === "string") { row = JSON.parse(row) } } if (this.transform && typeof this.transform === "function") { this.transform(resultJson, row, index); resultStr = null; } if (this._needEmitJson) { this.emit("json", resultJson, index) } if (this._needEmitCsv) { if (typeof row === "string") { row = JSON.parse(row) } this.emit("csv", row, index) } if (this.param.constructResult && this._needEmitFinalResult) { this.finalResult.push(resultJson) } if (this._needEmitResult) { this.emit("record_parsed", resultJson, row, index); } if (this.param.toArrayString && index > 0 && this._needPush) { this.push("," + eol); } if (this._options && this._options.objectMode) { this.push(resultJson); } else { if (this._needPush) { if (resultStr === null) { resultStr = JSON.stringify(resultJson) } this.push(!this.param.toArrayString ? resultStr + eol : resultStr, "utf8"); } } } Converter.prototype.preProcessRaw = function (data, cb) { cb(data); } Converter.prototype.preProcessLine = function (line, lineNumber) { return line; } Converter.prototype._flush = function (cb) { var self = this; this.flushCb = function () { self.emit("end_parsed", self.finalResult); if (self.workerMgr) { self.workerMgr.destroyWorker(); } cb() if (!self._needPush) { self.emit("end") } }; if (this._csvLineBuffer.length > 0) { if (this._csvLineBuffer[this._csvLineBuffer.length - 1] != this.getEol()) { this._csvLineBuffer += this.getEol(); } this.processData(this._csvLineBuffer, function () { this.checkAndFlush(); }.bind(this)); } else { this.checkAndFlush(); } return; }; // Converter.prototype._transformFork = function(data, encoding, cb) { // this.child.stdin.write(data, encoding, cb); // } // Converter.prototype._flushFork = function(cb) { // this.child.stdin.end(); // this.child.on("exit", cb); // } Converter.prototype.checkAndFlush = function () { if (this._csvLineBuffer.length !== 0) { this.emit("error", CSVError.unclosed_quote(this.recordNum, this._csvLineBuffer), this._csvLineBuffer); } if (this.param.toArrayString && this._needPush) { this.push(eol + "]", "utf8"); } if (this.workerMgr && this.workerMgr.isRunning()) { this.workerMgr.drain = function () { this.flushCb(); }.bind(this); } else { this.flushCb(); } } Converter.prototype.getEol = function (data) { if (!this.param.eol && data) { for (var i = 0; i < data.length; i++) { if (data[i] === "\r") { if (data[i + 1] === "\n") { this.param.eol = "\r\n"; } else { this.param.eol = "\r"; } return this.param.eol; } else if (data[i] === "\n") { this.param.eol = "\n"; return this.param.eol; } } this.param.eol = eol; } return this.param.eol || eol; }; Converter.prototype.fromFile = function (filePath, cb) { var fs = require('fs'); var rs = null; this.wrapCallback(cb, function () { if (rs && rs.destroy) { rs.destroy(); } }); fs.exists(filePath, function (exist) { if (exist) { rs = fs.createReadStream(filePath); rs.pipe(this); } else { this.emit('error', new Error("File not exist")) } }.bind(this)); return this; } Converter.prototype.fromStream = function (readStream, cb) { if (cb && typeof cb === "function") { this.wrapCallback(cb); } readStream.pipe(this); return this; } Converter.prototype.transf = function (func) { this.transform = func; return this; } Converter.prototype.fromString = function (csvString, cb) { if (typeof csvString != "string") { return cb(new Error("Passed CSV Data is not a string.")); } if (cb && typeof cb === "function") { this.wrapCallback(cb, function () { }); } process.nextTick(function () { this.end(csvString) }.bind(this)) return this; }; Converter.prototype.wrapCallback = function (cb, clean) { if (clean === undefined) { clean = function () { } } if (cb && typeof cb === "function") { this.once("end_parsed", function (res) { if (!this.hasError) { cb(null, res); } }.bind(this)); } this.once("error", function (err) { this.hasError = true; if (cb && typeof cb === "function") { cb(err); } clean(); }.bind(this)); } module.exports = Converter; }).call(this,require('_process')) },{"./CSVError":3,"./csvline":5,"./dataToCSVLine":6,"./defParam":7,"./fileLineToCSVLine":14,"./fileline":15,"./linesToJson":19,"./workerMgr":23,"_process":41,"fs":30,"os":40,"stream":59,"util":72}],5:[function(require,module,exports){ var getEol=require("./getEol"); var getDelimiter=require("./getDelimiter"); var toLines=require("./fileline"); var rowSplit=require("./rowSplit"); /** * Convert lines to csv columns * @param {[type]} lines [file lines] * @param {[type]} param [Converter param] * @return {[type]} {lines:[[col1,col2,col3...]],partial:String} */ module.exports=function(lines,param){ var csvLines=[]; var left=""; while (lines.length){ var line=left+lines.shift(); var row=rowSplit(line,param); if (row.closed){ csvLines.push(row.cols); left=""; }else{ left=line+getEol(line,param); } } return {lines:csvLines,partial:left}; } },{"./fileline":15,"./getDelimiter":16,"./getEol":17,"./rowSplit":22}],6:[function(require,module,exports){ var fileline=require("./fileline"); var csvline=require("./csvline"); /** * Convert data chunk to csv lines with cols * @param {[type]} data [description] * @param {[type]} params [description] * @return {[type]} {lines:[[col1,col2,col3]],partial:String} */ module.exports=function(data,params){ var line=fileline(data,params); var lines=line.lines; var csvLines=csvline(lines,params); return { lines:csvLines.lines, partial:csvLines.partial+line.partial } } },{"./csvline":5,"./fileline":15}],7:[function(require,module,exports){ (function (process){ module.exports = function (params) { var _param = { constructResult: true, //set to false to not construct result in memory. suitable for big csv data delimiter: ',', // change the delimiter of csv columns. It is able to use an array to specify potencial delimiters. e.g. [",","|",";"] ignoreColumns: [], // columns to ignore upon input. includeColumns: [], // columns to include upon input. quote: '"', //quote for a column containing delimiter. trim: true, //trim column's space charcters checkType: false, //whether check column type toArrayString: false, //stream down stringified json array instead of string of json. (useful if downstream is file writer etc) ignoreEmpty: false, //Ignore empty value while parsing. if a value of the column is empty, it will be skipped parsing. workerNum: getEnv("CSV_WORKER", 1), //number of parallel workers. If multi-core CPU available, increase the number will get better performance for large csv data. fork: false, //use another CPU core to convert the csv stream noheader: false, //indicate if first line of CSV file is header or not. headers: null, //an array of header strings. If noheader is false and headers is array, csv header will be ignored. flatKeys: false, // Don't interpret dots and square brackets in header fields as nested object or array identifiers at all. maxRowLength: 0, //the max character a csv row could have. 0 means infinite. If max number exceeded, parser will emit "error" of "row_exceed". if a possibly corrupted csv data provided, give it a number like 65535 so the parser wont consume memory. default: 0 checkColumn: false, //whether check column number of a row is the same as headers. If column number mismatched headers number, an error of "mismatched_column" will be emitted.. default: false escape: '"', //escape char for quoted column /**below are internal params */ _headerType: [], _headerTitle: [], _headerFlag: [], _headers: null, _needFilterRow:false }; if (!params) { params = {}; } for (var key in params) { if (params.hasOwnProperty(key)) { _param[key] = params[key]; } }; if (_param.ignoreColumns.length || _param.includeColumns.length){ _param._needFilterRow=true; _param.ignoreColumns.sort(function (a, b) { return b - a; }); } return _param; } function getEnv(key, def) { if (process.env[key]) { return process.env[key]; } else { return def; } } }).call(this,require('_process')) },{"_process":41}],8:[function(require,module,exports){ module.exports = [ require('./parser_array.js'), require('./parser_json.js'), require('./parser_omit.js'), require('./parser_jsonarray.js'), require("./parser_flat.js") ]; },{"./parser_array.js":9,"./parser_flat.js":10,"./parser_json.js":11,"./parser_jsonarray.js":12,"./parser_omit.js":13}],9:[function(require,module,exports){ module.exports = { "name": "array", "processSafe":true, "regExp": /^\*array\*/, "parserFunc": function parser_array(params) { var fieldName = params.head.replace(this.regExp, ''); if (params.resultRow[fieldName] === undefined) { params.resultRow[fieldName] = []; } params.resultRow[fieldName].push(params.item); } }; },{}],10:[function(require,module,exports){ module.exports = { "name": "flat", "processSafe":true, "regExp": /^\*flat\*/, "parserFunc": function parser_flat (params) { var key=this.getHeadStr(); var val=params.item; params.resultRow[key]=val; } }; },{}],11:[function(require,module,exports){ var arrReg = /\[([0-9]*)\]/; function processHead(pointer, headArr, arrReg, flatKeys) { var headStr, match, index; while (headArr.length > 1) { headStr = headArr.shift(); // match = headStr.match(arrReg); match = flatKeys ? false : headStr.match(arrReg); if (match) { //if its array, we need add an empty json object into specified index. if (pointer[headStr.replace(match[0], '')] === undefined) { pointer[headStr.replace(match[0], '')] = []; } index = match[1]; //get index where json object should stay pointer = pointer[headStr.replace(match[0], '')]; if (index === '') { //if its dynamic array index, push to the end index = pointer.length; } if (!pointer[index]) { //current index in the array is empty. we need create a new json object. pointer[index] = {}; } pointer = pointer[index]; } else { //not array, just normal JSON object. we get the reference of it if (pointer[headStr] === undefined) { pointer[headStr] = {}; } pointer = pointer[headStr]; } } return pointer; } module.exports = { "name": "json", "processSafe": true, "regExp": /^\*json\*/, "parserFunc": function parser_json(params) { var fieldStr = this.getHeadStr(); var headArr = (params.config && params.config.flatKeys) ? [fieldStr] : fieldStr.split('.'); var match, index, key, pointer; //now the pointer is pointing the position to add a key/value pair. var pointer = processHead(params.resultRow, headArr, arrReg, params.config && params.config.flatKeys); key = headArr.shift(); match = (params.config && params.config.flatKeys) ? false : key.match(arrReg); if (match) { // the last element is an array, we need check and treat it as an array. try { key = key.replace(match[0], ''); if (!pointer[key] || !(pointer[key] instanceof Array)) { pointer[key] = []; } if (pointer[key]) { index = match[1]; if (index === '') { index = pointer[key].length; } pointer[key][index] = params.item; } else { params.resultRow[fieldStr] = params.item; } } catch (e) { params.resultRow[fieldStr] = params.item; } } else { if (typeof pointer=== "string"){ params.resultRow[fieldStr] = params.item; }else{ pointer[key] = params.item; } } } }; },{}],12:[function(require,module,exports){ module.exports = { "name": "jsonarray", "processSafe":true, "regExp": /^\*jsonarray\*/, "parserFunc": function parser_jsonarray (params) { var fieldStr = params.head.replace(this.regExp, ""); var headArr = fieldStr.split('.'); var pointer = params.resultRow; while (headArr.length > 1) { var headStr = headArr.shift(); if (pointer[headStr] === undefined) { pointer[headStr] = {}; } pointer = pointer[headStr]; } var arrFieldName = headArr.shift(); if (pointer[arrFieldName] === undefined) { pointer[arrFieldName] = []; } pointer[arrFieldName].push(params.item); } }; },{}],13:[function(require,module,exports){ module.exports = { "name": "omit", "regExp": /^\*omit\*/, "processSafe":true, "parserFunc": function parser_omit() {} }; },{}],14:[function(require,module,exports){ var csvline=require("./csvline"); /** * Convert data chunk to csv lines with cols * @param {[type]} data [description] * @param {[type]} params [description] * @return {[type]} {lines:[[col1,col2,col3]],partial:String} */ module.exports=function(fileLine,params){ var lines=fileLine.lines; var csvLines=csvline(lines,params); return { lines:csvLines.lines, partial:csvLines.partial+fileLine.partial } } },{"./csvline":5}],15:[function(require,module,exports){ var getEol=require("./getEol"); /** * convert data chunk to file lines array * @param {string} data data chunk as utf8 string * @param {object} param Converter param object * @return {Object} {lines:[line1,line2...],partial:String} */ module.exports=function(data,param){ var eol=getEol(data,param); var lines= data.split(eol); var partial=lines.pop(); // if (param.ignoreEmpty){ // var trimmedLines=[]; // for (var i=0;i<lines.length;i++){ // trimmedLines.push(lines[i].trim()) // } // return {lines:trimmedLines,partial:partial}; // }else{ return {lines:lines,partial:partial}; // } } },{"./getEol":17}],16:[function(require,module,exports){ module.exports=getDelimiter; var defaulDelimiters=[",","|","\t",";",":"]; function getDelimiter(rowStr,param) { var checker; if (param.delimiter==="auto"){ checker=defaulDelimiters; }else if (param.delimiter instanceof Array){ checker=param.delimiter; }else{ return param.delimiter; } var count=0; var rtn=","; checker.forEach(function(delim){ var delimCount=rowStr.split(delim).length; if (delimCount>count){ rtn=delim; count=delimCount; } }); return rtn; } },{}],17:[function(require,module,exports){ //return eol from a data chunk. var eol=require("os").EOL; module.exports=function(data,param){ if (!param.eol && data) { for (var i=0;i<data.length;i++){ if (data[i]==="\r"){ if (data[i+1] === "\n"){ param.eol="\r\n"; }else{ param.eol="\r"; } return param.eol; }else if (data[i]==="\n"){ param.eol="\n"; return param.eol; } } param.eol=eol; } return param.eol; } },{"os":40}],18:[function(require,module,exports){ module.exports=constructor; module.exports.Converter = require("./Converter.js"); // module.exports.Parser = require("./parser.js"); // module.exports.parserMgr = require("./parserMgr.js"); function constructor(param,options){ return new module.exports.Converter(param,options) } },{"./Converter.js":4}],19:[function(require,module,exports){ var parserMgr = require("./parserMgr.js"); var Parser = require("./parser"); var CSVError = require("./CSVError"); var numReg = /^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$/; /** * Convert lines of csv array into json * @param {[type]} lines [[col1,col2,col3]] * @param {[type]} params Converter params with _headers field populated * @param {[type]} idx start pos of the lines * @return {[type]} [{err:null,json:obj,index:line,row:[csv row]}] */ module.exports = function (lines, params, idx) { if (params._needParseJson) { if (!params._headers) { params._headers = []; } if (!params.parseRules) { var row = params._headers; params.parseRules = parserMgr.initParsers(row, params); } return processRows(lines, params, idx); } else { return justReturnRows(lines, params, idx); } } function justReturnRows(lines, params, idx) { var rtn = []; for (var i = 0; i < lines.length; i++) { rtn.push({ err: null, json: {}, index: idx++, row: lines[i] }) } return rtn; } function processRows(csvRows, params, startIndex) { var count = csvRows.length; var res = []; for (var i = 0; i < csvRows.length; i++) { var r = processRow(csvRows[i], params, startIndex++); if (r) { res.push(r); } } return res; } function getConstParser(number, param) { var inst = new Parser("field" + number, /.*/, function (params) { var name = this.getName(); params.resultRow[name] = params.item; }, true); inst.setParam(param); return inst; } function processRow(row, param, index) { var i, item, parser, head; var parseRules = param.parseRules; if (param.checkColumn && row.length != parseRules.length) { return { err: CSVError.column_mismatched(index) } } var headRow = param._headers; var resultRow = convertRowToJson(row, headRow, param); if (resultRow) { return { json: resultRow, index: index, row: row }; } else { return null; } } function convertRowToJson(row, headRow, param) { var hasValue = false; var resultRow = {}; for (i = 0; i < row.length; i++) { item = row[i]; if (param.ignoreEmpty && item === '') { continue; } hasValue = true; // parser = parseRules[i]; // if (!parser) { // parser = parseRules[i] = getConstParser(i + 1, param); // } head = headRow[i]; if (!head || head === "") { head = headRow[i] = "field" + (i + 1); // parser.initHead(head); } var flag = getFlag(head, i, param) if (flag === 'omit') { continue } if (param.checkType) { convertFunc = checkType(item, head, i, param) item = convertFunc(item) } var title = getTitle(head, i, param) if (flag === 'flat' || param.flatKeys) { resultRow[title] = item } else { setPath(resultRow, title, item) } // _.set(resultRow,head,item) // parser.parse({ // head: head, // item: item, // itemIndex: i, // rawRow: row, // resultRow: resultRow, // rowIndex: index, // config: param || {} // }); } if (hasValue) { return resultRow } else { return false } } function setPath(json, path, value) { var _set = require('lodash/set') var pathArr = path.split('.') if (pathArr.length === 1) { json[path] = value; } else { _set(json, path, value) } } function getFlag(head, i, param) { if (typeof param._headerFlag[i] === "string") { return param._headerFlag[i] } else { if (head.indexOf('*omit*') > -1) { return param._headerFlag[i] = 'omit' } else if (head.indexOf('*flat*') > -1) { return param._headerFlag[i] = 'flat' } else { return param._headerFlag[i] = '' } } } function getTitle(head, i, param) { if (param._headerTitle[i]) { return param._headerTitle[i] } else { var flag = getFlag(head, i, param) var str = head.replace(flag, '') str = str.replace('string#!', '').replace('number#!', '') return param._headerTitle[i] = str } } function checkType(item, head, headIdx, param) { if (param._headerType[headIdx]) { return param._headerType[headIdx] } else { if (head.indexOf('number#!') > -1) { return param._headerType[headIdx] = numberType } else if (head.indexOf('string#!') > -1) { return param._headerType[headIdx] = stringType } else if (param.checkType) { return param._headerType[headIdx] = dynamicType } else { return param._headerType[headIdx] = stringType } } } function numberType(item) { var rtn = parseFloat(item) if (isNaN(rtn)) { return item; } return rtn; } function stringType(item) { return item.toString(); } function dynamicType(item) { var trimed = item.trim(); if (trimed === "") { return stringType(item); } if (numReg.test(trimed)) { return numberType(item) } else if (trimed.length === 5 && trimed.toLowerCase() === "false" || trimed.length === 4 && trimed.toLowerCase() === "true") { return booleanType(item); } else if (trimed[0] === "{" && trimed[trimed.length - 1] === "}" || trimed[0] === "[" && trimed[trimed.length - 1] === "]") { return jsonType(item); } else { return stringType(item); } } function booleanType(item) { var trimed = item.trim(); if (trimed.length === 5 && trimed.toLowerCase() === "false") { return false; } else { return true; } } function jsonType(item) { try { return JSON.parse(item); } catch (e) { return item; } } // function dynamicType(item) { // var trimed = item.trim(); // if (trimed === "") { // return trimed; // } // if (!isNaN(trimed)) { // return parseFloat(trimed); // } else if (trimed.length === 5 && trimed.toLowerCase() === "false") { // return false; // } else if (trimed.length === 4 && trimed.toLowerCase() === "true") { // return true; // } else if (trimed[0] === "{" && trimed[trimed.length - 1] === "}" || trimed[0] === "[" && trimed[trimed.length - 1] === "]") { // try { // return JSON.parse(trimed); // } catch (e) { // return item; // } // } else { // return item; // } // } },{"./CSVError":3,"./parser":20,"./parserMgr.js":21,"lodash/set":128}],20:[function(require,module,exports){ var explicitTypes = ["number", "string"]; function Parser(name, regExp, parser, processSafe) { this.name = typeof name === "undefined" ? "Default" : name; this.regExp = null; this.type = ""; this.processSafe = processSafe; if (typeof regExp !== "undefined") { if (typeof regExp === "string") { this.regExp = new RegExp(regExp); } else { this.regExp = regExp; } } if (typeof parser !== "undefined") { this.parse = parser; } } // var numReg = /^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$/; Parser.prototype.convertType = function(item) { var type=this.type; if (type === 'number') { var rtn = parseFloat(item); if (isNaN(rtn)) { return 0; } else { return rtn; } } else if (this.param && this.param.checkType && type === '') { var trimed = item.trim(); if (trimed === ""){ return trimed; } if (!isNaN(trimed)) { return parseFloat(trimed); } else if (trimed.length === 5 && trimed.toLowerCase() === "false") { return false; } else if (trimed.length === 4 && trimed.toLowerCase() === "true") { return true; } else if (trimed[0] === "{" && trimed[trimed.length - 1] === "}" || trimed[0] === "[" && trimed[trimed.length - 1]==="]") { try { return JSON.parse(trimed); } catch (e) { return item; } } else { return item; } } return item; } Parser.prototype.setParam = function(param) { this.param = param; } Parser.prototype.test = function(str) { return this.regExp && this.regExp.test(str); }; Parser.prototype.parse = function(params) { params.resultRow[params.head] = params.item; }; Parser.prototype.getHeadStr = function() { if (this.headStr) { return this.headStr; } else { var head = this.head; this.headStr = head.replace(this.regExp, ''); if (!this.headStr) { this.headStr = "Unknown Header"; } return this.getHeadStr(); } }; Parser.prototype.getHead = function() { return this.head; }; Parser.prototype.initHead = function(columnTitle) { this.head = columnTitle; var wholeHead = columnTitle.replace(this.regExp, ''); //init type && headStr var splitArr = wholeHead.split("#!"); if (splitArr.length === 1) { //no explicit type this.headStr = splitArr[0]; } else { var type = splitArr.shift(); if (explicitTypes.indexOf(type.toLowerCase()) > -1) { this.type = type; this.headStr = splitArr.join("#!"); } else { //no explicit type this.headStr = wholeHead; } } if (!this.headStr) { this.headStr = wholeHead ? wholeHead : "Unknown Head"; } } Parser.prototype.clone = function() { var obj = Object.create(this); var newParser = new Parser(); for (var key in obj) { newParser[key] = obj[key]; } return newParser; //return new Parser(this.name, this.regExp, this.parse, this.processSafe); }; Parser.prototype.getName = function() { return this.name; }; module.exports = Parser; },{}],21:[function(require,module,exports){ //implementation var registeredParsers = []; var Parser = require("./parser.js"); var defaultParser = require("./defaultParsers"); function registerParser (parser) { if (parser instanceof Parser && registeredParsers.indexOf(parser) === -1) { registeredParsers.push(parser); // TODO indexOf doesn't work with object references } } function getParser (columnTitle, param) { var inst, parser; function getParserByName (parserName) { var parser; registeredParsers.forEach(function(p){ if (p.getName() === parserName){ parser=p; } }); if (parser) { var inst = parser.clone(); return inst; } return new Parser(); //TODO remove new } columnTitle = columnTitle ? columnTitle : ''; registeredParsers.forEach(function(p){ if (p.test(columnTitle)){ parser=p; } }); if (parser) { inst = parser.clone(); inst.head = columnTitle; } else { inst = getParserByName("json", columnTitle); } inst.setParam(param); inst.initHead(columnTitle); return inst; } function addParser (name, regExp, parseFunc) { var parser = new Parser(name, regExp, parseFunc,false); //TODO remove new registerParser(parser); } function addSafeParser(parserPath){ //TODO impl } function initParsers (row, param) { var parsers = []; row.forEach(function (columnTitle) { parsers.push(getParser(columnTitle, param)); }); return parsers; } defaultParser.forEach(function (parserCfg){ //TODO refactor this addParser(parserCfg.name, parserCfg.regExp, parserCfg.parserFunc,parserCfg.processSafe); }); //module interfaces module.exports.addParser = addParser; module.exports.initParsers = initParsers; module.exports.getParser = getParser; },{"./defaultParsers":8,"./parser.js":20}],22:[function(require,module,exports){ var getDelimiter = require("./getDelimiter"); /** * Convert a line of string to csv columns according to its delimiter * @param {[type]} rowStr [description] * @param {[type]} param [Converter param] * @return {[type]} {cols:["a","b","c"],closed:boolean} the closed field indicate if the row is a complete row */ module.exports = function rowSplit(rowStr, param) { if (rowStr === "") { return { cols: [], closed: true }; } var quote = param.quote; var trim = param.trim; var escape = param.escape; if (param.delimiter instanceof Array || param.delimiter.toLowerCase() === "auto") { param.delimiter = getDelimiter(rowStr, param); } var delimiter = param.delimiter; var rowArr = rowStr.split(delimiter); if (quote === "off") { return { cols: rowArr, closed: true }; } var row = []; var inquote = false; var quoteBuff = ''; for (var i = 0; i < rowArr.length; i++) { var e = rowArr[i]; if (!inquote && trim) { e = e.trim(); } var len = e.length; if (!inquote) { if (isQuoteOpen(e, param)) { //quote open e = e.substr(1); if (isQuoteClose(e, param)) { //quote close e = e.substring(0, e.length - 1); e = _escapeQuote(e, quote, escape);; row.push(e); continue; } else { inquote = true; quoteBuff += e; continue; } } else { row.push(e); continue; } } else { //previous quote not closed if (isQuoteClose(e, param)) { //close double quote inquote = false; e = e.substr(0, len - 1); quoteBuff += delimiter + e; quoteBuff = _escapeQuote(quoteBuff, quote, escape); if (trim) { quoteBuff = quoteBuff.trimRight(); } row.push(quoteBuff); quoteBuff = ""; } else { quoteBuff += delimiter + e; } } } if (!inquote && param._needFilterRow) { row = filterRow(row, param); } return { cols: row, closed: !inquote }; // if (param.workerNum<=1){ // }else{ // if (inquote && quoteBuff.length>0){//for multi core, quote will be closed at the end of line // quoteBuff=_escapeQuote(quoteBuff,quote,escape);; // if (trim){ // quoteBuff=quoteBuff.trimRight(); // } // row.push(quoteBuff); // } // return {cols:row,closed:true}; // } } function filterRow(row, param) { if (param.ignoreColumns instanceof Array && param.ignoreColumns.length > 0) { for (var irow = 0; irow < param.ignoreColumns.length; irow++) { if (param.ignoreColumns[irow] >= 0) { row.splice(param.ignoreColumns[irow], 1); } } } if (param.includeColumns instanceof Array && param.includeColumns.length > 0) { var cleanRowArr = []; for (var irow = 0; irow < param.includeColumns.length; irow++) { if (param.includeColumns[irow] >= 0) { cleanRowArr.push(row[param.includeColumns[irow]]); } } row = cleanRowArr; } return row; } function isQuoteOpen(str, param) { var quote = param.quote; var escape = param.escape; return str[0] === quote && ( str[1] !== quote || str[1] === escape && (str[2] === quote || str.length === 2)); } function isQuoteClose(str, param) { var quote = param.quote; var count = 0; var idx = str.length - 1; var escape = param.escape; while (str[idx] === quote || str[idx] === escape) { idx--; count++; } return count % 2 !== 0; } function twoDoubleQuote(str, quote) { var twoQuote = quote + quote; var curIndex = -1; while ((curIndex = str.indexOf(twoQuote, curIndex)) > -1) { str = str.substring(0, curIndex) + str.substring(++curIndex); } return str; } var cachedRegExp = {} function _escapeQuote(segment, quote, escape) { var key = "es|" + quote + "|" + escape; if (cachedRegExp[key] === undefined) { if (escape === "\\") { escape = "\\\\"; } cachedRegExp[key] = new RegExp(escape + quote, 'g'); } var regExp = cachedRegExp[key]; return segment.replace(regExp, quote); } },{"./getDelimiter":16}],23:[function(require,module,exports){ (function (process,__dirname){ module.exports=workerMgr; var spawn=require("child_process").spawn; var eom="\x03" var eom1="\x0e" var eom2="\x0f" var CSVError=require('./CSVError') function workerMgr(){ var exports={ initWorker:initWorker, sendWorker:sendWorker, setParams:setParams, drain:function(){}, isRunning:isRunning, destroyWorker:destroyWorker } var workers=[]; var running=0; var waiting=null; function initWorker(num,params){ workers=[]; running=0; waiting=null; for (var i=0;i<num;i++){ workers.push(new Worker(params)); } } function isRunning(){ return running>0; } function destroyWorker(){ workers.forEach(function(w){ w.destroy(); }); } function sendWorker(data,startIdx,transformCb,cbResult){ if (workers.length>0){ var worker=workers.shift(); running++; worker.parse(data,startIdx,function(result){ // var arr=JSON.parse(result); // arr.forEach(function(item){ // console.log('idx',item.index) // }) workers.push(worker) cbResult(result,startIdx); running--; if (waiting === null && running===0){ exports.drain(); }else if (waiting){ sendWorker.apply(this,waiting) waiting=null; } }); process.nextTick(transformCb) }else{ waiting=[data,startIdx,transformCb,cbResult]; } } function setParams(params){ workers.forEach(function(w){ w.setParams(params); }); } return exports; } function Worker(params){ this.cp=spawn(process.execPath,[__dirname+"/worker.js"],{ env:{ child:true }, stdio:['pipe','pipe',2,'ipc'] // stdio:[0,1,2,'ipc'] }); this.setParams(params); this.cp.on("message",this.onChildMsg.bind(this)); this.buffer=""; var self=this; this.cp.stdout.on("data",function(d){ var str=d.toString("utf8"); var all=self.buffer+str; var cmdArr=all.split(eom) while (cmdArr.length >1){ self.onChildMsg(cmdArr.shift()); } self.buffer=cmdArr[0]; }) } Worker.prototype.setParams=function(params){ var msg="0"+JSON.stringify(params); this.sendMsg(msg); } /** * msg is like: * <cmd><data> * cmd is from 0-9 */ Worker.prototype.onChildMsg=function(msg){ if (msg){ var cmd=msg[0]; var data=msg.substr(1); switch (cmd){ case "0": //total line number of current chunk if (this.cbLine){ var sp=data.split("|"); var len=parseInt(sp[0]); var partial=sp[1]; this.cbLine(len,partial); } break; case "1": // json array of current chunk if (this.cbResult){ var rows=data.split(eom1); rows.pop(); var res=[]; rows.forEach(function(row){ var sp=row.split(eom2); res.push({ index:sp[0], row:sp[1], err:sp[2]?CSVError.fromArray(JSON.parse(sp[2])):null, json:sp[3] }) }) this.cbResult(res); } break; } } } Worker.prototype.parse=function(data,startIdx,cbResult){ this.cbResult=cbResult; var msg="1"+startIdx+"|"+data; this.sendMsg(msg); } Worker.prototype.destroy=function(){ this.cp.kill(); } Worker.prototype.sendMsg=function(msg){ this.cp.stdin.write(msg+eom,"utf8") // this.cp.send(msg) } }).call(this,require('_process'),"/libs/core") },{"./CSVError":3,"_process":41,"child_process":30}],24:[function(require,module,exports){ //deprecated but leave it for backword compatibility module.exports.core=require("./core"); //live apis module.exports=require("./core"); module.exports.interfaces = require("./interfaces"); },{"./core":18,"./interfaces":27}],25:[function(require,module,exports){ module.exports = require("./main.js"); },{"./main.js":26}],26:[function(require,module,exports){ (function (process){ /** * Convert input to process stdout */ //implementation var Converter = require("../../core/Converter.js"); function _initConverter(){ var csvConverter = new Converter(); var started = false; var writeStream = process.stdout; csvConverter.on("record_parsed",function(rowJSON){ if (started){ writeStream.write(",\n"); } writeStream.write(JSON.stringify(rowJSON)); //write parsed JSON object one by one. if (started === false){ started = true; } }); writeStream.write("[\n"); //write array symbol csvConverter.on("end_parsed",function(){ writeStream.write("\n]"); //end array symbol }); csvConverter.on("error",function(err){ console.error(err); process.exit(-1); }); return csvConverter; } function convertFile(fileName){ var csvConverter=_initConverter(); csvConverter.from(fileName); } function convertString(csvString){ var csvConverter=_initConverter(); csvConverter.from(csvString); } //module interfaces module.exports.convertFile = convertFile; module.exports.convertString = convertString; }).call(this,require('_process')) },{"../../core/Converter.js":4,"_process":41}],27:[function(require,module,exports){ module.exports.web=require("./web"); module.exports.cli=require("./cli"); },{"./cli":25,"./web":28}],28:[function(require,module,exports){ module.exports = require("./webServer.js"); },{"./webServer.js":29}],29:[function(require,module,exports){ var http = require("http"); var Converter = require("../../core/Converter.js"); function startWebServer (args) { args = args || {}; var serverArgs = { port: args.port || '8801', urlpath: args.urlpath || '/parseCSV' }; var server = http.createServer(); server.on("request", function(req, res){ if (req.url === serverArgs.urlpath && req.method === "POST"){ req.pipe(new Converter({constructResult:false})).pipe(res); } else { res.end("Please post data to: " + serverArgs.urlpath); } }); server.listen(serverArgs.port); console.log("CSV Web Server Listen On:" + serverArgs.port); console.log("POST to " + serverArgs.urlpath + " with CSV data to get parsed."); return server; } module.exports.startWebServer = startWebServer; },{"../../core/Converter.js":4,"http":60}],30:[function(require,module,exports){ },{}],31:[function(require,module,exports){ arguments[4][30][0].apply(exports,arguments) },{"dup":30}],32:[function(require,module,exports){ (function (global){ /*! * The buffer module from node.js, for the browser. * * @author Feross Aboukhadijeh <feross@feross.org> <http://feross.org> * @license MIT */ /* eslint-disable no-proto */ 'use strict' var base64 = require('base64-js') var ieee754 = require('ieee754') var isArray = require('isarray') exports.Buffer = Buffer exports.SlowBuffer = SlowBuffer exports.INSPECT_MAX_BYTES = 50 Buffer.poolSize = 8192 // not used by this implementation var rootParent = {} /** * If `Buffer.TYPED_ARRAY_SUPPORT`: * === true Use Uint8Array implementation (fastest) * === false Use Object implementation (most compatible, even IE6) * * Browsers that support typed arrays are IE 10+, Firefox 4+, Chrome 7+, Safari 5.1+, * Opera 11.6+, iOS 4.2+. * * Due to various browser bugs, sometimes the Object implementation will be used even * when the browser supports typed arrays. * * Note: * * - Firefox 4-29 lacks support for adding new properties to `Uint8Array` instances, * See: https://bugzilla.mozilla.org/show_bug.cgi?id=695438. * * - Safari 5-7 lacks support for changing the `Object.prototype.constructor` property * on objects. * * - Chrome 9-10 is missing the `TypedArray.prototype.subarray` function. * * - IE10 has a broken `TypedArray.prototype.subarray` function which returns arrays of * incorrect length in some situations. * We detect these buggy browsers and set `Buffer.TYPED_ARRAY_SUPPORT` to `false` so they * get the Object implementation, which is slower but behaves correctly. */ Buffer.TYPED_ARRAY_SUPPORT = global.TYPED_ARRAY_SUPPORT !== undefined ? global.TYPED_ARRAY_SUPPORT : typedArraySupport() function typedArraySupport () { function Bar () {} try { var arr = new Uint8Array(1) arr.foo = function () { return 42 } arr.constructor = Bar return arr.foo() === 42 && // typed array instances can be augmented arr.constructor === Bar && // constructor can be set typeof arr.subarray === 'function' && // chrome 9-10 lack `subarray` arr.subarray(1, 1).byteLength === 0 // ie10 has broken `subarray` } catch (e) { return false } } function kMaxLength () { return Buffer.TYPED_ARRAY_SUPPORT ? 0x7fffffff : 0x3fffffff } /** * Class: Buffer * ============= * * The Buffer constructor returns instances of `Uint8Array` that are augmented * with function properties for all the node `Buffer` API functions. We use * `Uint8Array` so that square bracket notation works as expected -- it returns * a single octet. * * By augmenting the instances, we can avoid modifying the `Uint8Array` * prototype. */ function Buffer (arg) { if (!(this instanceof Buffer)) { // Avoid going through an ArgumentsAdaptorTrampoline in the common case. if (arguments.length > 1) return new Buffer(arg, arguments[1]) return new Buffer(arg) } if (!Buffer.TYPED_ARRAY_SUPPORT) { this.length = 0 this.parent = undefined } // Common case. if (typeof arg === 'number') { return fromNumber(this, arg) } // Slightly less common case. if (typeof