wsv
Version:
Stream based stringifier for data with character separated values
257 lines (214 loc) • 6.57 kB
JavaScript
var stream = require("stream");
var presets = {
csv: { // comma separated values
sep: ",",
quote: "\"",
escape: "\\",
break: "\n",
empty: "",
buffers: true,
objects: true,
unbreak: true,
untab: false,
sanitize: false,
formulaescape: false,
},
tsv: { // tab separated values
sep: "\t",
quote: "\"",
escape: "\\",
break: "\n",
empty: "",
buffers: true,
objects: true,
unbreak: true,
untab: true,
sanitize: false,
formulaescape: false,
},
ssv: { // semicolon separated values (what excel does in some localizations)
sep: 0x3b,
quote: "\"",
escape: "\\",
break: "\n",
empty: "",
buffers: true,
objects: true,
unbreak: true,
untab: false,
sanitize: false,
formulaescape: false,
},
asv: { // ascii separated values
sep: 0x1f,
quote: 0x2,
startquote: 0x3,
escape: 0x1b,
break: 0x1e,
empty: "",
buffers: true,
objects: true,
unbreak: false,
untab: false,
sanitize: false,
formulaescape: false,
},
};
var wsv = module.exports = function(opts){
if (!(this instanceof wsv)) return new wsv(opts);
var self = this;
self.header = null;
self.opts = self.parseOpts(opts);
self.stream = new stream.Transform({
objectMode: true,
transform: function(chunk, encoding, fn) {
self.handle(chunk, fn);
}
});
return self.stream;
};
// handle strings
wsv.prototype.stringify = function(field) {
var self = this;
field = Buffer.from(field);
var collect = [];
// check if nessecary
var needquotes = field.includes(self.opts.sep);
if (needquotes) collect.push((self.opts.startquote !== null) ? self.opts.startquote[0] : self.opts.quote[0]);
for (var i = 0; i < field.length; i++) {
// formula escape: prefix characters that might lead excel to interpret fields as formulae with 0x27 to prevent this
if (i === 0 && self.opts.formulaescape && (field[i] === 0x22 || field[i] === 0x3d || field[i] === 0x2b || field[i] === 0x2d || field[i] === 0x40 || field[i] === 0x9 || field[i] === 0xd)) collect.push(0x27);
// sanitize: remove non printable characters
if (self.opts.sanitize && ((field[i] <= 0x1f) || (field[i] >= 0x7f && field[i] <= 0x9f))) continue;
if (self.opts.untab && field[i] === 0x9) {
collect.push(0x5c);
collect.push(0x74);
continue;
}
if (self.opts.unbreak) {
if (field[i] === 0xa) {
collect.push(0x5c);
collect.push(0x6e);
continue;
}
if (field[i] === 0xb) {
collect.push(0x5c);
collect.push(0x76);
continue;
}
if (field[i] === 0xc) {
collect.push(0x5c);
collect.push(0x66);
continue;
}
if (field[i] === 0xd) {
collect.push(0x5c);
collect.push(0x72);
continue;
}
}
if (field[i] === 0x5c && (self.opts.untab || self.opts.unbreak) && (!needquotes || self.opts.escape[0] !== 0x5c)) {
collect.push(0x5c);
collect.push(0x5c);
continue;
}
if (needquotes) {
if (field[i] === self.opts.quote[0]) {
collect.push(self.opts.escape[0]);
}
if (field[i] === self.opts.escape[0]) {
collect.push(self.opts.escape[0]);
}
}
collect.push(field[i]);
};
if (needquotes) collect.push(self.opts.quote[0]);
return Buffer.from(collect);
};
wsv.prototype.assemble = function(record) {
var self = this;
var line = [];
record.forEach(function(field,idx){
if (idx > 0) line.push(self.opts.sep);
switch (typeof field) {
case "number":
case "boolean":
return line.push(Buffer.from(field.toString()));
break;
case "string":
// oh dear
return line.push(self.stringify(field));
break;
case "object":
if (!field) ;
if (field instanceof Buffer) return line.push(Buffer.from("0x"+field.toString("hex")));
if (field instanceof Object || field instanceof Array) return (self.opts.objects) ? line.push(self.stringify(JSON.stringify(field))) : line.push(Buffer.from(field.toString()))
break;
}
return line.push(self.opts.empty); // everything we can't handle gets an empty entry
});
line.push(self.opts.break);
self.stream.push(Buffer.concat(line))
return this;
};
wsv.prototype.handle = function(data, done){
var self = this;
// ignore anything but objects
if (typeof data !== "object") return done();
if (data instanceof Array) {
var finished = 0;
data.forEach(function(record){
self.handle(record, function(){
if (++finished === data.length) done();
});
});
} else if (data instanceof Object && !!data && data.constructor === Object) {
// objtain header from object keys
if (!self.header) {
self.header = (!!self.opts.header) ? self.opts.header : Object.keys(data);
self.assemble(self.header);
}
self.assemble(self.header.map(function(k){ return data[k]; }));
};
return done();
};
wsv.prototype.parseOpts = function(opts){
var self = this;
// opts is a preset, use preset
if (typeof opts === "string" && !!presets.hasOwnProperty(opts)) opts = presets[opts];
// if opts is not an object, assume opts is the separator
if (typeof opts !== "object" || !(opts instanceof Object) || opts.constructor !== Object) opts = { sep: opts };
// check for preset
if (opts.hasOwnProperty("preset") && presets.hasOwnProperty(opts.preset)) Object.keys(presets[opts.preset]).forEach(function(k){
if (!opts.hasOwnProperty(k)) opts[k] = presets[opts.preset][k];
});
return {
sep: self.parseOpt(opts.sep, Buffer.from(",")),
quote: self.parseOpt(opts.quote, Buffer.from("\"")),
startquote: self.parseOpt(opts.startquote, null),
escape: self.parseOpt(opts.escape, Buffer.from("\\")),
break: self.parseOpt(opts.break, Buffer.from("\n")),
empty: self.parseOpt(opts.empty, Buffer.from("")),
header: (opts.hasOwnProperty("header") && (opts.header instanceof Array)) ? opts.header : null,
buffers: (opts.hasOwnProperty("buffers")) ? !!opts.buffers : true,
objects: (opts.hasOwnProperty("objects")) ? !!opts.unbreak : true,
unbreak: (opts.hasOwnProperty("unbreak")) ? !!opts.unbreak : true,
untab: (opts.hasOwnProperty("untab")) ? !!opts.untab : true,
sanitize: (opts.hasOwnProperty("sanitize")) ? !!opts.sanitize : false,
formulaescape: (opts.hasOwnProperty("formulaescape")) ? !!opts.formulaescape : false,
};
};
wsv.prototype.parseOpt = function(v,d){
var self = this;
if (!v) return d;
switch (typeof v) {
case "number": return Buffer.from([ v ]); break;
case "string": return Buffer.from( v ); break;
case "object":
if (v instanceof Array) return Buffer.from( v );
if (v instanceof Array) return Buffer.from( v );
break;
}
return d;
};