csv-rxjs-kit
Version:
A kit of RxJS operators to handle CSV formatted (RFC 4180, MIME Type 'text/csv') data.
369 lines (368 loc) • 15.2 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.csvPropValues = exports.csvPropNames = exports.csvAssembler = exports.csvJustifier = exports.csvConvert = exports.csvFromArray = exports.csvValidateRecord = exports.csvInjectHeader = exports.csvDropHeader = exports.csvDropEmpty = exports.csvParse = exports.csvStringify = void 0;
const rxjs_1 = require("rxjs");
const operators_1 = require("rxjs/operators");
/**
* CSV formatter RxJS operator.
*
* Returns an Observable that converts to text every `csvRecord` emitted by source Observable.
* Output text data is formatted as MIME Type 'text/csv' [RFC 4180](https://www.ietf.org/rfc/rfc4180.txt).
*
* Parameter `opt.delimiter` sets line breaks type, default is `CRLF`.
*
* Parameter `opt.last_break` adds optional line delimiter after the last record.
*
* Parameter `opt.force_quote` adds quotation to every item.
*
* @param opt - options
*/
function csvStringify(opt) {
const eol = (opt === null || opt === void 0 ? void 0 : opt.delimiter) || '\r\n';
const quote = opt === null || opt === void 0 ? void 0 : opt.force_quote;
const line$ = (0, operators_1.map)((r, i) => (i ? eol : '') +
r.map((field) => (quote || /[,"\r\n]/.test(field) ? `"${field.replace(/"/g, '""')}"` : field)).join(','));
return (opt === null || opt === void 0 ? void 0 : opt.last_break) ? (data$) => data$.pipe(line$, (0, operators_1.endWith)(eol)) : line$;
}
exports.csvStringify = csvStringify;
/**
* CSV parser RxJS operator.
*
* Returns an Obsrevable that parses text emitted by source Observable and converts it to `csvRecord`s.
* Input text should be MIME Type 'text/csv' as described is [RFC 4180](https://www.ietf.org/rfc/rfc4180.txt).
*
* All parse errors are reported as `SyntaxError` using an Observer's `error()` method.
*/
function csvParse() {
return function (csv$) {
return new rxjs_1.Observable((observer) => {
const R = new RegExp(/,|"|\n|\r\n|\r|[^,"\r\n]+/y);
let state = 'IDLE';
let r = [];
let v = '';
let rIndex = 1;
let cIndex = 1;
let lastToken = '';
function err(msg) {
observer.error(new SyntaxError(`CSV4180 [${rIndex}, ${cIndex}]: ` + msg));
}
function next() {
observer.next(r);
r = [];
}
function txt(s) {
switch (state) {
case 'IDLE':
case 'CR':
switch (s) {
case ',':
r.push('');
state = 'COMMA';
break;
case '"':
state = 'ESCAPED';
break;
case '\r':
case '\r\n':
next();
state = s === '\r' ? 'CR' : 'IDLE';
break;
case '\n':
if (state === 'CR')
state = 'IDLE';
else
next();
break;
default:
v += s;
state = 'TEXT';
break;
}
break;
case 'COMMA':
switch (s) {
case ',':
r.push('');
break;
case '"':
state = 'ESCAPED';
break;
case '\r':
case '\r\n':
case '\n':
r.push('');
next();
state = s === '\r' ? 'CR' : 'IDLE';
break;
default:
v += s;
state = 'TEXT';
break;
}
break;
case 'TEXT':
switch (s) {
case ',':
r.push(v);
v = '';
state = 'COMMA';
break;
case '"':
err('Quote in non-escaped data.');
return false;
case '\r':
case '\r\n':
case '\n':
r.push(v);
v = '';
next();
state = s === '\r' ? 'CR' : 'IDLE';
break;
default:
v += s;
break;
}
break;
case 'ESCAPED':
if (s === '"')
state = 'CLOSED';
else
v += s;
break;
case 'CLOSED':
switch (s) {
case ',':
r.push(v);
v = '';
state = 'COMMA';
break;
case '"':
v += s;
state = 'ESCAPED';
break;
case '\r':
case '\r\n':
case '\n':
r.push(v);
v = '';
next();
state = s === '\r' ? 'CR' : 'IDLE';
break;
default:
err('Invalid escape sequence.');
return false;
}
break;
default:
return false;
}
if (s[0] === '\r' || (s === '\n' && lastToken !== '\r')) {
rIndex++;
cIndex = 1;
}
else
cIndex += s.length;
lastToken = s;
return true;
}
function end() {
if (state === 'ESCAPED') {
err('Closing quote is missing.');
return false;
}
if (state === 'COMMA' || state === 'TEXT' || state === 'CLOSED') {
r.push(v);
next();
}
return true;
}
return csv$.subscribe({
next: (chunk) => {
var _a;
let m;
while ((m = (_a = R.exec(chunk)) === null || _a === void 0 ? void 0 : _a[0]) && txt(m))
;
},
complete: () => end() && observer.complete(),
error: (e) => observer.error(e),
});
});
};
}
exports.csvParse = csvParse;
/**
* Removes empty records.
*
* Returns an Observable that removes all empty records emitted by source Observable.
*/
function csvDropEmpty() {
return (0, operators_1.filter)((r) => !!r.length);
}
exports.csvDropEmpty = csvDropEmpty;
/**
* Removes header record.
*
* Returns an Observable that removes header `csvRecord` (the first data item) emitted by source Observable.
* There is no dedicated tagging for header `csvRecord` so one must be sure the source Observable emits header.
*
* @typeParam T - the main data type, might be csvRecord or any arbitrary data type
*/
function csvDropHeader() {
return (0, operators_1.skip)(1);
}
exports.csvDropHeader = csvDropHeader;
/**
* Injects a header record.
*
* Returns an Observable that emits header record and then mirrors source Observable.
* Inserted `csvRecord` will be interpreted as a header `csvRecord` by other operators if they are instructed to.
*
* @typeParam T - the main stream type
* @param header - header record value
*/
function csvInjectHeader(header) {
return (0, operators_1.startWith)(header);
}
exports.csvInjectHeader = csvInjectHeader;
/**
* Validates a record.
*
* Returns an Observable that uses `validator` to check/modify every `csvRecord` emitted by source Observable.
*
* To remove invalid records one can use `validator` to convert them to empty records and then use `csvDropEmpty` operator.
*
* @param hdr - does source Obsrvable emit header 'csvRecord'?
* @param validator - function to validate a 'csvRecord'
* @throws everything that `validator` throws
*/
function csvValidateRecord(hdr, validator) {
return (0, operators_1.map)((r, i) => validator(r, hdr && !i));
}
exports.csvValidateRecord = csvValidateRecord;
/**
* Creates `csvRecord` for data array.
*
* Returns an Observable that converts every array item emitted by source Observable to `csvRecord`.
* The operator calls `String(...)` for every defined item in the input data array, undefined items are replaced by `''`.
*
* It doesn't matter if source Observable emits header record. The operator's conversion procedure doesn't affect a header record.
*
* If you need different behavior and/or have different input data type use `csvConvert()` and custom `csvExtractor<T>`.
*/
function csvFromArray() {
return (0, operators_1.map)((obj) => obj.map((v) => String(v !== null && v !== void 0 ? v : '')));
}
exports.csvFromArray = csvFromArray;
function csvConvert(hdr, prj) {
if (!prj)
throw new RangeError('Data converter is not provided.');
if (!hdr)
return (0, operators_1.map)((rec) => prj(rec));
return (0, operators_1.map)((() => {
let names = undefined;
return (obj, i) => {
if (i !== 0)
return prj(obj, names);
names = obj;
return obj;
};
})());
}
exports.csvConvert = csvConvert;
/**
* Justifies record length, can be is used with `csvValidateRecord()`.
*
* Returns `csvRecordValidator` that justifies record length according to header length.
* According to RFC 4180 'each line should contain the same number of fields throughout the file'
* hence any changes to bad sized records violate the RFC.
*
* Parameter `opt.length` sets default length of a record. A header record length (if available)
* overrides `opt.length` value. If `opt.length` is not set and header record hasn't been available,
* no records are altered.
*
* Parameter `opt.skip_empty` instructs to skip empty records, **violates RFC 4180**
*
* Parameter `opt.repair` instructs to repair bad sized records, **violates RFC 4180**
*
* Parameter `opt.filler` sets repair mode. If `opt.filler` is defined bad sized record is filled up
* or cut to the header length. If `opt.filler` is undefined invalid record is replaced with an empty record.
*
* @param opt - validation options
*/
function csvJustifier(opt) {
let hlen = opt === null || opt === void 0 ? void 0 : opt.length;
const skip_empty = opt === null || opt === void 0 ? void 0 : opt.skip_empty;
const repair = opt === null || opt === void 0 ? void 0 : opt.repair;
const filler = opt === null || opt === void 0 ? void 0 : opt.filler;
return (rec, isHeader) => {
const rlen = rec.length;
if (isHeader)
hlen = rlen;
if (hlen !== undefined && hlen !== rlen && (rlen != 0 || !skip_empty)) {
if (!repair)
throw RangeError('Invalid record.');
if (filler == undefined)
return [];
if (rlen < hlen) {
rec.length = hlen;
rec.fill(filler, rlen);
}
else
rec.length = hlen;
}
return rec;
};
}
exports.csvJustifier = csvJustifier;
/**
* Creates an object upon `csvRecord`.
*
* Returns `csvBuilder<Record<string, string>>` that creates an object using properties names array and `rec` data as values.
* Every data item in `csvRecords` is added to created object. Data item index is used to select property name in `names` array.
* If no such name exists then argument `extra` is used to generate property name for index `i`.
*
* If `extra` is function, then propery name is `<extra>(<i>)`.
* If `extra` is string, then property name is `<extra><i>`.
* If `extra` is undefined, then property name is `_<i>`.
*
* Can be used in `csvConvert()` to create simple objects.
*
* @see csvConvert()
*
* @param extra - property name generator for unlisted properties
*/
function csvAssembler(extra) {
const n = typeof extra === 'function'
? extra
: typeof extra === 'string'
? (i) => `${extra}${i}`
: (i) => `_${i}`;
return (rec, names) => {
const fields = names || [];
return Object.assign({}, ...rec.map((v, i) => ({ [fields[i] || n(i)]: v })));
};
}
exports.csvAssembler = csvAssembler;
/** Returns an array of alphabetically sorted enumerable properties of an object. */
function csvPropNames(obj) {
return Object.keys(obj).sort((a, b) => a.localeCompare(b));
}
exports.csvPropNames = csvPropNames;
/**
* Creates an CSV record upon object.
*
* Returns `csvExtractor<Record<string, unknown>>` that creates an CSV record using properties names array and `obj` as values.
* Every listed on `names` property's value is received from `obj` and stored in the record. Undefined values replaced by `''`.
* If `extra === true` all non listed object properties values are added to the record in alphabetical order after the listed ones.
*
* @param extra - add non listed properties values
*/
function csvPropValues(extra) {
return (obj, names) => {
let listed = names || [];
if (extra)
listed = listed.concat(csvPropNames(obj).filter((k) => listed.indexOf(k) < 0));
return listed.map((k) => { var _a; return String((_a = obj[k]) !== null && _a !== void 0 ? _a : ''); });
};
}
exports.csvPropValues = csvPropValues;