UNPKG

csv-rxjs-kit

Version:

A kit of RxJS operators to handle CSV formatted (RFC 4180, MIME Type 'text/csv') data.

354 lines 14.3 kB
import { Observable } from 'rxjs'; import { map, skip, filter, startWith, endWith } from 'rxjs/operators'; /** * CSV formatter RxJS operator. * * Returns an Observable that converts to text every `csvRecord` emitted by source Observable. * Output text data is formatted as MIME Type 'text/csv' [RFC 4180](https://www.ietf.org/rfc/rfc4180.txt). * * Parameter `opt.delimiter` sets line breaks type, default is `CRLF`. * * Parameter `opt.last_break` adds optional line delimiter after the last record. * * Parameter `opt.force_quote` adds quotation to every item. * * @param opt - options */ export function csvStringify(opt) { const eol = (opt === null || opt === void 0 ? void 0 : opt.delimiter) || '\r\n'; const quote = opt === null || opt === void 0 ? void 0 : opt.force_quote; const line$ = map((r, i) => (i ? eol : '') + r.map((field) => (quote || /[,"\r\n]/.test(field) ? `"${field.replace(/"/g, '""')}"` : field)).join(',')); return (opt === null || opt === void 0 ? void 0 : opt.last_break) ? (data$) => data$.pipe(line$, endWith(eol)) : line$; } /** * CSV parser RxJS operator. * * Returns an Obsrevable that parses text emitted by source Observable and converts it to `csvRecord`s. * Input text should be MIME Type 'text/csv' as described is [RFC 4180](https://www.ietf.org/rfc/rfc4180.txt). * * All parse errors are reported as `SyntaxError` using an Observer's `error()` method. */ export function csvParse() { return function (csv$) { return new Observable((observer) => { const R = new RegExp(/,|"|\n|\r\n|\r|[^,"\r\n]+/y); let state = 'IDLE'; let r = []; let v = ''; let rIndex = 1; let cIndex = 1; let lastToken = ''; function err(msg) { observer.error(new SyntaxError(`CSV4180 [${rIndex}, ${cIndex}]: ` + msg)); } function next() { observer.next(r); r = []; } function txt(s) { switch (state) { case 'IDLE': case 'CR': switch (s) { case ',': r.push(''); state = 'COMMA'; break; case '"': state = 'ESCAPED'; break; case '\r': case '\r\n': next(); state = s === '\r' ? 'CR' : 'IDLE'; break; case '\n': if (state === 'CR') state = 'IDLE'; else next(); break; default: v += s; state = 'TEXT'; break; } break; case 'COMMA': switch (s) { case ',': r.push(''); break; case '"': state = 'ESCAPED'; break; case '\r': case '\r\n': case '\n': r.push(''); next(); state = s === '\r' ? 'CR' : 'IDLE'; break; default: v += s; state = 'TEXT'; break; } break; case 'TEXT': switch (s) { case ',': r.push(v); v = ''; state = 'COMMA'; break; case '"': err('Quote in non-escaped data.'); return false; case '\r': case '\r\n': case '\n': r.push(v); v = ''; next(); state = s === '\r' ? 'CR' : 'IDLE'; break; default: v += s; break; } break; case 'ESCAPED': if (s === '"') state = 'CLOSED'; else v += s; break; case 'CLOSED': switch (s) { case ',': r.push(v); v = ''; state = 'COMMA'; break; case '"': v += s; state = 'ESCAPED'; break; case '\r': case '\r\n': case '\n': r.push(v); v = ''; next(); state = s === '\r' ? 'CR' : 'IDLE'; break; default: err('Invalid escape sequence.'); return false; } break; default: return false; } if (s[0] === '\r' || (s === '\n' && lastToken !== '\r')) { rIndex++; cIndex = 1; } else cIndex += s.length; lastToken = s; return true; } function end() { if (state === 'ESCAPED') { err('Closing quote is missing.'); return false; } if (state === 'COMMA' || state === 'TEXT' || state === 'CLOSED') { r.push(v); next(); } return true; } return csv$.subscribe({ next: (chunk) => { var _a; let m; while ((m = (_a = R.exec(chunk)) === null || _a === void 0 ? void 0 : _a[0]) && txt(m)) ; }, complete: () => end() && observer.complete(), error: (e) => observer.error(e), }); }); }; } /** * Removes empty records. * * Returns an Observable that removes all empty records emitted by source Observable. */ export function csvDropEmpty() { return filter((r) => !!r.length); } /** * Removes header record. * * Returns an Observable that removes header `csvRecord` (the first data item) emitted by source Observable. * There is no dedicated tagging for header `csvRecord` so one must be sure the source Observable emits header. * * @typeParam T - the main data type, might be csvRecord or any arbitrary data type */ export function csvDropHeader() { return skip(1); } /** * Injects a header record. * * Returns an Observable that emits header record and then mirrors source Observable. * Inserted `csvRecord` will be interpreted as a header `csvRecord` by other operators if they are instructed to. * * @typeParam T - the main stream type * @param header - header record value */ export function csvInjectHeader(header) { return startWith(header); } /** * Validates a record. * * Returns an Observable that uses `validator` to check/modify every `csvRecord` emitted by source Observable. * * To remove invalid records one can use `validator` to convert them to empty records and then use `csvDropEmpty` operator. * * @param hdr - does source Obsrvable emit header 'csvRecord'? * @param validator - function to validate a 'csvRecord' * @throws everything that `validator` throws */ export function csvValidateRecord(hdr, validator) { return map((r, i) => validator(r, hdr && !i)); } /** * Creates `csvRecord` for data array. * * Returns an Observable that converts every array item emitted by source Observable to `csvRecord`. * The operator calls `String(...)` for every defined item in the input data array, undefined items are replaced by `''`. * * It doesn't matter if source Observable emits header record. The operator's conversion procedure doesn't affect a header record. * * If you need different behavior and/or have different input data type use `csvConvert()` and custom `csvExtractor<T>`. */ export function csvFromArray() { return map((obj) => obj.map((v) => String(v !== null && v !== void 0 ? v : ''))); } export function csvConvert(hdr, prj) { if (!prj) throw new RangeError('Data converter is not provided.'); if (!hdr) return map((rec) => prj(rec)); return map((() => { let names = undefined; return (obj, i) => { if (i !== 0) return prj(obj, names); names = obj; return obj; }; })()); } /** * Justifies record length, can be is used with `csvValidateRecord()`. * * Returns `csvRecordValidator` that justifies record length according to header length. * According to RFC 4180 'each line should contain the same number of fields throughout the file' * hence any changes to bad sized records violate the RFC. * * Parameter `opt.length` sets default length of a record. A header record length (if available) * overrides `opt.length` value. If `opt.length` is not set and header record hasn't been available, * no records are altered. * * Parameter `opt.skip_empty` instructs to skip empty records, **violates RFC 4180** * * Parameter `opt.repair` instructs to repair bad sized records, **violates RFC 4180** * * Parameter `opt.filler` sets repair mode. If `opt.filler` is defined bad sized record is filled up * or cut to the header length. If `opt.filler` is undefined invalid record is replaced with an empty record. * * @param opt - validation options */ export function csvJustifier(opt) { let hlen = opt === null || opt === void 0 ? void 0 : opt.length; const skip_empty = opt === null || opt === void 0 ? void 0 : opt.skip_empty; const repair = opt === null || opt === void 0 ? void 0 : opt.repair; const filler = opt === null || opt === void 0 ? void 0 : opt.filler; return (rec, isHeader) => { const rlen = rec.length; if (isHeader) hlen = rlen; if (hlen !== undefined && hlen !== rlen && (rlen != 0 || !skip_empty)) { if (!repair) throw RangeError('Invalid record.'); if (filler == undefined) return []; if (rlen < hlen) { rec.length = hlen; rec.fill(filler, rlen); } else rec.length = hlen; } return rec; }; } /** * Creates an object upon `csvRecord`. * * Returns `csvBuilder<Record<string, string>>` that creates an object using properties names array and `rec` data as values. * Every data item in `csvRecords` is added to created object. Data item index is used to select property name in `names` array. * If no such name exists then argument `extra` is used to generate property name for index `i`. * * If `extra` is function, then propery name is `<extra>(<i>)`. * If `extra` is string, then property name is `<extra><i>`. * If `extra` is undefined, then property name is `_<i>`. * * Can be used in `csvConvert()` to create simple objects. * * @see csvConvert() * * @param extra - property name generator for unlisted properties */ export function csvAssembler(extra) { const n = typeof extra === 'function' ? extra : typeof extra === 'string' ? (i) => `${extra}${i}` : (i) => `_${i}`; return (rec, names) => { const fields = names || []; return Object.assign({}, ...rec.map((v, i) => ({ [fields[i] || n(i)]: v }))); }; } /** Returns an array of alphabetically sorted enumerable properties of an object. */ export function csvPropNames(obj) { return Object.keys(obj).sort((a, b) => a.localeCompare(b)); } /** * Creates an CSV record upon object. * * Returns `csvExtractor<Record<string, unknown>>` that creates an CSV record using properties names array and `obj` as values. * Every listed on `names` property's value is received from `obj` and stored in the record. Undefined values replaced by `''`. * If `extra === true` all non listed object properties values are added to the record in alphabetical order after the listed ones. * * @param extra - add non listed properties values */ export function csvPropValues(extra) { return (obj, names) => { let listed = names || []; if (extra) listed = listed.concat(csvPropNames(obj).filter((k) => listed.indexOf(k) < 0)); return listed.map((k) => { var _a; return String((_a = obj[k]) !== null && _a !== void 0 ? _a : ''); }); }; } //# sourceMappingURL=csv-rxjs-kit.js.map