@thi.ng/csv
Version:
Customizable, transducer-based CSV parser/object mapper and transformer
194 lines (193 loc) • 5.06 kB
JavaScript
import { isArray } from "@thi.ng/checks/is-array";
import { isFunction } from "@thi.ng/checks/is-function";
import { isIterable } from "@thi.ng/checks/is-iterable";
import { ESCAPES } from "@thi.ng/strings/escape";
import { split } from "@thi.ng/strings/split";
import { compR } from "@thi.ng/transducers/compr";
import { iterator1 } from "@thi.ng/transducers/iterator";
const DEFAULT_OPTS = {
delim: ",",
quote: '"',
comment: "#",
trim: false
};
function parseCSV(opts, src) {
return isIterable(src) ? iterator1(parseCSV(opts), src) : (rfn) => {
const {
all = true,
cols,
comment,
delim,
header,
quote,
trim
} = {
...DEFAULT_OPTS,
...opts
};
const reduce = rfn[2];
let index;
let revIndex;
let first = true;
let isQuoted = false;
let record = [];
const init = (header2) => {
cols && (index = __initIndex(header2, cols));
all && (revIndex = __initRevIndex(header2));
first = false;
};
const collectAll = (row) => record.reduce(
(acc, x, i) => (acc[revIndex[i]] = trim ? x.trim() : x, acc),
row
);
const collectIndexed = (row) => Object.entries(index).reduce((acc, [id, { i, spec }]) => {
let val = record[i];
if (val !== void 0) {
trim && (val = val.trim());
all && spec.alias && delete acc[id];
acc[spec.alias || id] = spec.tx ? spec.tx(val, acc) : val;
}
return acc;
}, row);
header && init(header);
return compR(rfn, (acc, line) => {
if ((!line.length || line.startsWith(comment)) && !isQuoted)
return acc;
if (!first) {
isQuoted = __parseLine(
line,
record,
isQuoted,
delim,
quote
);
if (isQuoted) return acc;
const row = {};
all && collectAll(row);
index && collectIndexed(row);
record = [];
return reduce(acc, row);
} else {
isQuoted = __parseLine(
line,
record,
isQuoted,
delim,
quote
);
if (!isQuoted) {
init(record);
record = [];
}
return acc;
}
});
};
}
function parseCSVSimple(opts, src) {
return isIterable(src) ? iterator1(parseCSVSimple(opts), src) : (rfn) => {
const {
header = true,
cols,
comment,
delim,
quote,
trim
} = {
...DEFAULT_OPTS,
...opts
};
const reduce = rfn[2];
let first = header;
let isQuoted = false;
let record = [];
const collect = () => cols.reduce((acc, col, i) => {
if (col) {
let val = record[i];
if (val !== void 0) {
trim && (val = val.trim());
acc.push(isFunction(col) ? col(val, acc) : val);
}
}
return acc;
}, []);
return compR(rfn, (acc, line) => {
if ((!line.length || line.startsWith(comment)) && !isQuoted)
return acc;
if (!first) {
isQuoted = __parseLine(
line,
record,
isQuoted,
delim,
quote
);
if (isQuoted) return acc;
const row = cols ? collect() : record;
record = [];
return reduce(acc, row);
} else {
isQuoted = __parseLine(
line,
record,
isQuoted,
delim,
quote
);
first = false;
record = [];
return acc;
}
});
};
}
const parseCSVFromString = (opts, src) => parseCSV(opts, split(src));
const parseCSVSimpleFromString = (opts, src) => parseCSVSimple(opts, split(src));
const __parseLine = (line, acc, isQuoted, delim, quote) => {
let curr = "";
let p = "";
let openQuote = isQuoted;
for (let i = 0, n = line.length; i < n; i++) {
const c = line[i];
if (p === "\\") {
curr += ESCAPES[c] || c;
} else if (c === quote) {
if (!isQuoted) {
p = "";
isQuoted = true;
continue;
} else if (p === quote) {
curr += quote;
p = "";
continue;
} else if (line[i + 1] !== quote) isQuoted = false;
} else if (!isQuoted && c === delim) {
__collectCell(acc, curr, openQuote);
openQuote = false;
curr = "";
} else if (c !== "\\") {
curr += c;
}
p = c;
}
curr !== "" && __collectCell(acc, curr, openQuote);
return isQuoted;
};
const __collectCell = (acc, curr, openQuote) => openQuote ? acc[acc.length - 1] += "\n" + curr : acc.push(curr);
const __initIndex = (line, cols) => isArray(cols) ? cols.reduce((acc, spec, i) => {
if (spec) {
const alias = spec.alias || line[i] || String(i);
acc[alias] = { i, spec: { alias, ...spec } };
}
return acc;
}, {}) : line.reduce(
(acc, id, i) => cols[id] ? (acc[id] = { i, spec: cols[id] }, acc) : acc,
{}
);
const __initRevIndex = (line) => line.reduce((acc, x, i) => (acc[i] = x, acc), {});
export {
parseCSV,
parseCSVFromString,
parseCSVSimple,
parseCSVSimpleFromString
};