@thi.ng/csv
Version:
Customizable, transducer-based CSV parser/object mapper and transformer
225 lines (224 loc) • 5.99 kB
JavaScript
import { isArray } from "@thi.ng/checks/is-array";
import { isFunction } from "@thi.ng/checks/is-function";
import { isIterable } from "@thi.ng/checks/is-iterable";
import { illegalArgs } from "@thi.ng/errors/illegal-arguments";
import { ESCAPES } from "@thi.ng/strings/escape";
import { split } from "@thi.ng/strings/split";
import { compR } from "@thi.ng/transducers/compr";
import { iterator1 } from "@thi.ng/transducers/iterator";
const DEFAULT_OPTS = {
delim: ",",
quote: '"',
comment: "#",
trim: false
};
function parseCSV(opts, src) {
return isIterable(src) ? iterator1(parseCSV(opts), src) : (rfn) => {
const {
all = true,
cols,
comment,
delim,
header,
quote,
trim
} = {
...DEFAULT_OPTS,
...opts
};
const reduce = rfn[2];
let index;
let revIndex;
let defaults;
let first = true;
let isQuoted = false;
let record = [];
const init = (header2) => {
if (cols) {
index = __initIndex(header2, cols);
defaults = __initDefaults(cols);
}
if (all) revIndex = __initRevIndex(header2);
first = false;
};
const collectAll = (row) => record.reduce(
(acc, x, i) => (acc[revIndex[i]] = trim ? x.trim() : x, acc),
row
);
const collectIndexed = (row) => Object.entries(index).reduce((acc, [id, { i, spec }]) => {
let val = record[i];
if (val !== void 0) {
trim && (val = val.trim());
all && spec.alias && delete acc[id];
acc[spec.alias || id] = spec.tx ? spec.tx(val, acc) : val;
}
return acc;
}, row);
const collectDefaults = (row) => defaults.reduce((acc, { alias, default: val }) => {
if (acc[alias] === void 0 || acc[alias] === "") {
acc[alias] = isFunction(val) ? val(acc) : val;
}
return acc;
}, row);
header && init(header);
return compR(rfn, (acc, line) => {
if ((!line.length || line.startsWith(comment)) && !isQuoted)
return acc;
if (!first) {
isQuoted = __parseLine(
line,
record,
isQuoted,
delim,
quote
);
if (isQuoted) return acc;
const row = {};
all && collectAll(row);
index && collectIndexed(row);
defaults && collectDefaults(row);
record = [];
return reduce(acc, row);
} else {
isQuoted = __parseLine(
line,
record,
isQuoted,
delim,
quote
);
if (!isQuoted) {
init(record);
record = [];
}
return acc;
}
});
};
}
function parseCSVSimple(opts, src) {
return isIterable(src) ? iterator1(parseCSVSimple(opts), src) : (rfn) => {
const {
header = true,
cols,
comment,
delim,
quote,
trim
} = {
...DEFAULT_OPTS,
...opts
};
const reduce = rfn[2];
let first = header;
let isQuoted = false;
let record = [];
const collect = () => cols.reduce((acc, col, i) => {
if (col) {
let val = record[i];
if (val !== void 0) {
trim && (val = val.trim());
acc.push(isFunction(col) ? col(val, acc) : val);
}
}
return acc;
}, []);
return compR(rfn, (acc, line) => {
if ((!line.length || line.startsWith(comment)) && !isQuoted)
return acc;
if (!first) {
isQuoted = __parseLine(
line,
record,
isQuoted,
delim,
quote
);
if (isQuoted) return acc;
const row = cols ? collect() : record;
record = [];
return reduce(acc, row);
} else {
isQuoted = __parseLine(
line,
record,
isQuoted,
delim,
quote
);
first = false;
record = [];
return acc;
}
});
};
}
const parseCSVFromString = (opts, src) => parseCSV(opts, split(src));
const parseCSVSimpleFromString = (opts, src) => parseCSVSimple(opts, split(src));
const __parseLine = (line, acc, isQuoted, delim, quote) => {
let curr = "";
let p = "";
let openQuote = isQuoted;
for (let i = 0, n = line.length; i < n; i++) {
const c = line[i];
if (p === "\\") {
curr += ESCAPES[c] || c;
} else if (c === quote) {
if (!isQuoted) {
p = "";
isQuoted = true;
continue;
} else if (p === quote) {
curr += quote;
p = "";
continue;
} else if (line[i + 1] !== quote) isQuoted = false;
} else if (!isQuoted && c === delim) {
__collectCell(acc, curr, openQuote);
openQuote = false;
curr = "";
} else if (c !== "\\") {
curr += c;
}
p = c;
}
curr !== "" && __collectCell(acc, curr, openQuote);
return isQuoted;
};
const __collectCell = (acc, curr, openQuote) => openQuote ? acc[acc.length - 1] += "\n" + curr : acc.push(curr);
const __initIndex = (line, cols) => isArray(cols) ? cols.reduce((acc, spec, i) => {
if (spec) {
const alias = spec.alias || line[i] || String(i);
acc[alias] = { i, spec: { alias, ...spec } };
}
return acc;
}, {}) : line.reduce(
(acc, id, i) => cols[id] ? (acc[id] = { i, spec: cols[id] }, acc) : acc,
{}
);
const __initRevIndex = (line) => line.reduce((acc, x, i) => (acc[i] = x, acc), {});
const __initDefaults = (cols) => {
const defaults = isArray(cols) ? cols.filter((c) => {
if (!c || c.default == void 0) return false;
if (!c.alias)
illegalArgs(
`missing column alias for default: ${c.default}`
);
return true;
}) : Object.entries(cols).reduce((acc, [k, v]) => {
if (v.default !== void 0) {
acc.push({
alias: v.alias ?? k,
default: v.default
});
}
return acc;
}, []);
return defaults.length ? defaults : void 0;
};
export {
parseCSV,
parseCSVFromString,
parseCSVSimple,
parseCSVSimpleFromString
};