ts-csv-parser
Version:
Utility package for parsing CSV files.
388 lines (384 loc) • 12 kB
JavaScript
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
// src/index.ts
var index_exports = {};
__export(index_exports, {
Parser: () => Parser
});
module.exports = __toCommonJS(index_exports);
// src/csv-tokenizer.ts
function parseCSV(input, options) {
const config = {
delimiter: options.delimiter || ",",
quote: options.quote || '"',
escape: options.escape || '"',
skipRows: options.skipRows || 0,
skipEmptyLines: options.skipEmptyLines !== false
};
const lines = input.split(/\r?\n/);
const relevantLines = lines.map((line, index) => ({ line, originalIndex: index })).filter(({ originalIndex }) => originalIndex >= config.skipRows);
const finalState = relevantLines.reduce(
(state, { line, originalIndex }) => processLine(line, state, config, originalIndex === lines.length - 1),
{
currentField: "",
currentRow: [],
inQuotes: false,
rows: [],
headers: [],
lineIndex: 0
}
);
const result = finalizeParsing(finalState, config);
return {
headers: result.headers,
rows: result.rows
};
}
function processLine(line, state, config, isLastLine) {
if (config.skipEmptyLines && line.trim() === "" && !state.inQuotes) {
return state;
}
let position = 0;
let field = state.currentField;
let inQuotes = state.inQuotes;
const row = [...state.currentRow];
while (position < line.length) {
const char = line[position];
if (inQuotes) {
if (char === config.quote) {
if (position + 1 < line.length && line[position + 1] === config.quote && config.escape === config.quote) {
field += config.quote;
position += 2;
} else {
inQuotes = false;
position++;
}
} else if (char === config.escape && config.escape !== config.quote && position + 1 < line.length && line[position + 1] === config.quote) {
field += config.quote;
position += 2;
} else {
field += char;
position++;
}
} else {
if (line.substr(position, config.delimiter.length) === config.delimiter) {
row.push(field);
field = "";
position += config.delimiter.length;
} else if (char === config.quote && (position === 0 || line[position - 1] === config.delimiter[config.delimiter.length - 1] || field === "")) {
inQuotes = true;
position++;
} else {
field += char;
position++;
}
}
}
if (inQuotes && !isLastLine) {
return {
...state,
currentField: field + "\n",
currentRow: row,
inQuotes: true
};
}
const completedRow = [...row, field];
const shouldAddRow = !config.skipEmptyLines || completedRow.some((f) => f !== "") || completedRow.length > 1;
if (state.lineIndex === 0) {
return {
currentField: "",
currentRow: [],
inQuotes: false,
rows: state.rows,
headers: completedRow,
lineIndex: state.lineIndex + 1
};
}
return {
currentField: "",
currentRow: [],
inQuotes: false,
rows: shouldAddRow ? [...state.rows, completedRow] : state.rows,
headers: state.headers,
lineIndex: state.lineIndex + 1
};
}
function finalizeParsing(state, config) {
if (state.currentField || state.currentRow.length > 0) {
const finalRow = [...state.currentRow, state.currentField];
if (state.lineIndex === 0) {
return {
...state,
headers: finalRow
};
} else {
const shouldAddRow = !config.skipEmptyLines || finalRow.some((field) => field !== "") || finalRow.length > 1;
return {
...state,
rows: shouldAddRow ? [...state.rows, finalRow] : state.rows
};
}
}
return state;
}
// src/parser.ts
var Parser = class {
columns = [];
options;
rowValidators = [];
rowIndexOffset = 2;
constructor(options = {}) {
this.options = {
delimiter: ",",
quote: '"',
escape: '"',
skipEmptyLines: true,
skipRows: 0,
trim: true,
caseInsensitiveColumnNames: false,
...options
};
}
col(csvColumnName, propertyName, options) {
const csvNames = Array.isArray(csvColumnName) ? csvColumnName : [csvColumnName];
const newParser = Object.create(Object.getPrototypeOf(this));
newParser.columns = [
...this.columns,
{
csvNames,
propertyName,
options: {
trim: this.options.trim,
caseInsensitiveColumnNames: this.options.caseInsensitiveColumnNames,
...options
}
}
];
newParser.options = this.options;
newParser.rowValidators = this.rowValidators;
newParser.rowIndexOffset = this.rowIndexOffset;
return newParser;
}
val(validator) {
const newParser = Object.create(Object.getPrototypeOf(this));
newParser.columns = this.columns;
newParser.options = this.options;
newParser.rowValidators = [...this.rowValidators, validator];
newParser.rowIndexOffset = this.rowIndexOffset;
return newParser;
}
parse(input) {
try {
const { headers, rows } = parseCSV(input, this.options);
const headerIndex = headers.reduce(
(map, header, index) => map.set(header, index),
/* @__PURE__ */ new Map()
);
const results = rows.map(
(row, rowIndex) => this.processRow(row, rowIndex, headerIndex)
);
const success = results.filter((result) => result.errors.length === 0).map((result) => result.record);
const errors = results.flatMap((result) => result.errors);
return {
success,
errors,
hasErrors: errors.length > 0
};
} catch (e) {
throw new Error(
`Failed to parse CSV: ${e instanceof Error ? e.message : "Unknown error"}`
);
}
}
processRow(row, rowIndex, headerIndex) {
const columnResults = this.columns.map((column) => ({
column,
result: this.processColumn(column, row, rowIndex, headerIndex)
}));
const errors = columnResults.map(({ result }) => result.error).filter((error) => error !== void 0);
const record = columnResults.reduce((acc, { column, result }) => {
if ("value" in result) {
return { ...acc, [column.propertyName]: result.value };
}
return acc;
}, {});
if (errors.length === 0 && this.rowValidators.length > 0) {
const rowValidationErrors = this.rowValidators.map((validator) => {
const error = validator(record);
if (error) {
return {
row: rowIndex + this.rowIndexOffset,
value: JSON.stringify(record),
message: error,
type: "row-validation"
};
}
return null;
}).filter((error) => error !== null);
errors.push(...rowValidationErrors);
}
return { record, errors };
}
processColumn(column, row, rowIndex, headerIndex) {
const match = this.findColumnMatchForColumn(column, headerIndex);
if (!match) {
return this.handleMissingColumn(column, rowIndex);
}
const rawValue = row[match.columnIndex] || "";
const trimmedValue = column.options.trim !== false ? rawValue.trim() : rawValue;
if (!trimmedValue) {
return this.handleEmptyValue(column, match.columnName, rowIndex);
}
return this.transformAndValidate(
trimmedValue,
column,
match.columnName,
rowIndex
);
}
findColumnMatchForColumn(column, headerIndex) {
const exactMatch = column.csvNames.map((name) => ({ name, index: headerIndex.get(name) })).find(({ index }) => index !== void 0);
if (exactMatch && exactMatch.index !== void 0) {
return { columnIndex: exactMatch.index, columnName: exactMatch.name };
}
const shouldUseCaseInsensitive = column.options.caseInsensitiveColumnNames ?? this.options.caseInsensitiveColumnNames;
if (shouldUseCaseInsensitive) {
const caseInsensitiveMatch = this.findCaseInsensitiveMatch(
column.csvNames,
headerIndex
);
if (caseInsensitiveMatch) {
return caseInsensitiveMatch;
}
}
return null;
}
findCaseInsensitiveMatch(csvNames, headerIndex) {
const headerEntries = Array.from(headerIndex.entries());
for (const csvName of csvNames) {
const lowerCsvName = csvName.toLowerCase();
const match = headerEntries.find(
([header]) => header.toLowerCase() === lowerCsvName
);
if (match) {
const [originalHeaderName, index] = match;
return { columnIndex: index, columnName: originalHeaderName };
}
}
return null;
}
handleMissingColumn(column, rowIndex) {
if (!column.options.nullable && column.options.defaultValue === void 0) {
return {
error: {
row: rowIndex + this.rowIndexOffset,
column: column.csvNames[0],
property: column.propertyName,
value: "",
message: `Column "${column.csvNames.join('" or "')}" not found`,
type: "missing"
}
};
}
return { value: column.options.defaultValue ?? null };
}
handleEmptyValue(column, columnName, rowIndex) {
if (column.options.nullable) {
return { value: null };
}
if (column.options.defaultValue !== void 0) {
return { value: column.options.defaultValue };
}
return {
error: {
row: rowIndex + this.rowIndexOffset,
column: columnName,
property: column.propertyName,
value: "",
message: "Required field is empty",
type: "validation"
}
};
}
transformAndValidate(value, column, columnName, rowIndex) {
const transformResult = this.applyTransform(
value,
column.options.transform
);
if ("error" in transformResult) {
return {
error: {
row: rowIndex + this.rowIndexOffset,
column: columnName,
property: column.propertyName,
value,
message: transformResult.error,
type: "transform"
}
};
}
const validationError = column.options.validate?.(transformResult.value);
if (validationError) {
return {
error: {
row: rowIndex + this.rowIndexOffset,
column: columnName,
property: column.propertyName,
value,
message: validationError,
type: "validation"
}
};
}
return { value: transformResult.value };
}
applyTransform(value, transform) {
if (!transform) {
return { value };
}
try {
return { value: transform(value) };
} catch (e) {
return {
error: e instanceof Error ? e.message : "Transform function failed"
};
}
}
async parseAsync(input) {
if (typeof input === "string") {
return this.parse(input);
}
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = (e) => {
const text = e.target?.result;
try {
resolve(this.parse(text));
} catch (error) {
reject(error);
}
};
reader.onerror = () => reject(new Error("Failed to read file"));
reader.readAsText(input);
});
}
};
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
Parser
});