UNPKG

ts-csv-parser

Version:

Utility package for parsing CSV files.

388 lines (384 loc) 12 kB
var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // src/index.ts var index_exports = {}; __export(index_exports, { Parser: () => Parser }); module.exports = __toCommonJS(index_exports); // src/csv-tokenizer.ts function parseCSV(input, options) { const config = { delimiter: options.delimiter || ",", quote: options.quote || '"', escape: options.escape || '"', skipRows: options.skipRows || 0, skipEmptyLines: options.skipEmptyLines !== false }; const lines = input.split(/\r?\n/); const relevantLines = lines.map((line, index) => ({ line, originalIndex: index })).filter(({ originalIndex }) => originalIndex >= config.skipRows); const finalState = relevantLines.reduce( (state, { line, originalIndex }) => processLine(line, state, config, originalIndex === lines.length - 1), { currentField: "", currentRow: [], inQuotes: false, rows: [], headers: [], lineIndex: 0 } ); const result = finalizeParsing(finalState, config); return { headers: result.headers, rows: result.rows }; } function processLine(line, state, config, isLastLine) { if (config.skipEmptyLines && line.trim() === "" && !state.inQuotes) { return state; } let position = 0; let field = state.currentField; let inQuotes = state.inQuotes; const row = [...state.currentRow]; while (position < line.length) { const char = line[position]; if (inQuotes) { if (char === config.quote) { if (position + 1 < line.length && line[position + 1] === config.quote && config.escape === config.quote) { field += config.quote; position += 2; } else { inQuotes = false; position++; } } else if (char === config.escape && config.escape !== config.quote && position + 1 < line.length && line[position + 1] === config.quote) { field += config.quote; position += 2; } else { field += char; position++; } } else { if (line.substr(position, config.delimiter.length) === config.delimiter) { row.push(field); field = ""; position += config.delimiter.length; } else if (char === config.quote && (position === 0 || line[position - 1] === config.delimiter[config.delimiter.length - 1] || field === "")) { inQuotes = true; position++; } else { field += char; position++; } } } if (inQuotes && !isLastLine) { return { ...state, currentField: field + "\n", currentRow: row, inQuotes: true }; } const completedRow = [...row, field]; const shouldAddRow = !config.skipEmptyLines || completedRow.some((f) => f !== "") || completedRow.length > 1; if (state.lineIndex === 0) { return { currentField: "", currentRow: [], inQuotes: false, rows: state.rows, headers: completedRow, lineIndex: state.lineIndex + 1 }; } return { currentField: "", currentRow: [], inQuotes: false, rows: shouldAddRow ? [...state.rows, completedRow] : state.rows, headers: state.headers, lineIndex: state.lineIndex + 1 }; } function finalizeParsing(state, config) { if (state.currentField || state.currentRow.length > 0) { const finalRow = [...state.currentRow, state.currentField]; if (state.lineIndex === 0) { return { ...state, headers: finalRow }; } else { const shouldAddRow = !config.skipEmptyLines || finalRow.some((field) => field !== "") || finalRow.length > 1; return { ...state, rows: shouldAddRow ? [...state.rows, finalRow] : state.rows }; } } return state; } // src/parser.ts var Parser = class { columns = []; options; rowValidators = []; rowIndexOffset = 2; constructor(options = {}) { this.options = { delimiter: ",", quote: '"', escape: '"', skipEmptyLines: true, skipRows: 0, trim: true, caseInsensitiveColumnNames: false, ...options }; } col(csvColumnName, propertyName, options) { const csvNames = Array.isArray(csvColumnName) ? csvColumnName : [csvColumnName]; const newParser = Object.create(Object.getPrototypeOf(this)); newParser.columns = [ ...this.columns, { csvNames, propertyName, options: { trim: this.options.trim, caseInsensitiveColumnNames: this.options.caseInsensitiveColumnNames, ...options } } ]; newParser.options = this.options; newParser.rowValidators = this.rowValidators; newParser.rowIndexOffset = this.rowIndexOffset; return newParser; } val(validator) { const newParser = Object.create(Object.getPrototypeOf(this)); newParser.columns = this.columns; newParser.options = this.options; newParser.rowValidators = [...this.rowValidators, validator]; newParser.rowIndexOffset = this.rowIndexOffset; return newParser; } parse(input) { try { const { headers, rows } = parseCSV(input, this.options); const headerIndex = headers.reduce( (map, header, index) => map.set(header, index), /* @__PURE__ */ new Map() ); const results = rows.map( (row, rowIndex) => this.processRow(row, rowIndex, headerIndex) ); const success = results.filter((result) => result.errors.length === 0).map((result) => result.record); const errors = results.flatMap((result) => result.errors); return { success, errors, hasErrors: errors.length > 0 }; } catch (e) { throw new Error( `Failed to parse CSV: ${e instanceof Error ? e.message : "Unknown error"}` ); } } processRow(row, rowIndex, headerIndex) { const columnResults = this.columns.map((column) => ({ column, result: this.processColumn(column, row, rowIndex, headerIndex) })); const errors = columnResults.map(({ result }) => result.error).filter((error) => error !== void 0); const record = columnResults.reduce((acc, { column, result }) => { if ("value" in result) { return { ...acc, [column.propertyName]: result.value }; } return acc; }, {}); if (errors.length === 0 && this.rowValidators.length > 0) { const rowValidationErrors = this.rowValidators.map((validator) => { const error = validator(record); if (error) { return { row: rowIndex + this.rowIndexOffset, value: JSON.stringify(record), message: error, type: "row-validation" }; } return null; }).filter((error) => error !== null); errors.push(...rowValidationErrors); } return { record, errors }; } processColumn(column, row, rowIndex, headerIndex) { const match = this.findColumnMatchForColumn(column, headerIndex); if (!match) { return this.handleMissingColumn(column, rowIndex); } const rawValue = row[match.columnIndex] || ""; const trimmedValue = column.options.trim !== false ? rawValue.trim() : rawValue; if (!trimmedValue) { return this.handleEmptyValue(column, match.columnName, rowIndex); } return this.transformAndValidate( trimmedValue, column, match.columnName, rowIndex ); } findColumnMatchForColumn(column, headerIndex) { const exactMatch = column.csvNames.map((name) => ({ name, index: headerIndex.get(name) })).find(({ index }) => index !== void 0); if (exactMatch && exactMatch.index !== void 0) { return { columnIndex: exactMatch.index, columnName: exactMatch.name }; } const shouldUseCaseInsensitive = column.options.caseInsensitiveColumnNames ?? this.options.caseInsensitiveColumnNames; if (shouldUseCaseInsensitive) { const caseInsensitiveMatch = this.findCaseInsensitiveMatch( column.csvNames, headerIndex ); if (caseInsensitiveMatch) { return caseInsensitiveMatch; } } return null; } findCaseInsensitiveMatch(csvNames, headerIndex) { const headerEntries = Array.from(headerIndex.entries()); for (const csvName of csvNames) { const lowerCsvName = csvName.toLowerCase(); const match = headerEntries.find( ([header]) => header.toLowerCase() === lowerCsvName ); if (match) { const [originalHeaderName, index] = match; return { columnIndex: index, columnName: originalHeaderName }; } } return null; } handleMissingColumn(column, rowIndex) { if (!column.options.nullable && column.options.defaultValue === void 0) { return { error: { row: rowIndex + this.rowIndexOffset, column: column.csvNames[0], property: column.propertyName, value: "", message: `Column "${column.csvNames.join('" or "')}" not found`, type: "missing" } }; } return { value: column.options.defaultValue ?? null }; } handleEmptyValue(column, columnName, rowIndex) { if (column.options.nullable) { return { value: null }; } if (column.options.defaultValue !== void 0) { return { value: column.options.defaultValue }; } return { error: { row: rowIndex + this.rowIndexOffset, column: columnName, property: column.propertyName, value: "", message: "Required field is empty", type: "validation" } }; } transformAndValidate(value, column, columnName, rowIndex) { const transformResult = this.applyTransform( value, column.options.transform ); if ("error" in transformResult) { return { error: { row: rowIndex + this.rowIndexOffset, column: columnName, property: column.propertyName, value, message: transformResult.error, type: "transform" } }; } const validationError = column.options.validate?.(transformResult.value); if (validationError) { return { error: { row: rowIndex + this.rowIndexOffset, column: columnName, property: column.propertyName, value, message: validationError, type: "validation" } }; } return { value: transformResult.value }; } applyTransform(value, transform) { if (!transform) { return { value }; } try { return { value: transform(value) }; } catch (e) { return { error: e instanceof Error ? e.message : "Transform function failed" }; } } async parseAsync(input) { if (typeof input === "string") { return this.parse(input); } return new Promise((resolve, reject) => { const reader = new FileReader(); reader.onload = (e) => { const text = e.target?.result; try { resolve(this.parse(text)); } catch (error) { reject(error); } }; reader.onerror = () => reject(new Error("Failed to read file")); reader.readAsText(input); }); } }; // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { Parser });