UNPKG

@maximai/maxim-js

Version:

Maxim AI JS SDK. Visit https://getmaxim.ai for more info.

314 lines 12.4 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.CSVFile = void 0; const fs_1 = require("fs"); const os_1 = require("os"); const stream_1 = require("stream"); class CSVFile { constructor(filePath, columnStructure, options = {}) { var _a, _b, _c, _d; this.headerRow = null; this.rowCount = null; this.columnCount = null; this.fileStats = null; this.filePath = filePath; this.columnStructure = columnStructure; this.options = { delimiter: (_a = options.delimiter) !== null && _a !== void 0 ? _a : ",", hasHeader: columnStructure ? true : ((_b = options.hasHeader) !== null && _b !== void 0 ? _b : true), quoteChar: (_c = options.quoteChar) !== null && _c !== void 0 ? _c : '"', escapeChar: (_d = options.escapeChar) !== null && _d !== void 0 ? _d : '"', }; if (columnStructure) { this.validateHeaders(); } } async validateHeaders() { const headerRow = await this.getHeader(); if (!headerRow) { throw new Error("Failed to read header row from CSV file."); } if (this.columnStructure) { for (const [columnName, index] of Object.entries(this.columnStructure)) { if (headerRow[index] !== columnName) { throw new Error(`Column structure does not match CSV "${this.filePath}" headers. \nExpected column "${columnName}" at index [${index}], but found "${headerRow[index]}".`, { cause: JSON.stringify({ expectedColumn: columnName, expectedIndex: index, actualColumn: headerRow[index], headerRow: headerRow, }, null, 2), }); } } } } parseRow(row) { const { delimiter, quoteChar, escapeChar } = this.options; const result = []; let field = ""; let inQuotes = false; for (let i = 0; i < row.length; i++) { const char = row[i]; const nextChar = row[i + 1]; if (inQuotes) { if (char === quoteChar && nextChar === quoteChar) { field += char; i++; } else if (char === quoteChar) { inQuotes = false; } else { field += char; } } else { if (char === quoteChar) { inQuotes = true; } else if (char === delimiter) { result.push(field); field = ""; } else if (char === escapeChar && nextChar === delimiter) { field += delimiter; i++; } else { field += char; } } } result.push(field); return result; } async getRowCount() { if (this.rowCount === null) { let count = 0; await this.processFile((row) => { count++; }); this.rowCount = count; } return this.rowCount; } async getColumnCount() { if (this.columnCount === null) { const headerRow = await this.getHeader(); if (!headerRow) { throw new Error("Failed to read header row from CSV file."); } this.columnCount = headerRow.length; } return this.columnCount; } async getHeader() { if (this.headerRow === null) { return new Promise((resolve, reject) => { let headerProcessed = false; const parser = new stream_1.Transform({ transform: (chunk, encoding, callback) => { if (!headerProcessed) { const line = chunk.toString().split(os_1.EOL)[0]; this.headerRow = this.parseRow(line); headerProcessed = true; parser.destroy(); resolve(this.headerRow); } callback(); }, flush: (callback) => { if (!headerProcessed) { reject(new Error("Failed to read header row from CSV file.")); } callback(); }, }); (0, fs_1.createReadStream)(this.filePath) .pipe(parser) .on("error", (err) => { reject(err); }); }); } return this.headerRow; } async getRow(index) { if (index < 0) { throw new Error("Row index must be non-negative."); } return new Promise((resolve, reject) => { let currentRow = -1; let bytesRead = 0; let lineBuffer = ""; const parser = new stream_1.Transform({ transform: (chunk, encoding, callback) => { bytesRead += chunk.length; const data = lineBuffer + chunk.toString(); const lines = data.split(os_1.EOL); lineBuffer = lines.pop() || ""; for (const line of lines) { if (this.options.hasHeader && currentRow === -1) { currentRow++; continue; } if (currentRow === index) { const parsedRow = this.parseRow(line); resolve(this.createTypedRow(parsedRow)); parser.destroy(); return; } currentRow++; } callback(); }, flush: (callback) => { if (lineBuffer) { currentRow++; if (currentRow === index) { const parsedRow = this.parseRow(lineBuffer); resolve(this.createTypedRow(parsedRow)); } else { reject(new Error(`Row index ${index} is out of bounds.`)); } } else { reject(new Error(`Row index ${index} is out of bounds.`)); } callback(); }, }); (0, fs_1.createReadStream)(this.filePath) .pipe(parser) .on("error", (err) => { reject(err); }); }); } createTypedRow(row) { if (this.columnStructure) { const typedRow = {}; for (const [key, index] of Object.entries(this.columnStructure)) { typedRow[key] = row[index]; } return typedRow; } return row; } async filter(predicate) { const result = []; await this.processFile((row) => { const typedRow = this.createTypedRow(row); if (predicate(typedRow)) { result.push(typedRow); } }); return result; } async map(mapper) { const result = []; await this.processFile((row) => { const typedRow = this.createTypedRow(row); result.push(mapper(typedRow)); }); return result; } async processFile(rowProcessor) { return new Promise((resolve, reject) => { let isFirstRow = true; let lineBuffer = ""; const parser = new stream_1.Transform({ transform: (chunk, encoding, callback) => { const data = lineBuffer + chunk.toString(); const lines = data.split(os_1.EOL); lineBuffer = lines.pop() || ""; for (const line of lines) { if (isFirstRow && this.options.hasHeader) { isFirstRow = false; continue; } const row = this.parseRow(line); rowProcessor(row); } callback(); }, flush: (callback) => { if (lineBuffer) { const row = this.parseRow(lineBuffer); rowProcessor(row); } resolve(); callback(); }, }); (0, fs_1.createReadStream)(this.filePath) .pipe(parser) .on("error", (err) => { reject(err); }); }); } static async restructure(csvFile, newColumnStructure) { const newCsvFile = new CSVFile(csvFile.filePath); Object.assign(newCsvFile, csvFile); newCsvFile.columnStructure = newColumnStructure; await newCsvFile.validateHeaders(); newCsvFile.columnCount = null; return newCsvFile; } static async writeToFile(data, outputPath, columnStructure, options = {}) { var _a, _b, _c, _d; const writeOptions = { delimiter: (_a = options.delimiter) !== null && _a !== void 0 ? _a : ",", includeHeader: (_b = options.includeHeader) !== null && _b !== void 0 ? _b : true, quoteChar: (_c = options.quoteChar) !== null && _c !== void 0 ? _c : '"', escapeChar: (_d = options.escapeChar) !== null && _d !== void 0 ? _d : '"', }; return new Promise((resolve, reject) => { const writeStream = (0, fs_1.createWriteStream)(outputPath); const processRow = (row) => { const rowArray = columnStructure ? Object.keys(columnStructure).map((key) => row[key]) : row; return CSVFile.formatRow(rowArray, writeOptions) + os_1.EOL; }; writeStream.on("error", (err) => { reject(err); }); writeStream.on("finish", () => { resolve(); }); const writeData = async () => { if (writeOptions.includeHeader && columnStructure) { const headerRow = Object.keys(columnStructure); if (!writeStream.write(CSVFile.formatRow(headerRow, writeOptions) + os_1.EOL)) { await new Promise((resolve) => writeStream.once("drain", () => resolve("drained"))); } } for (const row of data) { if (!writeStream.write(processRow(row))) { await new Promise((resolve) => writeStream.once("drain", () => resolve("drained"))); } } writeStream.end(); }; writeData().catch(reject); }); } static formatRow(row, options) { var _a; return row .map((field) => { var _a, _b, _c, _d, _e, _f; if (field.includes((_a = options.delimiter) !== null && _a !== void 0 ? _a : ",") || field.includes((_b = options.quoteChar) !== null && _b !== void 0 ? _b : '"') || field.includes("\n")) { return `${options.quoteChar}${field.replace(new RegExp((_c = options.quoteChar) !== null && _c !== void 0 ? _c : '"', "g"), (_e = (_d = options.escapeChar) !== null && _d !== void 0 ? _d : '"' + options.quoteChar) !== null && _e !== void 0 ? _e : '"')}${(_f = options.quoteChar) !== null && _f !== void 0 ? _f : '"'}`; } return field; }) .join((_a = options.delimiter) !== null && _a !== void 0 ? _a : ","); } } exports.CSVFile = CSVFile; //# sourceMappingURL=csvParser.js.map