UNPKG

@maximai/maxim-js

Version:

Maxim AI JS SDK. Visit https://getmaxim.ai for more info.

521 lines 20.3 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.CSVFile = void 0; const platform_1 = require("../platform"); /** * Represents a CSV file with optional type information for its columns. * * Provides methods for reading, parsing, and manipulating CSV data with * optional type safety through column structure definitions. Supports * both streaming and batch operations for efficient processing of large files. * * @template T - Optional column structure type for typed access * @class CSVFile * @example * // Basic usage without column structure * const csvFile = new CSVFile("data.csv"); * const rowCount = await csvFile.getRowCount(); * const firstRow = await csvFile.getRow(0); * * @example * // With typed column structure * const typedCSV = new CSVFile("users.csv", { * name: 0, * email: 1, * age: 2 * }); * * const user = await typedCSV.getRow(0); * console.log(user.name, user.email, user.age); // Type-safe access * * @example * // Filtering and mapping * const adults = await csvFile.filter(row => parseInt(row.age) >= 18); * const names = await csvFile.map(row => row.name.toUpperCase()); * * @example * // Writing data to CSV * await CSVFile.writeToFile( * [{ name: "John", email: "john@example.com" }], * "output.csv", * { name: 0, email: 1 } * ); */ class CSVFile { /** * Creates a new CSVFile instance. * * @param filePath - The path to the CSV file * @param columnStructure - Optional column structure mapping column names to indices * @param options - Optional parsing configuration * @throws {Error} When column structure is provided but doesn't match the file headers * @example * // Simple CSV file * const csv = new CSVFile("data.csv"); * * @example * // With column structure for type safety * const csv = new CSVFile("users.csv", { * id: 0, * name: 1, * email: 2 * }); * * @example * // With custom parsing options * const csv = new CSVFile("data.tsv", undefined, { * delimiter: "\t", * hasHeader: false, * quoteChar: "'" * }); */ constructor(filePath, columnStructure, options = {}) { var _a, _b, _c, _d; this.headerRow = null; this.rowCount = null; this.columnCount = null; this.fileStats = null; this.filePath = filePath; this.columnStructure = columnStructure; this.options = { delimiter: (_a = options.delimiter) !== null && _a !== void 0 ? _a : ",", hasHeader: columnStructure ? true : (_b = options.hasHeader) !== null && _b !== void 0 ? _b : true, quoteChar: (_c = options.quoteChar) !== null && _c !== void 0 ? _c : '"', escapeChar: (_d = options.escapeChar) !== null && _d !== void 0 ? _d : '"', }; // If column structure is provided, validate headers immediately if (columnStructure) { this.validateHeaders(); } } /** * Validates the headers of the CSV file against the provided column structure. * * @private * @async * @returns * @throws {Error} When headers don't match the column structure */ async validateHeaders() { const headerRow = await this.getHeader(); if (!headerRow) { throw new Error("Failed to read header row from CSV file."); } if (this.columnStructure) { for (const [columnName, index] of Object.entries(this.columnStructure)) { if (headerRow[index] !== columnName) { throw new Error(`Column structure does not match CSV "${this.filePath}" headers. \nExpected column "${columnName}" at index [${index}], but found "${headerRow[index]}".`, { cause: JSON.stringify({ expectedColumn: columnName, expectedIndex: index, actualColumn: headerRow[index], headerRow: headerRow, }, null, 2), }); } } } } /** * Parses a single row of the CSV file, handling quotes and escaping. * * @private * @param row - The raw row string to parse * @returns Array of parsed field values */ parseRow(row) { const { delimiter, quoteChar, escapeChar } = this.options; const result = []; let field = ""; let inQuotes = false; for (let i = 0; i < row.length; i++) { const char = row[i]; const nextChar = row[i + 1]; if (inQuotes) { if (char === quoteChar && nextChar === quoteChar) { field += char; i++; } else if (char === quoteChar) { inQuotes = false; } else { field += char; } } else { if (char === quoteChar) { inQuotes = true; } else if (char === delimiter) { result.push(field); field = ""; } else if (char === escapeChar && nextChar === delimiter) { field += delimiter; i++; } else { field += char; } } } result.push(field); return result; } /** * Gets the total number of rows in the CSV file. * * @async * @returns The total number of rows (excluding header if present) * @example * const csv = new CSVFile("large-dataset.csv"); * const totalRows = await csv.getRowCount(); * console.log(`Dataset contains ${totalRows} records`); */ async getRowCount() { if (this.rowCount === null) { let count = 0; await this.processFile((row) => { count++; }); this.rowCount = count; } return this.rowCount; } /** * Gets the number of columns in the CSV file. * * @async * @returns The number of columns * @throws {Error} When unable to read the header row * @example * const columnCount = await csv.getColumnCount(); * console.log(`CSV has ${columnCount} columns`); */ async getColumnCount() { if (this.columnCount === null) { const headerRow = await this.getHeader(); if (!headerRow) { throw new Error("Failed to read header row from CSV file."); } this.columnCount = headerRow.length; } return this.columnCount; } /** * Gets the header row of the CSV file. * * @async * @returns Array of header field names, or null if no header * @throws {Error} When unable to read the header row * @example * const headers = await csv.getHeader(); * console.log("Columns:", headers); // ["name", "email", "age"] */ async getHeader() { if (!platform_1.platform.features.csvSupported) { throw new Error("CSV file operations are not supported in this environment"); } if (this.headerRow === null) { return new Promise((resolve, reject) => { let headerProcessed = false; let buffer = ""; const parser = new platform_1.platform.stream.Transform({ transform: (chunk, _enc, cb) => { if (headerProcessed) return cb(); buffer += chunk.toString(); const nl = buffer.indexOf("\n"); if (nl !== -1) { const line = buffer.slice(0, nl).replace(/\r$/, ""); this.headerRow = this.parseRow(line); headerProcessed = true; parser.destroy(); resolve(this.headerRow); } cb(); }, flush: (cb) => { if (!headerProcessed && buffer.length > 0) { const line = buffer.replace(/\r$/, ""); this.headerRow = this.parseRow(line); headerProcessed = true; resolve(this.headerRow); } else if (!headerProcessed) { reject(new Error("Failed to read header row from CSV file.")); } cb(); }, }); platform_1.platform.fs .createReadStream(this.filePath) .pipe(parser) .on("error", (err) => { reject(err); }); }); } return this.headerRow; } /** * Gets a specific row from the CSV file. * @param index The zero-based index of the row to retrieve. * @returns A promise that resolves to the row data, either as an object (if column structure is provided) or as an array of strings. * @throws {Error} if the row index is out of bounds. * @example * const row = await csvFile.getRow(0); * console.log(row); * // { column1: "value1", column2: "value2" } (if column structure is provided) * // OR * // ["value1", "value2"] (if column structure is not provided) */ async getRow(index) { if (!platform_1.platform.features.csvSupported) { throw new Error("CSV file operations are not supported in this environment"); } if (index < 0) { throw new Error("Row index must be non-negative."); } return new Promise((resolve, reject) => { let currentRow = -1; let bytesRead = 0; let lineBuffer = ""; const parser = new platform_1.platform.stream.Transform({ transform: (chunk, encoding, callback) => { bytesRead += chunk.length; const data = lineBuffer + chunk.toString(); const lines = data.split("\n"); lineBuffer = lines.pop() || ""; for (const line of lines) { if (this.options.hasHeader && currentRow === -1) { currentRow++; continue; } if (currentRow === index) { const parsedRow = this.parseRow(line); resolve(this.createTypedRow(parsedRow)); parser.destroy(); return; } currentRow++; } callback(); }, flush: (callback) => { if (lineBuffer) { currentRow++; if (currentRow === index) { const parsedRow = this.parseRow(lineBuffer); resolve(this.createTypedRow(parsedRow)); } else { reject(new Error(`Row index ${index} is out of bounds.`)); } } else { reject(new Error(`Row index ${index} is out of bounds.`)); } callback(); }, }); platform_1.platform.fs .createReadStream(this.filePath) .pipe(parser) .on("error", (err) => { reject(err); }); }); } /** * Creates a typed row object based on the column structure. * @private * @param row The raw row data as an array of strings. * @returns A typed row object if column structure is provided, otherwise the original array. */ createTypedRow(row) { if (this.columnStructure) { const typedRow = {}; for (const [key, index] of Object.entries(this.columnStructure)) { typedRow[key] = row[index]; } return typedRow; } return row; } /** * Filters rows of the CSV file based on a predicate function. * @param predicate A function that takes a row and returns true if the row should be included in the result. * @returns A promise that resolves to an array of filtered rows. */ async filter(predicate) { const result = []; await this.processFile((row) => { const typedRow = this.createTypedRow(row); if (predicate(typedRow)) { result.push(typedRow); } }); return result; } /** * Maps each row of the CSV file using a mapper function. * @param mapper A function that takes a row and returns a transformed value. * @returns A promise that resolves to an array of mapped values. */ async map(mapper) { const result = []; await this.processFile((row) => { const typedRow = this.createTypedRow(row); result.push(mapper(typedRow)); }); return result; } /** * Processes the CSV file row by row. * @private * @param rowProcessor A function to process each row. * @returns A promise that resolves when all rows have been processed. */ async processFile(rowProcessor) { if (!platform_1.platform.features.csvSupported) { throw new Error("CSV file operations are not supported in this environment"); } return new Promise((resolve, reject) => { let isFirstRow = true; let lineBuffer = ""; const parser = new platform_1.platform.stream.Transform({ transform: (chunk, encoding, callback) => { const data = lineBuffer + chunk.toString(); const lines = data.split("\n"); lineBuffer = lines.pop() || ""; for (const line of lines) { if (isFirstRow && this.options.hasHeader) { isFirstRow = false; continue; } const row = this.parseRow(line); rowProcessor(row); } callback(); }, flush: (callback) => { if (lineBuffer) { const row = this.parseRow(lineBuffer); rowProcessor(row); } resolve(); callback(); }, }); platform_1.platform.fs .createReadStream(this.filePath) .pipe(parser) .on("error", (err) => { reject(err); }); }); } /** * Restructures a CSVFile object with a new column structure. * @param csvFile The original CSVFile object. * @param newColumnStructure The new column structure to apply. * @returns A new CSVFile object with the updated column structure. * @throws {Error} if the new column structure doesn't match the CSV file headers. */ static async restructure(csvFile, newColumnStructure) { // Create a new CSVFile object with the same file path const newCsvFile = new CSVFile(csvFile.filePath); // Copy all enumerable properties from the original object to the new one Object.assign(newCsvFile, csvFile); // Update the column structure newCsvFile.columnStructure = newColumnStructure; // Validate headers with the new column structure await newCsvFile.validateHeaders(); // Reset cached values that might be affected by the new structure newCsvFile.columnCount = null; return newCsvFile; } /** * Writes data to a CSV file. * @param data The data to write, either as an array of objects or an array of arrays. * @param outputPath The path where the CSV file should be written. * @param columnStructure Optional column structure for typed data. * @param options Optional writing options. * @returns A promise that resolves when the file has been written. * @example * const data = [ * { column1: "value1", column2: "value2" }, * { column1: "value3", column2: "value4" }, * ]; * await CSVFile.writeToFile( * data, * "path/to/output.csv", * { column1: 0, column2: 1 } * ); */ static async writeToFile(data, outputPath, columnStructure, options = {}) { var _a, _b, _c, _d; if (!platform_1.platform.features.csvSupported) { throw new Error("CSV file operations are not supported in this environment"); } const writeOptions = { delimiter: (_a = options.delimiter) !== null && _a !== void 0 ? _a : ",", includeHeader: (_b = options.includeHeader) !== null && _b !== void 0 ? _b : true, quoteChar: (_c = options.quoteChar) !== null && _c !== void 0 ? _c : '"', escapeChar: (_d = options.escapeChar) !== null && _d !== void 0 ? _d : '"', }; return new Promise((resolve, reject) => { const writeStream = platform_1.platform.fs.createWriteStream(outputPath); const processRow = (row) => { const rowArray = columnStructure ? Object.keys(columnStructure).map((key) => row[key]) : row; return CSVFile.formatRow(rowArray, writeOptions) + "\n"; }; writeStream.on("error", (err) => { reject(err); }); writeStream.on("finish", () => { resolve(); }); const writeData = async () => { if (writeOptions.includeHeader && columnStructure) { const headerRow = Object.keys(columnStructure); if (!writeStream.write(CSVFile.formatRow(headerRow, writeOptions) + "\n")) { await new Promise((resolve) => writeStream.once("drain", () => resolve("drained"))); } } for (const row of data) { if (!writeStream.write(processRow(row))) { await new Promise((resolve) => writeStream.once("drain", () => resolve("drained"))); } } writeStream.end(); }; writeData().catch(reject); }); } /** * Formats a row of data for CSV output. * @private * @param row The row data as an array of strings. * @param options The CSV writing options. * @returns A formatted string representing the CSV row. */ static formatRow(row, options) { var _a; return row .map((field) => { var _a, _b, _c, _d, _e, _f; if (field.includes((_a = options.delimiter) !== null && _a !== void 0 ? _a : ",") || field.includes((_b = options.quoteChar) !== null && _b !== void 0 ? _b : '"') || field.includes("\n")) { return `${options.quoteChar}${field.replace(new RegExp((_c = options.quoteChar) !== null && _c !== void 0 ? _c : '"', "g"), ((_d = options.escapeChar) !== null && _d !== void 0 ? _d : '"') + ((_e = options.quoteChar) !== null && _e !== void 0 ? _e : '"'))}${(_f = options.quoteChar) !== null && _f !== void 0 ? _f : '"'}`; } return field; }) .join((_a = options.delimiter) !== null && _a !== void 0 ? _a : ","); } } exports.CSVFile = CSVFile; //# sourceMappingURL=csvParser.js.map