@maximai/maxim-js
Version:
Maxim AI JS SDK. Visit https://getmaxim.ai for more info.
314 lines • 12.4 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.CSVFile = void 0;
const fs_1 = require("fs");
const os_1 = require("os");
const stream_1 = require("stream");
class CSVFile {
constructor(filePath, columnStructure, options = {}) {
var _a, _b, _c, _d;
this.headerRow = null;
this.rowCount = null;
this.columnCount = null;
this.fileStats = null;
this.filePath = filePath;
this.columnStructure = columnStructure;
this.options = {
delimiter: (_a = options.delimiter) !== null && _a !== void 0 ? _a : ",",
hasHeader: columnStructure ? true : ((_b = options.hasHeader) !== null && _b !== void 0 ? _b : true),
quoteChar: (_c = options.quoteChar) !== null && _c !== void 0 ? _c : '"',
escapeChar: (_d = options.escapeChar) !== null && _d !== void 0 ? _d : '"',
};
if (columnStructure) {
this.validateHeaders();
}
}
async validateHeaders() {
const headerRow = await this.getHeader();
if (!headerRow) {
throw new Error("Failed to read header row from CSV file.");
}
if (this.columnStructure) {
for (const [columnName, index] of Object.entries(this.columnStructure)) {
if (headerRow[index] !== columnName) {
throw new Error(`Column structure does not match CSV "${this.filePath}" headers. \nExpected column "${columnName}" at index [${index}], but found "${headerRow[index]}".`, {
cause: JSON.stringify({
expectedColumn: columnName,
expectedIndex: index,
actualColumn: headerRow[index],
headerRow: headerRow,
}, null, 2),
});
}
}
}
}
parseRow(row) {
const { delimiter, quoteChar, escapeChar } = this.options;
const result = [];
let field = "";
let inQuotes = false;
for (let i = 0; i < row.length; i++) {
const char = row[i];
const nextChar = row[i + 1];
if (inQuotes) {
if (char === quoteChar && nextChar === quoteChar) {
field += char;
i++;
}
else if (char === quoteChar) {
inQuotes = false;
}
else {
field += char;
}
}
else {
if (char === quoteChar) {
inQuotes = true;
}
else if (char === delimiter) {
result.push(field);
field = "";
}
else if (char === escapeChar && nextChar === delimiter) {
field += delimiter;
i++;
}
else {
field += char;
}
}
}
result.push(field);
return result;
}
async getRowCount() {
if (this.rowCount === null) {
let count = 0;
await this.processFile((row) => {
count++;
});
this.rowCount = count;
}
return this.rowCount;
}
async getColumnCount() {
if (this.columnCount === null) {
const headerRow = await this.getHeader();
if (!headerRow) {
throw new Error("Failed to read header row from CSV file.");
}
this.columnCount = headerRow.length;
}
return this.columnCount;
}
async getHeader() {
if (this.headerRow === null) {
return new Promise((resolve, reject) => {
let headerProcessed = false;
const parser = new stream_1.Transform({
transform: (chunk, encoding, callback) => {
if (!headerProcessed) {
const line = chunk.toString().split(os_1.EOL)[0];
this.headerRow = this.parseRow(line);
headerProcessed = true;
parser.destroy();
resolve(this.headerRow);
}
callback();
},
flush: (callback) => {
if (!headerProcessed) {
reject(new Error("Failed to read header row from CSV file."));
}
callback();
},
});
(0, fs_1.createReadStream)(this.filePath)
.pipe(parser)
.on("error", (err) => {
reject(err);
});
});
}
return this.headerRow;
}
async getRow(index) {
if (index < 0) {
throw new Error("Row index must be non-negative.");
}
return new Promise((resolve, reject) => {
let currentRow = -1;
let bytesRead = 0;
let lineBuffer = "";
const parser = new stream_1.Transform({
transform: (chunk, encoding, callback) => {
bytesRead += chunk.length;
const data = lineBuffer + chunk.toString();
const lines = data.split(os_1.EOL);
lineBuffer = lines.pop() || "";
for (const line of lines) {
if (this.options.hasHeader && currentRow === -1) {
currentRow++;
continue;
}
if (currentRow === index) {
const parsedRow = this.parseRow(line);
resolve(this.createTypedRow(parsedRow));
parser.destroy();
return;
}
currentRow++;
}
callback();
},
flush: (callback) => {
if (lineBuffer) {
currentRow++;
if (currentRow === index) {
const parsedRow = this.parseRow(lineBuffer);
resolve(this.createTypedRow(parsedRow));
}
else {
reject(new Error(`Row index ${index} is out of bounds.`));
}
}
else {
reject(new Error(`Row index ${index} is out of bounds.`));
}
callback();
},
});
(0, fs_1.createReadStream)(this.filePath)
.pipe(parser)
.on("error", (err) => {
reject(err);
});
});
}
createTypedRow(row) {
if (this.columnStructure) {
const typedRow = {};
for (const [key, index] of Object.entries(this.columnStructure)) {
typedRow[key] = row[index];
}
return typedRow;
}
return row;
}
async filter(predicate) {
const result = [];
await this.processFile((row) => {
const typedRow = this.createTypedRow(row);
if (predicate(typedRow)) {
result.push(typedRow);
}
});
return result;
}
async map(mapper) {
const result = [];
await this.processFile((row) => {
const typedRow = this.createTypedRow(row);
result.push(mapper(typedRow));
});
return result;
}
async processFile(rowProcessor) {
return new Promise((resolve, reject) => {
let isFirstRow = true;
let lineBuffer = "";
const parser = new stream_1.Transform({
transform: (chunk, encoding, callback) => {
const data = lineBuffer + chunk.toString();
const lines = data.split(os_1.EOL);
lineBuffer = lines.pop() || "";
for (const line of lines) {
if (isFirstRow && this.options.hasHeader) {
isFirstRow = false;
continue;
}
const row = this.parseRow(line);
rowProcessor(row);
}
callback();
},
flush: (callback) => {
if (lineBuffer) {
const row = this.parseRow(lineBuffer);
rowProcessor(row);
}
resolve();
callback();
},
});
(0, fs_1.createReadStream)(this.filePath)
.pipe(parser)
.on("error", (err) => {
reject(err);
});
});
}
static async restructure(csvFile, newColumnStructure) {
const newCsvFile = new CSVFile(csvFile.filePath);
Object.assign(newCsvFile, csvFile);
newCsvFile.columnStructure = newColumnStructure;
await newCsvFile.validateHeaders();
newCsvFile.columnCount = null;
return newCsvFile;
}
static async writeToFile(data, outputPath, columnStructure, options = {}) {
var _a, _b, _c, _d;
const writeOptions = {
delimiter: (_a = options.delimiter) !== null && _a !== void 0 ? _a : ",",
includeHeader: (_b = options.includeHeader) !== null && _b !== void 0 ? _b : true,
quoteChar: (_c = options.quoteChar) !== null && _c !== void 0 ? _c : '"',
escapeChar: (_d = options.escapeChar) !== null && _d !== void 0 ? _d : '"',
};
return new Promise((resolve, reject) => {
const writeStream = (0, fs_1.createWriteStream)(outputPath);
const processRow = (row) => {
const rowArray = columnStructure
? Object.keys(columnStructure).map((key) => row[key])
: row;
return CSVFile.formatRow(rowArray, writeOptions) + os_1.EOL;
};
writeStream.on("error", (err) => {
reject(err);
});
writeStream.on("finish", () => {
resolve();
});
const writeData = async () => {
if (writeOptions.includeHeader && columnStructure) {
const headerRow = Object.keys(columnStructure);
if (!writeStream.write(CSVFile.formatRow(headerRow, writeOptions) + os_1.EOL)) {
await new Promise((resolve) => writeStream.once("drain", () => resolve("drained")));
}
}
for (const row of data) {
if (!writeStream.write(processRow(row))) {
await new Promise((resolve) => writeStream.once("drain", () => resolve("drained")));
}
}
writeStream.end();
};
writeData().catch(reject);
});
}
static formatRow(row, options) {
var _a;
return row
.map((field) => {
var _a, _b, _c, _d, _e, _f;
if (field.includes((_a = options.delimiter) !== null && _a !== void 0 ? _a : ",") || field.includes((_b = options.quoteChar) !== null && _b !== void 0 ? _b : '"') || field.includes("\n")) {
return `${options.quoteChar}${field.replace(new RegExp((_c = options.quoteChar) !== null && _c !== void 0 ? _c : '"', "g"), (_e = (_d = options.escapeChar) !== null && _d !== void 0 ? _d : '"' + options.quoteChar) !== null && _e !== void 0 ? _e : '"')}${(_f = options.quoteChar) !== null && _f !== void 0 ? _f : '"'}`;
}
return field;
})
.join((_a = options.delimiter) !== null && _a !== void 0 ? _a : ",");
}
}
exports.CSVFile = CSVFile;
//# sourceMappingURL=csvParser.js.map