UNPKG

@pujansrt/data-genie

Version:

High performant ETL engine written in TypeScript

128 lines (127 loc) 5.53 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.TSVWriter = void 0; const fs_1 = require("fs"); const csv_stringify_1 = require("csv-stringify"); /** * TSVWriter class for writing data records to a Tab-Separated Values (TSV) file. * It uses the 'csv-stringify' library, configured specifically for tab delimiters. * Records are written incrementally, and a header row can be included. */ class TSVWriter { /** * Constructs a new TSVWriter. * @param filePath The path to the output TSV file. */ constructor(filePath) { this.headerWritten = false; this.fieldNames = []; // To store header names if explicitly set or inferred this.hasFieldNamesInFirstRow = false; // Controls if a header row is written this.filePath = filePath; this.outputStream = (0, fs_1.createWriteStream)(this.filePath, { encoding: 'utf-8' }); // Configure stringify for TSV: use tab as delimiter this.stringifier = (0, csv_stringify_1.stringify)({ delimiter: '\t' // If you need to handle fields containing tabs or newlines, // the 'csv-stringify' library will automatically quote them. // For strict TSV (where tabs/newlines are disallowed in fields), // you might need pre-processing or validation before passing to the writer. }); this.stringifier.pipe(this.outputStream); // Handle potential errors on the stringifier or output stream this.stringifier.on('error', (err) => { console.error(`TSVWriter: Error in stringifier for ${this.filePath}:`, err); this.outputStream.destroy(err); // Propagate error to the stream }); this.outputStream.on('error', (err) => { console.error(`TSVWriter: Error writing to file ${this.filePath}:`, err); }); } /** * Specifies whether the first row written should be a header row containing the field names. * If true, `setFieldNames` can be used to explicitly define names, or they will be inferred * from the first record. * @param value True to write field names in the first row, false otherwise. * @returns The current TSVWriter instance for chaining. */ setFieldNamesInFirstRow(value) { this.hasFieldNamesInFirstRow = value; return this; } /** * Explicitly sets the names of the fields. These names will be used for the header row * (if `hasFieldNamesInFirstRow` is true) and to determine the order of values from `DataRecord`s. * If not called, field names will be inferred from the first record's keys. * @param names A list of string names for the fields (column headers). * @returns The current TSVWriter instance for chaining. */ setFieldNames(...names) { this.fieldNames = names; return this; } /** * Writes a single data record to the TSV file. * Handles writing the header row if configured and not yet written. * @param record The DataRecord object to write. * @returns A Promise that resolves when the record has been written to the stream. */ async write(record) { // Determine field names if not explicitly set and not yet inferred if (!this.headerWritten && this.fieldNames.length === 0) { this.fieldNames = Object.keys(record); } // Write header if enabled and not already written if (this.hasFieldNamesInFirstRow && !this.headerWritten) { // Ensure field names are available before writing header if (this.fieldNames.length === 0) { throw new Error('Field names must be set or inferrable from the first record when hasFieldNamesInFirstRow is true.'); } await new Promise((resolve, reject) => { this.stringifier.write(this.fieldNames, (error) => { if (error) reject(error); else resolve(); }); }); this.headerWritten = true; } // Map the record object to an array of values based on the determined field names order const recordArray = this.fieldNames.map((fieldName) => record[fieldName]); return new Promise((resolve, reject) => { this.stringifier.write(recordArray, (error) => { if (error) { reject(error); } else { resolve(); } }); }); } /** * Writes all data records from an asynchronous iterable to the TSV file. * @param records An AsyncIterableIterator of DataRecord objects. * @returns A Promise that resolves when all records have been written. */ async writeAll(records) { for await (const record of records) { await this.write(record); } } /** * Closes the underlying write stream. This should be called when all data has been written * to ensure all buffered data is flushed to the file and resources are released. * @returns A Promise that resolves when the stream is closed. */ async close() { return new Promise((resolve) => { this.stringifier.end(() => { this.outputStream.end(() => { resolve(); }); }); }); } } exports.TSVWriter = TSVWriter;