UNPKG

@dobesv/parquets

Version:

TypeScript implementation of the Parquet file format, based on parquet.js

108 lines 3.9 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.generateParquetBuffer = exports.ParquetBufferWriter = void 0; const shred_1 = require("./shred"); const encoding_1 = require("./encoding"); /** * Synchronous in-memory Parquet writer that accumulates rows and returns a Buffer. */ class ParquetBufferWriter { constructor(schema, opts) { this.rowCount = 0; this.rowGroups = []; this.chunks = []; this.offset = 0; this.headerWritten = false; this.closed = false; this.schema = schema; this.opts = opts || {}; this.rowGroupSize = (opts === null || opts === void 0 ? void 0 : opts.rowGroupSize) || encoding_1.PARQUET_DEFAULT_ROW_GROUP_SIZE; this.pageSize = (opts === null || opts === void 0 ? void 0 : opts.pageSize) || encoding_1.PARQUET_DEFAULT_PAGE_SIZE; this.rowBuffer = new shred_1.ParquetWriteBuffer(this.schema); } /** * Create a new ParquetBufferWriter and return it. */ static openBuffer(schema, opts) { return new ParquetBufferWriter(schema, opts); } /** * Append a row to the buffer. */ appendRow(row) { if (this.closed) { throw new Error('writer was closed'); } // Write header on first row if (!this.headerWritten) { const headerBuf = Buffer.from(encoding_1.PARQUET_MAGIC); this.writeSection(headerBuf); this.headerWritten = true; } // Shred the record into the buffer (0, shred_1.shredRecord)(this.schema, row, this.rowBuffer); this.rowCount++; // Flush row group if we've reached the row group size if (this.rowBuffer.rowCount >= this.rowGroupSize) { this.flushRowGroup(); } } /** * Finalize and return the complete Parquet file as a Buffer. * Terminal operation: sets closed=true, throws if called twice. */ toBuffer() { if (this.closed) { throw new Error('writer was closed'); } this.closed = true; // Write header if no rows were added (empty file still needs PAR1 magic) if (!this.headerWritten) { const headerBuf = Buffer.from(encoding_1.PARQUET_MAGIC); this.writeSection(headerBuf); } // Flush final row group if there are remaining rows if (this.rowBuffer.rowCount > 0) { this.flushRowGroup(); } // Write footer const footerBuf = (0, encoding_1.encodeFooter)(this.schema, this.rowCount, this.rowGroups, {}); this.writeSection(footerBuf); return Buffer.concat(this.chunks); } /** * Write a buffer section and update offset */ writeSection(buf) { this.chunks.push(buf); this.offset += buf.length; } /** * Flush the current row group to chunks */ flushRowGroup() { const { body, metadata } = (0, encoding_1.encodeRowGroup)(this.schema, this.rowBuffer, { baseOffset: this.offset, pageSize: this.pageSize, useDataPageV2: this.opts.useDataPageV2, }); this.writeSection(body); this.rowGroups.push(metadata); // Reset row buffer for next row group this.rowBuffer = new shred_1.ParquetWriteBuffer(this.schema); } } exports.ParquetBufferWriter = ParquetBufferWriter; /** * Convenience function: write an array of rows to a Parquet buffer. * Equivalent to creating a ParquetBufferWriter, appending all rows, and calling toBuffer(). */ function generateParquetBuffer(schema, rows, opts) { const writer = new ParquetBufferWriter(schema, opts); for (const row of rows) { writer.appendRow(row); } return writer.toBuffer(); } exports.generateParquetBuffer = generateParquetBuffer; //# sourceMappingURL=bufferWriter.js.map