@dobesv/parquets
Version:
TypeScript implementation of the Parquet file format, based on parquet.js
108 lines • 3.9 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.generateParquetBuffer = exports.ParquetBufferWriter = void 0;
const shred_1 = require("./shred");
const encoding_1 = require("./encoding");
/**
* Synchronous in-memory Parquet writer that accumulates rows and returns a Buffer.
*/
class ParquetBufferWriter {
constructor(schema, opts) {
this.rowCount = 0;
this.rowGroups = [];
this.chunks = [];
this.offset = 0;
this.headerWritten = false;
this.closed = false;
this.schema = schema;
this.opts = opts || {};
this.rowGroupSize = (opts === null || opts === void 0 ? void 0 : opts.rowGroupSize) || encoding_1.PARQUET_DEFAULT_ROW_GROUP_SIZE;
this.pageSize = (opts === null || opts === void 0 ? void 0 : opts.pageSize) || encoding_1.PARQUET_DEFAULT_PAGE_SIZE;
this.rowBuffer = new shred_1.ParquetWriteBuffer(this.schema);
}
/**
* Create a new ParquetBufferWriter and return it.
*/
static openBuffer(schema, opts) {
return new ParquetBufferWriter(schema, opts);
}
/**
* Append a row to the buffer.
*/
appendRow(row) {
if (this.closed) {
throw new Error('writer was closed');
}
// Write header on first row
if (!this.headerWritten) {
const headerBuf = Buffer.from(encoding_1.PARQUET_MAGIC);
this.writeSection(headerBuf);
this.headerWritten = true;
}
// Shred the record into the buffer
(0, shred_1.shredRecord)(this.schema, row, this.rowBuffer);
this.rowCount++;
// Flush row group if we've reached the row group size
if (this.rowBuffer.rowCount >= this.rowGroupSize) {
this.flushRowGroup();
}
}
/**
* Finalize and return the complete Parquet file as a Buffer.
* Terminal operation: sets closed=true, throws if called twice.
*/
toBuffer() {
if (this.closed) {
throw new Error('writer was closed');
}
this.closed = true;
// Write header if no rows were added (empty file still needs PAR1 magic)
if (!this.headerWritten) {
const headerBuf = Buffer.from(encoding_1.PARQUET_MAGIC);
this.writeSection(headerBuf);
}
// Flush final row group if there are remaining rows
if (this.rowBuffer.rowCount > 0) {
this.flushRowGroup();
}
// Write footer
const footerBuf = (0, encoding_1.encodeFooter)(this.schema, this.rowCount, this.rowGroups, {});
this.writeSection(footerBuf);
return Buffer.concat(this.chunks);
}
/**
* Write a buffer section and update offset
*/
writeSection(buf) {
this.chunks.push(buf);
this.offset += buf.length;
}
/**
* Flush the current row group to chunks
*/
flushRowGroup() {
const { body, metadata } = (0, encoding_1.encodeRowGroup)(this.schema, this.rowBuffer, {
baseOffset: this.offset,
pageSize: this.pageSize,
useDataPageV2: this.opts.useDataPageV2,
});
this.writeSection(body);
this.rowGroups.push(metadata);
// Reset row buffer for next row group
this.rowBuffer = new shred_1.ParquetWriteBuffer(this.schema);
}
}
exports.ParquetBufferWriter = ParquetBufferWriter;
/**
* Convenience function: write an array of rows to a Parquet buffer.
* Equivalent to creating a ParquetBufferWriter, appending all rows, and calling toBuffer().
*/
function generateParquetBuffer(schema, rows, opts) {
const writer = new ParquetBufferWriter(schema, opts);
for (const row of rows) {
writer.appendRow(row);
}
return writer.toBuffer();
}
exports.generateParquetBuffer = generateParquetBuffer;
//# sourceMappingURL=bufferWriter.js.map