@dsnp/parquetjs
Version:
fully asynchronous, pure JavaScript implementation of the Parquet file format
54 lines (53 loc) • 1.77 kB
TypeScript
import { ParquetSchema } from './schema';
import { Page, PageData } from './declare';
/**
* 'Shred' a record into a list of <value, repetition_level, definition_level>
* tuples per column using the Google Dremel Algorithm..
*
* The buffer argument must point to an object into which the shredded record
* will be returned. You may re-use the buffer for repeated calls to this function
* to append to an existing buffer, as long as the schema is unchanged.
*
* The format in which the shredded records will be stored in the buffer is as
* follows:
*
* buffer = {
* columnData: [
* 'my_col': {
* dlevels: [d1, d2, .. dN],
* rlevels: [r1, r2, .. rN],
* values: [v1, v2, .. vN],
* }, ...
* ],
* rowCount: X,
* }
*
*/
export interface RecordBuffer {
columnData?: Record<string, PageData>;
rowCount?: number;
pageRowCount?: number;
pages?: Record<string, Page[]>;
}
export declare const shredRecord: (schema: ParquetSchema, record: Record<string, unknown>, buffer: RecordBuffer) => void;
/**
* 'Materialize' a list of <value, repetition_level, definition_level>
* tuples back to nested records (objects/arrays) using the Google Dremel
* Algorithm..
*
* The buffer argument must point to an object with the following structure (i.e.
* the same structure that is returned by shredRecords):
*
* buffer = {
* columnData: [
* 'my_col': {
* dlevels: [d1, d2, .. dN],
* rlevels: [r1, r2, .. rN],
* values: [v1, v2, .. vN],
* }, ...
* ],
* rowCount: X,
* }
*
*/
export declare const materializeRecords: (schema: ParquetSchema, buffer: RecordBuffer, records?: Record<string, unknown>[]) => Record<string, unknown>[];