@dsnp/parquetjs

Version:

fully asynchronous, pure JavaScript implementation of the Parquet file format

54 lines (53 loc) • 1.77 kB

TypeScript

import { ParquetSchema } from './schema'; import { Page, PageData } from './declare'; /** * 'Shred' a record into a list of <value, repetition_level, definition_level> * tuples per column using the Google Dremel Algorithm.. * * The buffer argument must point to an object into which the shredded record * will be returned. You may re-use the buffer for repeated calls to this function * to append to an existing buffer, as long as the schema is unchanged. * * The format in which the shredded records will be stored in the buffer is as * follows: * * buffer = { * columnData: [ * 'my_col': { * dlevels: [d1, d2, .. dN], * rlevels: [r1, r2, .. rN], * values: [v1, v2, .. vN], * }, ... * ], * rowCount: X, * } * */ export interface RecordBuffer { columnData?: Record<string, PageData>; rowCount?: number; pageRowCount?: number; pages?: Record<string, Page[]>; } export declare const shredRecord: (schema: ParquetSchema, record: Record<string, unknown>, buffer: RecordBuffer) => void; /** * 'Materialize' a list of <value, repetition_level, definition_level> * tuples back to nested records (objects/arrays) using the Google Dremel * Algorithm.. * * The buffer argument must point to an object with the following structure (i.e. * the same structure that is returned by shredRecords): * * buffer = { * columnData: [ * 'my_col': { * dlevels: [d1, d2, .. dN], * rlevels: [r1, r2, .. rN], * values: [v1, v2, .. vN], * }, ... * ], * rowCount: X, * } * */ export declare const materializeRecords: (schema: ParquetSchema, buffer: RecordBuffer, records?: Record<string, unknown>[]) => Record<string, unknown>[];