parquets

Version:

TypeScript implementation of the Parquet file format, based on parquet.js

47 lines (46 loc) • 1.63 kB

TypeScript

import { ParquetBuffer, ParquetRecord } from './declare'; import { ParquetSchema } from './schema'; export declare function shredBuffer(schema: ParquetSchema): ParquetBuffer; /** * 'Shred' a record into a list of <value, repetition_level, definition_level> * tuples per column using the Google Dremel Algorithm.. * * The buffer argument must point to an object into which the shredded record * will be returned. You may re-use the buffer for repeated calls to this function * to append to an existing buffer, as long as the schema is unchanged. * * The format in which the shredded records will be stored in the buffer is as * follows: * * buffer = { * columnData: [ * 'my_col': { * dlevels: [d1, d2, .. dN], * rlevels: [r1, r2, .. rN], * values: [v1, v2, .. vN], * }, ... * ], * rowCount: X, * } */ export declare function shredRecord(schema: ParquetSchema, record: any, buffer: ParquetBuffer): void; /** * 'Materialize' a list of <value, repetition_level, definition_level> * tuples back to nested records (objects/arrays) using the Google Dremel * Algorithm.. * * The buffer argument must point to an object with the following structure (i.e. * the same structure that is returned by shredRecords): * * buffer = { * columnData: [ * 'my_col': { * dlevels: [d1, d2, .. dN], * rlevels: [r1, r2, .. rN], * values: [v1, v2, .. vN], * }, ... * ], * rowCount: X, * } */ export declare function materializeRecords(schema: ParquetSchema, buffer: ParquetBuffer): ParquetRecord[];