@dobesv/parquets
Version:
TypeScript implementation of the Parquet file format, based on parquet.js
82 lines (81 loc) • 2.86 kB
TypeScript
import { ParquetBuffer, ParquetColumnData, ParquetRecord, ParquetValueArray } from './declare';
import { ParquetSchema } from './schema';
import { CustomError } from 'ts-custom-error';
export declare class ParquetShredError extends CustomError {
constructor(message: string);
}
export declare class MissingRequiredFieldShredError extends CustomError {
fieldName: string;
constructor(fieldName: string);
}
export declare class TooManyValuesShredError extends CustomError {
fieldName: string;
constructor(fieldName: string);
}
export interface ParquetWriteColumnData {
dLevels: number[];
rLevels: number[];
values: ParquetValueArray;
count: number;
}
export declare class ParquetWriteBuffer {
rowCount: number;
columnData: Record<string, ParquetWriteColumnData>;
constructor(schema: ParquetSchema);
}
/**
* 'Shred' a record into a list of <value, repetition_level, definition_level>
* tuples per column using the Google Dremel Algorithm..
*
* The buffer argument must point to an object into which the shredded record
* will be returned. You may re-use the buffer for repeated calls to this function
* to append to an existing buffer, as long as the schema is unchanged.
*
* The format in which the shredded records will be stored in the buffer is as
* follows:
*
* buffer = {
* columnData: [
* 'my_col': {
* dLevels: [d1, d2, .. dN],
* rLevels: [r1, r2, .. rN],
* values: [v1, v2, .. vN],
* }, ...
* ],
* rowCount: X,
* }
*/
export declare function shredRecord(schema: ParquetSchema, record: any, buffer: ParquetWriteBuffer): void;
/**
* 'Materialize' a list of <value, repetition_level, definition_level>
* tuples back to nested records (objects/arrays) using the Google Dremel
* Algorithm..
*
* The buffer argument must point to an object with the following structure (i.e.
* the same structure that is returned by shredRecords):
*
* buffer = {
* columnData: [
* 'my_col': {
* dlevels: [d1, d2, .. dN],
* rlevels: [r1, r2, .. rN],
* values: [v1, v2, .. vN],
* }, ...
* ],
* rowCount: X,
* }
*/
export declare function materializeRecords(schema: ParquetSchema, buffer: ParquetBuffer): ParquetRecord[];
/**
* Support iteration over the values in a single column.
*
* For a simple column which is not repeated and not nested in a repeated
* field, this will give one value for each row in the input.
*
* If the column is repeated or nested in a repeated column, it will give an
* array for each row in the input.
*
* When there are multiple levels of repetition the iterator will yield
* nested arrays.
*/
export declare function materializeColumn(schema: ParquetSchema, data: ParquetColumnData, columnPath: string[]): Generator<any, void, unknown>;