UNPKG

@uwdata/flechette

Version:

Fast, lightweight access to Apache Arrow data.

369 lines (307 loc) 10.7 kB
import { Batch } from './batch.js'; import { Version, Endianness, MessageHeader, Precision, DateUnit, TimeUnit, IntervalUnit, UnionMode } from './constants.js'; /** A valid Arrow version number. */ export type Version_ = typeof Version[keyof typeof Version]; /** A valid endianness value. */ export type Endianness_ = typeof Endianness[keyof typeof Endianness]; /** A valid message header type value. */ export type MessageHeader_ = typeof MessageHeader[keyof typeof MessageHeader]; /** A valid floating point precision value. */ export type Precision_ = typeof Precision[keyof typeof Precision]; /** A valid date unit value. */ export type DateUnit_ = typeof DateUnit[keyof typeof DateUnit]; /** A valid time unit value. */ export type TimeUnit_ = typeof TimeUnit[keyof typeof TimeUnit]; /** A valid date/time interval unit value. */ export type IntervalUnit_ = typeof IntervalUnit[keyof typeof IntervalUnit]; /** A valid union type mode value. */ export type UnionMode_ = typeof UnionMode[keyof typeof UnionMode]; export type IntegerArray = | Uint8Array | Uint16Array | Uint32Array | Int8Array | Int16Array | Int32Array | BigUint64Array | BigInt64Array; export type TypedArray = | IntegerArray | Float32Array | Float64Array; export type OffsetArray = | Int32Array | BigInt64Array; export type IntArrayConstructor = | Uint8ArrayConstructor | Uint16ArrayConstructor | Uint32ArrayConstructor | Int8ArrayConstructor | Int16ArrayConstructor | Int32ArrayConstructor | Int64ArrayConstructor; export type Int64ArrayConstructor = | BigUint64ArrayConstructor | BigInt64ArrayConstructor; export type FloatArrayConstructor = | Uint16ArrayConstructor | Float32ArrayConstructor | Float64ArrayConstructor; export type DateTimeArrayConstructor = | Int32ArrayConstructor | BigInt64ArrayConstructor; export type DecimalArrayConstructor = | Int32ArrayConstructor | BigUint64ArrayConstructor; export type TypedArrayConstructor = | Uint8ArrayConstructor | Uint16ArrayConstructor | Uint32ArrayConstructor | BigUint64ArrayConstructor | Int8ArrayConstructor | Int16ArrayConstructor | Int32ArrayConstructor | BigInt64ArrayConstructor | Float32ArrayConstructor | Float64ArrayConstructor; /** An extracted array of column values. */ export interface ValueArray<T> extends ArrayLike<T>, Iterable<T> { slice(start?: number, end?: number): ValueArray<T>; } /** Struct/row object factory method. */ export type StructFactory = (names: string[], batches: Batch<any>[]) => (index: number) => Record<string, any>; /** Custom metadata. */ export type Metadata = Map<string, string>; /** * Arrow table schema. */ export interface Schema { version?: Version_; endianness?: Endianness_; fields: Field[]; metadata?: Metadata | null } /** * Arrow schema field definition. */ export interface Field { name: string, type: DataType; nullable: boolean; metadata: Metadata; } /** Valid integer bit widths. */ export type IntBitWidth = 8 | 16 | 32 | 64; /** Dictionary-encoded data type. */ export type DictionaryType = { typeId: -1, dictionary: DataType, id: number, indices: IntType, ordered: boolean }; /** None data type. */ export type NoneType = { typeId: 0 }; /** Null data type. */ export type NullType = { typeId: 1 }; /** Integer data type. */ export type IntType = { typeId: 2, bitWidth: IntBitWidth, signed: boolean, values: IntArrayConstructor }; /** Floating point number data type. */ export type FloatType = { typeId: 3, precision: Precision_, values: FloatArrayConstructor }; /** Opaque binary data type. */ export type BinaryType = { typeId: 4, offsets: Int32ArrayConstructor }; /** UTF-8 encoded string data type. */ export type Utf8Type = { typeId: 5, offsets: Int32ArrayConstructor }; /** Boolean data type. */ export type BoolType = { typeId: 6 }; /** Fixed decimal number data type. */ export type DecimalType = { typeId: 7, precision: number, scale: number, bitWidth: 32 | 64 | 128 | 256, values: DecimalArrayConstructor }; /** Date data type. */ export type DateType = { typeId: 8, unit: DateUnit_, values: DateTimeArrayConstructor }; /** Time data type. */ export type TimeType = { typeId: 9, unit: TimeUnit_, bitWidth: 32 | 64, values: DateTimeArrayConstructor }; /** Timestamp data type. */ export type TimestampType = { typeId: 10, unit: TimeUnit_, timezone: string | null, values: BigInt64ArrayConstructor }; /** Date/time interval data type. */ export type IntervalType = { typeId: 11, unit: IntervalUnit_, values?: Int32ArrayConstructor }; /** List data type. */ export type ListType = { typeId: 12, children: [Field], offsets: Int32ArrayConstructor }; /** Struct data type. */ export type StructType = { typeId: 13, children: Field[] }; /** Union data type. */ export type UnionType = { typeId: 14, mode: UnionMode_, typeIds: number[], typeMap: Record<number, number>, children: Field[], typeIdForValue?: (value: any, index: number) => number, offsets: Int32ArrayConstructor }; /** Fixed-size opaque binary data type. */ export type FixedSizeBinaryType = { typeId: 15, stride: number }; /** Fixed-size list data type. */ export type FixedSizeListType = { typeId: 16, stride: number, children: Field[] }; /** Key-value map data type. */ export type MapType = { typeId: 17, keysSorted: boolean, children: [Field], offsets: Int32ArrayConstructor }; /** Duration data type. */ export type DurationType = { typeId: 18, unit: TimeUnit_, values: BigInt64ArrayConstructor }; /** Opaque binary data type with 64-bit integer offsets for larger data. */ export type LargeBinaryType = { typeId: 19, offsets: BigInt64ArrayConstructor }; /** UTF-8 encoded string data type with 64-bit integer offsets for larger data. */ export type LargeUtf8Type = { typeId: 20, offsets: BigInt64ArrayConstructor }; /** List data type with 64-bit integer offsets for larger data. */ export type LargeListType = { typeId: 21, children: [Field], offsets: BigInt64ArrayConstructor }; /** RunEndEncoded data type. */ export type RunEndEncodedType = { typeId: 22, children: [Field, Field] }; /** Opaque binary data type with multi-buffer view layout. */ export type BinaryViewType = { typeId: 23 }; /** UTF-8 encoded string data type with multi-buffer view layout. */ export type Utf8ViewType = { typeId: 24 }; /** ListView data type. */ export type ListViewType = { typeId: 25, children: [Field], offsets: Int32ArrayConstructor }; /** ListView data type with 64-bit integer offsets for larger data. */ export type LargeListViewType = { typeId: 26, children: [Field], offsets: BigInt64ArrayConstructor }; /** * Arrow field data types. */ export type DataType = | NoneType | NullType | IntType | FloatType | BinaryType | Utf8Type | BoolType | DecimalType | DateType | TimeType | TimestampType | IntervalType | ListType | StructType | UnionType | FixedSizeBinaryType | FixedSizeListType | MapType | DurationType | LargeBinaryType | LargeUtf8Type | LargeListType | RunEndEncodedType | BinaryViewType | Utf8ViewType | ListViewType | LargeListViewType | DictionaryType; /** * Arrow IPC record batch message. */ export interface RecordBatch { length?: number; nodes: {length: number, nullCount: number}[]; regions: {offset: number, length: number}[]; variadic: number[]; body?: Uint8Array; buffers?: Uint8Array[]; byteLength?: number; } /** * Arrow IPC dictionary batch message. */ export interface DictionaryBatch { id: number; data: RecordBatch; isDelta: boolean; body?: Uint8Array; } /** * Parsed Arrow IPC data, prior to table construction. */ export interface ArrowData { schema?: Schema; dictionaries: DictionaryBatch[] | null; records: RecordBatch[] | null; metadata: Metadata | null; } /** * Parsed Arrow message data. */ export interface Message { /** The Arrow version. */ version: Version_; /** The message header type. */ type: MessageHeader_; /** The buffer integer index after the message. */ index: number; /** The message content. */ content?: Schema | RecordBatch | DictionaryBatch; } /** * A pointer block in the Arrow IPC 'file' format. */ export interface Block { /** The file byte offset to the message start. */ offset: number, /** The size of the message header metadata. */ metadataLength: number, /** The size of the message body. */ bodyLength: number } /** * Options for controlling how values are transformed when extracted * from an Arrow binary representation. */ export interface ExtractionOptions { /** * If true, extract dates and timestamps as JavaScript `Date` objects. * Otherwise, return numerical timestamp values (default). */ useDate?: boolean; /** * If true, extract decimal-type data as scaled integer values, where * fractional digits are scaled to integer positions. Returned integers * are `BigInt` values for decimal bit widths of 64 bits or higher and * 32-bit integers (as JavaScript `number`) otherwise. If false, decimals * are converted to floating-point numbers (default). */ useDecimalInt?: boolean; /** * If true, extract 64-bit integers as JavaScript `BigInt` values. * Otherwise, coerce long integers to JavaScript number values (default). */ useBigInt?: boolean; /** * If true, extract Arrow 'Map' values as JavaScript `Map` instances. * Otherwise, return an array of [key, value] pairs compatible with * both `Map` and `Object.fromEntries` (default). */ useMap?: boolean; /** * If true, extract Arrow 'Struct' values and table row objects using * zero-copy proxy objects that extract data from underlying Arrow batches. * The proxy objects can improve performance and reduce memory usage, but * do not support property enumeration (`Object.keys`, `Object.values`, * `Object.entries`) or spreading (`{ ...object }`). */ useProxy?: boolean; } /** * Options for building new columns and controlling how values are * transformed when extracted from an Arrow binary representation. */ export interface ColumnBuilderOptions extends ExtractionOptions { /** * The maximum number of rows to include in a single record batch. */ maxBatchRows?: number; } /** * Options for building new tables and controlling how values are * transformed when extracted from an Arrow binary representation. */ export interface TableBuilderOptions extends ColumnBuilderOptions { /** * A map from column names to Arrow data types. */ types?: Record<string, DataType>; } /** * A map of types for representing table columns. */ export type TypeMap = Record<string, any>;