icebird
Version:
Apache Iceberg client for javascript
268 lines (238 loc) • 5.96 kB
TypeScript
export interface TableMetadata {
'format-version': number
'table-uuid': string
location: string
'last-sequence-number': number
'last-updated-ms': number
'last-column-id': number
'current-schema-id': number
schemas: Schema[]
'default-spec-id': number
'partition-specs': PartitionSpec[]
'last-partition-id': number
properties?: Record<string, string>
'current-snapshot-id': number
snapshots?: Snapshot[]
'snapshot-log'?: SnapshotLog[]
'metadata-log'?: MetadataLog[]
'sort-orders': SortOrder[]
'default-sort-order-id': number
refs?: object
statistics: TableStatistics[]
'partition-statistics'?: PartitionStatistics[]
'next-row-id'?: bigint
}
export interface Schema {
type: 'struct'
'schema-id': number
'identifier-field-ids'?: number[]
fields: Field[]
}
interface Field {
id: number
name: string
required: boolean
type: string // TODO
doc?: string
'initial-default': any
'write-default': any
}
export interface PartitionSpec {
'spec-id': number
fields: PartitionField[]
}
interface PartitionField {
'source-id'?: number
'source-ids'?: number[]
'field-id': number
name: string
transform: PartitionTransform
}
export type PartitionTransform =
'identity' |
`bucket[${number}]` |
`truncate[${number}]` |
'year' |
'month' |
'day' |
'hour' |
'void'
interface PartitionStatistics {
'snapshot-id': bigint
'statistics-path': string
'file-size-in-bytes': bigint
}
interface SortOrder {
'order-id': number
'fields': SortField[]
}
interface SortField {
transform: string
'source-id'?: number
'source-ids'?: number[] // V3
'direction': 'asc' | 'desc'
'null-order': 'nulls-first' | 'nulls-last'
}
export interface NameMapping {
names: string[]
'field-id'?: number
fields?: NameMapping[]
}
export interface Snapshot {
'snapshot-id': number
'parent-snapshot-id'?: number
'sequence-number': number
'timestamp-ms': number
'manifest-list': string
manifests?: Manifest[]
summary: {
operation: string
// 'spark.app.id'?: string
'added-data-files': string
'added-records': string
'added-files-size': string
'changed-partition-count': string
'total-records': string
'total-files-size': string
'total-data-files': string
'total-delete-files': string
'total-position-deletes': string
'total-equality-deletes': string
}
'schema-id'?: number
'first-row-id'?: bigint // V3
'added-rows'?: number // V3
}
interface TableStatistics {
'snapshot-id': number
'statistics-path': string
'file-size-in-bytes': bigint
'file-footer-size-in-bytes': bigint
}
interface SnapshotLog {
'timestamp-ms': number
'snapshot-id': number
}
interface MetadataLog {
'timestamp-ms': number
'metadata-file': string
}
export interface Manifest {
manifest_path: string
manifest_length: bigint
partition_spec_id: number
content: 0 | 1 // 0=data, 1=deletes
sequence_number?: bigint
min_sequence_number?: bigint
added_snapshot_id: bigint
added_files_count: number
existing_files_count: number
deleted_files_count: number
added_rows_count: bigint
existing_rows_count: bigint
deleted_rows_count: bigint
partitions?: FieldSummary[]
// key_metadata?: unknown
first_row_id?: bigint
}
export interface ManifestEntry {
status: 0 | 1 | 2 // 0=existing, 1=added, 2=deleted
snapshot_id?: bigint
sequence_number?: bigint
file_sequence_number?: bigint
data_file: DataFile
}
interface FieldSummary {
'contains-null': boolean
'contains-nan'?: boolean
'lower-bound'?: unknown
'upper-bound'?: unknown
}
export interface DataFile {
content: 0 | 1 | 2 // 0=data, 1=position_delete, 2=equality_delete
file_path: string
file_format: 'avro' | 'orc' | 'parquet' | 'puffin'
partition: Record<number, unknown> // indexed by field id
record_count: bigint
file_size_in_bytes: bigint
column_sizes?: Record<number, bigint>
value_counts?: Record<number, bigint>
null_value_counts?: Record<number, bigint>
nan_value_counts?: Record<number, bigint>
lower_bounds?: Record<number, unknown>
upper_bounds?: Record<number, unknown>
// key_metadata?: string
split_offsets?: bigint[]
equality_ids?: number[]
sort_order_id?: number
first_row_id?: bigint
referenced_data_file?: string
content_offset?: bigint
content_size_in_bytes?: bigint
}
export interface FilePositionDelete {
file_path: string
pos: bigint
}
/* Avro types */
interface AvroField {
name: string
type: AvroType
doc?: string
default?: any
// 'field-id'?: number
}
export type AvroType = AvroPrimitiveType | AvroComplexType | AvroLogicalType
type AvroPrimitiveType = 'null' | 'boolean' | 'int' | 'long' | 'float' | 'double' | 'bytes' | 'string'
interface AvroRecord {
type: 'record'
name: string
namespace?: string
doc?: string
aliases?: string[]
fields: AvroField[]
'schema-id'?: number
}
interface AvroArray {
type: 'array'
items: AvroType
default?: any[]
}
type AvroUnion = AvroType[]
type AvroDate = {
type: 'int'
logicalType: 'date'
}
type AvroDecimal = {
type: 'bytes'
logicalType: 'decimal'
precision: number
scale?: number
}
type AvroTimestampMillis = {
type: 'long'
logicalType: 'timestamp-millis'
}
type AvroTimestampMicros = {
type: 'long'
logicalType: 'timestamp-micros'
}
type AvroLogicalTypeType =
'date' |
'decimal' |
'duration' |
'local-timestamp-millis' |
'local-timestamp-micros' |
'time-millis' |
'time-micros' |
'timestamp-millis' |
'timestamp-micros' |
'uuid'
// catch-all: "implementations must ignore unknown logical types when reading"
type AvroGenericLogicalType = {
type: AvroPrimitiveType
logicalType: AvroLogicalTypeType
}
type AvroLogicalType = AvroDate | AvroDecimal | AvroTimestampMillis | AvroTimestampMicros | AvroGenericLogicalType
// Avro complex types: records, enums, arrays, maps, unions, fixed
type AvroComplexType = AvroRecord | AvroArray | AvroUnion