apache-arrow
Version:
Apache Arrow columnar in-memory format
198 lines (189 loc) • 12.1 kB
text/typescript
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
import { Data, makeData } from '../data.js';
import * as type from '../type.js';
import { Field } from '../schema.js';
import { Vector } from '../vector.js';
import { DataType } from '../type.js';
import { Visitor } from '../visitor.js';
import { packBools } from '../util/bit.js';
import { encodeUtf8 } from '../util/utf8.js';
import { Int64, Int128 } from '../util/int.js';
import { UnionMode, DateUnit, MetadataVersion } from '../enum.js';
import { toArrayBufferView } from '../util/buffer.js';
import { BufferRegion, FieldNode } from '../ipc/metadata/message.js';
/** @ignore */
export interface VectorLoader extends Visitor {
visit<T extends DataType>(node: Field<T> | T): Data<T>;
visitMany<T extends DataType>(nodes: (Field<T> | T)[]): Data<T>[];
}
/** @ignore */
export class VectorLoader extends Visitor {
private bytes: Uint8Array;
private nodes: FieldNode[];
private nodesIndex = -1;
private buffers: BufferRegion[];
private buffersIndex = -1;
private dictionaries: Map<number, Vector<any>>;
private readonly metadataVersion: MetadataVersion;
constructor(bytes: Uint8Array, nodes: FieldNode[], buffers: BufferRegion[], dictionaries: Map<number, Vector<any>>, metadataVersion = MetadataVersion.V5) {
super();
this.bytes = bytes;
this.nodes = nodes;
this.buffers = buffers;
this.dictionaries = dictionaries;
this.metadataVersion = metadataVersion;
}
public visit<T extends DataType>(node: Field<T> | T): Data<T> {
return super.visit(node instanceof Field ? node.type : node);
}
public visitNull<T extends type.Null>(type: T, { length } = this.nextFieldNode()) {
return makeData({ type, length });
}
public visitBool<T extends type.Bool>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), data: this.readData(type) });
}
public visitInt<T extends type.Int>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), data: this.readData(type) });
}
public visitFloat<T extends type.Float>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), data: this.readData(type) });
}
public visitUtf8<T extends type.Utf8>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), valueOffsets: this.readOffsets(type), data: this.readData(type) });
}
public visitLargeUtf8<T extends type.LargeUtf8>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), valueOffsets: this.readOffsets(type), data: this.readData(type) });
}
public visitBinary<T extends type.Binary>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), valueOffsets: this.readOffsets(type), data: this.readData(type) });
}
public visitLargeBinary<T extends type.LargeBinary>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), valueOffsets: this.readOffsets(type), data: this.readData(type) });
}
public visitFixedSizeBinary<T extends type.FixedSizeBinary>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), data: this.readData(type) });
}
public visitDate<T extends type.Date_>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), data: this.readData(type) });
}
public visitTimestamp<T extends type.Timestamp>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), data: this.readData(type) });
}
public visitTime<T extends type.Time>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), data: this.readData(type) });
}
public visitDecimal<T extends type.Decimal>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), data: this.readData(type) });
}
public visitList<T extends type.List>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), valueOffsets: this.readOffsets(type), 'child': this.visit(type.children[0]) });
}
public visitStruct<T extends type.Struct>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), children: this.visitMany(type.children) });
}
public visitUnion<T extends type.Union>(type: T, { length, nullCount } = this.nextFieldNode()) {
if (this.metadataVersion < MetadataVersion.V5) {
this.readNullBitmap(type, nullCount);
}
return type.mode === UnionMode.Sparse
? this.visitSparseUnion(type as type.SparseUnion, { length, nullCount })
: this.visitDenseUnion(type as type.DenseUnion, { length, nullCount });
}
public visitDenseUnion<T extends type.DenseUnion>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, typeIds: this.readTypeIds(type), valueOffsets: this.readOffsets(type), children: this.visitMany(type.children) });
}
public visitSparseUnion<T extends type.SparseUnion>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, typeIds: this.readTypeIds(type), children: this.visitMany(type.children) });
}
public visitDictionary<T extends type.Dictionary>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), data: this.readData(type.indices), dictionary: this.readDictionary(type) });
}
public visitInterval<T extends type.Interval>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), data: this.readData(type) });
}
public visitDuration<T extends type.Duration>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), data: this.readData(type) });
}
public visitFixedSizeList<T extends type.FixedSizeList>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), 'child': this.visit(type.children[0]) });
}
public visitMap<T extends type.Map_>(type: T, { length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), valueOffsets: this.readOffsets(type), 'child': this.visit(type.children[0]) });
}
protected nextFieldNode() { return this.nodes[++this.nodesIndex]; }
protected nextBufferRange() { return this.buffers[++this.buffersIndex]; }
protected readNullBitmap<T extends DataType>(type: T, nullCount: number, buffer = this.nextBufferRange()) {
return nullCount > 0 && this.readData(type, buffer) || new Uint8Array(0);
}
protected readOffsets<T extends DataType>(type: T, buffer?: BufferRegion) { return this.readData(type, buffer); }
protected readTypeIds<T extends DataType>(type: T, buffer?: BufferRegion) { return this.readData(type, buffer); }
protected readData<T extends DataType>(_type: T, { length, offset } = this.nextBufferRange()) {
return this.bytes.subarray(offset, offset + length);
}
protected readDictionary<T extends type.Dictionary>(type: T): Vector<T['dictionary']> {
return this.dictionaries.get(type.id)!;
}
}
/** @ignore */
export class JSONVectorLoader extends VectorLoader {
private sources: any[][];
constructor(sources: any[][], nodes: FieldNode[], buffers: BufferRegion[], dictionaries: Map<number, Vector<any>>, metadataVersion: MetadataVersion) {
super(new Uint8Array(0), nodes, buffers, dictionaries, metadataVersion);
this.sources = sources;
}
protected readNullBitmap<T extends DataType>(_type: T, nullCount: number, { offset } = this.nextBufferRange()) {
return nullCount <= 0 ? new Uint8Array(0) : packBools(this.sources[offset]);
}
protected readOffsets<T extends DataType>(_type: T, { offset } = this.nextBufferRange()) {
return toArrayBufferView(Uint8Array, toArrayBufferView(_type.OffsetArrayType, this.sources[offset]));
}
protected readTypeIds<T extends DataType>(type: T, { offset } = this.nextBufferRange()) {
return toArrayBufferView(Uint8Array, toArrayBufferView(type.ArrayType, this.sources[offset]));
}
protected readData<T extends DataType>(type: T, { offset } = this.nextBufferRange()) {
const { sources } = this;
if (DataType.isTimestamp(type)) {
return toArrayBufferView(Uint8Array, Int64.convertArray(sources[offset] as string[]));
} else if ((DataType.isInt(type) || DataType.isTime(type)) && type.bitWidth === 64 || DataType.isDuration(type)) {
return toArrayBufferView(Uint8Array, Int64.convertArray(sources[offset] as string[]));
} else if (DataType.isDate(type) && type.unit === DateUnit.MILLISECOND) {
return toArrayBufferView(Uint8Array, Int64.convertArray(sources[offset] as string[]));
} else if (DataType.isDecimal(type)) {
return toArrayBufferView(Uint8Array, Int128.convertArray(sources[offset] as string[]));
} else if (DataType.isBinary(type) || DataType.isLargeBinary(type) || DataType.isFixedSizeBinary(type)) {
return binaryDataFromJSON(sources[offset] as string[]);
} else if (DataType.isBool(type)) {
return packBools(sources[offset] as number[]);
} else if (DataType.isUtf8(type) || DataType.isLargeUtf8(type)) {
return encodeUtf8((sources[offset] as string[]).join(''));
}
return toArrayBufferView(Uint8Array, toArrayBufferView(type.ArrayType, sources[offset].map((x) => +x)));
}
}
/** @ignore */
function binaryDataFromJSON(values: string[]) {
// "DATA": ["49BC7D5B6C47D2","3F5FB6D9322026"]
// There are definitely more efficient ways to do this... but it gets the
// job done.
const joined = values.join('');
const data = new Uint8Array(joined.length / 2);
for (let i = 0; i < joined.length; i += 2) {
data[i >> 1] = Number.parseInt(joined.slice(i, i + 2), 16);
}
return data;
}