apache-arrow
Version:
Apache Arrow columnar in-memory format
300 lines (298 loc) • 12.5 kB
JavaScript
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
var _a;
import { Data, makeData } from './data.mjs';
import { Table } from './table.mjs';
import { Vector } from './vector.mjs';
import { Schema, Field } from './schema.mjs';
import { DataType, Struct, Null } from './type.mjs';
import { wrapIndex } from './util/vector.mjs';
import { instance as getVisitor } from './visitor/get.mjs';
import { instance as setVisitor } from './visitor/set.mjs';
import { instance as indexOfVisitor } from './visitor/indexof.mjs';
import { instance as iteratorVisitor } from './visitor/iterator.mjs';
/** @ignore */
export class RecordBatch {
constructor(...args) {
switch (args.length) {
case 2: {
[this.schema] = args;
if (!(this.schema instanceof Schema)) {
throw new TypeError('RecordBatch constructor expects a [Schema, Data] pair.');
}
[,
this.data = makeData({
nullCount: 0,
type: new Struct(this.schema.fields),
children: this.schema.fields.map((f) => makeData({ type: f.type, nullCount: 0 }))
})
] = args;
if (!(this.data instanceof Data)) {
throw new TypeError('RecordBatch constructor expects a [Schema, Data] pair.');
}
[this.schema, this.data] = ensureSameLengthData(this.schema, this.data.children);
break;
}
case 1: {
const [obj] = args;
const { fields, children, length } = Object.keys(obj).reduce((memo, name, i) => {
memo.children[i] = obj[name];
memo.length = Math.max(memo.length, obj[name].length);
memo.fields[i] = Field.new({ name, type: obj[name].type, nullable: true });
return memo;
}, {
length: 0,
fields: new Array(),
children: new Array(),
});
const schema = new Schema(fields);
const data = makeData({ type: new Struct(fields), length, children, nullCount: 0 });
[this.schema, this.data] = ensureSameLengthData(schema, data.children, length);
break;
}
default: throw new TypeError('RecordBatch constructor expects an Object mapping names to child Data, or a [Schema, Data] pair.');
}
}
get dictionaries() {
return this._dictionaries || (this._dictionaries = collectDictionaries(this.schema.fields, this.data.children));
}
/**
* The number of columns in this RecordBatch.
*/
get numCols() { return this.schema.fields.length; }
/**
* The number of rows in this RecordBatch.
*/
get numRows() { return this.data.length; }
/**
* The number of null rows in this RecordBatch.
*/
get nullCount() {
return this.data.nullCount;
}
/**
* Check whether an row is null.
* @param index The index at which to read the validity bitmap.
*/
isValid(index) {
return this.data.getValid(index);
}
/**
* Get a row by position.
* @param index The index of the row to read.
*/
get(index) {
return getVisitor.visit(this.data, index);
}
/**
* Get a row value by position.
* @param index The index of the row to read. A negative index will count back from the last row.
*/
at(index) {
return this.get(wrapIndex(index, this.numRows));
}
/**
* Set a row by position.
* @param index The index of the row to write.
* @param value The value to set.
*/
set(index, value) {
return setVisitor.visit(this.data, index, value);
}
/**
* Retrieve the index of the first occurrence of a row in an RecordBatch.
* @param element The row to locate in the RecordBatch.
* @param offset The index at which to begin the search. If offset is omitted, the search starts at index 0.
*/
indexOf(element, offset) {
return indexOfVisitor.visit(this.data, element, offset);
}
/**
* Iterator for rows in this RecordBatch.
*/
[Symbol.iterator]() {
return iteratorVisitor.visit(new Vector([this.data]));
}
/**
* Return a JavaScript Array of the RecordBatch rows.
* @returns An Array of RecordBatch rows.
*/
toArray() {
return [...this];
}
/**
* Combines two or more RecordBatch of the same schema.
* @param others Additional RecordBatch to add to the end of this RecordBatch.
*/
concat(...others) {
return new Table(this.schema, [this, ...others]);
}
/**
* Return a zero-copy sub-section of this RecordBatch.
* @param start The beginning of the specified portion of the RecordBatch.
* @param end The end of the specified portion of the RecordBatch. This is exclusive of the row at the index 'end'.
*/
slice(begin, end) {
const [slice] = new Vector([this.data]).slice(begin, end).data;
return new RecordBatch(this.schema, slice);
}
/**
* Returns a child Vector by name, or null if this Vector has no child with the given name.
* @param name The name of the child to retrieve.
*/
getChild(name) {
var _b;
return this.getChildAt((_b = this.schema.fields) === null || _b === void 0 ? void 0 : _b.findIndex((f) => f.name === name));
}
/**
* Returns a child Vector by index, or null if this Vector has no child at the supplied index.
* @param index The index of the child to retrieve.
*/
getChildAt(index) {
if (index > -1 && index < this.schema.fields.length) {
return new Vector([this.data.children[index]]);
}
return null;
}
/**
* Sets a child Vector by name.
* @param name The name of the child to overwrite.
* @returns A new RecordBatch with the new child for the specified name.
*/
setChild(name, child) {
var _b;
return this.setChildAt((_b = this.schema.fields) === null || _b === void 0 ? void 0 : _b.findIndex((f) => f.name === name), child);
}
setChildAt(index, child) {
let schema = this.schema;
let data = this.data;
if (index > -1 && index < this.numCols) {
if (!child) {
child = new Vector([makeData({ type: new Null, length: this.numRows })]);
}
const fields = schema.fields.slice();
const children = data.children.slice();
const field = fields[index].clone({ type: child.type });
[fields[index], children[index]] = [field, child.data[0]];
schema = new Schema(fields, new Map(this.schema.metadata));
data = makeData({ type: new Struct(fields), children });
}
return new RecordBatch(schema, data);
}
/**
* Construct a new RecordBatch containing only specified columns.
*
* @param columnNames Names of columns to keep.
* @returns A new RecordBatch of columns matching the specified names.
*/
select(columnNames) {
const schema = this.schema.select(columnNames);
const type = new Struct(schema.fields);
const children = [];
for (const name of columnNames) {
const index = this.schema.fields.findIndex((f) => f.name === name);
if (~index) {
children[index] = this.data.children[index];
}
}
return new RecordBatch(schema, makeData({ type, length: this.numRows, children }));
}
/**
* Construct a new RecordBatch containing only columns at the specified indices.
*
* @param columnIndices Indices of columns to keep.
* @returns A new RecordBatch of columns matching at the specified indices.
*/
selectAt(columnIndices) {
const schema = this.schema.selectAt(columnIndices);
const children = columnIndices.map((i) => this.data.children[i]).filter(Boolean);
const subset = makeData({ type: new Struct(schema.fields), length: this.numRows, children });
return new RecordBatch(schema, subset);
}
}
_a = Symbol.toStringTag;
// Initialize this static property via an IIFE so bundlers don't tree-shake
// out this logic, but also so we're still compliant with `"sideEffects": false`
RecordBatch[_a] = ((proto) => {
proto._nullCount = -1;
proto[Symbol.isConcatSpreadable] = true;
return 'RecordBatch';
})(RecordBatch.prototype);
/** @ignore */
function ensureSameLengthData(schema, chunks, maxLength = chunks.reduce((max, col) => Math.max(max, col.length), 0)) {
var _b;
const fields = [...schema.fields];
const children = [...chunks];
const nullBitmapSize = ((maxLength + 63) & ~63) >> 3;
for (const [idx, field] of schema.fields.entries()) {
const chunk = chunks[idx];
if (!chunk || chunk.length !== maxLength) {
fields[idx] = field.clone({ nullable: true });
children[idx] = (_b = chunk === null || chunk === void 0 ? void 0 : chunk._changeLengthAndBackfillNullBitmap(maxLength)) !== null && _b !== void 0 ? _b : makeData({
type: field.type,
length: maxLength,
nullCount: maxLength,
nullBitmap: new Uint8Array(nullBitmapSize)
});
}
}
return [
schema.assign(fields),
makeData({ type: new Struct(fields), length: maxLength, children })
];
}
/** @ignore */
function collectDictionaries(fields, children, dictionaries = new Map()) {
var _b, _c;
if (((_b = fields === null || fields === void 0 ? void 0 : fields.length) !== null && _b !== void 0 ? _b : 0) > 0 && ((fields === null || fields === void 0 ? void 0 : fields.length) === (children === null || children === void 0 ? void 0 : children.length))) {
for (let i = -1, n = fields.length; ++i < n;) {
const { type } = fields[i];
const data = children[i];
for (const next of [data, ...(((_c = data === null || data === void 0 ? void 0 : data.dictionary) === null || _c === void 0 ? void 0 : _c.data) || [])]) {
collectDictionaries(type.children, next === null || next === void 0 ? void 0 : next.children, dictionaries);
}
if (DataType.isDictionary(type)) {
const { id } = type;
if (!dictionaries.has(id)) {
if (data === null || data === void 0 ? void 0 : data.dictionary) {
dictionaries.set(id, data.dictionary);
}
}
else if (dictionaries.get(id) !== data.dictionary) {
throw new Error(`Cannot create Schema containing two different dictionaries with the same Id`);
}
}
}
}
return dictionaries;
}
/**
* An internal class used by the `RecordBatchReader` and `RecordBatchWriter`
* implementations to differentiate between a stream with valid zero-length
* RecordBatches, and a stream with a Schema message, but no RecordBatches.
* @see https://github.com/apache/arrow/pull/4373
* @ignore
* @private
*/
export class _InternalEmptyPlaceholderRecordBatch extends RecordBatch {
constructor(schema) {
const children = schema.fields.map((f) => makeData({ type: f.type }));
const data = makeData({ type: new Struct(schema.fields), nullCount: 0, children });
super(schema, data);
}
}
//# sourceMappingURL=recordbatch.mjs.map