@uwdata/flechette
Version:
Fast, lightweight access to Apache Arrow data.
122 lines (109 loc) • 3.1 kB
JavaScript
import { Column } from '../../column.js';
import { keyString } from '../../util/strings.js';
import { batchType } from '../../batch-type.js';
import { buffer } from '../buffer.js';
import { ValidityBuilder } from './validity.js';
/**
* Create a context object for managing dictionary builders.
*/
export function dictionaryContext() {
const idMap = new Map;
const dicts = new Set;
return {
/**
* Get a dictionary values builder for the given dictionary type.
* @param {import('../../types.js').DictionaryType} type
* The dictionary type.
* @param {*} ctx The builder context.
* @returns {ReturnType<dictionaryValues>}
*/
get(type, ctx) {
// if a dictionary has a non-negative id, assume it was set
// intentionally and track it for potential reuse across columns
// otherwise the dictionary is used for a single column only
const id = type.id;
if (id >= 0 && idMap.has(id)) {
return idMap.get(id);
} else {
const dict = dictionaryValues(type, ctx);
if (id >= 0) idMap.set(id, dict);
dicts.add(dict);
return dict;
}
},
/**
* Finish building dictionary values columns and assign them to
* their corresponding dictionary batches.
* @param {import('../../types.js').ExtractionOptions} options
*/
finish(options) {
dicts.forEach(dict => dict.finish(options));
}
};
}
/**
* Builder helper for creating dictionary values.
* @param {import('../../types.js').DictionaryType} type
* The dictionary data type.
* @param {ReturnType<import('../builder.js').builderContext>} ctx
* The builder context.
*/
export function dictionaryValues(type, ctx) {
const keys = Object.create(null);
const values = ctx.builder(type.dictionary);
const batches = [];
values.init();
let index = -1;
return {
type,
values,
add(batch) {
batches.push(batch);
return batch;
},
key(value) {
const v = keyString(value);
let k = keys[v];
if (k === undefined) {
keys[v] = k = ++index;
values.set(value, k);
}
return k;
},
finish(options) {
const valueType = type.dictionary;
const batch = new (batchType(valueType, options))(values.done());
const dictionary = new Column([batch]);
batches.forEach(batch => batch.setDictionary(dictionary));
}
};
}
/**
* Builder for dictionary-typed data batches.
*/
export class DictionaryBuilder extends ValidityBuilder {
constructor(type, ctx) {
super(type, ctx);
this.dict = ctx.dictionary(type);
}
init() {
this.values = buffer(this.type.indices.values);
return super.init();
}
set(value, index) {
if (super.set(value, index)) {
this.values.set(this.dict.key(value), index);
}
}
done() {
return {
...super.done(),
values: this.values.array(this.index + 1)
};
}
batch() {
// register batch with dictionary
// batch will be updated when the dictionary is finished
return this.dict.add(super.batch());
}
}