arquero
Version:
Query processing and transformation of array-backed data tables.
66 lines (58 loc) • 1.62 kB
JavaScript
import { toArray } from '../../util/to-array.js';
import { Reducer } from './reducer.js';
export function countPattern(fields, as, pattern) {
return new CountPattern(fields, as, pattern);
}
function columnGetter(column) {
return (row, data) => data[column].at(row);
}
export class CountPattern extends Reducer {
constructor(fields, as, pattern) {
super(as || ['word', 'count']);
this._fields = toArray(fields).map(columnGetter);
this._pattern = pattern || ' ';
}
init() {
return { index: {}, words: [], count: [] };
}
add({ index, words, count }, row, data) {
const pattern = this._pattern;
this._fields.forEach(get => {
const text = get(row, data) + '';
for (const token of text.split(pattern)) {
const idx = index[token];
if (idx == null) {
index[token] = words.length;
words.push(token);
count.push(1);
} else {
count[idx] += 1;
}
}
});
}
rem({ index, count }, row, data) {
const pattern = this._pattern;
this._fields.forEach(get => {
const text = get(row, data) + '';
for (const token of text.split(pattern)) {
const idx = index[token];
count[idx] -= 1;
}
});
}
write({ words, count }, values, index) {
const n = words.length;
const v0 = values[this._outputs[0]];
const v1 = values[this._outputs[1]];
let offset = index;
for (let i = 0; i < n; ++i) {
if (count[i] > 0) {
v0[offset] = words[i];
v1[offset] = count[i];
++offset;
}
}
return offset - index;
}
}