xdim
Version:
Multi-Dimensional Functions. Create, Query, and Transform Multi-Dimensional Data.
569 lines (502 loc) • 15.8 kB
JavaScript
const layoutCache = {};
const { wrapNextFunction } = require("iter-fun");
const preparedSelectFunctions = require("./prepared-select-funcs.js");
const preparedUpdateFunctions = require("./prepared-update-funcs.js");
const ARRAY_TYPES = {
Array,
Int8Array,
Uint8Array,
Uint8ClampedArray,
Int16Array,
Uint16Array,
Float32Array,
Float64Array
};
try {
ARRAY_TYPES.BigInt64Array = BigInt64Array;
ARRAY_TYPES.BigUint64Array = BigUint64Array;
} catch (error) {
// pass
}
function parseDimensions(str) {
const dims = {};
const re = /[A-Za-z]+/g;
let arr;
while ((arr = re.exec(str)) !== null) {
const [match] = arr;
dims[match] = {
name: match
};
}
return dims;
}
function normalizeLayoutString(str) {
const alphabet = "abcdefghijklmnopqrstuvwxyz";
let i = 0;
return str.replace(/[A-Za-z]+/g, () => alphabet[i++]);
}
const parseVectors = str => str.match(/\[[^\]]+\]/g);
// "[row]" to "row"
const removeBraces = str => (str.startsWith("[") && str.endsWith("]") ? str.substring(1, str.length - 1) : str);
// "(row)" to "row"
const removeParentheses = str => (str.startsWith("(") && str.endsWith(")") ? str.substring(1, str.length - 1) : str);
// sort of like parsing a CSV except instead of " for quotes use (
const matchSequences = str => str.match(/(\(.*?\)|[^\(,\s]+)(?=\s*,|\s*$)/g);
const parseSequences = str => {
// unwrap [...]
str = removeBraces(str);
// unwrap (...)
str = removeParentheses(str);
const seqs = matchSequences(str);
if (seqs.length === 1) {
return {
type: "Vector",
dim: seqs[0]
};
} else {
return {
type: "Matrix",
parts: seqs.map(parseSequences)
};
}
};
function checkValidity(str) {
const invalid = str.match(/[^ A-Za-z,\[\]]/g);
if (invalid) {
throw new Error("The following invalid characters were used: " + invalid.map(c => `"${c}"`).join(", "));
} else {
return true;
}
}
function parse(str, { useLayoutCache = true } = { useLayoutCache: true }) {
if (useLayoutCache && str in layoutCache) return layoutCache[str];
checkValidity(str);
const vectors = parseVectors(str);
const dims = vectors.map(parseSequences);
const result = {
type: "Layout",
summary: dims.map(it => (it.type === "Matrix" ? it.parts.length : 1)),
dims
};
if (useLayoutCache) layoutCache[str] = result;
return result;
}
function update({ useLayoutCache = true, data, layout, point, sizes = {}, value }) {
if (typeof layout === "string") layout = parse(layout, { useLayoutCache });
const { dims } = layout;
for (let idim = 0; idim < dims.length; idim++) {
const last = idim === dims.length - 1;
const arr = dims[idim];
let offset;
if (arr.type === "Vector") {
offset = point[arr.dim];
} else {
// arr.type assumed to be "Matrix"
const { parts } = arr;
offset = 0;
let multiplier = 1;
for (let i = parts.length - 1; i >= 0; i--) {
const part = parts[i];
const { dim } = part;
offset += multiplier * point[dim];
if (i > 0) {
if (!(dim in sizes)) throw new Error(`you cannot calculate the location without knowing the size of the "${dim}" dimension.`);
multiplier *= sizes[dim];
}
}
}
if (last) {
data[offset] = value;
} else {
data = data[offset];
}
}
}
function prepareUpdate({ useLayoutCache = true, data, layout, sizes = {} }) {
if (typeof layout === "string") {
layout = parse(layout, { useLayoutCache });
}
const { dims } = layout;
const numDims = dims.length;
const multipliers = getMultipliers({ useLayoutCache, layout, sizes });
const end = numDims - 1;
const key = layout.summary.toString();
if (key in preparedUpdateFunctions) {
const _this = { data };
layout.dims.map((it, depth) => {
if (it.type === "Vector") {
_this[`d${depth}v0`] = it.dim;
} else if (it.type === "Matrix") {
it.parts.forEach((part, ipart) => {
_this[`d${depth}v${ipart}`] = part.dim;
_this[`m${depth}v${ipart}`] = multipliers[part.dim];
});
}
});
return preparedUpdateFunctions[key].bind(_this);
}
return ({ point, value }) => {
let currentData = data;
for (let idim = 0; idim < numDims; idim++) {
const last = idim === end;
const arr = dims[idim];
let offset;
if (arr.type === "Vector") {
offset = point[arr.dim];
} else {
// arr.type assumed to be "Matrix"
offset = arr.parts.reduce((acc, { dim }) => acc + multipliers[dim] * point[dim], 0);
}
if (last) {
currentData[offset] = value;
} else {
currentData = currentData[offset];
}
}
};
}
function iterClip({ data, layout, order, rect = {}, sizes = {}, useLayoutCache = true }) {
if (!data) throw new Error("[xdim] must specify data");
if (!layout) throw new Error("[xdim] must specify layout");
const points = iterPoints({ order, sizes, rect });
return wrapNextFunction(function next() {
const { value: point, done } = points.next();
if (done) {
return { done: true };
} else {
const { value } = select({ data, layout, point, sizes, useLayoutCache });
return { done: false, value };
}
});
}
function validateRect({ rect = {} }) {
if (rect) {
for (let key in rect) {
const value = rect[key];
if (value.length !== 2) throw new Error(`[xdim] uh oh. invalid hyper-rectangle`);
const [start, end] = value;
if (start > end) throw new Error(`[xdim] uh oh. invalid range for "${key}". Start of ${start} can't be greater than end of ${end}.`);
if (start < 0) throw new Error(`[xdim] uh oh. invalid hyper-rectangle with start ${start}`);
}
}
}
function clip({ useLayoutCache = true, data, layout, rect, sizes = {}, flat = false, validate = true }) {
if (validate) validateRect({ rect });
if (typeof layout === "string") layout = parse(layout, { useLayoutCache });
let datas = [data];
layout.dims.forEach(arr => {
let new_datas = [];
datas.forEach(data => {
if (arr.type === "Vector") {
const [start, end] = rect[arr.dim];
new_datas = new_datas.concat(data.slice(start, end + 1));
} else {
// only 2 types so must be arr.type === "Matrix"
const { parts } = arr;
let offsets = [0];
let multiplier = 1;
for (let i = parts.length - 1; i >= 0; i--) {
const part = parts[i];
// assume part.type === "Vector"
const { dim } = part;
const [start, end] = rect[dim];
const new_offsets = [];
for (let n = start; n <= end; n++) {
offsets.forEach(offset => {
new_offsets.push(offset + multiplier * n);
});
}
offsets = new_offsets;
multiplier *= sizes[dim];
}
offsets.forEach(offset => {
new_datas.push(data[offset]);
});
}
});
datas = new_datas;
});
if (flat) {
return {
data: datas
};
}
// prepareResult
const out_sizes = Object.fromEntries(Object.entries(rect).map(([dim, [start, end]]) => [dim, end - start + 1]));
const { data: out_data } = prepareData({
layout,
sizes: out_sizes
});
const max_depth = layout.dims.length;
const step = (arr, depth) => {
if (depth === max_depth) {
for (let i = 0; i < arr.length; i++) {
arr[i] = datas.shift();
}
} else {
arr.forEach(sub => step(sub, depth + 1));
}
};
step(out_data, 1);
return { data: out_data };
}
function getMultipliers({ useLayoutCache = true, layout, sizes }) {
if (typeof layout === "string") {
layout = parse(layout, { useLayoutCache });
}
const { dims } = layout;
const numDims = dims.length;
let multipliers = {};
for (let idim = 0; idim < numDims; idim++) {
const arr = dims[idim];
if (arr.type === "Vector") {
multipliers[arr.dim] = 1;
} else {
// arr.type assumed to be "Matrix"
const { parts } = arr;
let multiplier = 1;
for (let i = parts.length - 1; i >= 0; i--) {
const { dim } = parts[i];
multipliers[dim] = multiplier;
multiplier *= sizes[parts[i].dim];
}
}
}
return multipliers;
}
function prepareSelect({ useLayoutCache = true, data, layout, sizes = {} }) {
if (typeof layout === "string") {
layout = parse(layout, { useLayoutCache });
}
const { dims } = layout;
const numDims = dims.length;
const multipliers = getMultipliers({ useLayoutCache, layout, sizes });
const end = numDims - 1;
const key = layout.summary.toString();
if (key in preparedSelectFunctions) {
const _this = { data };
layout.dims.map((it, depth) => {
if (it.type === "Vector") {
_this[`d${depth}v0`] = it.dim;
} else if (it.type === "Matrix") {
it.parts.forEach((part, ipart) => {
_this[`d${depth}v${ipart}`] = part.dim;
_this[`m${depth}v${ipart}`] = multipliers[part.dim];
});
}
});
return preparedSelectFunctions[key].bind(_this);
}
return ({ point }) => {
let currentData = data;
for (let idim = 0; idim < numDims; idim++) {
const last = idim === end;
const arr = dims[idim];
let offset;
if (arr.type === "Vector") {
offset = point[arr.dim];
} else {
// arr.type assumed to be "Matrix"
offset = arr.parts.reduce((acc, { dim }) => acc + multipliers[dim] * point[dim], 0);
}
if (last) {
return {
index: offset,
parent: currentData,
value: currentData[offset]
};
} else {
currentData = currentData[offset];
}
}
};
}
function select({ useLayoutCache = true, data, layout, point, sizes = {} }) {
// converts layout expression to a layout object
if (typeof layout === "string") {
layout = parse(layout, { useLayoutCache });
}
let parent;
let index;
let value = data;
// dims are arrays
const { dims } = layout;
const len = dims.length;
for (let idim = 0; idim < len; idim++) {
const arr = dims[idim];
if (arr.type === "Vector") {
const i = point[arr.dim];
parent = value;
index = i;
value = value[i];
} else {
// only 2 types so must be a Matrix
const { parts } = arr;
let offset = 0;
let multiplier = 1;
for (let i = parts.length - 1; i >= 0; i--) {
const part = parts[i];
if (part.type === "Vector") {
const { dim } = part;
offset += multiplier * point[dim];
if (i > 0) {
if (!(dim in sizes)) throw new Error(`you cannot calculate the location without knowing the size of the "${dim}" dimension.`);
multiplier *= sizes[dim];
}
}
}
parent = value;
index = offset;
value = value[offset];
}
}
return { index, value, parent };
}
// add dimensions to an array until the limit reaches zero
function addDims({ arr, fill = undefined, lens, arrayTypes }) {
// no new dimensions to add
if (lens.length === 0) return arr;
const len = lens[0];
if (lens.length === 1) {
const lastArrayType = arrayTypes ? arrayTypes[arrayTypes.length - 1] : "Array";
for (let i = 0; i < arr.length; i++) {
arr[i] = new ARRAY_TYPES[lastArrayType](len).fill(fill);
}
} else {
for (let i = 0; i < arr.length; i++) {
const sub = new Array(len).fill(fill);
arr[i] = sub;
addDims({ arr: sub, fill, lens: lens.slice(1), arrayTypes });
}
}
return arr;
}
// to-do: maybe only call fill if not undefined or default typed array value?
function createMatrix({ fill = undefined, shape, arrayTypes }) {
const len = shape[0];
if (shape.length === 1) {
if (Array.isArray(arrayTypes) && arrayTypes.length !== 1) throw new Error("[xdim] shape and arrayTypes have different lengths");
const arrayType = Array.isArray(arrayTypes) ? arrayTypes[0] : "Array";
return new ARRAY_TYPES[arrayType](len).fill(fill);
}
const arr = new Array(len).fill(fill);
return addDims({ arr, fill, lens: shape.slice(1), arrayTypes });
}
// generates an in-memory data structure to hold the data
function prepareData({ fill = undefined, layout, useLayoutCache = true, sizes, arrayTypes }) {
if (typeof layout === "string") layout = parse(layout, { useLayoutCache });
// console.log("layout:", layout);
const shape = layout.dims.map(it => {
if (it.type === "Vector") {
return sizes[it.dim];
} else if (it.type === "Matrix") {
return it.parts.reduce((total, part) => {
if (!(part.dim in sizes)) throw new Error(`[xdim] could not find "${part.dim}" in sizes: { ${Object.keys(sizes).join(", ")} }`);
return total * sizes[part.dim];
}, 1);
}
});
const data = createMatrix({ fill, shape, arrayTypes });
return { data, shape, arrayTypes };
}
// assume positive step
function iterRange({ start = 0, end = 100 }) {
let i = start - 1;
end = end + 1;
return wrapNextFunction(function next() {
i++;
if (i === end) {
return { done: true };
} else {
return { done: false, value: i };
}
});
}
// iterate over all the points, saving memory vs array
function iterPoints({ order, sizes, rect = {} }) {
// names sorted by shortest dimension to longest dimension
const names = Array.isArray(order) ? order : Object.keys(sizes).sort((a, b) => sizes[a] - sizes[b]);
const iters = new Array(names.length);
const current = {};
for (let i = 0; i < names.length - 1; i++) {
const name = names[i];
const [start, end] = rect[name] || [0, sizes[name] - 1];
iters[i] = iterRange({ start: start + 1, end });
current[name] = start;
}
const lastName = names[names.length - 1];
const [start, end] = rect[lastName] || [0, sizes[lastName] - 1];
iters[iters.length - 1] = iterRange({ start: start, end });
current[lastName] = start - 1;
// permutate
return wrapNextFunction(function next() {
for (let i = iters.length - 1; i >= 0; i--) {
const { value, done } = iters[i].next();
if (done) {
if (i === 0) {
// we have exhausted all of the permutations
return { done: true };
}
} else {
// add iters for the remaining dims
for (let ii = i + 1; ii < iters.length; ii++) {
const nameii = names[ii];
const [start, end] = rect[nameii] || [0, sizes[nameii] - 1];
iters[ii] = iterRange({ start: start + 1, end });
current[nameii] = start;
}
current[names[i]] = value;
return { value: current, done: false };
}
}
});
}
function transform({ data, fill = undefined, from, to, sizes, useLayoutCache = true }) {
if (typeof from === "string") from = parse(from, { useLayoutCache });
if (typeof to === "string") to = parse(to, { useLayoutCache });
const { data: out_data } = prepareData({ fill, layout: to, sizes });
const update = prepareUpdate({
useLayoutCache,
data: out_data,
layout: to,
sizes
});
const points = iterPoints({ sizes });
for (point of points) {
const { value } = select({
data,
layout: from,
point,
sizes
});
// insert into new frame
update({
point,
value
});
}
return { data: out_data };
}
module.exports = {
addDims,
checkValidity,
createMatrix,
iterClip,
iterRange,
iterPoints,
matchSequences,
parse,
parseDimensions,
parseSequences,
parseVectors,
prepareData,
prepareSelect,
prepareUpdate,
removeBraces,
removeParentheses,
select,
transform,
update,
clip,
validateRect
};