vitessce
Version:
Vitessce app and React component library
199 lines (188 loc) • 6.41 kB
JavaScript
/* eslint-disable no-underscore-dangle */
import { openArray } from 'zarr';
import range from 'lodash/range';
import ZarrDataSource from './ZarrDataSource';
const readFloat32FromUint8 = (bytes) => {
if (bytes.length !== 4) {
throw new Error('readFloat32 only takes in length 4 byte buffers');
}
return new Int32Array(bytes.buffer)[0];
};
const HEADER_LENGTH = 4;
/**
* Method for decoding text arrays from zarr.
* Largerly a port of https://github.com/zarr-developers/numcodecs/blob/2c1aff98e965c3c4747d9881d8b8d4aad91adb3a/numcodecs/vlen.pyx#L135-L178
* @returns {string[]} An array of strings.
*/
function parseVlenUtf8(buffer) {
const decoder = new TextDecoder();
let data = 0;
const dataEnd = data + buffer.length;
const length = readFloat32FromUint8(buffer.slice(data, HEADER_LENGTH));
if (buffer.length < HEADER_LENGTH) {
throw new Error('corrupt buffer, missing or truncated header');
}
data += HEADER_LENGTH;
const output = new Array(length);
for (let i = 0; i < length; i += 1) {
if (data + 4 > dataEnd) {
throw new Error('corrupt buffer, data seem truncated');
}
const l = readFloat32FromUint8(buffer.slice(data, data + 4));
data += 4;
if (data + l > dataEnd) {
throw new Error('corrupt buffer, data seem truncated');
}
output[i] = decoder.decode(buffer.slice(data, data + l));
data += l;
}
return output;
}
/**
* A base AnnData loader which has all shared methods for more comlpex laoders,
* like loading cell names and ids. It inherits from AbstractLoader.
*/
export default class AnnDataSource extends ZarrDataSource {
constructor(...args) {
super(...args);
/** @type {Map<string, Promise<string[]>} */
this.obsPromises = new Map();
}
/**
* Class method for loading obs variables.
* Takes the location as an argument because this is shared across objects,
* which have different ways of specifying location.
* @param {string[]} obsPaths An array of strings like "obs/leiden" or "obs/bulk_labels."
* @returns {Promise} A promise for an array of ids with one per cell.
*/
loadObsVariables(obsPaths) {
const obsPromises = obsPaths.map((obsPath) => {
const getObsCol = (obsCol) => {
if (!this.obsPromises.has(obsCol)) {
const obsPromise = this._loadObsVariable(obsCol).catch((err) => {
// clear from cache if promise rejects
this.obsPromises.delete(obsCol);
// propagate error
throw err;
});
this.obsPromises.set(obsCol, obsPromise);
}
return this.obsPromises.get(obsCol);
};
if (Array.isArray(obsPath)) {
return Promise.resolve(Promise.all(obsPath.map(getObsCol)));
}
return getObsCol(obsPath);
});
return Promise.all(obsPromises);
}
async _loadObsVariable(obs) {
const { store } = this;
const { categories } = await this.getJson(`${obs}/.zattrs`);
let categoriesValues;
if (categories) {
const { dtype } = await this.getJson(`/obs/${categories}/.zarray`);
if (dtype === '|O') {
categoriesValues = await this.getFlatArrDecompressed(`/obs/${categories}`);
}
}
const obsArr = await openArray({ store, path: obs, mode: 'r' });
const obsValues = await obsArr.get();
const { data } = obsValues;
const mappedObsValues = Array.from(data).map(
i => (!categoriesValues ? String(i) : categoriesValues[i]),
);
return mappedObsValues;
}
/**
* Class method for loading general numeric arrays.
* @param {string} path A string like obsm.X_pca.
* @returns {Promise} A promise for a zarr array containing the data.
*/
loadNumeric(path) {
const { store } = this;
return openArray({
store,
path,
mode: 'r',
}).then(arr => arr.get());
}
/**
* A common method for loading flattened data
* i.e that which has shape [n] where n is a natural number.
* @param {string} path A path to a flat array location, like obs/_index
* @returns {Array} The data from the zarr array.
*/
getFlatArrDecompressed(path) {
const { store } = this;
return openArray({
store,
path,
mode: 'r',
}).then(async (z) => {
let data;
const parseAndMergeTextBytes = (dbytes) => {
const text = parseVlenUtf8(dbytes);
if (!data) {
data = text;
} else {
data = data.concat(text);
}
};
const mergeBytes = (dbytes) => {
if (!data) {
data = dbytes;
} else {
const tmp = new Uint8Array(dbytes.buffer.byteLength + data.buffer.byteLength);
tmp.set(new Uint8Array(data.buffer), 0);
tmp.set(dbytes, data.buffer.byteLength);
data = tmp;
}
};
const numRequests = Math.ceil(z.meta.shape[0] / z.meta.chunks[0]);
const requests = range(numRequests).map(async item => store.getItem(`${z.keyPrefix}${String(item)}`)
.then(buf => z.compressor.then(compressor => compressor.decode(buf))));
const dbytesArr = await Promise.all(requests);
dbytesArr.forEach((dbytes) => {
// Use vlenutf-8 decoding if necessary and merge `data` as a normal array.
if (Array.isArray(z.meta.filters) && z.meta.filters[0].id === 'vlen-utf8') {
parseAndMergeTextBytes(dbytes);
// Otherwise just merge the bytes as a typed array.
} else {
mergeBytes(dbytes);
}
});
const {
meta: {
shape: [length],
},
} = z;
// truncate the filled in values
return data.slice(0, length);
});
}
/**
* Class method for loading the obs index.
* @returns {Promise} An promise for a zarr array containing the indices.
*/
loadObsIndex() {
if (this.obsIndex) {
return this.obsIndex;
}
this.obsIndex = this.getJson('obs/.zattrs')
.then(({ _index }) => this.getFlatArrDecompressed(`/obs/${_index}`));
return this.obsIndex;
}
/**
* Class method for loading the var index.
* @returns {Promise} An promise for a zarr array containing the indices.
*/
loadVarIndex() {
if (this.varIndex) {
return this.varIndex;
}
this.varIndex = this.getJson('var/.zattrs')
.then(({ _index }) => this.getFlatArrDecompressed(`/var/${_index}`));
return this.varIndex;
}
}