UNPKG

vitessce

Version:

Vitessce app and React component library

358 lines (340 loc) 13.3 kB
/* eslint-disable no-underscore-dangle */ import { openArray, slice } from 'zarr'; import { extent } from 'd3-array'; import LoaderResult from '../LoaderResult'; import AbstractTwoStepLoader from '../AbstractTwoStepLoader'; const normalize = (arr) => { const [min, max] = extent(arr); const ratio = 255 / (max - min); const data = new Uint8Array( arr.map(i => Math.floor((i - min) * ratio)), ); return { data }; }; const concatenateColumnVectors = (arr) => { const numCols = arr.length; const numRows = arr[0].length; const { BYTES_PER_ELEMENT } = arr[0]; const view = new DataView(new ArrayBuffer(numCols * numRows * BYTES_PER_ELEMENT)); const TypedArray = arr[0].constructor; const dtype = TypedArray.name.replace('Array', ''); for (let i = 0; i < numCols; i += 1) { for (let j = 0; j < numRows; j += 1) { view[`set${dtype}`](BYTES_PER_ELEMENT * (j * numCols + i), arr[i][j], true); } } return new TypedArray(view.buffer); }; /** * Loader for converting zarr into the a cell x gene matrix for use in Genes/Heatmap components. */ export default class MatrixZarrLoader extends AbstractTwoStepLoader { /** * Class method for loading the genes list from AnnData.var, * filtered if a there is a `geneFilterZarr` present in the view config. * @returns {Promise} A promise for the zarr array contianing the gene names. */ async loadFilteredGeneNames() { if (this.filteredGeneNames) { return this.filteredGeneNames; } const { geneFilter: geneFilterZarr } = this.options; const getFilterFn = async () => { if (!geneFilterZarr) return data => data; const geneFilter = await this.dataSource.getFlatArrDecompressed(geneFilterZarr); return data => data.filter((_, j) => geneFilter[j]); }; this.filteredGeneNames = Promise .all([this.dataSource.loadVarIndex(), getFilterFn()]) .then(([data, filter]) => filter(data)); return this.filteredGeneNames; } /** * Class method for loading a filtered subset of the genes list * @param {String} filterZarr A location in the zarr store to fetch a boolean array from. * @returns {Array} A list of filtered genes. */ async _getFilteredGenes(filterZarr) { const filter = await this.dataSource.getFlatArrDecompressed(filterZarr); const geneNames = await this.loadFilteredGeneNames(); const genes = geneNames.filter((_, i) => filter[i]); return genes; } /** * Class method for getting the integer indices of a selection of genes within a list. * @param {Array} selection A list of gene names. * @returns {Array} A list of integer indices. */ async _getGeneIndices(selection) { const geneNames = await this.loadFilteredGeneNames(); return selection.map(gene => geneNames.indexOf(gene)); } /** * Class method for getting the number of cells i.e entries in `obs`. * @returns {Number} The number of cells. */ async _getNumCells() { const cells = await this.dataSource.loadObsIndex(); return cells.length; } /** * Class method for getting the number of genes i.e entries in `var`, * potentially filtered by `genesFilter`. * @returns {Number} The number of genes. */ async _getNumGenes() { const genes = await this.loadFilteredGeneNames(); return genes.length; } /** * Class method for opening the sparse matrix arrays in zarr. * @returns {Array} A list of promises pointing to the indptr, indices, and data of the matrix. */ async _openSparseArrays() { const { options: { matrix } } = this; const { store } = this.dataSource; if (this.sparseArrays) { return this.sparseArrays; } this.sparseArrays = Promise.all( ['indptr', 'indices', 'data'].map(name => openArray({ store, path: `${matrix}/${name}`, mode: 'r' })), ); return this.sparseArrays; } /** * Class method for loading a gene selection from a CSC matrix. * @param {Array} selection A list of gene names whose data should be fetched. * @returns {Promise} A Promise.all array of promises containing Uint8Arrays, one per selection. */ async _loadCSCGeneSelection(selection) { const indices = await this._getGeneIndices(selection); const [indptrArr, indexArr, cellXGeneArr] = await this._openSparseArrays(); const numCells = await this._getNumCells(); const { data: cols } = await indptrArr.getRaw(null); // If there is not change in the column indexer, then the data is all zeros return Promise.all( indices.map(async (index) => { const startRowIndex = cols[index]; const endRowIndex = cols[index + 1]; const isColumnAllZeros = startRowIndex === endRowIndex; const geneData = new Float32Array(numCells).fill(0); if (isColumnAllZeros) { return geneData; } const { data: rowIndices } = await indexArr.get([ slice(startRowIndex, endRowIndex), ]); const { data: cellXGeneData } = await cellXGeneArr.get([ slice(startRowIndex, endRowIndex), ]); for (let rowIndex = 0; rowIndex < rowIndices.length; rowIndex += 1) { geneData[rowIndices[rowIndex]] = cellXGeneData[rowIndex]; } return geneData; }), ); } /** * Class method for loading a gene selection from a CSR matrix. * @param {Array} selection A list of gene names whose data should be fetched. * @returns {Promise} A Promise.all array of promises containing Uint8Arrays, one per selection. */ async _loadCSRGeneSelection(selection) { const indices = await this._getGeneIndices(selection); const numGenes = await this._getNumGenes(); const numCells = await this._getNumCells(); const cellXGene = await this._loadCSRSparseCellXGene(); return indices.map((index) => { const geneData = new Float32Array(numCells).fill(0); for (let i = 0; i < numCells; i += 1) { geneData[i] = cellXGene[i * numGenes + index]; } return geneData; }); } /** * Class method for loading row oriented (CSR) sparse data from zarr. * * @returns {Object} A { data: Float32Array } contianing the CellXGene matrix. */ async _loadCSRSparseCellXGene() { if (this._sparseMatrix) { return this._sparseMatrix; } this._sparseMatrix = this._openSparseArrays().then(async (sparseArrays) => { const { options: { matrix } } = this; const { shape } = await this.dataSource.getJson(`${matrix}/.zattrs`); const [rows, cols, cellXGene] = await Promise.all( sparseArrays.map(async (arr) => { const { data } = await arr.getRaw(null); return data; }), ); const cellXGeneMatrix = new Float32Array(shape[0] * shape[1]).fill(0); let row = 0; rows.forEach((_, index) => { const rowStart = rows[index]; const rowEnd = rows[index + 1]; for (let i = rowStart; i < rowEnd; i += 1) { const val = cellXGene[i]; const col = cols[i]; cellXGeneMatrix[row * shape[1] + col] = val; } row += 1; }); return cellXGeneMatrix; }); return this._sparseMatrix; } /** * Class method for loading column oriented (CSC) sparse data from zarr. * @returns {Object} A { data: Float32Array } contianing the CellXGene matrix. */ async _loadCSCSparseCellXGene() { if (this._sparseMatrix) { return this._sparseMatrix; } this._sparseMatrix = this._openSparseArrays().then(async (sparseArrays) => { const { options: { matrix } } = this; const { shape } = await this.dataSource.getJson(`${matrix}/.zattrs`); const [cols, rows, cellXGene] = await Promise.all( sparseArrays.map(async (arr) => { const { data } = await arr.getRaw(null); return data; }), ); const cellXGeneMatrix = new Float32Array(shape[0] * shape[1]).fill(0); let col = 0; cols.forEach((_, index) => { const colStart = cols[index]; const colEnd = cols[index + 1]; for (let i = colStart; i < colEnd; i += 1) { const val = cellXGene[i]; const row = rows[i]; cellXGeneMatrix[row * shape[1] + col] = val; } col += 1; }); return cellXGeneMatrix; }); return this._sparseMatrix; } /** * Class method for loading the cell x gene matrix. * @returns {Promise} A promise for the zarr array contianing the cell x gene data. */ async loadCellXGene() { const { store } = this.dataSource; if (this.cellXGene) { return this.cellXGene; } const { options: { matrix, matrixGeneFilter } } = this; if (!this._matrixZattrs) { this._matrixZattrs = await this.dataSource.getJson(`${matrix}/.zattrs`); } const encodingType = this._matrixZattrs['encoding-type']; if (!matrixGeneFilter) { if (encodingType === 'csr_matrix') { this.cellXGene = this._loadCSRSparseCellXGene().then(data => normalize(data)); } else if (encodingType === 'csc_matrix') { this.cellXGene = this._loadCSCSparseCellXGene().then(data => normalize(data)); } else { if (!this.arr) { this.arr = openArray({ store, path: matrix, mode: 'r' }); } this.cellXGene = this.arr.then(z => z.getRaw(null).then(({ data }) => normalize(data))); } } else if (encodingType === 'csr_matrix') { this.cellXGene = this._loadCSRSparseCellXGene().then( async (cellXGene) => { const filteredGenes = await this._getFilteredGenes(matrixGeneFilter); const numGenesFiltered = filteredGenes.length; const geneNames = await this.loadFilteredGeneNames(); const numGenes = geneNames.length; const numCells = await this._getNumCells(); const cellXGeneMatrixFiltered = new Float32Array( numCells * numGenesFiltered, ).fill(0); for (let i = 0; i < numGenesFiltered; i += 1) { const index = geneNames.indexOf(filteredGenes[i]); for (let j = 0; j < numCells; j += 1) { cellXGeneMatrixFiltered[j * numGenesFiltered + i] = cellXGene[j * numGenes + index]; } } return normalize(cellXGeneMatrixFiltered); }, ); } else { const genes = await this._getFilteredGenes(matrixGeneFilter); this.cellXGene = this.loadGeneSelection({ selection: genes, shouldNormalize: false }) .then(({ data }) => (normalize(concatenateColumnVectors(data)))); } return this.cellXGene; } /** * Class method for loading a gene selection. * @param {Object} args * @param {Array} args.selection A list of gene names whose data should be fetched. * @param {Boolean} args.shouldNormalize A list of gene names whose data should be fetched. * @returns {Object} { data } containing an array of gene expression data. */ async loadGeneSelection({ selection, shouldNormalize = true }) { const { options: { matrix } } = this; const { store } = this.dataSource; if (!this._matrixZattrs) { this._matrixZattrs = await this.dataSource.getJson(`${matrix}/.zattrs`); } const encodingType = this._matrixZattrs['encoding-type']; let genes; if (encodingType === 'csc_matrix') { genes = await this._loadCSCGeneSelection(selection); } else if (encodingType === 'csr_matrix') { genes = await this._loadCSRGeneSelection(selection); } else { if (!this.arr) { this.arr = openArray({ store, path: matrix, mode: 'r' }); } const indices = await this._getGeneIndices(selection); // We can index directly into a normal dense array zarr store via `get`. genes = await Promise.all( indices.map(index => this.arr.then(z => z.get([null, index])).then(({ data }) => data)), ); } return { data: genes.map(i => (shouldNormalize ? normalize(i).data : i)), url: null }; } /** * Class method for loading only attributes i.e rows and columns * @param {Array} selection A list of gene names whose data should be fetched. * @returns {Object} { data: { rows, cols }, url } containing row and col labels for the matrix. */ loadAttrs() { return Promise.all([this.dataSource.loadObsIndex(), this.loadFilteredGeneNames()]) .then((d) => { const [cellNames, geneNames] = d; const attrs = { rows: cellNames, cols: geneNames }; return { data: attrs, url: null, }; }); } load() { return Promise.all([this.loadAttrs(), this.loadCellXGene()]).then( async (d) => { const [{ data: attrs }, cellXGene] = d; const { options: { matrixGeneFilter: matrixGeneFilterZarr }, } = this; // In order to return the correct gene list with the heatmap data, // we need to filter the columns of attrs so it matches the cellXGene data. if (matrixGeneFilterZarr) { const matrixGeneFilter = await this.dataSource.getFlatArrDecompressed( matrixGeneFilterZarr, ); attrs.cols = attrs.cols.filter((_, i) => matrixGeneFilter[i]); } return Promise.resolve(new LoaderResult([attrs, cellXGene], null)); }, ); } }