UNPKG

scran.js

Version:

Single cell RNA-seq analysis in Javascript

200 lines (178 loc) 8.44 kB
import * as gc from "./gc.js"; import * as utils from "./utils.js"; /** * Wrapper for the PCA results on the Wasm heap, typically created by {@linkcode runPca}. * @hideconstructor */ export class RunPcaResults { #id; #results; constructor(id, raw, filled = true) { this.#id = id; this.#results = raw; return; } /** * @param {object} [options={}] - Optional parameters. * @param {boolean} [options.copy=true] - Whether to copy the results from the Wasm heap, see {@linkcode possibleCopy}. * @return {Float64Array|Float64Wasmarray} Array containing the principal components for all cells. * This should be treated as a column-major array where the rows are the PCs and columns are the cells. */ principalComponents(options = {}) { const { copy = true, ...others } = options; utils.checkOtherOptions(others); return utils.possibleCopy(this.#results.components(), copy); } /** * @param {object} [options={}] - Optional parameters. * @param {boolean} [options.copy=true] - Whether to copy the results from the Wasm heap, see {@linkcode possibleCopy}. * @return {Float64Array|Float64Wasmarray} Array containing the rotation matrix for all cells. * This should be treated as a column-major array where the rows are the genes and the columns are the PCs. */ rotation(options = {}) { const { copy = true, ...others } = options; utils.checkOtherOptions(others); return utils.possibleCopy(this.#results.rotation(), copy); } /** * @param {object} [options={}] - Optional parameters. * @param {boolean} [options.copy=true] - Whether to copy the results from the Wasm heap, see {@linkcode possibleCopy}. * @return {Float64Array|Float64WasmArray} Array containing the variance explained for each requested PC. */ varianceExplained(options = {}) { const { copy = true, ...others } = options; utils.checkOtherOptions(others); return utils.possibleCopy(this.#results.variance_explained(), copy); } /** * @return {number} The total variance in the dataset, * typically used with {@linkcode PCAResults#varianceExplained varianceExplained} to compute the proportion of variance explained. */ totalVariance() { return this.#results.total_variance(); } /** * @return {number} Number of PCs available in these results. */ numberOfPCs() { return this.#results.num_pcs(); } /** * @return {number} Number of cells used to compute these results. */ numberOfCells() { return this.#results.num_cells(); } /** * @return Frees the memory allocated on the Wasm heap for this object. * This invalidates this object and all references to it. */ free() { if (this.#results !== null) { gc.release(this.#id); this.#results = null; } return; } } /** * Run a principal components analysis on the log-expression matrix. * This is usually done on a subset of features, and possibly with some kind of blocking on a per-cell batch factor. * * @param {ScranMatrix} x - The log-normalized expression matrix. * @param {object} [options={}] - Optional parameters. * @param {?(Uint8WasmArray|Array|TypedArray)} [options.features=null] - Array specifying which features should be retained (e.g., HVGs). * This should be of length equal to the number of rows in `x`; elements should be `true` to retain each row. * If `null`, all features are retained. * @param {number} [options.numberOfPCs=25] - Number of top principal components to compute. * @param {boolean} [options.scale=false] - Whether to scale each feature to unit variance. * @param {?(Int32WasmArray|Array|TypedArray)} [options.block=null] - Array containing the block assignment for each cell. * This should have length equal to the number of cells and contain all values from 0 to `n - 1` at least once, where `n` is the number of blocks. * This is used to segregate cells in order to compute filters within each block. * Alternatively, this may be `null`, in which case all cells are assumed to be in the same block. * @param {string} [options.blockMethod="regress"] - How to adjust the PCA for the blocking factor. * * - `"regress"` will regress out the factor, effectively performing a PCA on the residuals. * This only makes sense in limited cases, e.g., inter-block differences are linear and the composition of each block is the same. * - `"project"` will compute the rotation vectors from the residuals but will project the cells onto the PC space. * This focuses the PCA on within-block variance while avoiding any assumptions about the nature of the inter-block differences. * - `"none"` will ignore any blocking factor, i.e., as if `block = null`. * Any inter-block differences will both contribute to the determination of the rotation vectors and also be preserved in the PC space. * * This option is only used if `block` is not `null`. * @param {string} [options.blockWeightPolicy="variable"] The policy for weighting each block so that it contributes the same number of effective observations to the covariance matrix. * * - `"variable"` ensures that, past a certain size (default 1000 cells), larger blocks do not dominate the definition of the PC space. * Below the threshold size, blocks are weighted in proportion to their size to reduce the influence of very small blocks. * - `"equal"` uses the same weight for each block, regardless of size. * - `"none"` does not apply any extra weighting, i.e., the contribution of each block is proportional to its size. * * This option is only used if `block` is not `null`. * @param {?boolean} [options.realizeMatrix=null] - Whether to realize the submatrix into its own memory. * This is more efficient but consumes more memory. * Defaults to true if `subset` is supplied, otherwise it is false. * @param {?number} [options.numberOfThreads=null] - Number of threads to use. * If `null`, defaults to {@linkcode maximumThreads}. * * @return {RunPcaResults} Object containing the computed PCs. */ export function runPca(x, options = {}) { let { features = null, numberOfPCs = 25, scale = false, block = null, blockMethod = "regress", blockWeightPolicy = "variable", realizeMatrix = null, numberOfThreads = null, ...others } = options; utils.checkOtherOptions(others); var feat_data; var block_data; var output; utils.matchOptions("blockMethod", blockMethod, ["none", "regress", "project"]); let nthreads = utils.chooseNumberOfThreads(numberOfThreads); try { var use_feat = false; var fptr = 0; if (features !== null) { feat_data = utils.wasmifyArray(features, "Uint8WasmArray"); if (feat_data.length != x.numberOfRows()) { throw new Error("length of 'features' should be equal to number of rows in 'x'"); } use_feat = true; fptr = feat_data.offset; } if (realizeMatrix === null) { realizeMatrix = use_feat; } // Avoid asking for more PCs than is possible. // Remember that centering removes one df, so we subtract 1 from the dimensions. numberOfPCs = Math.min(numberOfPCs, x.numberOfRows() - 1, x.numberOfColumns() - 1); var use_block = false; var bptr = 0; var comp_as_resid = false; if (block !== null && blockMethod !== 'none') { block_data = utils.wasmifyArray(block, "Int32WasmArray"); if (block_data.length != x.numberOfColumns()) { throw new Error("length of 'block' should be equal to the number of columns in 'x'"); } use_block = true; bptr = block_data.offset; comp_as_resid = (blockMethod == "regress"); } output = gc.call( module => module.run_pca(x.matrix, numberOfPCs, use_feat, fptr, scale, use_block, bptr, blockWeightPolicy, comp_as_resid, realizeMatrix, nthreads), RunPcaResults ); } catch (e) { utils.free(output); throw e; } finally { utils.free(feat_data); utils.free(block_data); } return output; }