UNPKG

scran.js

Version:

Single cell RNA-seq analysis in Javascript

301 lines (276 loc) 12.6 kB
import * as wa from "wasmarrays.js"; import * as utils from "./utils.js"; /** * Convert an arbitrary array into a R-style factor, with integer indices into an array of levels. * This is useful for formatting grouping or blocking vectors for {@linkcode scoreMarkers}, {@linkcode modelGeneVar}, etc. * * @param {Array|TypedArray} x - Array of values to be converted into a factor. * * Note that TypedArray views on Wasm-allocated buffers should only be provided if `buffer` is also provided; * otherwise, a Wasm memory allocation may invalidate the view. * @param {object} [options={}] - Optional parameters. * @param {boolean} [options.asWasmArray=true] - Whether to return an Int32WasmArray instance for the indices. * If `false`, an Int32Array is returned instead. * Only used if `buffer` is not supplied. * @param {?(Int32WasmArray|Int32Array)} [options.buffer=null] - Array in which the output is to be stored. * If provided, this should be of length equal to that of `x`. * @param {?Array} [options.levels=null] - An existing array of known levels to be matched against `x`. * Values in `x` that are not in `levels` are considered to be invalid. * If `null`, the levels are automatically inferred from `x`; these will be sorted if all-numeric or all-string. * @param {string} [options.action="error"] - Action to take when invalid values (i.e., null, NaNs) are detected in `x`. * * - `"none"`: the index is silently set to `placeholder`. * - `"warn"`: a warning is raised on the first occurrence of an invalid value, and the index is set to `placeholder`. * - `"error"`: an error is raised. * * @param {number} [options.placeholder=-1] - Placeholder index to use upon detecting invalid values in `x`. * * @return {object} Object containing: * * - `ids`: an Int32WasmArray or Int32Array of length equal to `x`, containing the index into `levels` for each cell. * - `levels`: an array of unique levels, such that `Array.from(ids).map(i => levels[i])` returns the same contents as `x` (aside from invalid values). * If an input `levels` is supplied, this is returned directly. * * If `buffer` was supplied, it is used as the value of the `ids` property. */ export function convertToFactor(x, options = {}) { let { asWasmArray = true, buffer = null, levels = null, action = "error", placeholder = -1, ...others } = options; utils.checkOtherOptions(others); let local_buffer; let failure; if (action == "warn") { let warned = false; failure = () => { if (!warned) { console.warn ("replacing invalid values with the placeholder index '" + String(placeholder) + "'"); warned = true; } }; } else if (action == "none") { failure = () => {}; } else if (action == "error") { failure = () => { throw new Error("detected invalid value (e.g., null, NaN) in 'x'"); }; } else { throw new Error("unknown action '" + action + "' for handling invalid entries"); } try { if (buffer == null) { local_buffer = (asWasmArray ? utils.createInt32WasmArray(x.length) : new Int32Array(x.length)); buffer = local_buffer; } else { if (buffer.length !== x.length) { throw new Error("'buffer' should have length equal to that of 'x'"); } asWasmArray = buffer instanceof wa.Int32WasmArray; } let barr = (asWasmArray ? buffer.array() : buffer); // no allocations from this point onwards! let mapping = new Map; if (levels == null) { levels = []; for (var i = 0; i < x.length; i++) { let y = x[i]; if (y == null || (typeof y == "number" && !Number.isFinite(y))) { failure(); barr[i] = placeholder; continue; } let existing = mapping.get(y); if (typeof existing == "undefined") { let n = levels.length; mapping.set(y, n); levels.push(y); barr[i] = n; } else { barr[i] = existing; } } // Sorting them by default, to make life nicer. if (levels.every(x => typeof x == "string")) { let oldlevels = levels.slice(); levels.sort(); resetLevels({ ids: buffer, levels: oldlevels }, levels); } else if (levels.every(x => typeof x == "number")) { let oldlevels = levels.slice(); levels.sort((a, b) => a - b); resetLevels({ ids: buffer, levels: oldlevels }, levels); } } else { for (var l = 0; l < levels.length; l++) { mapping.set(levels[l], l); } for (var i = 0; i < x.length; i++) { let y = x[i]; let existing = mapping.get(y); if (typeof existing == "undefined") { failure(); barr[i] = placeholder; } else { barr[i] = existing; } } } } catch (e) { if (local_buffer instanceof wa.WasmArray) { utils.free(local_buffer); } throw e; } return { ids: buffer, levels: levels }; } /** * Reindex the factor indices to remove unused levels. * This is done by adjusting the indices such that every index from `[0, N)` is represented at least once, where `N` is the number of (used) levels. * * @param {Int32WasmArray|TypedArray|Array} x - Array of factor indices such as that produced by {@linkcode convertToFactor}. * * @return {Array} `x` is modified in place to remove unused levels. * * An array (denoted here as `y`) is returned that represents the mapping between the original and modified IDs, * i.e., running `x.map(i => y[i])` will recover the input `x`. * This is most commonly used to create a new array of levels, i.e., `y.map(i => old_levels[i])` will drop the unused levels. */ export function dropUnusedLevels(x) { if (x instanceof wa.WasmArray) { // No more wasm allocations past this point! x = x.array(); } let uniq = new Set(x); let uniq_arr = Array.from(uniq).sort(); let mapping = {}; uniq_arr.forEach((y, i) => { mapping[y] = i; }); x.forEach((y, i) => { x[i] = mapping[y]; }); return uniq_arr; } /** * Change the levels of a factor, updating the indices appropriately. * * @param {object} x - Factor object produced by {@linkcode convertToFactor}. * @param {Array} newLevels - Array of new levels. * This should be a superset of `x.levels`. * @param {object} [options={}] - Optional parameters. * @param {string} [options.action="error"] - Action to take when `newLevels` is not a superset of `x.levels`. * This can be `"error"`, `"warn"` or `"none"`. * @param {number} [options.placeholder=-1] - Placeholder index corresponding to invalid values of `x.ids`. * Any placeholders in `x.ids` will be preserved on function return. * Additionally, if entries of `x.ids` refer to entries of `x.levels` that are missing in `newLevels`, they will be set to the placeholder value on function return; * this is only relevant if `action = "warn"` or `"none"`. * * @return `x` is modified by reference such that `x.levels` is set to `newLevels`. * `x.ids` is updated so that the indices now refer to the appropriate value in `newLevels`. */ export function resetLevels(x, newLevels, options = {}) { const { action = "error", placeholder = -1, ...others } = options; utils.checkOtherOptions(others); let mapping = new Map; for (var i = 0; i < newLevels.length; i++) { mapping.set(newLevels[i], i); } let failure; if (action == "warn") { let warned = false; failure = () => { if (!warned) { console.warn ("replacing missing levels with the placeholder index '" + String(placeholder) + "'"); warned = true; } }; } else if (action == "none") { failure = () => {}; } else if (action == "error") { failure = () => { throw new Error("detected level in 'x.levels' that is missing from 'newLevels'"); }; } else { throw new Error("unknown action '" + action + "' for handling missing levels"); } let oldLevels = x.levels; let conversion = new Array(oldLevels.length); let warned = false; for (var i = 0; i < oldLevels.length; i++) { let found = mapping.get(oldLevels[i]); if (typeof found == "undefined") { failure(); conversion[i] = placeholder; } else { conversion[i] = found; } } x.levels = newLevels; let target = x.ids; if (target instanceof wa.WasmArray) { // No more wasm allocations past this point! target = target.array(); } target.forEach((y, i) => { if (y !== placeholder) { target[i] = conversion[y]; } }); } /** * Subset a factor, possibly also dropping its unused levels. * This is typically based on the same filtering vector as {@linkcode filterCells}. * * @param {object} x - An object representing a factor, containing the following properties: * * - `ids`: An Int32Array or Int32WasmArray of integer indices. * - `levels`: An array of levels that can be indexed by entries of `ids`. * * This is typically produced by {@linkcode convertToFactor}. * @param {(Array|TypedArray|WasmArray)} subset - Array specifying the subset to retain or filter out, depending on `filter`. * * If `filter = null`, the array is expected to contain integer indices specifying the entries in `x` to retain. * The ordering of indices in `subset` will be respected in the subsetted array. * * If `filter = true`, the array should be of length equal to that of `x`. * Each value is interpreted as a boolean and, if truthy, indicates that the corresponding entry of `x` should be filtered out. * * If `filter = false`, the array should be of length equal to that of `x`. * Each value is interpreted as a boolean and, if truthy, indicates that the corresponding entry of `x` should be retained. * * Note that TypedArray views on Wasm-allocated buffers should only be provided if `buffer` is also provided; * otherwise, a Wasm memory allocation may invalidate the view. * @param {object} [options={}] - Optional parameters. * @param {boolean} [options.drop=true] - Whether to drop unused levels in the output, see {@linkcode dropUnusedLevels}. * @param {?boolean} [options.filter=null] - Whether to retain truthy or falsey values in a `subset` boolean filter. * If `null`, `subset` should instead contain the indices of elements to retain. * @param {?(Int32Array|Int32WasmArray)} [options.buffer=null] - Array in which the output is to be stored, of the same type as `x.ids`. * If provided, this should be of length equal to `subset`, if `filter = null`; * the number of truthy elements in `subset`, if `filter = false`; * or the number of falsey elements in `subset`, if `filter = true`. * * @return {object} An object like `x`, containing: * * - `ids`: An Int32Array or Int32WasmArray of integer indices, subsetted from those in `x.ids`. * - `levels`: Array of levels that can be indexed by entries of the output `ids`. * If `drop = true`, this may be a subset of `x.levels` where every entry is represented at least once in the output `ids`. * * If `buffer` is supplied, the returned `ids` will be set to `buffer`. */ export function subsetFactor(x, subset, options = {}) { let { drop = true, filter = null, buffer = null, ...others } = options; utils.checkOtherOptions(others); let output = { ids: null, levels: x.levels }; if (x.ids instanceof wa.WasmArray) { output.ids = wa.subsetWasmArray(x.ids, subset, { filter, buffer }); } else { let n = wa.checkSubsetLength(subset, filter, x.length, "x"); if (buffer == null) { buffer = new x.ids.constructor(n); } wa.fillSubset(subset, filter, x.ids, buffer); output.ids = buffer; } if (drop) { let remapping = dropUnusedLevels(output.ids); output.levels = remapping.map(i => x.levels[i]); } return output; }