UNPKG

auspice

Version:

Web app for visualizing pathogen evolution

496 lines (464 loc) 21.5 kB
import { freqScale, NODE_NOT_VISIBLE, NODE_VISIBLE_TO_MAP_ONLY, NODE_VISIBLE, genotypeSymbol } from "./globals"; import { calcTipCounts } from "./treeCountingHelpers"; import { getTraitFromNode } from "./treeMiscHelpers"; import { warningNotification } from "../actions/notifications"; import { getFocusedNodes } from "../actions/tree"; export const getVisibleDateRange = (nodes, visibility) => nodes .filter((node, idx) => (visibility[idx] === NODE_VISIBLE && !node.hasChildren)) .reduce((acc, node) => { const nodeDate = getTraitFromNode(node, "num_date"); return nodeDate ? [Math.min(nodeDate, acc[0]), Math.max(nodeDate, acc[1])] : acc; }, [100000, -100000]); export const strainNameToIdx = (nodes, name) => { let i; for (i = 0; i < nodes.length; i++) { if (nodes[i].name === name) { return i; } } console.error("strainNameToIdx couldn't find strain"); return 0; }; /** * Find the node with the given label name & value * NOTE: if there are multiple nodes with the same label then `null` is returned * * @param {Array} nodes tree nodes (flat) * @param {string} labelName label name * @param {string} labelValue label value * @returns {int|null} the index of the matching node (0 if no match found) */ export const getIdxMatchingLabel = (nodes, labelName, labelValue, dispatch) => { let i; let found = 0; for (i = 0; i < nodes.length; i++) { if ( nodes[i].branch_attrs && nodes[i].branch_attrs.labels !== undefined && nodes[i].branch_attrs.labels[labelName] === labelValue ) { if (found === 0) { found = i; } else { console.error(`getIdxMatchingLabel found multiple labels ${labelName}===${labelValue}`); dispatch(warningNotification({ message: "Specified Zoom Label Found Multiple Times!", details: "Multiple nodes in the tree are labelled '"+labelName+" "+labelValue+"' - no zoom performed" })); return null; } } } if (found === 0) { console.error(`getIdxMatchingLabel couldn't find label ${labelName}===${labelValue}`); dispatch(warningNotification({ message: "Specified Zoom Label Value Not Found!", details: "The label '"+labelName+"' value '"+labelValue+"' was not found in the tree - no zoom performed" })); return null; } return found; }; /** * Scan the branch labels associated with the node *n* and if an appropriate one * exists then we want to set this as the branch label query. Branches with * multiple labels will be used in the order specified by *availableBranchLabels* * (i.e. the order of the drop-down on the menu) */ export function urlQueryLabel( n, availableBranchLabels ) { if (n.branch_attrs && n.branch_attrs.labels !== undefined) { const legalBranchLabels = Object.keys(n.branch_attrs.labels) // don't use AA mutations as zoom labels currently (the URL is ugly and there will be too many non-unique labels) .filter((label) => label !== "aa") // sort the possible branch labels by the order of those available on the tree .sort((a, b) => availableBranchLabels.indexOf(a) - availableBranchLabels.indexOf(b)); if (legalBranchLabels.length) { const key = legalBranchLabels[0]; // use the first one (if multiple) return `${key}:${n.branch_attrs.labels[key]}`; } } return undefined } /** calcBranchThickness ** * returns an array of node (branch) thicknesses based on the tipCount at each node * If the node isn't visible, the thickness is 1. * Relies on the `tipCount` property of the nodes having been updated. * Pure. * @param {Array<Node>} nodes - JSON nodes * @param {Array<Int>} visibility - visibility array (1-1 with nodes) * @returns array of thicknesses (numeric) */ const calcBranchThickness = (nodes, visibility) => { let maxTipCount = nodes[0].tipCount; /* edge case: no tips selected */ if (!maxTipCount) { maxTipCount = 1; } return nodes.map((d, idx) => { if (visibility[idx] === NODE_VISIBLE) { return freqScale((d.tipCount + 5) / (maxTipCount + 5)); } return 0.5; }); }; /* recursively mark the parents of a given node active by setting the node idx to true in the param visArray */ const makeParentVisible = (visArray, node) => { if (node.arrayIdx === 0 || visArray[node.parent.arrayIdx]) { return; // this is the root of the tree or the parent was already visibile } visArray[node.parent.arrayIdx] = true; makeParentVisible(visArray, node.parent); }; /* Recursively hide nodes that do not have more than one child node by updating * the boolean values in the param visArray. * Relies on visArray having been updated by `makeParentVisible` * Returns the index of the visible common ancestor. */ const hideNodesAboveVisibleCommonAncestor = (visArray, node) => { if (!node.hasChildren) { return node.arrayIdx; // Terminal node without children } const visibleChildren = node.children.filter((child) => visArray[child.arrayIdx]); if (visibleChildren.length > 1) { return node.arrayIdx; // This is the common ancestor of visible children } visArray[node.arrayIdx] = false; for (let i = 0; i < visibleChildren.length; i++) { const commonAncestorIdx = hideNodesAboveVisibleCommonAncestor(visArray, visibleChildren[i]); if (commonAncestorIdx) return commonAncestorIdx; } // If there is no visible common ancestor, then return null return null; }; /* Gets the inView attribute of phyloTree.nodes, accessed through * redux.tree.nodes[idx].shell.inView Bool. The inView attribute is set by * phyloTree and determines if the tip is within the view. * Returns the array of inView booleans. */ const getInView = (tree) => { if (!tree.nodes) { console.error("getInView() ran without tree.nodes"); return null; } /* inView represents nodes that are within the current view window (i.e. not off the screen) */ let inView; try { inView = tree.nodes.map((d) => d.shell.inView); } catch (e) { /* edge case: this fn may be called before the shell structure of the nodes * has been created (i.e. phyloTree's not run yet). In this case, it's * safe to assume that everything's in view */ inView = tree.nodes.map((d) => d.inView !== undefined ? d.inView : true); } return inView; }; /* Gets all active filters and checks if each tree.node matches the filters. * Returns an array of filtered booleans and the index of the least common * ancestor node of the filtered nodes. * FILTERS: * - controls.filters (redux) is a dict of trait name -> values * - filters (in this code) is a list of filters to apply * e.g. [{trait: "country", values: [...]}, ...] */ export const getFilteredAndIdxOfFilteredRoot = (tree, controls, inView) => { if (!tree.nodes) { console.error("getFiltered() ran without tree.nodes"); return null; } let filtered; // array of bools, same length as tree.nodes. true -> that node should be visible let idxOfFilteredRoot; // index of last common ancestor of filtered nodes. const filters = []; Reflect.ownKeys(controls.filters).forEach((filterName) => { if (filterName===genotypeSymbol) return; // see `performGenotypeFilterMatch` call below const items = controls.filters[filterName]; const activeFilterItems = items.filter((item) => item.active).map((item) => item.value); if (activeFilterItems.length) { filters.push({trait: filterName, values: activeFilterItems}); } }); if (filters.length) { /* find the terminal nodes that were (a) already visible and (b) match the filters */ filtered = tree.nodes.map((d, idx) => ( !d.hasChildren && inView[idx] && filters.every((f) => f.values.includes(getTraitFromNode(d, f.trait))) )); const idxsOfFilteredTips = filtered.reduce((a, e, i) => { if (e) {a.push(i);} return a; }, []); /* for each visible tip, make the parent nodes visible (recursively) */ for (let i = 0; i < idxsOfFilteredTips.length; i++) { makeParentVisible(filtered, tree.nodes[idxsOfFilteredTips[i]]); } /* Recursively hide ancestor nodes that are not the last common * ancestor of selected nodes, starting from the root of the tree */ idxOfFilteredRoot = hideNodesAboveVisibleCommonAncestor(filtered, tree.nodes[0]); } ([filtered, idxOfFilteredRoot] = performGenotypeFilterMatch(filtered, controls.filters, tree.nodes) || [filtered, idxOfFilteredRoot]); return {filtered, idxOfFilteredRoot}; }; /* calcVisibility USES: - use dates NOT controls.dateMin & controls.dateMax - uses inView array returned by getInView() - uses filtered array returned by getFilteredAndIdxOfFilteredRoot() RETURNS: visibility: array of integers in {0, 1, 2} - 0: not displayed by map. Potentially displayed by tree as a thin branch. - 1: available for display by the map. Displayed by tree as a thin branch. - 2: Displayed by both the map and the tree. ROUGH DESCRIPTION OF HOW FILTERING IS APPLIED: - inView filtering (reflects tree zooming): Nodes which are not inView always have visibility=0 - time filtering is simple - all nodes (internal + terminal) not within (tmin, tmax) are excluded. - filters are a bit more tricky - the visible tips are calculated, and the parent branches back to the MRCA are considered visible. This is then intersected with the time & inView visible stuff */ export const calcVisibility = (tree, controls, dates, inView, filtered) => { if (tree.nodes) { /* intersect the various arrays contributing to visibility */ const visibility = tree.nodes.map((node, idx) => { if (inView[idx] && (filtered ? filtered[idx] : true)) { const nodeDate = getTraitFromNode(node, "num_date"); const parentNodeDate = getTraitFromNode(node.parent, "num_date"); if (!nodeDate || !parentNodeDate) { return NODE_VISIBLE; } /* if branchLengthsToDisplay is "divOnly", then ensure node displayed */ if (controls.branchLengthsToDisplay === "divOnly") { return NODE_VISIBLE; } /* is the actual node date (the "end" of the branch) in the time slice? */ if (nodeDate >= dates.dateMinNumeric && nodeDate <= dates.dateMaxNumeric) { return NODE_VISIBLE; } /* is any part of the (parent date -> node date) in the time slice? */ if (!(nodeDate < dates.dateMinNumeric || parentNodeDate > dates.dateMaxNumeric)) { return NODE_VISIBLE_TO_MAP_ONLY; } } return NODE_NOT_VISIBLE; }); return visibility; } console.error("calcVisibility ran without tree.nodes"); return NODE_VISIBLE; }; export const calculateVisiblityAndBranchThickness = (tree, controls, dates) => { const inView = getInView(tree); const {filtered, idxOfFilteredRoot} = getFilteredAndIdxOfFilteredRoot(tree, controls, inView) || {}; const visibility = calcVisibility(tree, controls, dates, inView, filtered); /* recalculate tipCounts over the tree - modifies redux tree nodes in place (yeah, I know) */ calcTipCounts(tree.nodes[0], visibility); /* re-calculate branchThickness (inline) */ const focusNodes = controls.focus ? getFocusedNodes(tree.nodes, visibility) : undefined; return { visibility: visibility, visibilityVersion: tree.visibilityVersion + 1, branchThickness: calcBranchThickness(tree.nodes, visibility), branchThicknessVersion: tree.branchThicknessVersion + 1, idxOfFilteredRoot: idxOfFilteredRoot, focusNodes }; }; /** * Compute whether each node is filtered (visible) by any defined genotype filters. * * Idea behind how we check genotype filter matches: * A "constellation" is a set of mutations -- for instance, the filters define such a set (see `filterConstellationLong`) * We define `constellationMatchesPerNode` which, for each node, defines an array of values corresponding to that node's membership of the constellation. * We recursively traverse the tree and use mutations (defined per node) to modulate this data. * Note that we don't know the basal genotype for a given position until we have traversed the tree, thus we cannot test a nodes membership (of * a constellation) until after traversal. * Example: * genotypeFilters[i]: S:484K * the ith genotype filter specifies Spike residue 484 to be Lysine (K). Note that this may include E484K but also others. * constellationMatchesPerNode[nodeIdx][i]: false|true|undefined. * False means an observed mutation means this node has a residue that is _not_ K. * true means that an observed mutation informs us that this node _is_ K. * undefined means that no muts were observed during the traversal to this node, so we must rely on the basal state, which may not yet be known. * * Pseudo-typescript type declarations are added as comments, the intention of which is to help readability & understanding. * @param {Array<bool>} filtered length nodes.length & in 1-1 correspondence * @param {Object} filters * @param {Array<TreeNode>} nodes * @returns {Array<bool>} */ function performGenotypeFilterMatch(filtered, filters, nodes) { // type genotypeFilters: Array<string> // active genotype filters. Examples: "nuc:123A", "S:484K" etc const genotypeFilters = Reflect.ownKeys(filters).includes(genotypeSymbol) ? filters[genotypeSymbol].filter((item) => item.active).map((item) => item.value) : false; if (!genotypeFilters || !genotypeFilters.length) { return undefined; } // todo: this has the potential to be rather slow. Timing / optimisation needed. // note: rather similar (in spirit) to how we calculate entropy - can we refactor / combine / speed up? // todo: the (new) "zoom to selected" isn't working with genotypes currently (as we're not calculating CA and storing as `idxOfFilteredRoot`) // todo: the entropy view is sometimes broken after filtering by genotype, but this shouldn't be the case (we can filter by other traits which are homoplasic and it works) if (!filtered) { // happens if there are no other filters in play filtered = Array.from({length: nodes.length}, () => true); } const filterConstellationLong = createFilterConstellation(genotypeFilters); const nGt = filterConstellationLong.length; // Note: may not be the same as genotypeFilters.length // type basalGt: Array<string> // entries at index `i` are the basal nt / aa at genotypeFilters[i] const basalGt = new Array(nGt); // stores the basal nt / aa of the position // type constellationEntry: undefined | false | true // type constellationMatch: Array<constellationEntry> // type constellationMatchesPerNode: Array<constellationMatch> const constellationMatchesPerNode = new Array(nodes.length); const recurse = (node, constellationMatch) => { if (node.branch_attrs && node.branch_attrs.mutations && Object.keys(node.branch_attrs.mutations).length) { const bmuts = node.branch_attrs.mutations; for (let i=0; i<nGt; i++) { // does this branch encode a mutation which means it matches the ith filter, or reverts away from it? if (bmuts[filterConstellationLong[i][0]]) { // todo -- move these array creations out of the constellation loop & pre-compute for unique set of {gene,position} within `genotypeFilters` const bposns = bmuts[filterConstellationLong[i][0]].map((m) => m.slice(1, -1)); const bmutsto = bmuts[filterConstellationLong[i][0]].map((m) => m.slice(-1)); const posIdx = bposns.indexOf(filterConstellationLong[i][1]); if (posIdx!==-1) { /* part I: does the mutation mean the node (at this idx) matches the ith entry in the constellation? */ if (filterConstellationLong[i][2].has(bmutsto[posIdx])) { // branch mutation leading to the constellation mutation constellationMatch[i] = true; } else { // branch mutation meaning the inherited state does not match the constellation constellationMatch[i] = false; } /* part II: store the basal state of this position (if not already defined) */ if (!basalGt[i]) { // console.log("Hey - get basal from", bmuts[filterConstellationLong[i][0]][posIdx]); basalGt[i] = bmuts[filterConstellationLong[i][0]][posIdx].slice(0, 1); } } } } } constellationMatchesPerNode[node.arrayIdx] = constellationMatch; // recurse to children & pass down (copy of) `constellationMatch` which can then be modified by descendants if (node.hasChildren) { node.children.forEach((c) => recurse(c, [...constellationMatch])); } }; recurse(nodes[0], Array.from({length: nGt}, () => undefined)); /* We can now compute whether the basal positions match the relevant filter */ const basalConstellationMatch = basalGt.map((basalState, i) => filterConstellationLong[i][2].has(basalState)); // filtered state is determined by checking if each node has the "correct" constellation of mutations const newFiltered = filtered.map((prevFilterValue, idx) => { if (!prevFilterValue) return false; // means that another filter (non-gt) excluded it return constellationMatchesPerNode[idx] .map((match, i) => match===undefined ? basalConstellationMatch[i] : match) // See docstring for defn of `undefined` here .every((el) => el); }); /* Find the MRCA of the filtered nodes, which we use for `zoom to selected` */ const newIdxOfFilteredRoot = findFilteredMRCA(nodes, newFiltered); return [newFiltered, newIdxOfFilteredRoot]; } /** * Given genotype filters, such as `["HA1 186D", "HA1 186S", "nuc 100T"]` * Produce an array of arrays whereby genotypes at the same position are grouped * e.g. `[["HA1", "186", Set("D", "S")], ["nuc", "100", "T"]]`. * The returned array will be sorted to improve readability. * @param {Array<string>} filters genotype filters */ export function createFilterConstellation(filters) { return filters .map((x) => { const [gene, state] = x.split(' '); return [gene, state.slice(0, -1), state.slice(-1)]; // e.g. ["HA1", "186", "D"] }) .sort(sortConstellationLongFn) .map((e, i) => { if (i===0) return [[e[0], e[1], new Set(e[2])]]; // ideally could be part of the `reduce` call return e; }) .reduce((constellation, entry) => { const lastEntry = constellation[constellation.length-1]; if (entry[0]===lastEntry[0] && entry[1]===lastEntry[1]) { lastEntry[2].add(entry[2]); } else { constellation.push([entry[0], entry[1], new Set(entry[2])]); } return constellation; }); } export function sortConstellationLongFn(a, b) { if (a[0]!==b[0]) { // alphabetically sort genes, nuc goes last. if (a[0]==="nuc") return 1; if (b[0]==="nuc") return -1; return a<b ? -1 : 1; } // sort according to codon / nt position const [posA, posB] = [parseInt(a[1], 10), parseInt(b[1], 10)]; if (posA > posB) { return 1; } else if (posB > posA) { return -1; } // codon / nt position is the same => sort alphabetically by residue if (a[2] > b[2]) { return 1; } if (a[2] < b[2]) { return -1; } return 0; } export const getNumSelectedTips = (nodes, visibility) => { let count = 0; nodes.forEach((d, idx) => { // nodes which are not inView have a visibility of NODE_NOT_VISIBLE // so this check accounts for them as well if (!d.hasChildren && visibility[idx] === NODE_VISIBLE) count += 1; }); return count; }; /** * Given filtered: Array<bool> find the MRCA node of the filtered nodes * Note that this node not be part of the filtered selection. */ function findFilteredMRCA(nodes, filtered) { const basalIdxsOfFilteredClades = []; // the `arrayIdx`s of the first (preorder) visible nodes const rootPathToBasalFiltered = new Set(); // the `arrayIdx`s of paths from the root -> each of the nodes from `basalIdxsOfFilteredClades` let mrcaIdx = 0; findBasalFilteredNodes(nodes[0]); basalIdxsOfFilteredClades.forEach((idx) => constructPathToRoot(idx)); findMrca(nodes[0]); /* step1 does a shortened preorder traversal to find the set of basal visible nodes */ function findBasalFilteredNodes(n) { if (filtered[n.arrayIdx]) { basalIdxsOfFilteredClades.push(n.arrayIdx); return; } if (n.hasChildren) { for (let i = 0; i < n.children.length; i++) { findBasalFilteredNodes(n.children[i]); } } } /* step 2 recursively visit parents to store the node indexes of the path to the root in `rootPathToBasalFiltered` */ function constructPathToRoot(nIdx) { rootPathToBasalFiltered.add(nIdx); const pIdx = nodes[nIdx].parent.arrayIdx; if (nIdx===0 || rootPathToBasalFiltered.has(pIdx)) { return; // this is the root of the tree or the parent was already in the path } constructPathToRoot(pIdx); } /* step 3 - preorder confined to nodes in `rootPathToBasalFiltered` to find first node with multiple children in the path */ function findMrca(n) { const nIdx = n.arrayIdx; if (!rootPathToBasalFiltered.has(nIdx)) return; if (!n.hasChildren) { // occurs when {filtered nodes} is a single terminal node mrcaIdx = nIdx; return; } const childrenInPath = n.children.filter((c) => rootPathToBasalFiltered.has(c.arrayIdx)); if (childrenInPath.length!==1) { mrcaIdx = nIdx; return; } findMrca(childrenInPath[0]); } return mrcaIdx; }