UNPKG

phylojs

Version:

A simple typescript library for phylogenetic trees

611 lines (610 loc) 23.5 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.Tree = void 0; // Tree constructor const node_1 = require("./node"); class Tree { /** * The constructor of the `Tree` class. * * @param {Node} root Root node */ constructor(root) { this.root = root; } // Tree methods /** * Convenience function applied function post-order to all nodes in the tree. * Is a wrapper for `Node.applyPostOrder()` * @param {function} func Function to apply to each node */ applyPostOrder(func) { this.root.applyPostOrder(func); } /** * Convenience function applied function pre-order to all nodes in the tree. * Is a wrapper for `Node.applyPreOrder()` * @param {function} func Function to apply to each node */ applyPreOrder(func) { this.root.applyPreOrder(func); } /** * Computes height of each node above the root. Automatically done if rerooting. * NaN if any undefined branch lengths ancestral to a particular node. Contrasts * `Tree.getRTTD()` in not converting undefined branch lengths to 0.0. */ computeNodeHeights() { this.root.applyPreOrder((node) => { if (node.parent === undefined) node.height = 0.0; // root case else { if (node.branchLength !== undefined && node.parent.height !== undefined) node.height = node.parent.height + node.branchLength; } return node.height; }); } /** * Reflects whether all tips are the same age (0), equivalently if they are * all the same height about the root. Only makes sense for time trees. * Sets property on tree object and returns boolean. The tolerance for differences * in tip heights is 1e-6 as a default, but can be adjusted. * @param {number} tol * @returns {boolean} */ isUltrametric(tol = 1e-6) { this.computeNodeHeights(); const tipHeights = this.leafList.map(e => e.height); let definedHeights; if (tipHeights.some(e => e == undefined)) { this.ultrametric = false; return false; } else { definedHeights = tipHeights; if (definedHeights.every((e, i, a) => Math.abs(e - a[0]) < tol)) { this.ultrametric = true; return true; } else { this.ultrametric = false; return false; } } } /** * Ladderises the tree. * Applies a pre-order search. For each node, child nodes are ordered by increasing number of descending tips */ ladderise() { this.root.applyPreOrder((node) => { node.children.sort((a, b) => { const lenA = this.getClade(a).getTipLabels().length; const lenB = this.getClade(b).getTipLabels().length; if (lenA < lenB) { return -1; } else if (lenA > lenB) { return 1; } else { return 0; } }); }); } /** Return branch lengths in order matching .nodeList */ getBranchLengths() { return this.nodeList.map(e => e.branchLength); } /** Returns root to tip distances. Counts undefined branch lengths as zero */ getRTTDist() { const rttDist = this.root.applyPreOrder((node) => { if (node.parent === undefined) { node.rttDist = 0.0; // root case } else if (node.branchLength !== undefined && node.parent.rttDist !== undefined) { node.rttDist = node.branchLength + node.parent.rttDist; } else { node.rttDist = node.parent.rttDist; } if (node.isLeaf()) return node.rttDist; return undefined; }); // TODO: have to loop over array to remove undefined values // do it in the applyPreOrder function return rttDist.filter(e => e !== undefined); } /** Assign new node IDs (use with care!) */ reassignNodeIDs() { let nodeID = 0; for (let i = 0; i < this.nodeList.length; i++) this.nodeList[i].id = nodeID++; } /** Clear various node caches */ clearCaches() { this._nodeList = undefined; this.nodeIDMap = undefined; this.labelNodeMap = undefined; this._leafList = undefined; this.recombEdgeMap = undefined; this.ultrametric = undefined; } /** A getter that returns an array of nodes (`Node[]`) from private `_nodeList` property in order determined by a pre-order search*/ get nodeList() { if (this._nodeList === undefined && this.root !== undefined) { this._nodeList = this.root.applyPreOrder((node) => { return node; }); } if (!this._nodeList) { return []; } return this._nodeList; } /** * Get node given its numerical `id` * @param {number} nodeID Numerical id of node */ getNode(nodeID) { if (this.nodeIDMap === undefined && this.root !== undefined) { this.nodeIDMap = {}; for (let i = 0; i < this.nodeList.length; i++) { const node = this.nodeList[i]; this.nodeIDMap[node.id] = node; } } return this.nodeIDMap == undefined ? null : this.nodeIDMap[nodeID]; } /** A getter that returns an array of nodes (`Node[]`) from private `_nodeList` property in order determined by a pre-order search*/ get leafList() { if (this._leafList === undefined && this.root !== undefined) { this._leafList = this.nodeList.filter(e => e.isLeaf()); } return this._leafList == undefined ? [] : this._leafList; } /** * Retrieve node having given label * @param {string} label Node's label */ getNodeByLabel(label) { if (this.labelNodeMap === undefined && this.root !== undefined) { this.labelNodeMap = {}; for (let i = 0; i < this.leafList.length; i++) { const node = this.leafList[i]; if (node.label !== undefined) { this.labelNodeMap[node.label] = node; // Assume Node has 'label' property } } } return this.labelNodeMap == undefined || this.labelNodeMap[label] === undefined ? null : this.labelNodeMap[label]; } /** Retrieve map from recomb edge IDs to src/dest node pairs */ getRecombEdgeMap() { if (this.recombEdgeMap === undefined) { let node; let i; let hybridNodeList; if (this.root !== undefined) { hybridNodeList = this.nodeList.filter(e => e.isHybrid()); } else { hybridNodeList = []; } const srcHybridIDMap = {}; const destHybridIDMap = {}; for (i = 0; i < hybridNodeList.length; i++) { if (hybridNodeList[i] === null) continue; node = hybridNodeList[i]; if (node.hybridID === undefined) { continue; } if (node.isLeaf()) { if (node.hybridID in destHybridIDMap) destHybridIDMap[node.hybridID].push(node); else destHybridIDMap[node.hybridID] = [node]; } else srcHybridIDMap[node.hybridID] = node; } let hybridID; this.recombEdgeMap = {}; for (hybridID in srcHybridIDMap) { if (hybridID in destHybridIDMap) this.recombEdgeMap[hybridID] = [srcHybridIDMap[hybridID]].concat(destHybridIDMap[hybridID]); else throw 'Extended Newick error: hybrid nodes must come in groups of 2 or more.'; } // Edge case: leaf recombinations for (hybridID in destHybridIDMap) { if (!(hybridID in this.recombEdgeMap)) this.recombEdgeMap[hybridID] = destHybridIDMap[hybridID]; } } return this.recombEdgeMap; } /** * Check if node is a source node for a hybrid edge in the tree. * @param {Node} node * @returns {boolean} */ isRecombSrcNode(node) { if (node.hybridID !== undefined) { return (node.isHybrid() && this.getRecombEdgeMap()[node.hybridID][0] == node); } else { return false; } } /** * Check if node is a destination node for a hybrid edge in the tree. * @param {Node} node * @returns {boolean} */ isRecombDestNode(node) { if (node.hybridID !== undefined) { return (node.isHybrid() && this.getRecombEdgeMap()[node.hybridID][0] != node); } else { return false; } } /** * Check if the tree is a phylogenetic network. * @param {Node} node * @returns {boolean} */ isNetwork() { return Object.keys(this.getRecombEdgeMap()).length > 0; } /** * Return sub-stree descending from a given `node` * @parm {Node} root node root of desired subtree */ getClade(node) { return new Tree(node); } /** * Get the most recent common ancestor of a set of nodes * @param {Node[]} nodes Nodes for which the MRCA is sought */ getMRCA(nodes) { const leafCount = nodes.length; if (leafCount === 0) return null; if (leafCount === 1) return nodes[0].parent || nodes[0]; const visitCounts = new Map(); let nodesToCheck = nodes.slice(); while (nodesToCheck.length > 0) { const nextNodes = []; for (const node of nodesToCheck) { const count = (visitCounts.get(node) || 0) + 1; if (count === leafCount) { // This is the MRCA. return node; } visitCounts.set(node, count); if (node.parent) { nextNodes.push(node.parent); } } nodesToCheck = nextNodes; } return null; // return null if no common ancestor is found } /** * Get all tip names from tree or descending from a `node` * @param {Node | undefined} node Optional node whose descending tips are returned. Defaults to root */ getTipLabels(node) { let tips; if (node !== undefined) { tips = this.getClade(node).leafList.map(e => { var _a; return (_a = e.label) !== null && _a !== void 0 ? _a : e.id.toString(); }); } else { tips = this.leafList.map(e => { var _a; return (_a = e.label) !== null && _a !== void 0 ? _a : e.id.toString(); }); } return tips; } /** Sum of all defined branch lengths. Elsewhere referred to tree "length" if all baranch lengths are defined */ getTotalBranchLength() { let totalLength = 0.0; const nodeList = this.nodeList; for (const node of nodeList) { if (node.branchLength !== undefined) { totalLength += node.branchLength; } } return totalLength; } /** * Reroot a tree at a given node. * @param {Node} edgeBaseNode `Node` to reroot at * @param {number|undefined} prop Proportion of the branch descending from `edgeBaseNode` at which to cut and place the root. Defaults ot 0.5 */ reroot(edgeBaseNode, prop = 0.5) { var _a, _b, _c; // --- Prior check if nework --- if (this.isNetwork()) { throw new Error('Cannot reroot a network.'); } // --- 0. Prep old root and recomb map --- const oldRoot = this.root; this.recombEdgeMap = undefined; const recombMap = this.getRecombEdgeMap(); // --- 1. Cut A—B at the desired proportion --- const A = edgeBaseNode; const B = A.parent; if (!B) throw new Error('Cannot reroot at the current root'); const origBL = (_a = A.branchLength) !== null && _a !== void 0 ? _a : 0; const frac = Math.min(1, Math.max(0, prop)); const BL_A = origBL * frac; const BL_B = origBL - BL_A; B.removeChild(A); A.parent = undefined; // --- 2. Build undirected adjacency including the two halves and recomb edges --- const R = new node_1.Node(0); const adj = new Map(); function addEdge(u, v, w) { var _a, _b; if (!adj.has(u)) adj.set(u, []); if (!adj.has(v)) adj.set(v, []); (_a = adj.get(u)) === null || _a === void 0 ? void 0 : _a.push({ node: v, weight: w }); (_b = adj.get(v)) === null || _b === void 0 ? void 0 : _b.push({ node: u, weight: w }); } // a) original tree edges (walk old nodes before we wipe parent/children) const allOld = [...this.nodeList]; for (const n of allOld) { for (const c of n.children) { // skip the cut edge A→B if ((n === B && c === A) || (n === A && c === B)) continue; addEdge(n, c, (_b = c.branchLength) !== null && _b !== void 0 ? _b : 0); } } // b) the two new cuts addEdge(R, A, BL_A); addEdge(R, B, BL_B); // c) recombination (hybrid) edges for (const hidStr in recombMap) { const group = recombMap[+hidStr]; const src = group[0]; for (let i = 1; i < group.length; i++) { const dst = group[i]; addEdge(src, dst, (_c = dst.branchLength) !== null && _c !== void 0 ? _c : 0); } } // --- 3. Clear all old orientation and rebuild from R via BFS --- for (const n of allOld) { n.parent = undefined; n.children = []; } this.root = R; const visited = new Set([R]); const queue = [R]; while (queue.length) { const u = queue.shift(); if (!u) { throw new Error('Queue is unexpectedly empty'); } for (const { node: v, weight } of adj.get(u) || []) { if (visited.has(v)) continue; v.parent = u; u.children.push(v); v.branchLength = weight; visited.add(v); queue.push(v); } } // --- 4. Clean up old singleton root if it lost all but one child --- if (oldRoot.children.length === 1 && !oldRoot.isHybrid()) { const [only] = oldRoot.children; if (!oldRoot.parent) { throw new Error('Old root does not have a parent.'); } const p = oldRoot.parent; p.removeChild(oldRoot); oldRoot.removeChild(only); p.addChild(only); if (oldRoot.branchLength !== undefined && only.branchLength !== undefined) { only.branchLength += oldRoot.branchLength; } else { throw new Error('Branch length is undefined for either the old root or the only child.'); } } // --- 5. Final bookkeeping --- this.clearCaches(); this.computeNodeHeights(); this.reassignNodeIDs(); // fix recombination edge lengths const newMap = this.getRecombEdgeMap(); for (const hid in newMap) { const [src, ...dests] = newMap[hid]; for (const dest of dests) { if (dest.branchLength !== undefined && dest.height !== undefined && src.height !== undefined) { dest.branchLength += dest.height - src.height; } else { throw new Error('Branch length or height is undefined for the destination or source node.'); } } } } /** * Returns height above the root for each internal node. Root height is assumed to be 0 * and undefined branch lengths are assumed to be zero. * @returns {number[]} */ getInternalNodeHeights() { const heights = this.root.applyPreOrder((node) => { if (node.parent === undefined) { node.height = 0.0; // root case } else if (node.branchLength !== undefined && node.parent.height !== undefined) { node.height = node.branchLength + node.parent.height; } else { node.height = node.parent.height; } if (!node.isLeaf()) return node.height; return undefined; }); // TODO: have to loop over array to remove undefined values from leaves // do it in the applyPreOrder function return heights.filter(e => e !== undefined); } /** * Calculates Gamma statistic from Pybus and Harvey 2000 (10.1098/rspb.2000.1278 ). * The Gamma statistic measures deviation from a constant rate pure-birth process * in the underlying population (also known as a Yule process). Values above 0 indicate * longer external branches, while values corresond to internal branches. Calculating the Gamma statistic involves * manipulations that mean the statistic should follow a standard normal distribution, * such that one can perform a frequentist test to measure consistency with the pure birth * process. * * Since the Gamma statistic applies to a pure birth process, the tree is expected to be * ultrametric and the method throws an error if it is not. It also returns NaN if any node * node heights are undefined, due to undefined branch lengths. * * @param {number} tol Tolerance for ultrametricity passed to `.isUltrametric()` * @returns {number} * */ gammaStatistic(tol = 1e-6) { let g; // inter-node times (Fig 1 Pybys & Harvey 2004) const n = this.leafList.length; // Num Tips let A = 0; if (n <= 2) { return NaN; } else if (!this.isUltrametric(tol)) { console.warn('Gamma Statistic requires an untrametric tree!'); return NaN; } this.computeNodeHeights(); const nodes = this.nodeList; const heights = nodes.map(e => e.height).slice(1); if (heights.slice(1).some(e => e == undefined)) return NaN; // if any heights undefined if (nodes.some(node => node.isHybrid())) { console.warn('This is a netowrk! Gamma statistic ignoring hybrid nodes.'); } g = nodes .slice(1) .filter(node => !node.isLeaf()) .map(node => node.height) .filter(e => e !== undefined); // Need to assert type here const maxHeight = this.leafList.map(e => e.height)[0]; g.push(maxHeight); g.sort((a, b) => a - b); // in place const minHeight = g[0]; g = g.map((e, i, a) => e - a[i - 1]); g[0] = minHeight; // NaN from first diff --> frist branch time const T = g .map((e, i) => (i + 2) * e) .reduce((partialSum, e) => partialSum + e, 0); let sum; for (let i = 0; i <= g.length - 2; i++) { sum = 0; for (let k = 0; k <= i; k++) { sum += (k + 2) * g[k]; } A += sum; } A /= n - 2; const gamma = (A - T / 2) / (T * Math.sqrt(1 / (12 * (n - 2)))); return gamma; } /** * Returns the Sackin Index, which measures imbalance as the sum of the number of tips * descending from each internal node. The sum increases for more imbalances (ladder-like) trees. * See Chapter 5 in "Tree balance indices: a comprehensive survey" (https://doi.org/10.1007/978-3-031-39800-1_5) * for further details, a reference we point to from the treebalance R package * @returns {number} */ sackinIndex() { const internalNodes = this.nodeList.filter(e => !e.isLeaf()); const sackinIindex = internalNodes .map(e => this.getClade(e).leafList.length) .reduce((partialSum, e) => partialSum + e, 0); return sackinIindex; } /** * Check whether the tree is binary (each internal node has <= 2 descendents) * @returns {any} */ isBinary() { const internalNodes = this.nodeList.filter(e => !e.isLeaf()); const nChildren = internalNodes.map(e => e.children.length); if (nChildren.every(e => e <= 2)) { return true; } else { return false; } } /** * Returns the Colless Imbalance (CI) index, with an option to normalise or use the standard method. * The CI is defined for binary trees, so NaN is returned if the tree is not binary. * In essence, the CI adds the difference between the number of tips descending from each internal node. * The square of each difference is taking if the method selected is quadratic. If the method is set to * "corrected", then the overall sum is scaled by the number of tips. If method is "standard", then * sum is returned. * The value should be smaller for more balanced trees (less ladder-like). As for the Sacking Index, * see "Tree balance indices: a comprehensive survey * (https://doi.org/10.1007/978-3-031-39800-1_12, https://doi.org/10.1007/978-3-031-39800-1_13, https://doi.org/10.1007/978-3-031-39800-1_15) * for further details. * @param {"standard" | "normalised" | "quadratic"} method * @returns {number} */ collessIndex(method = 'standard') { if (!this.isBinary()) { console.warn('Coless Imbalance not defined for non-binary trees'); return NaN; } const internalNodes = this.nodeList.filter(e => !e.isLeaf()); // Number of left and right escending tips for each internal node const collessIndex = internalNodes .map(e => { let nLeft = this.getClade(e.children[0]).leafList.length; let nRight = this.getClade(e.children[1]).leafList.length; if (!Number.isInteger(nLeft)) nLeft = 0; if (!Number.isInteger(nRight)) nRight = 0; return method === 'quadratic' ? Math.pow(nLeft - nRight, 2) : Math.abs(nLeft - nRight); }) .reduce((partialSum, e) => partialSum + e, 0); if (method === 'corrected') { const n = this.leafList.length; if (n <= 2) return 0; return (2 * collessIndex) / ((n - 1) * (n - 2)); } return collessIndex; } } exports.Tree = Tree;