@veg/hyphy-eye
Version:
Observable Framework application for building, testing, and exporting visualization components for Datamonkey.org
1,259 lines (1,137 loc) • 325 kB
JavaScript
import * as _ from 'lodash-es';
import * as d3 from 'd3';
import * as parse_svg from 'parse-svg-path';
import * as phylotree from 'phylotree';
import { html } from 'htl';
import { log } from 'gamma';
import * as Plot from '@observablehq/plot';
/**
* Calculates the number of possible synonymous and non-synonymous substitutions
* between two codon sequences as they diverge.
*
* @param {string} from - The original codon sequence.
* @param {string} to - The target codon sequence.
*
* @returns {Array<number>} An array with two elements:
* - The first element is the count of synonymous substitutions.
* - The second element is the count of non-synonymous substitutions.
* If either codon sequence is 'NNN', both counts are zero.
*/
function subsForPair(from, to) {
if (from == 'NNN' || to == 'NNN') {
return [0,0];
}
let diffs = [];
_.each (from, (c,i)=> {
if (c != to[i]) {
diffs.push (i);
}
});
switch (diffs.length) {
case 0:
return [0,0];
case 1:
if (translateAmbiguousCodon(from) == translateAmbiguousCodon(to)) {
return [1,0];
}
return [0,1];
case 2: {
let res = pathDiff(from,to,[diffs[0],diffs[1]]);
_.each (pathDiff(from,to,[diffs[1],diffs[0]]), (d,i) => {res[i] += d;});
return _.map (res, (d)=>0.5*d);
}
case 3: {
let res = pathDiff(from,to,[diffs[0],diffs[1],diffs[2]]);
_.each (pathDiff(from,to,[diffs[0],diffs[2],diffs[1]]), (d,i) => {res[i] += d;});
_.each (pathDiff(from,to,[diffs[1],diffs[0],diffs[2]]), (d,i) => {res[i] += d;});
_.each (pathDiff(from,to,[diffs[1],diffs[2],diffs[0]]), (d,i) => {res[i] += d;});
_.each (pathDiff(from,to,[diffs[2],diffs[0],diffs[1]]), (d,i) => {res[i] += d;});
_.each (pathDiff(from,to,[diffs[2],diffs[1],diffs[0]]), (d,i) => {res[i] += d;});
return _.map (res, (d)=>d/6);
}
}
}
const ambiguousCodes = {
'A' : ['A'],
'C' : ['C'],
'G' : ['G'],
'T' : ['T'],
'U' : ['T'],
'R' : ['A','G'],
'Y' : ['C','T'],
'K' : ['G','T'],
'M' : ['A','C'],
'S' : ['C','G'],
'W' : ['A','T'],
'B' : ['C','G','T'],
'D' : ['A','G','T'],
'H' : ['A','C','T'],
'V' : ['A','C','G'],
'N' : ['A','C','G','T'],
'?' : ['A','C','G','T']
};
/**
* Translate a codon to an amino acid, handling ambiguous codes.
*
* If the codon is unambiguous, just return the translation.
* If the codon is ambiguous, return a string of all possible translations,
* sorted alphabetically.
*
* @param {string} codon - a three-nucleotide codon
* @return {string} the amino acid(s) corresponding to the codon
*/
function translateAmbiguousCodon(codon) {
const translationTable = getTranslationTable();
if (codon in translationTable) {
return translationTable[codon];
}
let options = {};
_.each (ambiguousCodes[codon[0]], (n1)=> {
_.each (ambiguousCodes[codon[1]], (n2)=> {
_.each (ambiguousCodes[codon[2]], (n3)=> {
let c = translationTable[n1+n2+n3];
if (c in options) {
options[c] += 1;
} else {
options [c] = 1;
}
});
});
});
options = _.keys(options);
if (options.length == 0) {
return "?";
}
return _.sortBy (options).join ("");
}
/**
* Computes the number of synonymous and nonsynonymous substitutions on a given
* path between two codons.
*
* @param {Array} from - The starting codon, represented as an array of 3
* single-character strings.
* @param {Array} to - The ending codon, represented as an array of 3
* single-character strings.
* @param {Array} path - An array of indices indicating the order in which
* positions in the codon should be changed to get from the starting codon to
* the ending codon.
*
* @returns {Array} An array of two elements. The first element is the number
* of synonymous substitutions, and the second element is the number of
* nonsynonymous substitutions.
*/
function pathDiff(from,to,path) {
let result = [0,0];
let curr = _.map (from),
next = _.clone (curr);
next [path[0]] = to[path[0]];
const isSyn = translateAmbiguousCodon (curr.join ("")) == translateAmbiguousCodon(next.join (""));
result[isSyn ? 0 : 1] += 1;
for (let i = 1; i < path.length; i++) {
curr = _.clone (next);
next [path[i]] = to[path[i]];
const isSyn = translateAmbiguousCodon (curr.join ("")) == translateAmbiguousCodon(next.join (""));
result[isSyn ? 0 : 1] += 1;
}
return result;
}
/**
* A dictionary mapping codons to amino acids. The dictionary is
* constructed from a table of codons and their corresponding amino
* acids, with the codons as keys and the amino acids as values.
*
* The table is adapted from the GenBank documentation, with the
* addition of the codon 'NNN' mapping to the amino acid '?', and
* the codon '---' mapping to the amino acid '-'.
*
* @return {Object} a dictionary mapping codons to amino acids
*/
function getTranslationTable() {
var code = d3.csvParse("Codon,AA\nTTT,F\nTCT,S\nTAT,Y\nTGT,C\nTTC,F\nTCC,S\nTAC,Y\nTGC,C\nTTA,L\nTCA,S\nTAA,*\nTGA,*\nTTG,L\nTCG,S\nTAG,*\nTGG,W\nCTT,L\nCCT,P\nCAT,H\nCGT,R\nCTC,L\nCCC,P\nCAC,H\nCGC,R\nCTA,L\nCCA,P\nCAA,Q\nCGA,R\nCTG,L\nCCG,P\nCAG,Q\nCGG,R\nATT,I\nACT,T\nAAT,N\nAGT,S\nATC,I\nACC,T\nAAC,N\nAGC,S\nATA,I\nACA,T\nAAA,K\nAGA,R\nATG,M\nACG,T\nAAG,K\nAGG,R\nGTT,V\nGCT,A\nGAT,D\nGGT,G\nGTC,V\nGCC,A\nGAC,D\nGGC,G\nGTA,V\nGCA,A\nGAA,E\nGGA,G\nGTG,V\nGCG,A\nGAG,E\nGGG,G\n");
var mappedCode = {};
_.each (code, (v,k) => {mappedCode[v.Codon] = v.AA;});
mappedCode["---"] = "-";
mappedCode["NNN"] = "?";
return mappedCode;
}
var count = 0;
/**
* Generates a unique identifier string by appending an incrementing number
* to the provided name.
*
* @param {string} name - The base name for the unique identifier. If null or
* undefined, an empty string is used.
*
* @returns {string} A unique identifier string in the format "name-count",
* where "count" is a globally incrementing number.
*/
function uid(name) {
name = name == null ? "" : name;
return name + "-" + ++count;
}
/**
* Extracts common attributes from HyPhy results JSON that are shared across multiple methods
*
* @param {Object} resultsJson - The results JSON object from a HyPhy analysis
* @returns {Object} Common attributes extracted from the results
*/
function extractCommonAttributes(resultsJson) {
const attributes = {};
// Basic sequence and site information
if (_.has(resultsJson, 'input.number of sequences')) {
attributes.numberOfSequences = resultsJson.input["number of sequences"];
}
if (_.has(resultsJson, 'input.number of sites')) {
attributes.numberOfSites = resultsJson.input["number of sites"];
}
if (_.has(resultsJson, 'input.partition count')) {
attributes.numberOfPartitions = resultsJson.input["partition count"];
}
// Extract partition sizes if available
if (resultsJson.tested) {
attributes.partitionSizes = Object.values(resultsJson.tested).map(
d => (d && typeof d === 'object') ? Object.values(d).filter(d => d === "test").length : 0
);
} else {
attributes.partitionSizes = [];
}
// Extract tested branch information if available
if (_.has(resultsJson, 'tested')) {
const testedArray = Object.values(resultsJson.tested);
const testCounts = testedArray.map(obj => {
return (obj && typeof obj === 'object') ? Object.values(obj).filter(value => value === "test").length : 0;
});
attributes.testedBranchCount = d3.median(testCounts);
}
return attributes;
}
/**
* Extracts rate distribution information from HyPhy results JSON
*
* @param {Object} resultsJson - The results JSON object from a HyPhy analysis
* @param {Array<string>} path - Path to the rate distribution in the results JSON
* @param {Array<string>} fields - Fields to extract from the rate distribution
* @returns {Object|null} Rate distribution information or null if not available
*/
function extractRateDistribution(resultsJson, path, fields) {
const distribution = _.get(resultsJson, path);
if (!distribution) {
return null;
}
return _.map(distribution, (d) => {
return _.fromPairs(_.map(fields, (f) => [f, d[f]]));
});
}
/**
* Checks if the results JSON has background rate distributions
*
* @param {Object} resultsJson - The results JSON object from a HyPhy analysis
* @returns {boolean} Whether background rate distributions are available
*/
function hasBackground(resultsJson) {
return !!_.get(resultsJson, ["fits", "Unconstrained model", "Rate Distributions", "Background"]);
}
/**
* Checks if the results JSON has error sink settings
*
* @param {Object} resultsJson - The results JSON object from a HyPhy analysis
* @returns {boolean} Whether error sink settings are available
*/
function hasErrorSink(resultsJson) {
return !!(resultsJson["analysis"] &&
resultsJson["analysis"]["settings"] &&
resultsJson["analysis"]["settings"]["error-sink"]);
}
/**
* Retrieves and sorts rate distribution data from the results JSON.
* This function works with BUSTED, MEME, and aBSREL results formats.
*
* @param {Object} resultsJson - The JSON object containing the results
* @param {boolean} [hasErrorSink] - Whether to consider error sink in calculations
* @param {Array} keys - The path to access the rate distribution data
* @param {Array} [tags=["omega", "proportion"]] - The field names for rate and weight
*
* @returns {Array|null} A sorted array of objects, each containing:
* - value: The rate value
* - weight: The corresponding weight
* The array is sorted by rate value. Returns null if no rate information is found.
*/
function getRateDistribution(resultsJson, hasErrorSink, keys, tags = ["omega", "proportion"]) {
const rateInfo = _.get(resultsJson, keys);
if (!rateInfo) return null;
// Only handle error sink if hasErrorSink is explicitly provided as a boolean
let clipFirst = false;
if (typeof hasErrorSink === 'boolean' && hasErrorSink && tags[0] === 'omega') {
clipFirst = true;
}
let rateData;
if (clipFirst) {
// Filter out error sink rate (rate 0) for BUSTED and aBSREL
const entries = Object.entries(rateInfo);
rateData = Object.fromEntries(entries.filter(([key]) => key !== '0'));
} else {
rateData = rateInfo;
}
// Create rate distribution objects
const rateDistribution = _.map(rateData, (d) => ({
value: d[tags[0]],
weight: d[tags[1]]
}));
// Sort by rate value
return _.sortBy(rateDistribution, (d) => d.value);
}
/**
* Retrieves the rate distribution for a given branch in the results JSON.
* This function works with BUSTED, MEME, and aBSREL results formats.
*
* @param {Object} resultsJson - The JSON object containing the results
* @param {string} branch - The name of the branch to retrieve rate distribution for
* @param {Array} [keys] - The path to access the branch attributes
* @param {Array} [tags=["0", "1"]] - The field names for rate and weight
* @param {boolean} [hasErrorSink] - Whether to consider error sink in calculations
*
* @returns {Array|null} A sorted array of objects, each containing:
* - value: The rate value
* - weight: The corresponding weight
* The array is sorted by rate value. Returns null if no rate information is found.
*/
function getRateDistributionByBranch(resultsJson, branch, keys = ["branch attributes", "0"], tags = ["0", "1"], hasErrorSink) {
return getRateDistribution(resultsJson, hasErrorSink, [...keys, branch, "Rate Distributions"], tags);
}
/**
* Retrieves the corrected P-value for a given branch in the results JSON.
* This function works with BUSTED, MEME, and aBSREL results formats.
*
* @param {Object} resultsJson - The JSON object containing the results
* @param {string} branch - The name of the branch to retrieve p-value for
* @param {Array} [keys] - The path to access the branch attributes
* @param {string} [pvalueKey="Corrected P-value"] - The key for the p-value in the branch attributes
*
* @returns {number|null} The corrected P-value for the given branch, or
* null if no P-value information is found.
*/
function getBranchPvalue(resultsJson, branch, keys = ["branch attributes", "0"], pvalueKey = "Corrected P-value") {
return _.get(resultsJson, [...keys, branch, pvalueKey]);
}
// TODO: if we need these so often, do we need to do work in phylotree??
/**
* Computes a set of labels for each node in a tree.
*
* @param {PhyloTree} T - The tree.
* @param {Object.<string,string>} labels - A mapping of node names to their labels (as strings of length 3).
* @return {Object.<string,array>} - A mapping of node names to their labels, with the value being an array
* of [label, translation, parent label, number of substitutions]. Substitutions are only counted between
* non-ambiguous, non-degenerate codons.
*/
function generateNodeLabels(T, labels) {
let L = {};
T.traverse_and_compute(function (n) {
if (n.data.name in labels) {
L[n.data.name] = [labels[n.data.name], translateAmbiguousCodon(labels[n.data.name]),'',0];
if (n.parent) {
L[n.data.name][2] = L[n.parent.data.name][0];
_.each (L[n.data.name][0], (c,i)=> {
const c2 = L[n.data.name][2][i];
if (c2 != c && c != '-' && c2 != '-' && c != 'N' && c2 != 'N') {
L[n.data.name][3] ++;
}
});
}
} else {
if (n.parent) {
L[n.data.name] = _.clone (L[n.parent.data.name]);
L[n.data.name][2] = L[n.data.name][0];
L[n.data.name][3] = 0;
} else {
L['root'] = [labels["root"], translateAmbiguousCodon(labels["root"]), "", 0];
}
}
L[n.data.name][4] = !_.isUndefined (n.children);
},"pre-order");
return L;
}
/**
* Adds an SVG filter to the given SVG element that can be used to provide a
* lightgray background for branch labels. The filter is given the id
* "tree_branchlabel_bgfill".
*
* @param {d3.selection} svg - The SVG element to which the filter will be added.
*
* @returns {void}
*/
function addSvgDefs(svg) {
let filter = svg.selectAll ("defs").append ("filter").attr ("x", 0).attr ("y", 0).attr ("width", 1).attr ("height", 1).attr ("id", "tree_branchlabel_bgfill");
filter.append ("feFlood").attr ("flood-color", "lightgray");
filter.append ("feComposite").attr ("in", "SourceGraphic").attr ("operator", "atop");
}
/**
* Adds a text label to a branch in a phylogenetic tree visualization. The label
* is positioned based on the branch's SVG path data and is styled with a specific
* font and background filter.
*
* @param {d3.selection} e - The D3 selection of the branch element.
* @param {string|number} text - The text content to be displayed as the label.
* @param {number} font_size - The base font size for the label text.
* @param {d3.selection} container - The SVG container where the label will be added.
*/
function addBranchLabel(e, text, font_size, container) {
const where2 = _.get (parse_svg.default(e.attr("d")),["1"]);
if (where2 && (text.length || _.isNumber (text))) {
let my_id = e.attr ("id");
if (!e.attr ("id")) {
my_id = uid("absrel_tree");
e.attr ("id", my_id);
}
let branch_label = container.selectAll ("text[label-for='" + my_id + "']").data ([text]).join ("text").attr ("label-for", my_id).text ((d)=>d).classed ("absrel-branch-labels",true).attr ("x", where2[1]).attr ("y", where2[2]).attr ("font-size", font_size * 0.8).attr ("dx","0.5em").attr ("dy", "-0.4em").style ("font-family", "ui-monospace");
branch_label.attr ("filter","url(#tree_branchlabel_bgfill)");
}
}
/**
* Takes a tree, a site number, the node labels at that site, and an options object
* and returns an object with the same keys as node_labels. The value of each key is
* a two-member array of strings, where the first element is a pipe-separated list of
* all the codon states at the site numbers that are within 4 of the given site number,
* and the second element is the same but for the amino acid states. The states at the
* given site number are marked with a leading and trailing "·".
*
* This is used to generate the visual display of the codon and amino acid states at
* the sites that are neighbors to the given site number.
* @param {number} index - the index of the tree in the tree array
* @param {number|string} s - the site number
* @param {object} node_labels - an object with the node names as keys and two-member
* arrays of strings as values, where the first element is the codon state and the
* second element is the amino acid state.
* @param {object} T - the tree object
* @param {object} options - an object with options
* @param {object} results - the results object
* @param {number} site_count - the number of sites
* @return {object} - an object with the same keys as node_labels, with values as
* described above.
*/
function displayTreeHandleNeighbors(index, s, node_labels, T, options, results, site_count) {
let extended_labels = {};
if (options["neighbors"]) {
const si = (+s)-1;
let joint_labels = [];
for (let idx = si-4; idx <= si+4; idx++) {
if (idx >= 0 && idx < site_count) {
if (idx != si) {
joint_labels.push (generateNodeLabels (T, results["substitutions"][index][idx]));
} else {
joint_labels.push (_.mapValues (node_labels, (d)=> {
return ["·" + d[0] + "·", "·" + d[1] + "·"]
}));
}
}
}
_.each (node_labels, (d,k)=> {
extended_labels [k] = [_.map (joint_labels, (slc)=> {
return slc[k][0];
}).join ("|"),_.map (joint_labels, (slc)=> {
return slc[k][1];
}).join ("|")];
});
}
return extended_labels;
}
/**
* Computes the number of substitutions for each branch, excluding the root,
* based on the substitutions data for a given index.
*
* @param {Object} results_json - hyphy results json
* @param {number} i - The index of the substitutions data in the results JSON.
*
* @returns {Object} An object where keys are branch names and values are the
* count of substitutions for each branch.
*/
function subsByBranch(results_json, i) {
let counts = {};
_.each (results_json.substitutions[i], (states, site)=> {
_.each (states, (state, branch)=> {
if (branch != "root") {
if (state != '---') {
counts[branch] = 1 + (counts[branch] ? counts[branch] : 0);
}
}
});
});
return counts;
}
/**
* Compute a depth-first ordering of tree nodes.
*
* This function computes a depth-first ordering of tree nodes by traversing the tree
* and computing the maximum depth of each node, then sorting the nodes by their maximum depth.
* The root node comes first, followed by the nodes in a depth-first ordering. The ordering includes
* only those nodes that are tested and excludes the root node if `root` is false.
*
* @param {Object} rawTree - The raw phylotree object
* @param {Object} tested - The tested object containing branch testing information
* @param {boolean} root - Whether to include the root node in the ordering
* @param {boolean} only_leaves - Whether to include only leaf nodes in the ordering
* @returns {string[]} An array of node names in the computed ordering
*/
function treeNodeOrdering(rawTree, tested, root, only_leaves) {
let order = [];
if (root) { order.push('root'); }
function sortNodes(asc) {
rawTree.traverse_and_compute(function (n) {
var d = 1;
if (n.children && n.children.length) {
d += d3.max(n.children, function (d) { return d["count_depth"]; });
}
n["count_depth"] = d;
});
rawTree.resortChildren(function (a, b) {
return (a["count_depth"] - b["count_depth"]) * (1 );
});
}
sortNodes();
rawTree.traverse_and_compute(function (n) {
if (tested[n.data.name] === "test" && (!only_leaves || _.isUndefined(n.children))) {
order.push(n.data.name);
}
});
return order;
}
/**
* Compute the total length of a tree.
* @param {Object} tree - a phylotree object
* @return {Number} total length of the tree
*/
function totalTreeLength(tree) {
let L = 0;
tree.traverse_and_compute ( (n)=> {
if (tree.branch_length (n)) {
L += +tree.branch_length (n);
}
});
return L;
}
/**
* Extracts a list of sequence names (i.e., names of the tips) from a phylogenetic tree.
*
* @param {Object} tree - A phylotree object which represents a phylogenetic tree.
* @return {Array<String>} An array containing the names of the tip sequences in the tree.
*/
function seqNames(tree) {
let seq_names = [];
tree.traverse_and_compute (n=>{
if (n.children && n.children.length) return;
seq_names.push (n.data.name);
});
return seq_names;
}
/**
* Extracts a set of names of the direct children of the root node in a phylogenetic tree.
*
* @param {Object} tree - A phylotree object which represents a phylogenetic tree.
* @return {Set<String>} A set containing the names of the direct children of the root node.
*/
function rootChildren(tree) {
let rt = new Set();
tree.traverse_and_compute ((n)=> {
if (n.parent && !n.parent.parent) {
rt.add (n.data.name);
}
});
return rt;
}
/**
* Returns an array of strings representing the tree view options for the
* given results JSON object. The options depend on the number
* of partitions and whether substitutions data is available.
*
* @param {Object} resultsJson - The results JSON object containing tree data.
* @param {Object} options - Configuration options.
* @param {boolean} [options.onlyWithSubstitutions=false] - If true, only include codons with substitutions.
* @param {boolean} [options.includeMapping=false] - If true, return a mapping between codon indices and partition indices.
* @param {boolean} [options.includeCodons=true] - If false, only include partition options, no codons.
*
* @returns {Array|Array[]} If includeMapping is false, returns an array of strings representing
* the tree view options. If includeMapping is true, returns an array containing both the options
* array and a mapping object.
*/
function getTreeViewOptions(resultsJson, options = {}) {
const treeObjects = getTreeObjects(resultsJson);
const onlyWithSubstitutions = options.onlyWithSubstitutions || false;
const includeMapping = options.includeMapping || false;
const includeCodons = options.includeCodons !== false; // Default to true
let opts = [];
let codonIdxToPartIdx = {};
// Add partition options based on number of partitions
if (treeObjects.length === 1) {
opts.push("Alignment-wide tree");
} else {
opts = opts.concat(_.map(_.range(1, treeObjects.length + 1), (d) => "Partition " + d));
}
// Add codon options if substitutions data exists and includeCodons is true
if (resultsJson.substitutions && includeCodons) {
if (onlyWithSubstitutions) {
// Only include codons with substitutions
let offset = 0;
_.each(resultsJson.substitutions, (sites, partition) => {
_.each(sites, (subs, site) => {
if (subs) {
let idx = ((+site) + 1 + offset);
codonIdxToPartIdx[idx] = [partition, (+site) + 1];
opts.push("Codon " + idx);
}
});
// Calculate offset based on partition coverage
if (resultsJson["data partitions"] &&
resultsJson["data partitions"][partition] &&
resultsJson["data partitions"][partition].coverage) {
offset += resultsJson["data partitions"][partition].coverage[0].length;
}
});
} else {
// Include all codons
opts = opts.concat(_.map(_.range(1, resultsJson.input["number of sites"] + 1), (d) => "Codon " + d));
}
}
return includeMapping ? [opts, codonIdxToPartIdx] : opts;
}
/**
* Constructs an array of phylotree objects from the provided results JSON,
* each with a branch length accessor set according to the specified model.
*
* @param {Object} results_json - The JSON object containing input trees and
* branch attributes for each tree.
* @param {string} modelForTree - The model name used to access the branch
* length attributes for each tree. Defaults to "Global MG94xREV".
*
* @returns {Array<phylotree.phylotree>} An array of phylotree objects with
* branch length accessors set.
*/
function getTreeObjects(results_json, modelForTree = "Global MG94xREV") {
const tree_objects = _.map (results_json.input.trees, (tree,i)=> {
let T = new phylotree.phylotree (tree);
T.branch_length_accessor = setBranchLengthAccessor(T, results_json, i, modelForTree);
return T;
});
return tree_objects;
}
/**
* Sets the branch length accessor for a tree based on the results JSON and branch length key.
*
* @param {Object} tree - The phylotree object
* @param {Object} resultsJson - The results JSON object containing branch attributes
* @param {number} index - The index of the tree in the results
* @param {string} branchLengthKey - The key to use for branch lengths
* @returns {Function} - The branch length accessor function
*/
function setBranchLengthAccessor(tree, resultsJson, index, branchLengthKey) {
return (n) => {
const branchAttributes = resultsJson["branch attributes"][index];
return (n.data.name in branchAttributes ? branchAttributes[n.data.name][branchLengthKey] : 0) || 0;
};
}
/**
* Parses a selected tree view option and returns a 0-based index.
* For options like 'Partition X' or 'Codon Y', returns (parsed number - 1).
* For 'Alignment-wide tree', returns 0.
*
* @param {string} option - The selected tree view option
* @returns {number} The 0-based index
*/
function getTreeId(option) {
if (option === 'Alignment-wide tree') {
return 0;
}
// Extract number from strings like 'Partition 1' or 'Codon 3'
const match = option.match(/\d+/);
return match ? parseInt(match[0], 10) - 1 : 0;
}
/**
* Configures node display and labeling for a phylogenetic tree.
*
* This helper function provides a flexible way to configure tree visualization
* by allowing users to provide custom functions that work directly with the tree object.
*
* @param {Object} nodeLabels - Object mapping node names to their labels
* @param {Object} options - Configuration options
* @param {Object} tested - The object containing branch testing information. Subset of resultsJson.
* @returns {Function} - A function that configures the tree
*/
function getConfigureNodesFn(tested, nodeLabels, options) {
const {
showAA = false,
showCodons = false,
showSeqNames = false,
showOnlyMH = false,
showOnlyNS = false,
alignTips = false
} = options;
return (rawTree, renderedTree) => {
// Set up node display
renderedTree.show_internal_names = showSeqNames;
renderedTree.show_leaf_names = showSeqNames;
// Get node ordering
treeNodeOrdering(rawTree, tested, false, false);
// Set up node labels
if (nodeLabels) {
renderedTree.style_nodes((e, n) => {
// Skip nodes without labels
const ext = nodeLabels[n.data.name];
if (!ext) return;
let label = "";
let has_extended_label = ext;
n.data.color_on = "";
if (showCodons) {
label = has_extended_label[0];
n.data.color_on = nodeLabels[n.data.name][0];
if (showAA) label += "/";
}
if (showAA) {
label += has_extended_label[1];
}
if (showSeqNames) {
label += ":" + n.data.name;
}
e.selectAll("text").text(label);
e.selectAll("title").data([n.data.name]).join("title").text((d) => d);
});
}
// Configure node display based on options
rawTree.traverse_and_compute((n) => {
n._display_me = !(showOnlyMH || showOnlyNS);
if (!n._display_me && nodeLabels[n.data.name]) {
if (showOnlyMH && nodeLabels[n.data.name][3] > 1) {
n._display_me = true;
}
if (!n._display_me && showOnlyNS) {
if (n.parent) {
const my_aa = nodeLabels[n.data.name][1];
const parent_aa = nodeLabels[n.parent.data.name][1];
if (my_aa != parent_aa && my_aa != '-' && parent_aa != '-') {
n._display_me = true;
if (showOnlyMH) n._display_me = nodeLabels[n.data.name][3] > 1;
} else {
n._display_me = false;
}
}
}
}
if (n._display_me && n.parent) {
n.parent._display_me = true;
}
}, "pre-order");
// Sort nodes by depth
function sort_nodes(asc) {
rawTree.traverse_and_compute((n) => {
let d = 1;
if (n.children && n.children.length) {
d += d3.max(n.children, (d) => d["count_depth"]);
}
n["count_depth"] = d;
});
rawTree.resortChildren((a, b) => (a["count_depth"] - b["count_depth"]) * (1 ));
}
sort_nodes();
};
}
/**
* Configures branch styling and labeling for a tree visualization.
* @param {Object} params - Additional parameters specific to the method
* @param {Object} params.color_branches - The type of branch coloring to apply
* @param {Object} params.branch_length - The branch length type
* @param {Object} params.index - The index of the current partition/site
* @param {Object} params.s - The site index (for site-specific visualizations)
* @param {Object} params.has_error_sink - Whether error sink is present (for BUSTED)
* @param {boolean} params.use_error_sink - Whether to support error sink
* @param {boolean} params.use_site_specific_support - Whether to use site-specific support calculation
* @param {boolean} params.use_turbo_color - Whether to use turbo color scale instead of PuOr
* @param {Object} params.test_omega - The object with omega rate classes and weights
* @param {Object} results - The results object containing branch attributes
* @returns {Function} - A function that configures the tree
*/
function getConfigureBranchesFn(results, params, options) {
const {
color_branches,
index,
s,
has_error_sink,
use_error_sink = false, // Whether to support error sink
use_site_specific_support = false, // Whether to use site-specific support calculation
use_turbo_color = false, // Whether to use turbo color scale instead of PuOr,
node_labels = null,
add_branch_labels = false,
use_omega_support = false,
test_omega
} = params;
// TODO: decide if the test_* utils should be defined here or in the methods
// Configure branch colors
return (rawTree, renderedTree) => {
if (color_branches === "Tested") {
configureTestedBranches(results, index, renderedTree);
} else if (color_branches === "Support for selection" || (color_branches === "Error-sink support" && use_error_sink)) {
configureSupportBranches(results, index, rawTree, renderedTree, {
es: color_branches === "Error-sink support",
rate_class: test_omega?.length - 1 + (has_error_sink ? 1 : 0),
prior: test_omega?.[test_omega?.length-1]?.weight,
use_turbo_color,
use_site_specific_support,
s,
add_branch_labels,
use_omega_support
});
} else if (color_branches === "Substitutions") {
configureSubstitutionBranches(results, index, renderedTree, {
use_site_specific_support,
node_labels,
add_branch_labels,
s
});
}
};
}
/**
* Configures branch colors for tested branches.
* @param {Object} results - The results object containing branch attributes
* @param {number} index - The index of the current partition/site
* @param {Object} renderedTree - The rendered tree object
*/
function configureTestedBranches(results, index, renderedTree) {
renderedTree.style_edges((e, n) => {
const is_tested = results["tested"][index][n.target.data.name] === "test";
if (is_tested) {
e.style("stroke", "firebrick")
.style("stroke-width", "5")
.style("opacity", 1.0);
} else {
e.style("stroke", null)
.style("opacity", 0.25);
}
});
}
/**
* Configures branch colors for support calculations.
* @param {Object} results - The results object containing branch attributes
* @param {number} index - The index of the current partition/site
* @param {Object} rawTree - The raw tree object
* @param {Object} renderedTree - The rendered tree object
* @param {Object} params - Configuration parameters
* @param {boolean} params.es - Whether to use error sink support
* @param {number} params.rate_class - The rate class to use
* @param {number} params.prior - The prior value
* @param {boolean} params.use_turbo_color - Whether to use turbo color scale
* @param {boolean} params.use_site_specific_support - Whether to use site-specific support
* @param {number} params.s - The site index
* @param {boolean} params.use_omega_support - Whether to use omega values as support
* @param {boolean} params.add_branch_labels - Whether to add branch labels
*/
function configureSupportBranches(results, index, rawTree, renderedTree, params) {
const {
es,
rate_class,
prior,
use_turbo_color,
use_site_specific_support,
s,
add_branch_labels,
use_omega_support
} = params;
const branch_values = {};
const branch_gradients = {};
let bID = 0;
let max_omega_by_branch = {};
let color_scale;
if (use_omega_support) {
// Omega-based support calculation
rawTree.traverse_and_compute((n) => {
const test_omega = getRateDistributionByBranch(results, n.data.name);
console.log(test_omega);
if (test_omega) {
const rate_class = test_omega.length - 1;
branch_values[n.data.name] = test_omega[rate_class].value;
// Use diverging log scale for omega values
color_scale = d3.scaleDivergingLog(
[1e-4, 1, Math.min(1000, d3.max(_.map(branch_values, (d) => d)))],
["rgb(0,0,255)", "rgb(128,128,128)", "rgb(255,0,0)"]
);
// Create gradient for this branch
branch_gradients[n.data.name] = "hyphy_phylo_branch_gradient_" + bID;
bID += 1;
// Create gradient definition
const gradient_def = renderedTree.svg.selectAll("defs")
.append("linearGradient")
.attr("id", branch_gradients[n.data.name]);
let current_frac = 0;
_.each(test_omega, (t) => {
gradient_def.append("stop")
.attr("offset", current_frac * 100 + "%")
.style("stop-color", color_scale(t.value));
current_frac += t.weight;
gradient_def.append("stop")
.attr("offset", current_frac * 100 + "%")
.style("stop-color", color_scale(t.value));
});
max_omega_by_branch[n.data.name] = test_omega[rate_class];
}
});
renderedTree.color_scale = color_scale;
renderedTree.color_scale_title = "ω";
renderedTree.style_edges((e, n) => {
const is_tested = results["tested"][index][n.target.data.name] === "test";
let t_string = n.target.data.name + " ";
let b_string = "";
if (is_tested) {
const test_pv = getBranchPvalue(results, n.target.data.name);
const pv_l = test_pv ? test_pv : 0;
t_string += "(p = " + pv_l.toFixed(3) + ")";
if (pv_l > 0) {
const log_p = -Math.floor(Math.log10(Math.max(pv_l, 1e-6)));
const mxo = max_omega_by_branch[n.target.data.name]?.value;
if (mxo && mxo > 1) {
const formatted_mxo = mxo > 1000 ? ">1000" : mxo.toFixed(2);
b_string = formatted_mxo + "/" +
(max_omega_by_branch[n.target.data.name].weight * 100).toFixed(2) + "%";
}
e.style("stroke", use_omega_support ?
"url('#" + branch_gradients[n.target.data.name] + "')" :
color_scale(branch_values[n.target.data.name])
).style("stroke-width", 2 + log_p);
}
} else {
t_string += "(not tested)";
e.style("stroke", use_omega_support ?
"url('#" + branch_gradients[n.target.data.name] + "')" :
color_scale(branch_values[n.target.data.name])
).style("stroke-width", "2").style("opacity", "0.5");
}
t_string += " max ω = " + (branch_values[n.target.data.name] || 0).toFixed(2);
e.style("stroke-linejoin", "round").style("stroke-linecap", "round");
e.selectAll("title").data([t_string]).join("title").text((d) => d);
if (add_branch_labels) {
addBranchLabel(e, b_string, renderedTree.font_size,
renderedTree.svg.selectAll(".phylotree-container"));
}
});
} else {
// Empirical Bayes Factor support calculation
rawTree.traverse_and_compute((n) => {
const posteriors = results["branch attributes"][index][n.data.name];
if (posteriors && rate_class) {
const support = use_site_specific_support
? posteriors["Posterior prob omega class by site"][rate_class][s-1]
: posteriors["Posterior prob omega class"][rate_class];
branch_values[n.data.name] = support / (1 - support) / prior;
if (branch_values[n.data.name] < 1) branch_values[n.data.name] = null;
}
});
color_scale = d3.scaleSequentialLog(
d3.extent(_.map(branch_values, (d) => d)),
use_turbo_color ? d3.interpolateTurbo : d3.interpolatePuOr
);
renderedTree.color_scale = color_scale;
renderedTree.color_scale_title = "Empirical Bayes Factor";
}
}
/**
* Configures branch colors for substitutions.
* @param {Object} results - The results object containing branch attributes
* @param {number} index - The index of the current partition/site
* @param {Object} renderedTree - The rendered tree object
* @param {Object} params - Configuration parameters
* @param {boolean} params.use_site_specific_support - Whether to use site-specific support
* @param {Object} params.node_labels - Node labels for substitutions
* @param {boolean} params.add_branch_labels - Whether to add branch labels
* @param {number} params.s - The site index (for site-specific visualizations)
*/
function configureSubstitutionBranches(results, index, renderedTree, params) {
let labels = params.node_labels === null ? params.use_site_specific_support
? results["substitutions"][index][params.s-1]
: subsByBranch(results, index) : params.node_labels;
let color_scale = d3.scaleSequential(
d3.extent(_.map(labels, d => d)),
d3.interpolateTurbo
);
renderedTree.color_scale = color_scale;
renderedTree.color_scale_title = "Min # of nucleotide substitutions";
renderedTree.style_edges((e, n) => {
const is_tested = labels[n.target.data.name];
if (is_tested) {
e.style("stroke", color_scale(is_tested))
.style("stroke-width", "5")
.style("opacity", 1.0);
e.selectAll("title").data([is_tested]).join("title").text((d) => d);
if (params.add_branch_labels) {
addBranchLabel(e, is_tested, renderedTree.font_size, renderedTree.svg.selectAll(".phylotree-container"));
}
}
});
}
/**
* Configures and renders a phylogenetic tree with customizable options.
*
* This helper function provides a flexible way to configure tree visualization
* by allowing users to provide custom functions that work directly with the tree object.
*
* @param {Object} rawTree - The raw phylotree object
* @param {string} treeDim - A string in the format "width x height" specifying the tree dimensions
* @param {Object} [options] - Optional configuration options
* @param {Function} [options.configureNodes] - Function to configure node display and colors
* @param {Function} [options.configureBranches] - Function to configure branch colors and shading
* @param {Object} [options.styleOptions] - Additional style options for the tree
* @returns {phylotree.phylotree} - The configured and rendered tree object
*/
function configureTree(rawTree, treeDim, options = {}) {
// Parse tree dimensions
const dim = treeDim.length ? _.map(treeDim.split("x"), (d) => +d) : null;
// Configure basic tree rendering
const renderedTree = rawTree.render({
height: dim && dim[0],
width: dim && dim[1],
'align-tips': options['align-tips'] || false,
'show-scale': true,
'is-radial': false,
'left-right-spacing': 'fit-to-size',
'top-bottom-spacing': 'fit-to-size',
'node_circle_size': (n) => 0,
'internal-names': options['internal-names'] || false,
'selectable': false
});
// Add SVG definitions for branch labels
addSvgDefs(renderedTree.svg);
// Configure branches if provided
if (options.configureBranches) {
options.configureBranches(rawTree, renderedTree);
}
// Configure node display if provided
if (options.configureNodes) {
options.configureNodes(rawTree, renderedTree);
}
// Update tree layout
renderedTree.placenodes();
renderedTree.update();
return renderedTree;
}
// TODO: need to refactor to reduce code duplication. this module may disappear then.
const floatFmt$1 = d3.format (".2g");
const percentageFormat = d3.format (".2p");
/**
* Extracts attributes from BUSTED results JSON that are used for visualization
*
* @param {Object} resultsJson - The JSON object containing the BUSTED results
* @returns {Object} An object with the following attributes:
* - testedBranchCount {number} - The median number of branches tested for selection across partitions
* - srvRateClasses {number} - The number of rate classes for the synonymous site-to-site rate distribution
* - srvDistribution {Array} - The distribution of synonymous site-to-site rates
* - partitionSizes {Array} - Array of sizes for each partition
* - hasBackground {boolean} - Whether background rate distributions are available
* - hasSrvHmm {boolean} - Whether Viterbi synonymous rate path is present
* - hasErrorSink {boolean} - Whether error sink settings are available
* - hasErrorSinkNt {boolean} - Whether nucleotide-level error sink is available
* - mhRates {Object} - Rates for double-hit and triple-hit substitutions
*/
function getBustedAttributes(resultsJson) {
// Extract common attributes using the utility function
const commonAttrs = extractCommonAttributes(resultsJson);
// BUSTED-specific attributes
const srvRateClasses = _.size(resultsJson.fits["Unconstrained model"]["Rate Distributions"]["Synonymous site-to-site rates"]);
const hasBackground$1 = hasBackground(resultsJson);
const hasSrvHmm = "Viterbi synonymous rate path" in resultsJson;
const hasErrorSink$1 = hasErrorSink(resultsJson);
const hasErrorSinkNt = getBustedHasErrorSinkNt(resultsJson, hasErrorSink$1, hasBackground$1);
const srvDistribution = getRateDistribution(
resultsJson,
hasErrorSink$1,
["fits", "Unconstrained model", "Rate Distributions", "Synonymous site-to-site rates"],
["rate", "proportion"]
);
const mhRates = {
'DH': _.get(resultsJson, ['fits', 'Unconstrained model', 'Rate Distributions', 'rate at which 2 nucleotides are changed instantly within a single codon']),
'TH': _.get(resultsJson, ['fits', 'Unconstrained model', 'Rate Distributions', 'rate at which 3 nucleotides are changed instantly within a single codon'])
};
return {
testedBranchCount: commonAttrs.testedBranchCount,
srvRateClasses,
srvDistribution,
partitionSizes: commonAttrs.partitionSizes,
hasBackground: hasBackground$1,
hasSrvHmm,
hasErrorSink: hasErrorSink$1,
hasErrorSinkNt,
mhRates
};
}
function getBustedTileSpecs(resultsJson, evThreshold, bsPositiveSelection, contributingSites) {
const attrs = getBustedAttributes(resultsJson);
const subFractions = _.map (
[
"Fraction of subs rate at which 2 nucleotides are changed instantly within a single codon",
"Fraction of subs rate at which 3 nucleotides are changed instantly within a single codon"
],
(d)=>resultsJson["fits"]["Unconstrained model"]["Rate Distributions"][d]
);
const omegaRateClasses = _.size(getBustedTestOmega(resultsJson, attrs.hasErrorSink));
const tileTableInputs = [
{
number: resultsJson.input["number of sequences"],
description: "sequences in the alignment",
icon: "icon-options-vertical icons",
color: "asbestos"
},
{
number: resultsJson.input["number of sites"],
description: "codon sites in the alignment",
icon: "icon-options icons",
color: "asbestos"
},
{
number: resultsJson.input["partition count"],
description: "partitions",
icon: "icon-arrow-up icons",
color: "asbestos"
},
{
number: attrs.testedBranchCount,
description: "median branches/partition used for testing",
icon: "icon-share icons",
color: "asbestos"
},
{
number: omegaRateClasses + " classes",
description: "non-synonymous rate variation",
icon: "icon-grid icons",
color: "asbestos"
},
{
number: attrs.srvRateClasses ? attrs.srvRateClasses + " classes" + (attrs.hasSrvHmm ? " HMM" : "") : "None",
description: "synonymous rate variation",
icon: "icon-layers icons",
color: "asbestos"
},
{
number: floatFmt$1(resultsJson["test results"]["p-value"]),
description: "p-value for episodic diversifying selection",
icon: "icon-plus icons",
color: "midnight_blue"
},
{
number: resultsJson["Evidence Ratios"]["constrained"] ? _.filter(resultsJson["Evidence Ratios"]["constrained"][0], (d) => d >= evThreshold).length : 0,
description: `Sites with ER≥${evThreshold} for positive selection`,
icon: "icon-energy icons",
color: "midnight_blue"
},
{
number: !_.isUndefined(attrs.mhRates['DH']) ? floatFmt$1(attrs.mhRates['DH']) : "N/A" + ":" + !_.isUndefined(attrs.mhRates['TH']) ? floatFmt$1(attrs.mhRates['TH']) : "N/A",
description: "Multiple hit rates (2H:3H)",
icon: "icon-target icons",
color: "midnight_blue"
},
{
number: resultsJson["Evidence Ratios"]["constrained"] ? _.filter(bsPositiveSelection, (d) => d.ER >= 100).length : "N/A",
description: "(branch, site) pairs with EBF ≥ 100",
icon: "icon-bulb icons",
color: "midnight_blue"
},
{
number: contributingSites ? contributingSites.length : "N/A",
description: "Sites contributing most signal to EDS detection",
icon: "icon-tag icons",
color: "midnight_blue"
},
{
number: !_.isUndefined(subFractions[0]) ? percentageFormat(subFractions[0]) : "N/A"