UNPKG

phylotree

Version:

A JavaScript library for developing applications and interactive visualizations involving [phylogenetic trees](https://en.wikipedia.org/wiki/Phylogenetic_tree), written as an extension of the [D3](http://d3js.org) [hierarchy layout](https://github.com/d3/

282 lines (243 loc) 7.61 kB
import { isLeafNode } from "../nodes"; /** * Parses a Newick string into an equivalent JSON representation that is * suitable for consumption by ``hierarchy``. * * Optionally accepts bootstrap values. Currently supports Newick strings with or without branch lengths, * as well as tagged trees such as * ``(a,(b{TAG},(c{TAG},d{ANOTHERTAG})))`` * * @param {String} nwk_str - A string representing a phylogenetic tree in Newick format. * @param {Object} bootstrap_values. * @returns {Object} An object with keys ``json`` and ``error``. */ function newickParser(nwk_str, options={}) { const bootstrap_values = true, int_or_float = /^-?\d+(\.\d+)?$/; let left_delimiter = options.left_delimiter || '{', right_delimiter = options.right_delimiter || '}'; let clade_stack = []; function addNewTreeLevel() { let new_level = { name: null }; let the_parent = clade_stack[clade_stack.length - 1]; if (!("children" in the_parent)) { the_parent["children"] = []; } clade_stack.push(new_level); the_parent["children"].push(clade_stack[clade_stack.length - 1]); clade_stack[clade_stack.length - 1]["original_child_order"] = the_parent["children"].length; } function finishNodeDefinition() { let this_node = clade_stack.pop(); this_node["name"] = current_node_name; if (bootstrap_values && "children" in this_node) { this_node["bootstrap_values"] = current_node_name; } else { this_node["name"] = current_node_name; } this_node["attribute"] = current_node_attribute; if(left_delimiter == "[" && current_node_annotation.includes("&&NHX")) { current_node_annotation .split(':') .slice(1) .forEach(annotation => { const [key, value] = annotation.split('='); this_node[key] = int_or_float.test(value) ? +value : value; }); } else { this_node["annotation"] = current_node_annotation; } current_node_name = ""; current_node_attribute = ""; current_node_annotation = ""; } function generateError(location) { return { json: null, error: "Unexpected '" + nwk_str[location] + "' in '" + nwk_str.substring(location - 20, location + 1) + "[ERROR HERE]" + nwk_str.substring(location + 1, location + 20) + "'" }; } let automaton_state = 0; let current_node_name = ""; let current_node_attribute = ""; let current_node_annotation = ""; let quote_delimiter = null; let name_quotes = { "'": 1, '"': 1 }; let tree_json = { name: "root" }; clade_stack.push(tree_json); var space = /\s/; for (var char_index = 0; char_index < nwk_str.length; char_index++) { try { var current_char = nwk_str[char_index]; switch (automaton_state) { case 0: { // look for the first opening parenthesis if (current_char == "(") { addNewTreeLevel(); automaton_state = 1; // expecting node name } break; } case 1: // name case 3: { // branch length // reading name if (current_char == ":") { automaton_state = 3; } else if (current_char == "," || current_char == ")") { try { finishNodeDefinition(); automaton_state = 1; if (current_char == ",") { addNewTreeLevel(); } } catch (e) { return generateError(char_index); } } else if (current_char == "(") { if (current_node_name.length > 0) { return generateError(char_index); } else { addNewTreeLevel(); } } else if (current_char in name_quotes) { if ( automaton_state == 1 && current_node_name.length === 0 && current_node_attribute.length === 0 && current_node_annotation.length === 0 ) { automaton_state = 2; quote_delimiter = current_char; continue; } return generateError(char_index); } else { if (current_char == left_delimiter) { if (current_node_annotation.length) { return generateError(char_index); } else { automaton_state = 4; } } else { if (automaton_state == 3) { current_node_attribute += current_char; } else { if (space.test(current_char)) { continue; } if (current_char == ";") { // semicolon terminates tree definition char_index = nwk_str.length; break; } current_node_name += current_char; } } } break; } case 2: { // inside a quoted expression if (current_char == quote_delimiter) { if (char_index < nwk_str.length - 1) { if (nwk_str[char_index + 1] == quote_delimiter) { char_index++; current_node_name += quote_delimiter; continue; } } quote_delimiter = 0; automaton_state = 1; continue; } else { current_node_name += current_char; } break; } case 4: { // inside a comment / attribute if (current_char == right_delimiter) { automaton_state = 3; } else { if (current_char == left_delimiter) { return generateError(char_index); } current_node_annotation += current_char; } break; } } } catch (e) { return generateError(char_index); } } if (clade_stack.length != 1) { return generateError(nwk_str.length - 1); } return { json: tree_json, error: null }; } /** * Return Newick string representation of a phylotree. * * @param {Function} annotator - Function to apply to each node, determining * what label is written (optional). * @param {Node} node - start at this node (default == root) * @returns {String} newick - Phylogenetic tree serialized as a Newick string. */ export function getNewick(annotator, root) { let self = this; if (!annotator) annotator = d => ''; function nodeDisplay(n) { // Skip the node if it is hidden if (n.notshown) return; if (!isLeafNode(n)) { element_array.push("("); n.children.forEach(function(d, i) { if (i) { element_array.push(","); } nodeDisplay(d); }); element_array.push(")"); } if(n.data.name !== 'root') { const node_label = n.data.name.replaceAll("'", "''"); // Surround the entire string with single quotes if it contains any // non-alphanumeric characters. if (/\W/.test(node_label)) { element_array.push("'" + node_label + "'"); } else { element_array.push(node_label); } } element_array.push(annotator(n)); let bl = self.branch_length_accessor(n); if (bl !== undefined) { element_array.push(":" + bl); } } let element_array = []; annotator = annotator || ""; nodeDisplay(root || this.nodes); return element_array.join("")+";"; } export default newickParser;