UNPKG

phylotree

Version:

A JavaScript library for developing applications and interactive visualizations involving [phylogenetic trees](https://en.wikipedia.org/wiki/Phylogenetic_tree), written as an extension of the [D3](http://d3js.org) [hierarchy layout](https://github.com/d3/

209 lines (173 loc) 5.25 kB
#!/usr/bin/env node const fs = require("fs"), phylotree = require("../dist/phylotree.js"), commander = require("commander"), _ = require("underscore"), moment = require("moment"), winston = require("winston"), stringify = require("csv-stringify"); const logger = winston.createLogger({ level: "warn", transports: [ new winston.transports.Console({ format: winston.format.simple() }) ] }); /* * Computes root-to-tip distance and fits linear regression * Please see the following notebook for more details * https://observablehq.com/@stevenweaver/computing-root-to-tip-distances-with-phylotree-js * * Usage: * root-to-tip -n test/data/MERS.txt * */ const default_regexp = /([0-9]{4})-?([0-9]{2})-?([0-9]{2})$/g; const default_date_format = "YYYY-MM-DD"; const default_pos = "last"; const default_log = "warn"; var regexp = [default_regexp]; function collect(value, previous) { return previous.concat([value]); } commander .requiredOption("-n --newick <newick>", "Input newick file") .option("-r --regex <regex>", "Regular expression to search date for", collect, []) .option( "-s --split-on-char <delimiter>", "Splits tip name based on delimiter" ) .option( "-i --index <index>", "Used with -s argument. Can be first, last, or <index>", default_pos ) .option( "-f --date-format <format>", "Specifies date format in tip.", default_date_format ) .option("-l --log-level <level>", "Specify log level", default_log); commander .on("--help", function() { console.log(""); console.log("Examples:"); console.log( 'tip-date-extractor -n test/data/MERS.txt -s "_" -f YYYY-MM-DD' ); console.log( "tip-date-extractor -n test/data/MERS.txt -r [0-9]{4}-[0-9]{2}-[0-9]{2} -f YYYY-MM-DD" ); }) .parse(process.argv); if (commander.regex.length && commander.splitOnChar) { logger.warn("-r and -s options are mutually exclusive"); process.exit(1); } if (commander.regex.length) { regexp = _.map(commander.regex, r => new RegExp(r)); } if (commander.logLevel) { logger.level = commander.logLevel; } var date_format = undefined; if (commander.dateFormat) { date_format = commander.dateFormat; } // Assumes date formatted like 1984-09-20 let default_date_parser = function(tree, node) { const default_regexp = /([0-9]{4})-?([0-9]{2})-?([0-9]{2})$/g; var location = ""; if (tree.isLeafNode(node)) { if ("name" in node.data) { location = default_regexp.exec(node.data.name); if (location) { return location[1] + location[2] + location[3]; } else { const default_regexp = /([0-9]{4})-?([0-9]{2})$/g; location = default_regexp.exec(node.data.name); if (location) { return location[1] + location[2] + "1"; } } } } return null; }; // Example - [0-9]{4}-[0-9]{2}-[0-9]{2} let regex_date_parser = function(tree, regex, format, node) { var location = ""; if (tree.isLeafNode(node) && "name" in node.data) { // map to each regex, filter, and return first result location = _.filter(_.map(regex, r => r.exec(node.data.name))); if (location) { // cast to date and format. let parsed_date = moment(location[0][0], format); // log if debugger is turned on let t = parsed_date.format("YYYYMMDD"); return t; } logger.debug("Unable to find date for " + node.data.name); } return null; }; let split_date_parser = function(tree, delimiter, pos, format, node) { // pos can be first, last, or index if (tree.isLeafNode(node) && "name" in node.data) { try { let nsp = node.data.name.split(delimiter); let itm = ""; // cast to date and format. if (pos == "last") { itm = nsp.pop(); } else if (pos == "first") { itm = nsp[0]; } else { itm = nsp[pos]; } let parsed_date = moment(itm, format); let t = parsed_date.format("YYYYMMDD"); return t; } catch (e) { logger.debug("Unable to find date for " + node.data.name); } } return null; }; fs.readFile(commander.newick, (err, newick_data) => { const tree = new phylotree.phylotree(newick_data.toString()); let computed_tree = phylotree.rootToTip(tree); let date_parser = _.partial(default_date_parser, computed_tree); // Set appropriate date parser if (commander.splitOnChar) { date_parser = _.partial( split_date_parser, computed_tree, commander.splitOnChar, "last", date_format ); } else if (commander.regex.length) { date_parser = _.partial( regex_date_parser, computed_tree, regexp, date_format ); } let tree_with_dates = phylotree.extract_dates(computed_tree, date_parser); // Filter just in case the date extractor did not always find a date from the header const mapped = _.map(tree_with_dates.getTips(), d => [ d.data.name, d.data.decimal_date_value ]); let date_and_distances = _.filter(mapped, d => { return !_.isNull(d.decimal_date_value); }); stringify(date_and_distances, function(err, output) { // Pretty print table console.log("name, date"); console.log(output); }); });