UNPKG

escher-vis

Version:

Escher: A Web Application for Building, Sharing, and Embedding Data-Rich Visualizations of Biological Pathways

633 lines (570 loc) 18.4 kB
/** * data_styles */ var utils = require('./utils') var _ = require('underscore') var d3_format = require('d3-format').format module.exports = { import_and_check: import_and_check, text_for_data: text_for_data, float_for_data: float_for_data, reverse_flux_for_data: reverse_flux_for_data, gene_string_for_data: gene_string_for_data, csv_converter: csv_converter, genes_for_gene_reaction_rule: genes_for_gene_reaction_rule, evaluate_gene_reaction_rule: evaluate_gene_reaction_rule, replace_gene_in_rule: replace_gene_in_rule, apply_reaction_data_to_reactions: apply_reaction_data_to_reactions, apply_metabolite_data_to_nodes: apply_metabolite_data_to_nodes, apply_gene_data_to_reactions: apply_gene_data_to_reactions } // globals var RETURN_ARG = function(x) { return x; } var ESCAPE_REG = /([.*+?^=!:${}()|\[\]\/\\])/g var EMPTY_LINES = /\n\s*\n/g var TRAILING_NEWLINE = /\n\s*(\)*)\s*$/ var AND_OR = /([\(\) ])(?:and|or)([\)\( ])/ig var ALL_PARENS = /[\(\)]/g // capture an expression surrounded by whitespace and a set of parentheses var EXCESS_PARENS = /\(\s*(\S+)\s*\)/g var OR = /\s+or\s+/i var AND = /\s+and\s+/i // find ORs var OR_EXPRESSION = /(^|\()(\s*-?[0-9.]+\s+(?:or\s+-?[0-9.]+\s*)+)(\)|$)/ig // find ANDS, respecting order of operations (and before or) var AND_EXPRESSION = /(^|\(|or\s)(\s*-?[0-9.]+\s+(?:and\s+-?[0-9.]+\s*)+)(\sor|\)|$)/ig function _align_gene_data_to_reactions (data, reactions) { var aligned = {} var null_val = [ null ] // make an array of nulls as the default for (var first_gene_id in data) { null_val = data[first_gene_id].map(function () { return null }) break } for (var reaction_id in reactions) { var reaction = reactions[reaction_id] var bigg_id = reaction.bigg_id var this_gene_data = {} reaction.genes.forEach(function (gene) { // check both gene id and gene name ;[ 'bigg_id', 'name' ].forEach(function (kind) { var d = data[gene[kind]] || utils.clone(null_val) // merger with existing data if present var existing_d = this_gene_data[gene.bigg_id] if (typeof existing_d === 'undefined') { this_gene_data[gene.bigg_id] = d } else { for (var i = 0; i < d.length; i++) { var pnt = d[i] if (pnt !== null) { existing_d[i] = pnt } } } }) }) aligned[bigg_id] = this_gene_data } return aligned } /** * Convert imported data to a style that can be applied to reactions and nodes. * data: The data object. * name: Either 'reaction_data', 'metabolite_data', or 'gene_data' * all_reactions: Required for name == 'gene_data'. Must include all GPRs for * the map and model. */ function import_and_check (data, name, all_reactions) { // check arguments if (data === null) { return null } if ([ 'reaction_data', 'metabolite_data', 'gene_data' ].indexOf(name) === -1) { throw new Error('Invalid name argument: ' + name) } // make array if (!(data instanceof Array)) { data = [ data ] } // check data var check = function () { if (data === null) { return null } if (data.length === 1) { return null } if (data.length === 2) { return null } return console.warn('Bad data style: ' + name) } check() data = utils.array_to_object(data) if (name === 'gene_data') { if (all_reactions === undefined) { throw new Error('Must pass all_reactions argument for gene_data') } data = _align_gene_data_to_reactions(data, all_reactions) } return data } function float_for_data(d, styles, compare_style) { // all null if (d === null) return null // absolute value var take_abs = (styles.indexOf('abs') != -1) if (d.length==1) { // 1 set // 1 null var f = _parse_float_or_null(d[0]) if (f === null) return null return abs(f, take_abs) } else if (d.length==2) { // 2 sets // 2 null var fs = d.map(_parse_float_or_null) if (fs[0] === null || fs[1] === null) return null if (compare_style == 'diff') { return diff(fs[0], fs[1], take_abs) } else if (compare_style == 'fold') { return check_finite(fold(fs[0], fs[1], take_abs)) } else if (compare_style == 'log2_fold') { return check_finite(log2_fold(fs[0], fs[1], take_abs)) } } else { throw new Error('Data array must be of length 1 or 2') } throw new Error('Bad data compare_style: ' + compare_style) // definitions function check_finite(x) { return isFinite(x) ? x : null } function abs(x, take_abs) { return take_abs ? Math.abs(x) : x } function diff(x, y, take_abs) { if (take_abs) return Math.abs(y - x) else return y - x } function fold(x, y, take_abs) { if (x == 0 || y == 0) return null var fold = (y >= x ? y / x : - x / y) return take_abs ? Math.abs(fold) : fold } function log2_fold(x, y, take_abs) { if (x == 0) return null if (y / x < 0) return null var log = Math.log(y / x) / Math.log(2) return take_abs ? Math.abs(log) : log } } function reverse_flux_for_data(d) { if (d === null || d[0] === null) return false return (d[0] < 0) } /** * Add gene values to the gene_reaction_rule string. * @param {String} rule - The gene reaction rule. * @param {} gene_values - The values. * @param {} genes - An array of objects specifying the gene bigg_id and name. * @param {} styles - The reaction styles. * @param {String} identifiers_on_map - The type of identifiers ('bigg_id' or 'name'). * @param {} compare_style - The comparison style. * * @return {Array} A list of objects with: * * { * bigg_id: The bigg ID. * name: The name. * text: The new string with formatted data values. * } * * The text elements should each appear on a new line. */ function gene_string_for_data (rule, gene_values, genes, styles, identifiers_on_map, compare_style) { var out_text = rule var no_data = (gene_values === null) // keep track of bigg_ids to remove repeats var genes_found = {} genes.forEach(function(g_obj) { var bigg_id = g_obj.bigg_id // ignore repeats that may have found their way into the genes object if (bigg_id in genes_found) return genes_found[bigg_id] = true // generate the string if (no_data) { out_text = replace_gene_in_rule(out_text, bigg_id, bigg_id + '\n') } else { if (!(bigg_id in gene_values)) return var d = gene_values[bigg_id] var f = float_for_data(d, styles, compare_style) var format = (f === null ? RETURN_ARG : d3_format('.3g')) if (d.length === 1) { out_text = replace_gene_in_rule(out_text, bigg_id, bigg_id + ' (' + null_or_d(d[0], format) + ')\n') } else if (d.length === 2) { var new_str // check if they are all text var any_num = _.any(d, function (x) { return _parse_float_or_null(x) !== null }) if (any_num) { new_str = (bigg_id + ' (' + null_or_d(d[0], format) + ', ' + null_or_d(d[1], format) + ': ' + null_or_d(f, format) + ')\n') } else { new_str = (bigg_id + ' (' + null_or_d(d[0], format) + ', ' + null_or_d(d[1], format) + ')\n') } out_text = replace_gene_in_rule(out_text, bigg_id, new_str) } } }) out_text = (out_text // remove empty lines .replace(EMPTY_LINES, '\n') // remove trailing newline (with or without parens) .replace(TRAILING_NEWLINE, '$1')) // split by newlines, and switch to names if necessary var result = out_text.split('\n').map(function (text) { for (var i = 0, l = genes.length; i < l; i++) { var gene = genes[i] if (text.indexOf(gene.bigg_id) !== -1) { // replace with names if (identifiers_on_map === 'name') text = replace_gene_in_rule(text, gene.bigg_id, gene.name) return { bigg_id: gene.bigg_id, name: gene.name, text: text } } } // not found, then none return { bigg_id: null, name: null, text: text } }) return result // definitions function null_or_d (d, format) { return d === null ? 'nd' : format(d) } } function text_for_data (d, f) { if (d === null) { return null_or_d(null) } if (d.length === 1) { var format = (f === null ? RETURN_ARG : d3_format('.3g')) return null_or_d(d[0], format) } if (d.length === 2) { var format = (f === null ? RETURN_ARG : d3_format('.3g')), t = null_or_d(d[0], format) t += ', ' + null_or_d(d[1], format) t += ': ' + null_or_d(f, format) return t } return '' // definitions function null_or_d (d, format) { return d === null ? '(nd)' : format(d) } } function csv_converter(csv_rows) { /** Convert data from a csv file to json-style data. File must include a header row. */ // count rows var c = csv_rows[0].length, converted = [] if (c < 2 || c > 3) throw new Error('CSV file must have 2 or 3 columns') // set up rows for (var i = 1; i < c; i++) { converted[i - 1] = {} } // fill csv_rows.slice(1).forEach(function(row) { for (var i = 1, l = row.length; i < l; i++) { converted[i - 1][row[0]] = row[i] } }) return converted } function genes_for_gene_reaction_rule(rule) { /** Find unique genes in gene_reaction_rule string. Arguments --------- rule: A boolean string containing gene names, parentheses, AND's and OR's. Returns ------- An array of gene strings. */ var genes = rule // remove ANDs and ORs, surrounded by space or parentheses .replace(AND_OR, '$1$2') // remove parentheses .replace(ALL_PARENS, '') // split on whitespace .split(' ') .filter(function(x) { return x != ''; }) // unique strings return utils.unique_strings_array(genes) } function evaluate_gene_reaction_rule(rule, gene_values, and_method_in_gene_reaction_rule) { /** Return a value given the rule and gene_values object. Arguments --------- rule: A boolean string containing gene names, parentheses, AND's and OR's. gene_values: Object with gene_ids for keys and numbers for values. and_method_in_gene_reaction_rule: Either 'mean' or 'min'. */ var null_val = [null], l = 1 // make an array of nulls as the default for (var gene_id in gene_values) { null_val = gene_values[gene_id].map(function() { return null; }) l = null_val.length break } if (rule == '') return utils.clone(null_val) // for each element in the arrays var out = [] for (var i = 0; i < l; i++) { // get the rule var curr_val = rule // put all the numbers into the expression var all_null = true for (var gene_id in gene_values) { var f = _parse_float_or_null(gene_values[gene_id][i]) if (f === null) { f = 0 } else { all_null = false } curr_val = replace_gene_in_rule(curr_val, gene_id, f) } if (all_null) { out.push(null) continue } // recursively evaluate while (true) { // arithemtic expressions var new_curr_val = curr_val // take out excessive parentheses new_curr_val = new_curr_val.replace(EXCESS_PARENS, ' $1 ') // or's new_curr_val = new_curr_val.replace(OR_EXPRESSION, function(match, p1, p2, p3) { // sum var nums = p2.split(OR).map(parseFloat), sum = nums.reduce(function(a, b) { return a + b;}) return p1 + sum + p3 }) // and's new_curr_val = new_curr_val.replace(AND_EXPRESSION, function(match, p1, p2, p3) { // find min var nums = p2.split(AND).map(parseFloat), val = (and_method_in_gene_reaction_rule == 'min' ? Math.min.apply(null, nums) : nums.reduce(function(a, b) { return a + b; }) / nums.length) return p1 + val + p3 }) // break if there is no change if (new_curr_val == curr_val) break curr_val = new_curr_val } // strict test for number var num = Number(curr_val) if (isNaN(num)) { console.warn('Could not evaluate ' + rule) out.push(null) } else { out.push(num) } } return out } function replace_gene_in_rule (rule, gene_id, val) { // get the escaped string, with surrounding space or parentheses var space_or_par_start = '(^|[\\\s\\\(\\\)])' var space_or_par_finish = '([\\\s\\\(\\\)]|$)' var escaped = space_or_par_start + escape_reg_exp(gene_id) + space_or_par_finish return rule.replace(new RegExp(escaped, 'g'), '$1' + val + '$2') // definitions function escape_reg_exp(string) { return string.replace(ESCAPE_REG, "\\$1") } } /** * Returns True if the scale has changed. * @param {Object} reactions - * @param {} data - * @param {} styles - * @param {String} compare_style - * @param {Array} keys - (Optional) The keys in reactions to apply data to. */ function apply_reaction_data_to_reactions (reactions, data, styles, compare_style, keys) { if (_.isUndefined(keys)) keys = Object.keys(reactions) var reaction_id var reaction var segment_id var segment if (data === null) { keys.map(function (reaction_id) { reaction = reactions[reaction_id] reaction.data = null reaction.data_string = '' for (segment_id in reaction.segments) { segment = reaction.segments[segment_id] segment.data = null } reaction.gene_string = null }) return false } // apply the datasets to the reactions keys.map(function (reaction_id) { reaction = reactions[reaction_id] // check bigg_id and name var d = data[reaction.bigg_id] || data[reaction.name] || null var f = float_for_data(d, styles, compare_style) var r = reverse_flux_for_data(d) var s = text_for_data(d, f) reaction.data = f reaction.data_string = s reaction.reverse_flux = r reaction.gene_string = null // apply to the segments for (segment_id in reaction.segments) { segment = reaction.segments[segment_id] segment.data = reaction.data segment.reverse_flux = reaction.reverse_flux } }) return true } /** * Returns True if the scale has changed. * @param {Object} nodes - * @param {} data - * @param {} styles - * @param {String} compare_style - * @param {Array} keys - (Optional) The keys in nodes to apply data to. */ function apply_metabolite_data_to_nodes (nodes, data, styles, compare_style, keys) { if (_.isUndefined(keys)) keys = Object.keys(nodes) var node_id if (data === null) { keys.map(function (node_id) { nodes[node_id].data = null nodes[node_id].data_string = '' }) return false } // grab the data keys.map(function (node_id) { var node = nodes[node_id] // check bigg_id and name var d = data[node.bigg_id] || data[node.name] || null, f = float_for_data(d, styles, compare_style), s = text_for_data(d, f) node.data = f node.data_string = s }) return true } /** * Returns true if data is present * reactions: The reactions to update. * gene_data_obj: The gene data object, with the following style: * { reaction_id: { gene_id: value } } * styles: Gene styles array. * identifiers_on_map: * compare_style: * and_method_in_gene_reaction_rule: * @param {Array} keys - (Optional) The keys in reactions to apply data to. */ function apply_gene_data_to_reactions (reactions, gene_data_obj, styles, identifiers_on_map, compare_style, and_method_in_gene_reaction_rule, keys) { if (_.isUndefined(keys)) keys = Object.keys(reactions) if (gene_data_obj === null) { keys.map(function (reaction_id) { var reaction = reactions[reaction_id] reaction.data = null reaction.data_string = '' reaction.reverse_flux = false for (var segment_id in reaction.segments) { var segment = reaction.segments[segment_id] segment.data = null } reaction.gene_string = null }) return false } // Get the null val var null_val = [ null ] // Make an array of nulls as the default for (var reaction_id in gene_data_obj) { for (var gene_id in gene_data_obj[reaction_id]) { null_val = gene_data_obj[reaction_id][gene_id] .map(function () { return null }) break } break } // Apply the datasets to the reactions keys.map(function (reaction_id) { var reaction = reactions[reaction_id] var rule = reaction.gene_reaction_rule // find the data var d, gene_values var r_data = gene_data_obj[reaction.bigg_id] if (!_.isUndefined(r_data)) { gene_values = r_data d = evaluate_gene_reaction_rule(rule, gene_values, and_method_in_gene_reaction_rule) } else { gene_values = {} d = utils.clone(null_val) } var f = float_for_data(d, styles, compare_style) var r = reverse_flux_for_data(d) var s = text_for_data(d, f) reaction.data = f reaction.data_string = s reaction.reverse_flux = r // apply to the segments for (var segment_id in reaction.segments) { var segment = reaction.segments[segment_id] segment.data = reaction.data segment.reverse_flux = reaction.reverse_flux } // always update the gene string reaction.gene_string = gene_string_for_data(rule, gene_values, reaction.genes, styles, identifiers_on_map, compare_style) }) return true } function _parse_float_or_null(x) { // strict number casting var f = Number(x) // check for null and '', which haven't been caught yet return (isNaN(f) || parseFloat(x) != f) ? null : f }