UNPKG

aletorecxml

Version:

Lib js for convert ALE format to recXML format

328 lines (245 loc) 12.3 kB
/* * @Author: Guillaume GENCE * @Date: 2015-11-23 15:09:17 * @Last Modified by: Guillaume GENCE * @Last Modified time: 2016-01-07 10:34:49 */ var parser = require("biojs-io-newick"); var xml = require('xml'); var pp_xml = require('pretty-data').pd; var xmlstring = ""; function ALEtorecXML (ale_data,callback) { var data_json = parser.parse_nhx(ale_data); var phyloxml_el = xml.element({ _attr: { "xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", "xmlns": "http://www.phyloxml.org", "xsi:schemaLocation":"recxml.xsd" } }); var phyloxml_stream = xml({ phyloxml: phyloxml_el }, { stream: true }); phyloxml_stream.on('data', function (chunk) { xmlstring = xmlstring + chunk; // faire un truc, peut-être asynchrone, puis… if(chunk === "</phyloxml>") { var data = pp_xml.xml(xmlstring); if(!Object.keys(data_json).length) { data = "Invalide ALE/Newick Data ! Sorry no error help can be seen ." } callback.call(this,data); xmlstring = ""; } }); var phylogeny_el = xml.element({ _attr: { "rooted": true } }); var phylogeny_stream = xml({ phylogeny: phylogeny_el }, { stream: true }); phylogeny_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;}); format_for_xml_translation(data_json); phylogeny_el.close(); phyloxml_el.close(); } function format_for_xml_translation (data_json) { var clade_el = xml.element(); var clade_stream = xml({ clade: clade_el }, { stream: true }); clade_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;}); clade_el.push({name:data_json.name}); clade_el.push({branch_length:data_json.branch_length}); parse_event_string(data_json.name); if (data_json.children) { format_for_xml_translation(data_json.children[0]); format_for_xml_translation(data_json.children[1]); } clade_el.close(); } /** * [Parse event_string and log in console ] * @param {[Sting]} event_string [Chaine d'évenement parsé] * @return {} */ function parse_event_string (event_string) { var eventsRec_in_el = null; var eventsRec_in_stream = null; var eventsRec_el = xml.element(); var eventsRec_stream = xml({ eventsRec: eventsRec_el }, { stream: true }); eventsRec_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;}); /** * [event_string_tmp ] * @type {Sting} */ var event_string_tmp = event_string; /** * [liste_evenement ] * @type {[type]} */ var liste_evenement = event_string + ': '; var feuille_bool = false; var feuille_speciesLocation = ""; var gene_name = ""; while (event_string_tmp) { if(/^\.EOS/.test(event_string_tmp)) { event_string_tmp = null; } //Si c'est une feuille else if(/^(.*_[^\nT@\.]+)/.test(event_string_tmp)) { liste_evenement = liste_evenement + "Feuille "; /^(.*_[^\nT@\.]+)/.exec(event_string_tmp); var replace = RegExp.$1; event_string_tmp = event_string_tmp.substr(replace.length, event_string_tmp.length); event_string_tmp = event_string_tmp + "\.EOS"; feuille_bool = true; feuille_speciesLocation = get_speciesLocation(replace); gene_name = replace; } //Si c'est un Spéciation else if (/^\.\d*[^\.TD]$/.test(event_string_tmp)) { eventsRec_in_el = xml.element({_attr: { "speciesLocation": get_speciesLocation(event_string_tmp) }}); eventsRec_in_stream = xml({ speciation: eventsRec_in_el }, { stream: true }); eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;}); eventsRec_in_el.close(); liste_evenement = liste_evenement + "Spéciation "; event_string_tmp = null; //Si c'est un Duplication } else if (/^D@\d*\|.*/.test(event_string_tmp)) { eventsRec_in_el = xml.element({_attr: { "speciesLocation": get_speciesLocation(event_string_tmp) , "timeSlice":get_timeSlice(event_string_tmp) }}); eventsRec_in_stream = xml({ duplication: eventsRec_in_el }, { stream: true }); eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;}); eventsRec_in_el.close(); liste_evenement = liste_evenement + "Duplication "; event_string_tmp = null; //Si c'est un Transfert } else if (/^T@\d*\|.*/.test(event_string_tmp)) { eventsRec_in_el = xml.element({_attr: { "speciesLocation": get_speciesLocation(event_string_tmp) , "timeSlice":get_timeSlice(event_string_tmp) }}); eventsRec_in_stream = xml({ speciationOut: eventsRec_in_el }, { stream: true }); eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;}); eventsRec_in_el.close(); liste_evenement = liste_evenement + "transfer "; event_string_tmp = null; //Si c'est un transfert -b }else if (/^Tb@\d*\|.*/.test(event_string_tmp)) { eventsRec_in_el = xml.element({_attr: { "speciesLocation": get_speciesLocation(event_string_tmp) , "timeSlice":get_timeSlice(event_string_tmp) }}); eventsRec_in_stream = xml({ speciationOut: eventsRec_in_el }, { stream: true }); eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;}); eventsRec_in_el.close(); liste_evenement = liste_evenement + "transfer back "; event_string_tmp = null; //Si c'est un Transféré } else if (/^@\d*\|(?!(.*T@)|(.*D@)|(.*\.)).*$/.test(event_string_tmp)) { eventsRec_in_el = xml.element({_attr: { "speciesLocation": get_speciesLocation(event_string_tmp) , "timeSlice":get_timeSlice(event_string_tmp)}}); eventsRec_in_stream = xml({ speciationOut: eventsRec_in_el }, { stream: true }); eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;}); eventsRec_in_el.close(); liste_evenement = liste_evenement + "Transféré "; event_string_tmp = null; //Si c'est un transfert-puis perte } else if (/^\.T(?!(.*T@)|(.*D@)|(.*\.)).*$/.test(event_string_tmp)) { liste_evenement = liste_evenement + "Transfére et perte"; /^\.T(?!(.*T@)|(.*D@)|(.*\.)).*$/.exec(event_string_tmp); event_string_tmp = null; eventsRec_in_el = xml.element({_attr: { "speciesLocation": get_speciesLocation(event_string_tmp) , "timeSlice":get_timeSlice(event_string_tmp) }}); eventsRec_in_stream = xml({ speciationOutLoss: eventsRec_in_el }, { stream: true }); eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;}); eventsRec_in_el.close(); feuille_bool = true; //Si c'est une Perte suivi d'un autre evenement à determiner } else if (/^(\.{1}\d?[^\nTD\.]+)/.test(event_string_tmp) ) { liste_evenement = liste_evenement + "Perte puis "; /^(\.{1}\d?[^\nTD\.]+)/.exec(event_string_tmp); var replace = RegExp.$1; event_string_tmp = event_string_tmp.substr(replace.length, event_string_tmp.length); eventsRec_in_el = xml.element({_attr: { "speciesLocation": get_speciesLocation(replace) }}); eventsRec_in_stream = xml({ speciationLoss: eventsRec_in_el }, { stream: true }); eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;}); eventsRec_in_el.close(); //Si c'est une Transféré suivi d'un autre evenement à determiner } else if (/^(@\d+\|.+?)(D@|T@|\.)/.test(event_string_tmp)) { liste_evenement = liste_evenement + "Transféré puis "; /^(@\d+\|.+?)(D@|T@|\.)/.exec(event_string_tmp); var replace = RegExp.$1; event_string_tmp = event_string_tmp.substr(replace.length, event_string_tmp.length); eventsRec_in_el = xml.element({_attr: { "destinationSpecies": get_speciesLocation(replace) , "timeSlice":get_timeSlice(replace) }}); eventsRec_in_stream = xml({ transferLoss: eventsRec_in_el }, { stream: true }); eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;}); eventsRec_in_el.close(); } //Si c'est un transfert-puis perte suivi d'un autre evenement à determiner else if (/^(\.T@.*?)(D@|T@|\.)/.test(event_string_tmp)) { liste_evenement = liste_evenement + "Transfére et perte puis "; /^((.T@.*?)(@.*))(D@|T@|\.)/.exec(event_string_tmp); var replace = RegExp.$1; var part_one = RegExp.$2; var part_two = RegExp.$3; event_string_tmp = event_string_tmp.substr(replace.length, event_string_tmp.length); eventsRec_in_el = xml.element({_attr: { "speciesLocation": get_speciesLocation(part_one), "timeSlice":get_timeSlice(part_one) }}); eventsRec_in_stream = xml({ speciationOutLoss: eventsRec_in_el }, { stream: true }); eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;}); eventsRec_in_el.close(); eventsRec_in_el = xml.element({_attr: { "destinationSpecies": get_speciesLocation(part_two) , "timeSlice":get_timeSlice(part_two) }}); eventsRec_in_stream = xml({ transferLoss: eventsRec_in_el }, { stream: true }); eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;}); eventsRec_in_el.close(); } //Sinon chaine fausse else { liste_evenement = liste_evenement + "event_string invalide"; event_string_tmp = null; } } if(feuille_bool) { eventsRec_in_el = xml.element({_attr: { "speciesLocation": feuille_speciesLocation , geneName:gene_name }}); eventsRec_in_stream = xml({ leaf: eventsRec_in_el }, { stream: true }); eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;}); eventsRec_in_el.close(); } eventsRec_el.close(); } function get_speciesLocation(event_string) { var speciesLocation = ""; if (/^.*_.*_.*/.test(event_string)) { /^(.*)_.*_.*/.exec(event_string); speciesLocation = RegExp.$1; } else if (/^D@.*/.test(event_string)){ /D@.*\|(.*)/.exec(event_string); speciesLocation = RegExp.$1; } else if (/^\.T@.*/.test(event_string)){ /\.T@.*\|(.*)/.exec(event_string); speciesLocation = RegExp.$1; } else if(/^\./.test(event_string)) { /\.(.*)/.exec(event_string); speciesLocation = RegExp.$1; } else if (/^T@.*/.test(event_string)){ /T@.*\|(.*)/.exec(event_string); speciesLocation = RegExp.$1; } else if (/^Tb@.*/.test(event_string)){ /Tb@.*\|(.*)/.exec(event_string); speciesLocation = RegExp.$1; } else if (/^@.*/.test(event_string)){ /@.*\|(.*)/.exec(event_string); speciesLocation = RegExp.$1; } if(speciesLocation === "") { console.log("speciesLocation not found in module ALEtorecXML, the event string of one of your ALE format it's not expected" + event_string) throw "speciesLocation not found in module ALEtorecXML, the event string of one of your ALE format it's not expected" + event_string; } return speciesLocation; } function get_timeSlice (event_string) { var timeSlice = ""; if (/@.*/.test(event_string)){ /@(.*)\|.*/.exec(event_string); timeSlice = RegExp.$1; } if(timeSlice === "") { console.log("timeSlice not found in module ALEtorecXML, the event string of one of your ALE format it's not expected" + event_string) throw "timeSlice not found in module ALEtorecXML, the event string of one of your ALE format it's not expected" + event_string; } return timeSlice; } module.exports.ALEtorecXML = ALEtorecXML;