aletorecxml
Version:
Lib js for convert ALE format to recXML format
328 lines (245 loc) • 12.3 kB
JavaScript
/*
* @Author: Guillaume GENCE
* @Date: 2015-11-23 15:09:17
* @Last Modified by: Guillaume GENCE
* @Last Modified time: 2016-01-07 10:34:49
*/
var parser = require("biojs-io-newick");
var xml = require('xml');
var pp_xml = require('pretty-data').pd;
var xmlstring = "";
function ALEtorecXML (ale_data,callback) {
var data_json = parser.parse_nhx(ale_data);
var phyloxml_el = xml.element({ _attr: { "xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", "xmlns": "http://www.phyloxml.org", "xsi:schemaLocation":"recxml.xsd" } });
var phyloxml_stream = xml({ phyloxml: phyloxml_el }, { stream: true });
phyloxml_stream.on('data', function (chunk) {
xmlstring = xmlstring + chunk;
// faire un truc, peut-être asynchrone, puis…
if(chunk === "</phyloxml>")
{
var data = pp_xml.xml(xmlstring);
if(!Object.keys(data_json).length)
{
data = "Invalide ALE/Newick Data ! Sorry no error help can be seen ."
}
callback.call(this,data);
xmlstring = "";
}
});
var phylogeny_el = xml.element({ _attr: { "rooted": true } });
var phylogeny_stream = xml({ phylogeny: phylogeny_el }, { stream: true });
phylogeny_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;});
format_for_xml_translation(data_json);
phylogeny_el.close();
phyloxml_el.close();
}
function format_for_xml_translation (data_json) {
var clade_el = xml.element();
var clade_stream = xml({ clade: clade_el }, { stream: true });
clade_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;});
clade_el.push({name:data_json.name});
clade_el.push({branch_length:data_json.branch_length});
parse_event_string(data_json.name);
if (data_json.children) {
format_for_xml_translation(data_json.children[0]);
format_for_xml_translation(data_json.children[1]);
}
clade_el.close();
}
/**
* [Parse event_string and log in console ]
* @param {[Sting]} event_string [Chaine d'évenement parsé]
* @return {}
*/
function parse_event_string (event_string) {
var eventsRec_in_el = null;
var eventsRec_in_stream = null;
var eventsRec_el = xml.element();
var eventsRec_stream = xml({ eventsRec: eventsRec_el }, { stream: true });
eventsRec_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;});
/**
* [event_string_tmp ]
* @type {Sting}
*/
var event_string_tmp = event_string;
/**
* [liste_evenement ]
* @type {[type]}
*/
var liste_evenement = event_string + ': ';
var feuille_bool = false;
var feuille_speciesLocation = "";
var gene_name = "";
while (event_string_tmp)
{
if(/^\.EOS/.test(event_string_tmp))
{
event_string_tmp = null;
}
//Si c'est une feuille
else if(/^(.*_[^\nT@\.]+)/.test(event_string_tmp))
{
liste_evenement = liste_evenement + "Feuille ";
/^(.*_[^\nT@\.]+)/.exec(event_string_tmp);
var replace = RegExp.$1;
event_string_tmp = event_string_tmp.substr(replace.length, event_string_tmp.length);
event_string_tmp = event_string_tmp + "\.EOS";
feuille_bool = true;
feuille_speciesLocation = get_speciesLocation(replace);
gene_name = replace;
}
//Si c'est un Spéciation
else if (/^\.\d*[^\.TD]$/.test(event_string_tmp)) {
eventsRec_in_el = xml.element({_attr: { "speciesLocation": get_speciesLocation(event_string_tmp) }});
eventsRec_in_stream = xml({ speciation: eventsRec_in_el }, { stream: true });
eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;});
eventsRec_in_el.close();
liste_evenement = liste_evenement + "Spéciation ";
event_string_tmp = null;
//Si c'est un Duplication
} else if (/^D@\d*\|.*/.test(event_string_tmp)) {
eventsRec_in_el = xml.element({_attr: { "speciesLocation": get_speciesLocation(event_string_tmp) , "timeSlice":get_timeSlice(event_string_tmp) }});
eventsRec_in_stream = xml({ duplication: eventsRec_in_el }, { stream: true });
eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;});
eventsRec_in_el.close();
liste_evenement = liste_evenement + "Duplication ";
event_string_tmp = null;
//Si c'est un Transfert
} else if (/^T@\d*\|.*/.test(event_string_tmp)) {
eventsRec_in_el = xml.element({_attr: { "speciesLocation": get_speciesLocation(event_string_tmp) , "timeSlice":get_timeSlice(event_string_tmp) }});
eventsRec_in_stream = xml({ speciationOut: eventsRec_in_el }, { stream: true });
eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;});
eventsRec_in_el.close();
liste_evenement = liste_evenement + "transfer ";
event_string_tmp = null;
//Si c'est un transfert -b
}else if (/^Tb@\d*\|.*/.test(event_string_tmp)) {
eventsRec_in_el = xml.element({_attr: { "speciesLocation": get_speciesLocation(event_string_tmp) , "timeSlice":get_timeSlice(event_string_tmp) }});
eventsRec_in_stream = xml({ speciationOut: eventsRec_in_el }, { stream: true });
eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;});
eventsRec_in_el.close();
liste_evenement = liste_evenement + "transfer back ";
event_string_tmp = null;
//Si c'est un Transféré
} else if (/^@\d*\|(?!(.*T@)|(.*D@)|(.*\.)).*$/.test(event_string_tmp)) {
eventsRec_in_el = xml.element({_attr: { "speciesLocation": get_speciesLocation(event_string_tmp) , "timeSlice":get_timeSlice(event_string_tmp)}});
eventsRec_in_stream = xml({ speciationOut: eventsRec_in_el }, { stream: true });
eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;});
eventsRec_in_el.close();
liste_evenement = liste_evenement + "Transféré ";
event_string_tmp = null;
//Si c'est un transfert-puis perte
} else if (/^\.T(?!(.*T@)|(.*D@)|(.*\.)).*$/.test(event_string_tmp)) {
liste_evenement = liste_evenement + "Transfére et perte";
/^\.T(?!(.*T@)|(.*D@)|(.*\.)).*$/.exec(event_string_tmp);
event_string_tmp = null;
eventsRec_in_el = xml.element({_attr: { "speciesLocation": get_speciesLocation(event_string_tmp) , "timeSlice":get_timeSlice(event_string_tmp) }});
eventsRec_in_stream = xml({ speciationOutLoss: eventsRec_in_el }, { stream: true });
eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;});
eventsRec_in_el.close();
feuille_bool = true;
//Si c'est une Perte suivi d'un autre evenement à determiner
} else if (/^(\.{1}\d?[^\nTD\.]+)/.test(event_string_tmp) ) {
liste_evenement = liste_evenement + "Perte puis ";
/^(\.{1}\d?[^\nTD\.]+)/.exec(event_string_tmp);
var replace = RegExp.$1;
event_string_tmp = event_string_tmp.substr(replace.length, event_string_tmp.length);
eventsRec_in_el = xml.element({_attr: { "speciesLocation": get_speciesLocation(replace) }});
eventsRec_in_stream = xml({ speciationLoss: eventsRec_in_el }, { stream: true });
eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;});
eventsRec_in_el.close();
//Si c'est une Transféré suivi d'un autre evenement à determiner
} else if (/^(@\d+\|.+?)(D@|T@|\.)/.test(event_string_tmp)) {
liste_evenement = liste_evenement + "Transféré puis ";
/^(@\d+\|.+?)(D@|T@|\.)/.exec(event_string_tmp);
var replace = RegExp.$1;
event_string_tmp = event_string_tmp.substr(replace.length, event_string_tmp.length);
eventsRec_in_el = xml.element({_attr: { "destinationSpecies": get_speciesLocation(replace) , "timeSlice":get_timeSlice(replace) }});
eventsRec_in_stream = xml({ transferLoss: eventsRec_in_el }, { stream: true });
eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;});
eventsRec_in_el.close();
}
//Si c'est un transfert-puis perte suivi d'un autre evenement à determiner
else if (/^(\.T@.*?)(D@|T@|\.)/.test(event_string_tmp)) {
liste_evenement = liste_evenement + "Transfére et perte puis ";
/^((.T@.*?)(@.*))(D@|T@|\.)/.exec(event_string_tmp);
var replace = RegExp.$1;
var part_one = RegExp.$2;
var part_two = RegExp.$3;
event_string_tmp = event_string_tmp.substr(replace.length, event_string_tmp.length);
eventsRec_in_el = xml.element({_attr: { "speciesLocation": get_speciesLocation(part_one), "timeSlice":get_timeSlice(part_one) }});
eventsRec_in_stream = xml({ speciationOutLoss: eventsRec_in_el }, { stream: true });
eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;});
eventsRec_in_el.close();
eventsRec_in_el = xml.element({_attr: { "destinationSpecies": get_speciesLocation(part_two) , "timeSlice":get_timeSlice(part_two) }});
eventsRec_in_stream = xml({ transferLoss: eventsRec_in_el }, { stream: true });
eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;});
eventsRec_in_el.close();
}
//Sinon chaine fausse
else {
liste_evenement = liste_evenement + "event_string invalide";
event_string_tmp = null;
}
}
if(feuille_bool)
{
eventsRec_in_el = xml.element({_attr: { "speciesLocation": feuille_speciesLocation , geneName:gene_name }});
eventsRec_in_stream = xml({ leaf: eventsRec_in_el }, { stream: true });
eventsRec_in_stream.on('data', function (chunk) {xmlstring = xmlstring + chunk;});
eventsRec_in_el.close();
}
eventsRec_el.close();
}
function get_speciesLocation(event_string) {
var speciesLocation = "";
if (/^.*_.*_.*/.test(event_string)) {
/^(.*)_.*_.*/.exec(event_string);
speciesLocation = RegExp.$1;
}
else if (/^D@.*/.test(event_string)){
/D@.*\|(.*)/.exec(event_string);
speciesLocation = RegExp.$1;
}
else if (/^\.T@.*/.test(event_string)){
/\.T@.*\|(.*)/.exec(event_string);
speciesLocation = RegExp.$1;
}
else if(/^\./.test(event_string))
{
/\.(.*)/.exec(event_string);
speciesLocation = RegExp.$1;
}
else if (/^T@.*/.test(event_string)){
/T@.*\|(.*)/.exec(event_string);
speciesLocation = RegExp.$1;
}
else if (/^Tb@.*/.test(event_string)){
/Tb@.*\|(.*)/.exec(event_string);
speciesLocation = RegExp.$1;
}
else if (/^@.*/.test(event_string)){
/@.*\|(.*)/.exec(event_string);
speciesLocation = RegExp.$1;
}
if(speciesLocation === "")
{
console.log("speciesLocation not found in module ALEtorecXML, the event string of one of your ALE format it's not expected" + event_string)
throw "speciesLocation not found in module ALEtorecXML, the event string of one of your ALE format it's not expected" + event_string;
}
return speciesLocation;
}
function get_timeSlice (event_string) {
var timeSlice = "";
if (/@.*/.test(event_string)){
/@(.*)\|.*/.exec(event_string);
timeSlice = RegExp.$1;
}
if(timeSlice === "")
{
console.log("timeSlice not found in module ALEtorecXML, the event string of one of your ALE format it's not expected" + event_string)
throw "timeSlice not found in module ALEtorecXML, the event string of one of your ALE format it's not expected" + event_string;
}
return timeSlice;
}
module.exports.ALEtorecXML = ALEtorecXML;