@gmod/jbrowse
Version:
JBrowse - client-side genome browser
143 lines (123 loc) • 4.7 kB
JavaScript
/* The function to parse the bed files. The standard BED file format (BED-6) is "chr\tstart(0based)\tEnd(1based)\tname\tscore\tstrand
BED-3 is the minimal parsed line by this parser (i.e. includes only first three fields)
Optional header lines start with '#'
*/
define( [
'dojo/_base/declare',
'dojo/_base/array',
'dojo/_base/lang',
'JBrowse/Util/TextIterator',
],
function (
declare,
array,
lang,
TextIterator,
) {
var bed_feature_names = 'seq_id start end name score strand'.split(" ");
return declare( null, {
constructor: function( args ) {
lang.mixin( this, {
featureCallback: args.featureCallback || function() {},
endCallback: args.endCallback || function() {},
commentCallback: args.commentCallback || function() {},
errorCallback: args.errorCallback || function(e) { console.error(e); },
store: args.store,
// if this is true, the parser ignores the
// rest of the lines in the file. currently
// set when the file switches over to FASTA
eof: false
});
},
/**
* Parse the bytes that contain the BED header, storing the parsed
* data in this.header.
*/
parseHeader: function( headerBytes ) {
// parse the header lines
var headData = {};
var lineIterator = new TextIterator.FromBytes({ bytes: headerBytes });
var line;
while(( line = lineIterator.getline() )) {
// only interested in meta and header lines
if( line[0] != '#' )
continue;
// parse meta line using the parseHeader configuration callback function
var metaData = (this.config.parseHeader||function() {})(line);
var key = metaData.key;
headData[key] = metaData.value;
}
this.header = headData;
return headData;
},
finish: function() {
this.endCallback();
},
addLine: function( line ) {
var match;
if( this.eof ) {
// do nothing
} else if( /^\s*[^#\s>]/.test(line) ) { //< feature line, most common case
line = line.replace( /\r?\n?$/g, '' );
var f = this.parse_feature( line );
this.featureCallback( this._return_item([f]) );
}
// directive or comment
else if(( match = /^\s*(\#+)(.*)/.exec( line ) )) {
var hashsigns = match[1], contents = match[2];
contents = contents.replace(/\s*/,'');
this._return_item({ comment: contents });
}
else if( /^\s*$/.test( line ) ) {
// blank line, do nothing
}
else if( /^\s*>/.test(line) ) {
// implicit beginning of a FASTA section. just stop
// parsing, since we don't currently handle sequences
this._return_all_under_construction_features();
this.eof = true;
}
else { // it's a parse error
line = line.replace( /\r?\n?$/g, '' );
throw "GFF3 parse error. Cannot parse '"+line+"'.";
}
},
unescape(s) {
if( s === null )
return null;
return s.replace( /%([0-9A-Fa-f]{2})/g, function( match, seq ) {
return String.fromCharCode( parseInt( seq, 16 ) );
});
},
parse_feature: function( line ) {
var f = array.map( line.split("\t"), function(a) {
if( a == '.' ) {
return null;
}
return a;
});
// unescape only the ref and source columns
f[0] = this.unescape( f[0] );
var parsed = {};
for( var i = 0; i < bed_feature_names.length; i++ ) {
if(f[i]) {
parsed[ bed_feature_names[i] ] = f[i] == '.' ? null : f[i];
}
}
if( parsed.start !== null )
parsed.start = parseInt( parsed.start, 10 );
if( parsed.end !== null )
parsed.end = parseInt( parsed.end, 10 );
if( parsed.score != null )
parsed.score = parseFloat( parsed.score, 10 );
parsed.strand = {'+':1,'-':-1}[parsed.strand] || 0;
return parsed;
},
_return_item: function(i) {
if( i[0] )
this.featureCallback( i );
else if( i.comment )
this.commentCallback( i, this.store );
}
});
});