UNPKG

@gmod/jbrowse

Version:

JBrowse - client-side genome browser

282 lines (252 loc) 9.42 kB
define([ 'dojo/_base/declare', 'JBrowse/has', 'JBrowse/Util', 'JBrowse/Model/DataView', 'JBrowse/Model/TabixIndex', 'JBrowse/Model/BGZip/VirtualOffset', 'JBrowse/Store/SeqFeature/BAM/Util' ], function( declare, has, Util, jDataView, TabixIndex, VirtualOffset, BAMUtil ) { var dlog = function(){ console.error.apply(console, arguments); }; var readInt = BAMUtil.readInt; var readVirtualOffset = BAMUtil.readVirtualOffset; var BAI_MAGIC = 21578050; function lshift(num, bits) { return num * Math.pow(2, bits); } function rshift(num, bits) { return Math.floor(num / Math.pow(2,bits)); } // inner class representing a chunk var Chunk = Util.fastDeclare({ constructor: function(minv,maxv,bin) { this.minv = minv; this.maxv = maxv; this.bin = bin; }, toUniqueString: function() { return this.minv+'..'+this.maxv+' (bin '+this.bin+')'; }, toString: function() { return this.toUniqueString(); }, compareTo: function( b ) { return this.minv.compareTo(b.minv) || this.maxv.compareTo(b.maxv) || this.bin - b.bin; }, compare: function( b ) { return this.compareTo( b ); }, fetchedSize: function() { return this.maxv.block + (1<<16) - this.minv.block + 1; } }); return declare( TabixIndex, { _parseIndex: function( header, deferred ) { if (!header) { dlog("No data read from BAM index (BAI) file"); deferred.reject("No data read from BAM index (BAI) file"); return; } if( ! has('typed-arrays') ) { dlog('Web browser does not support typed arrays'); deferred.reject('Web browser does not support typed arrays'); return; } var uncba = new Uint8Array(header); if( readInt(uncba, 0) != BAI_MAGIC) { dlog('Not a BAI file'); deferred.reject('Not a BAI file'); return; } var nref = readInt(uncba, 4); this.indices = []; var p = 8; for (var ref = 0; ref < nref; ++ref) { var blockStart = p; var nbin = readInt(uncba, p); p += 4; for (var b = 0; b < nbin; ++b) { var bin = readInt(uncba, p); var nchnk = readInt(uncba, p+4); p += 8; for( var chunkNum = 0; chunkNum < nchnk; chunkNum++ ) { var vo = readVirtualOffset( uncba, p ); this._findMinAlignment( vo ); p += 16; } } var nintv = readInt(uncba, p); p += 4; // as we're going through the linear index, figure out // the smallest virtual offset in the indexes, which // tells us where the BAM header ends this._findMinAlignment( nintv ? readVirtualOffset(uncba,p) : null ); p += nintv * 8; if( nbin > 0 || nintv > 0 ) { this.indices[ref] = new Uint8Array(header, blockStart, p - blockStart); } } this.empty = ! this.indices.length; deferred.resolve(); }, featureCount: function(tid) { var index = this.indices[tid]; if (!index) { return -1; } var p = 4; var nbin = readInt(index, 0); var overlappingBins = function() { var intBins = {}; var intBinsL = [this._bin_limit()+1]; for (var i = 0; i < intBinsL.length; ++i) { intBins[intBinsL[i]] = true; } return intBins; }.call(this); for (var b = 0; b < nbin; ++b) { var bin = readInt(index, p ); var nchnk = readInt(index, p+4); p += 8; if( overlappingBins[bin] ) { p += 16; var cs = readVirtualOffset( index, p ); var ce = readVirtualOffset( index, p + 8 ); var ch = new Chunk(cs, ce, bin); return ch.minv.offset; } else { p += nchnk * 16; } } return 0; }, /** * Get an array of Chunk objects for the given ref seq id and range. */ blocksForRange: function(refId, min, max) { var index = this.indices[refId]; if (!index) { return []; } // object as { <binNum>: true, ... } containing the bin numbers // that overlap this range var overlappingBins = function() { var intBins = {}; var intBinsL = this._reg2bins(min, max); for (var i = 0; i < intBinsL.length; ++i) { intBins[intBinsL[i]] = true; } return intBins; }.call(this); // parse the chunks for the overlapping bins out of the index // for this ref seq, keeping a distinction between chunks from // leaf (lowest-level, smallest) bins, and chunks from other, // larger bins var leafChunks = []; var otherChunks = []; var nbin = readInt(index, 0); var p = 4; for (var b = 0; b < nbin; ++b) { var bin = readInt(index, p ); var nchnk = readInt(index, p+4); p += 8; if( overlappingBins[bin] ) { for (var c = 0; c < nchnk; ++c) { var cs = readVirtualOffset( index, p ); var ce = readVirtualOffset( index, p + 8 ); ( bin < 4681 ? otherChunks : leafChunks ).push( new Chunk(cs, ce, bin) ); p += 16; } } else { p += nchnk * 16; } } var lowest = function() { var lowest = null; var nintv = readInt(index, p); var minLin = Math.min(min>>14, nintv - 1); var maxLin = Math.min(max>>14, nintv - 1); for (var i = minLin; i <= maxLin; ++i) { var lb = readVirtualOffset(index, p + 4 + (i * 8)); if( !lb ) continue; if ( ! lowest || lb.cmp( lowest ) > 0 ) lowest = lb; } return lowest; }(); // discard any chunks that come before the lowest // virtualOffset that we got from the linear index if( lowest ) { otherChunks = function( otherChunks ) { var relevantOtherChunks = []; for (var i = 0; i < otherChunks.length; ++i) { var chnk = otherChunks[i]; if( chnk.maxv.block >= lowest.block ) { relevantOtherChunks.push(chnk); } } return relevantOtherChunks; }(otherChunks); } // add the leaf chunks in, and sort the chunks ascending by virtual offset var allChunks = otherChunks .concat( leafChunks ) .sort( function(c0, c1) { return c0.minv.block - c1.minv.block || c0.minv.offset - c1.minv.offset; }); // merge chunks from the same block together var mergedChunks = []; if( allChunks.length ) { var cur = allChunks[0]; for (var i = 1; i < allChunks.length; ++i) { var nc = allChunks[i]; if (nc.minv.block == cur.maxv.block /* && nc.minv.offset == cur.maxv.offset */) { // no point splitting mid-block cur = new Chunk(cur.minv, nc.maxv, 'merged'); } else { mergedChunks.push(cur); cur = nc; } } mergedChunks.push(cur); } return mergedChunks; }, _findMinAlignment: function( candidate ) { if( candidate && ( ! this.minAlignmentVO || this.minAlignmentVO.cmp( candidate ) < 0 ) ) this.minAlignmentVO = candidate; }, /* calculate bin given an alignment covering [beg,end) (zero-based, half-close-half-open) */ _reg2bin: function( beg, end ) { --end; if (beg>>14 == end>>14) return ((1<<15)-1)/7 + (beg>>14); if (beg>>17 == end>>17) return ((1<<12)-1)/7 + (beg>>17); if (beg>>20 == end>>20) return ((1<<9)-1)/7 + (beg>>20); if (beg>>23 == end>>23) return ((1<<6)-1)/7 + (beg>>23); if (beg>>26 == end>>26) return ((1<<3)-1)/7 + (beg>>26); return 0; }, /* calculate the list of bins that may overlap with region [beg,end) (zero-based) */ MAX_BIN: (((1<<18)-1)/7), _reg2bins: function( beg, end ) { var k, list = [ 0 ]; --end; for (k = 1 + (beg>>26); k <= 1 + (end>>26); ++k) list.push(k); for (k = 9 + (beg>>23); k <= 9 + (end>>23); ++k) list.push(k); for (k = 73 + (beg>>20); k <= 73 + (end>>20); ++k) list.push(k); for (k = 585 + (beg>>17); k <= 585 + (end>>17); ++k) list.push(k); for (k = 4681 + (beg>>14); k <= 4681 + (end>>14); ++k) list.push(k); return list; }, _bin_limit: function(min_shift, depth=5) { return ((1 << (depth+1)*3) - 1) / 7; } }); });