@gmod/jbrowse
Version:
JBrowse - client-side genome browser
469 lines (415 loc) • 17.4 kB
JavaScript
const snakeCase = cjsRequire('snake-case')
define( [
'dojo/_base/declare',
'dojo/_base/lang',
'dojo/_base/array',
'JBrowse/Util',
'JBrowse/Util/RejectableFastPromise',
'dojo/promise/all',
'JBrowse/Model/Range',
'JBrowse/Model/SimpleFeature',
'jszlib/inflate',
'jszlib/arrayCopy'
],
function(
declare,
dlang,
array,
Util,
RejectableFastPromise,
all,
Range,
SimpleFeature,
inflate,
arrayCopy
) {
var dlog = function(){ console.log.apply(console, arguments); };
const defaultAutoSql = {
name: 'BigBED file',
description: 'this file has no associated autoSQL',
fields: [
{
type: 'string',
name: 'chrom',
description: 'Name of chromosome ',
},
{
type: 'uint',
name: 'chromStart',
description: 'Start position (first base is 0).',
},
{
type: 'uint',
name: 'chromEnd',
description: 'End position plus one (chromEnd – chromStart = size).',
},
{
type: 'string',
name: 'name',
description: 'Name of feature.',
},
{
type: 'float',
name: 'score',
description: 'A number between 0 and 1000 that controls shading of item (0 if unused).',
},
{
type: 'string',
name: 'strand',
description: '+ or – (or . for unknown).',
},
{
type: 'uint',
name: 'thickStart',
description: 'Start position where feature is drawn as thicker line; used for CDS start for genes.',
},
{
type: 'uint',
name: 'thickEnd',
description: 'Position where thicker part of feature ends.',
},
{
type: 'string',
name: 'itemRgb',
description: 'Comma-separated list of red, green, blue values from 0-255 (0 if unused).',
},
{
type: 'uint',
name: 'blockCount',
description: 'For multipart items, the number of blocks; corresponds to exons for genes.',
},
{
type: 'string',
name: 'blockSizes',
description: 'Comma-separated list of block sizes.',
},
{
type: 'string',
name: 'chromStarts',
description: 'Comma-separated list of block starts relative to chromStart.',
},
]
}
var RequestWorker = declare( null,
/**
* @lends JBrowse.Store.BigWig.Window.RequestWorker.prototype
*/
{
BIG_WIG_TYPE_GRAPH: 1,
BIG_WIG_TYPE_VSTEP: 2,
BIG_WIG_TYPE_FSTEP: 3,
/**
* Worker object for reading data from a bigwig or bigbed file.
* Manages the state necessary for traversing the index trees and
* so forth.
*
* Adapted by Robert Buels from bigwig.js in the Dalliance Genome
* Explorer by Thomas Down.
* @constructs
*/
constructor: function( window, chr, min, max, callback, errorCallback ) {
this.window = window;
this.source = window.bwg.name || undefined;
this.blocksToFetch = [];
this.outstanding = 0;
this.chr = chr;
this.min = min;
this.max = max;
this.callback = callback;
this.errorCallback = errorCallback || function(e) { console.error( e, e.stack, arguments.caller ); };
},
cirFobRecur: function(offset, level) {
this.outstanding += offset.length;
var maxCirBlockSpan = 4 + (this.window.cirBlockSize * 32); // Upper bound on size, based on a completely full leaf node.
var spans;
for (var i = 0; i < offset.length; ++i) {
var blockSpan = new Range(offset[i], Math.min(offset[i] + maxCirBlockSpan, this.window.cirTreeOffset + this.window.cirTreeLength));
spans = spans ? spans.union( blockSpan ) : blockSpan;
}
var fetchRanges = spans.ranges();
//dlog('fetchRanges: ' + fetchRanges);
for (var r = 0; r < fetchRanges.length; ++r) {
var fr = fetchRanges[r];
this.cirFobStartFetch(offset, fr, level);
}
},
cirFobStartFetch: function(offset, fr, level, attempts) {
var length = fr.max() - fr.min();
// dlog('fetching ' + fr.min() + '-' + fr.max() + ' (' + Util.humanReadableNumber(length) + ')');
//console.log('cirfobstartfetch');
this.window.bwg._read( fr.min(), length, dlang.hitch( this,function(resultBuffer) {
for (var i = 0; i < offset.length; ++i) {
if (fr.contains(offset[i])) {
this.cirFobRecur2(resultBuffer, offset[i] - fr.min(), level);
--this.outstanding;
if (this.outstanding == 0) {
this.cirCompleted();
}
}
}
}), this.errorCallback );
},
cirFobRecur2: function(cirBlockData, offset, level) {
var data = this.window.bwg.newDataView( cirBlockData, offset );
var isLeaf = data.getUint8();
var cnt = data.getUint16( 2 );
//dlog('cir level=' + level + '; cnt=' + cnt);
if (isLeaf != 0) {
for (var i = 0; i < cnt; ++i) {
var startChrom = data.getUint32();
var startBase = data.getUint32();
var endChrom = data.getUint32();
var endBase = data.getUint32();
var blockOffset = data.getUint64();
var blockSize = data.getUint64();
if ((startChrom < this.chr || (startChrom == this.chr && startBase <= this.max)) &&
(endChrom > this.chr || (endChrom == this.chr && endBase >= this.min)))
{
// dlog('Got an interesting block: startBase=' + startBase + '; endBase=' + endBase + '; offset=' + blockOffset + '; size=' + blockSize);
this.blocksToFetch.push({offset: blockOffset, size: blockSize});
}
}
} else {
var recurOffsets = [];
for (var i = 0; i < cnt; ++i) {
var startChrom = data.getUint32();
var startBase = data.getUint32();
var endChrom = data.getUint32();
var endBase = data.getUint32();
var blockOffset = data.getUint64();
if ((startChrom < this.chr || (startChrom == this.chr && startBase <= this.max)) &&
(endChrom > this.chr || (endChrom == this.chr && endBase >= this.min)))
{
recurOffsets.push(blockOffset);
}
}
if (recurOffsets.length > 0) {
this.cirFobRecur(recurOffsets, level + 1);
}
}
},
cirCompleted: function() {
// merge contiguous blocks
this.blockGroupsToFetch = this.groupBlocks( this.blocksToFetch );
if (this.blockGroupsToFetch.length == 0) {
this.callback([]);
} else {
this.features = [];
this.readFeatures();
}
},
groupBlocks: function( blocks ) {
// sort the blocks by file offset
blocks.sort(function(b0, b1) {
return (b0.offset|0) - (b1.offset|0);
});
// group blocks that are within 2KB of eachother
var blockGroups = [];
var lastBlock;
var lastBlockEnd;
for( var i = 0; i<blocks.length; i++ ) {
if( lastBlock && (blocks[i].offset-lastBlockEnd) <= 2000 ) {
lastBlock.size += blocks[i].size - lastBlockEnd + blocks[i].offset;
lastBlock.blocks.push( blocks[i] );
}
else {
blockGroups.push( lastBlock = { blocks: [blocks[i]], size: blocks[i].size, offset: blocks[i].offset } );
}
lastBlockEnd = lastBlock.offset + lastBlock.size;
}
return blockGroups;
},
createFeature: function(fmin, fmax, opts) {
// dlog('createFeature(' + fmin +', ' + fmax + ', '+opts.score+')');
var data = {
start: fmin,
end: fmax
};
for( var k in opts )
data[k] = opts[k];
var id = data.id;
delete data.id;
var f = new SimpleFeature({
data: data,
id: id ? id : data.start + '_' + data.end + '_' + data.score
});
this.features.push(f);
},
maybeCreateFeature: function(fmin, fmax, opts) {
if (fmin <= this.max && fmax >= this.min) {
this.createFeature( fmin, fmax, opts );
}
},
parseSummaryBlock: function( bytes, startOffset ) {
var data = this.window.bwg.newDataView( bytes, startOffset );
var itemCount = bytes.byteLength/32;
for (var i = 0; i < itemCount; ++i) {
var chromId = data.getInt32();
var start = data.getInt32();
var end = data.getInt32();
var validCnt = data.getInt32()||1;
var minVal = data.getFloat32();
var maxVal = data.getFloat32();
var sumData = data.getFloat32();
var sumSqData = data.getFloat32();
if (chromId == this.chr) {
var summaryOpts = {score: sumData/validCnt,maxScore: maxVal,minScore:minVal};
if (this.window.bwg.type == 'bigbed') {
summaryOpts.type = 'density';
}
this.maybeCreateFeature( start, end, summaryOpts);
}
}
},
parseBigWigBlock: function( bytes, startOffset ) {
var data = this.window.bwg.newDataView( bytes, startOffset );
var itemSpan = data.getUint32( 16 );
var blockType = data.getUint8( 20 );
var itemCount = data.getUint16( 22 );
// dlog('processing bigwig block, type=' + blockType + '; count=' + itemCount);
if (blockType == this.BIG_WIG_TYPE_FSTEP) {
var blockStart = data.getInt32( 4 );
var itemStep = data.getUint32( 12 );
for (var i = 0; i < itemCount; ++i) {
var score = data.getFloat32( 4*i+24 );
this.maybeCreateFeature( blockStart + (i*itemStep), blockStart + (i*itemStep) + itemSpan, {score: score});
}
} else if (blockType == this.BIG_WIG_TYPE_VSTEP) {
for (var i = 0; i < itemCount; ++i) {
var start = data.getInt32( 8*i+24 );
var score = data.getFloat32();
this.maybeCreateFeature( start, start + itemSpan, {score: score});
}
} else if (blockType == this.BIG_WIG_TYPE_GRAPH) {
for (var i = 0; i < itemCount; ++i) {
var start = data.getInt32( 12*i + 24 );
var end = data.getInt32();
var score = data.getFloat32();
if (start > end) {
start = end;
}
this.maybeCreateFeature( start, end, {score: score});
}
} else {
dlog('Currently not handling bwgType=' + blockType);
}
},
parseBigBedBlock: function( bytes, startOffset ) {
var data = this.window.bwg.newDataView( bytes, startOffset );
var offset = 0;
while (offset < bytes.byteLength) {
const chromId = data.getUint32(offset);
const start = data.getInt32(offset+4);
const end = data.getInt32(offset+8);
offset += 12;
if (chromId !== this.chr) {
console.warn('BigBed block is out of current range')
return
}
let rest = '';
while (offset < bytes.byteLength) {
let ch = data.getUint8( offset++ );
if (ch !== 0) {
rest += String.fromCharCode(ch);
} else {
break;
}
}
const featureData = this.parseBedText(start, end, rest)
featureData.id = `bb-${startOffset + offset}`
this.maybeCreateFeature(start,end,featureData)
}
},
/**
* parse the `rest` field of a binary bed data section, using
* the autosql schema defined for this file
*
* @returns {Object} feature data with native BED field names
*/
parseBedText: function(start, end, rest) {
// include ucsc-style names as well as jbrowse-style names
const featureData = {
start: start,
end: end,
}
const bedColumns = rest.split('\t')
const asql = this.window.autoSql || defaultAutoSql
const numericTypes = ['uint', 'int', 'float', 'long']
// first three columns (chrom,start,end) are not included in bigBed
for (let i = 3; i < asql.fields.length; i++) {
if (bedColumns[i-3] !== '.' && bedColumns[i-3] !== '') {
const autoField = asql.fields[i]
let columnVal = bedColumns[i-3]
// for speed, cache some of the tests we need inside the autofield definition
if (!autoField._requestWorkerCache) {
const match = /^(\w+)\[/.exec(autoField.type)
autoField._requestWorkerCache = {
isNumeric: numericTypes.includes(autoField.type),
isArray: !!match,
arrayIsNumeric: match && numericTypes.includes(match[1])
}
}
if (autoField._requestWorkerCache.isNumeric) {
let num = Number(columnVal)
// if the number parse results in NaN, somebody probably
// listed the type erroneously as numeric, so don't use
// the parsed number
columnVal = isNaN(num) ? columnVal : num
} else if(autoField._requestWorkerCache.isArray) {
// parse array values
columnVal = columnVal.split(',')
if (columnVal[columnVal.length-1] === '') columnVal.pop()
if (autoField._requestWorkerCache.arrayIsNumeric)
columnVal = columnVal.map(str => Number(str))
}
featureData[snakeCase(autoField.name)] = columnVal
}
}
if (featureData.strand) {
featureData.strand = {'-': -1, '+': 1}[featureData.strand]
}
return featureData
},
readFeatures: function() {
var thisB = this;
var blockFetches = array.map( thisB.blockGroupsToFetch, function( blockGroup ) {
//console.log( 'fetching blockgroup with '+blockGroup.blocks.length+' blocks: '+blockGroup );
var d = new RejectableFastPromise();
thisB.window.bwg._read( blockGroup.offset, blockGroup.size, function( data ) {
blockGroup.data = data;
d.resolve( blockGroup );
}, dlang.hitch( d, 'reject' ) );
return d;
}, thisB );
all( blockFetches ).then( function( blockGroups ) {
array.forEach( blockGroups, function( blockGroup ) {
array.forEach( blockGroup.blocks, function( block ) {
var data;
var offset = block.offset - blockGroup.offset;
if( thisB.window.bwg.uncompressBufSize > 0 ) {
// var beforeInf = new Date();
data = inflate( blockGroup.data, offset+2, block.size - 2);
offset = 0;
//console.log( 'inflate', 2, block.size - 2);
// var afterInf = new Date();
// dlog('inflate: ' + (afterInf - beforeInf) + 'ms');
} else {
data = blockGroup.data;
}
if( thisB.window.isSummary ) {
thisB.parseSummaryBlock( data, offset );
} else if (thisB.window.bwg.type == 'bigwig') {
thisB.parseBigWigBlock( data, offset );
} else if (thisB.window.bwg.type == 'bigbed') {
thisB.parseBigBedBlock( data, offset );
} else {
dlog("Don't know what to do with " + thisB.window.bwg.type);
}
});
});
thisB.callback( thisB.features );
}, thisB.errorCallback );
}
});
return RequestWorker;
});