UNPKG

@gmod/jbrowse

Version:

JBrowse - client-side genome browser

310 lines (269 loc) 11.5 kB
const LRU = cjsRequire('lru-cache') const { IndexedCramFile, CraiIndex } = cjsRequire('@gmod/cram') const { CramSizeLimitError } = cjsRequire('@gmod/cram/errors') const { Buffer } = cjsRequire('buffer') const cramIndexedFilesCache = LRU(5) const BlobFilehandleWrapper = cjsRequire('../../Model/BlobFilehandleWrapper') class CramSlightlyLazyFeature { _get_name() { return this.record.readName } _get_start() { return this.record.alignmentStart-1 } _get_end() { return this.record.alignmentStart+this.record.lengthOnRef-1 } _get_cram_read_features() { return this.record.readFeatures } _get_type() { return 'match'} _get_mapping_quality() { return this.record.mappingQuality} _get_flags() { return `0x${this.record.flags.toString(16)}`} _get_cramFlags() { return `0x${this.record.cramFlags.toString(16)}`} _get_strand() { return this.record.isReverseComplemented() ? -1 : 1 } _get_read_group_id() { return this.record.readGroupId } _get_qual() { return (this.record.qualityScores || []).map(q => q+33).join(' ')} _get_seq() { return this.record.readBases} _get_seq_id() { return this._store._refIdToName(this.record.sequenceId)} _get_qc_failed() { return this.record.isFailedQc()} _get_secondary_alignment() { return this.record.isSecondary()} _get_supplementary_alignment() { return this.record.isSupplementary()} _get_multi_segment_template() { return this.record.isPaired()} _get_multi_segment_all_correctly_aligned() { return this.record.isProperlyPaired()} _get_multi_segment_next_segment_unmapped() { return this.record.isMateUnmapped()} _get_multi_segment_first() { return this.record.isRead1()} _get_multi_segment_last() { return this.record.isRead2()} _get_multi_segment_next_segment_reversed() { return this.record.isMateReverseComplemented()} _get_unmapped() { return this.record.isSegmentUnmapped()} _get_next_seq_id() { return this.record.mate ? this._store._refIdToName(this.record.mate.sequenceId) : undefined } _get_next_segment_position() { return this.record.mate ? ( this._store._refIdToName(this.record.mate.sequenceId)+':'+this.record.mate.alignmentStart) : undefined} _get_tags() { return this.record.tags } _get_seq() { return this.record.getReadBases() } constructor(record, store) { this.record = record this._store = store } tags() { const properties = Object.getOwnPropertyNames(CramSlightlyLazyFeature.prototype) return properties .filter(prop => /^_get_/.test(prop)) .map(methodName => methodName.replace('_get_','')) } id() { return this.record.uniqueId + 1 } get(field) { const methodName = `_get_${field.toLowerCase()}` if (this[methodName]) return this[methodName]() return undefined } parent() {} children() {} } define( [ 'dojo/_base/declare', 'JBrowse/Errors', 'JBrowse/Store/SeqFeature', 'JBrowse/Store/DeferredStatsMixin', 'JBrowse/Store/DeferredFeaturesMixin', 'JBrowse/Store/SeqFeature/GlobalStatsEstimationMixin', 'JBrowse/Model/XHRBlob', 'JBrowse/Model/SimpleFeature', ], function( declare, Errors, SeqFeatureStore, DeferredStatsMixin, DeferredFeaturesMixin, GlobalStatsEstimationMixin, XHRBlob, SimpleFeature, ) { return declare( [ SeqFeatureStore, DeferredStatsMixin, DeferredFeaturesMixin, GlobalStatsEstimationMixin ], /** * @lends JBrowse.Store.SeqFeature.CRAM */ { /** * Data backend for reading feature data directly from a * web-accessible CRAM file. * * @constructs */ constructor: function( args ) { let dataBlob if (args.cram) dataBlob = new BlobFilehandleWrapper(args.cram) else if (args.urlTemplate) dataBlob = new BlobFilehandleWrapper(new XHRBlob(this.resolveUrl(args.urlTemplate || 'data.cram'), { expectRanges: true })) else throw new Error('must provide either `cram` or `urlTemplate`') let indexBlob if (args.crai) indexBlob = new BlobFilehandleWrapper(args.crai) else if (args.craiUrlTemplate) indexBlob = new BlobFilehandleWrapper(new XHRBlob(this.resolveUrl(args.craiUrlTemplate))) else if (args.urlTemplate) indexBlob = new BlobFilehandleWrapper(new XHRBlob(this.resolveUrl(args.urlTemplate+'.crai'))) else throw new Error('no index provided, must provide a CRAM index') this.source = dataBlob.toString() // LRU-cache the CRAM object so we don't have to re-download the // index when we switch chromosomes const cacheKey = `data: ${dataBlob}, index: ${indexBlob}` this.cram = cramIndexedFilesCache.get(cacheKey) if (!this.cram) { this.cram = new IndexedCramFile({ cramFilehandle: dataBlob, index: new CraiIndex({filehandle: indexBlob}), seqFetch: this._seqFetch.bind(this), checkSequenceMD5: false, }) cramIndexedFilesCache.set(cacheKey, this.cram) } // pre-download the index before running the statistics estimation so that the stats // estimation doesn't time out this.cram.hasDataForReferenceSequence(0) .then(() => this.cram.cram.getSamHeader()) .then(samHeader => { this._setSamHeader(samHeader) }) .then(() => { this._deferred.features.resolve({success:true}); }) .then(() => this._estimateGlobalStats()) .then(stats => { this.globalStats = stats; this._deferred.stats.resolve({success:true}); }) .catch(err => { this._deferred.features.reject(err) this._deferred.stats.reject(err) }) this.storeTimeout = args.storeTimeout || 3000; }, // process the parsed SAM header from the cram file _setSamHeader(samHeader) { this._samHeader = {} // use the @SQ lines in the header to figure out the // mapping between ref seq ID numbers and names const refSeqIdToName = [] const refSeqNameToId = {} const sqLines = samHeader.filter(l => l.tag === 'SQ') sqLines.forEach((sqLine, seqId) => { sqLine.data.forEach(item => { if (item.tag === 'SN') { // this is the seq name const seqName = item.value refSeqNameToId[seqName] = seqId refSeqIdToName[seqId] = seqName } }) }) if (refSeqIdToName.length) { this._samHeader.refSeqIdToName = refSeqIdToName this._samHeader.refSeqNameToId = refSeqNameToId } }, _refNameToId(refName) { // use info from the SAM header if possible, but fall back to using // the ref seq order from when the browser's refseqs were loaded if (this._samHeader.refSeqNameToId) return this._samHeader.refSeqNameToId[refName] else return this.browser.getRefSeqNumber(refName) }, _refIdToName(refId) { // use info from the SAM header if possible, but fall back to using // the ref seq order from when the browser's refseqs were loaded if (this._samHeader.refSeqIdToName) { return this._samHeader.refSeqIdToName[refId] } else { let ref = this.browser.getRefSeqById(refId) return ref ? ref.name : undefined } }, _getRefSeqStore() { return new Promise((resolve,reject) => { this.browser.getStore('refseqs',resolve,reject) }) }, // used by the CRAM backend to fetch a region of the underlying reference // sequence. needed for some of its calculations async _seqFetch(seqId, start, end) { start -= 1 // convert from 1-based closed to interbase const refSeqStore = await this._getRefSeqStore() if (!refSeqStore) return undefined const refName = this._refIdToName(seqId) if (!refName) return undefined const seqChunks = await new Promise((resolve,reject) => { let features = [] refSeqStore.getFeatures( {ref: refName, start: start-1, end}, f => features.push(f), () => resolve(features), reject ) }) const trimmed = [] seqChunks .sort((a,b) => a.get('start') - b.get('start')) .forEach( (chunk,i) => { let chunkStart = chunk.get('start') let chunkEnd = chunk.get('end') let trimStart = Math.max(start - chunkStart, 0) let trimEnd = Math.min(end - chunkStart, chunkEnd-chunkStart) let trimLength = trimEnd - trimStart let chunkSeq = chunk.get('seq') || chunk.get('residues') trimmed.push(chunkSeq.substr(trimStart,trimLength)) }) const sequence = trimmed.join('') if (sequence.length !== (end-start)) throw new Error(`sequence fetch failed: fetching ${ (start-1).toLocaleString()}-${end.toLocaleString() } only returned ${ sequence.length.toLocaleString() } bases, but should have returned ${ (end-start).toLocaleString() }`) return sequence }, /** * Interrogate whether a store has data for a given reference * sequence. Calls the given callback with either true or false. */ hasRefSeq: function( seqName, callback, errorCallback ) { seqName = this.browser.regularizeReferenceName( seqName ); const refSeqNumber = this._refNameToId(seqName) if (refSeqNumber === undefined) callback(false) this._deferred.stats .then(() => this.cram.hasDataForReferenceSequence(refSeqNumber)) .then(callback, errorCallback) }, // called by getFeatures from the DeferredFeaturesMixin _getFeatures: function( query, featCallback, endCallback, errorCallback ) { const seqName = query.ref || this.refSeq.name const refSeqNumber = this._refNameToId(seqName) if (refSeqNumber === undefined) { endCallback() return } this.cram.getRecordsForRange(refSeqNumber, query.start + 1, query.end) .then(records => { for (let i = 0; i < records.length; i+= 1) { featCallback(this._cramRecordToFeature(records[i])) } endCallback() }) .catch(err => { // map the CramSizeLimitError to JBrowse Errors.DataOverflow if (err instanceof CramSizeLimitError) { err = new Errors.DataOverflow(err) } errorCallback(err) }) }, _cramRecordToFeature(record) { return new CramSlightlyLazyFeature(record, this) }, saveStore: function() { return { urlTemplate: this.config.cram.url, craiUrlTemplate: this.config.crai.url }; } }); });