UNPKG

apr144-bam

Version:

Parser for BAM and BAM index (bai) files

980 lines 39.8 kB
import { Buffer } from 'buffer'; import crc32 from 'buffer-crc32'; import { unzip, unzipChunkSlice } from '@gmod/bgzf-filehandle'; import { LocalFile, RemoteFile } from 'generic-filehandle'; import AbortablePromiseCache from 'abortable-promise-cache'; import QuickLRU from 'quick-lru'; // locals import BAI from './bai'; import CSI from './csi'; import BAMFeature from './record'; import { parseHeaderText } from './sam'; import { checkAbortSignal, timeout, makeOpts } from './util'; export const BAM_MAGIC = 21840194; const blockLen = 1 << 16; async function gen2array(gen) { let out = []; for await (const x of gen) { out = out.concat(x); } return out; } class NullFilehandle { read() { throw new Error('never called'); } stat() { throw new Error('never called'); } readFile() { throw new Error('never called'); } close() { throw new Error('never called'); } } export default class BamFile { constructor({ bamFilehandle, bamPath, bamUrl, baiPath, baiFilehandle, baiUrl, csiPath, csiFilehandle, csiUrl, htsget, yieldThreadTime = 100, renameRefSeqs = n => n, }) { this.htsget = false; this.featureCache = new AbortablePromiseCache({ cache: new QuickLRU({ maxSize: 50, }), fill: async (args, signal) => { const { chunk, opts } = args; const { data, cpositions, dpositions } = await this._readChunk({ chunk, opts: { ...opts, signal }, }); return this.readBamFeatures(data, cpositions, dpositions, chunk); }, }); this.renameRefSeq = renameRefSeqs; if (bamFilehandle) { this.bam = bamFilehandle; } else if (bamPath) { this.bam = new LocalFile(bamPath); } else if (bamUrl) { const bamUrlObj = new URL(bamUrl); const bamUrlUsername = bamUrlObj.username; const bamUrlPassword = bamUrlObj.password; if (bamUrlUsername && bamUrlPassword) { bamUrl = `${bamUrlObj.protocol}//${bamUrlObj.host}${bamUrlObj.pathname}${bamUrlObj.search}`; this.bam = new RemoteFile(bamUrl, { overrides: { credentials: 'include', headers: { Authorization: 'Basic ' + btoa(bamUrlUsername + ':' + bamUrlPassword), }, }, }); } else { this.bam = new RemoteFile(bamUrl); } } else if (htsget) { this.htsget = true; this.bam = new NullFilehandle(); } else { throw new Error('unable to initialize bam'); } if (csiFilehandle) { this.index = new CSI({ filehandle: csiFilehandle }); } else if (csiPath) { this.index = new CSI({ filehandle: new LocalFile(csiPath) }); } else if (csiUrl) { this.index = new CSI({ filehandle: new RemoteFile(csiUrl) }); } else if (baiFilehandle) { this.index = new BAI({ filehandle: baiFilehandle }); } else if (baiPath) { this.index = new BAI({ filehandle: new LocalFile(baiPath) }); } else if (baiUrl) { const baiUrlObj = new URL(baiUrl); const baiUrlUsername = baiUrlObj.username; const baiUrlPassword = baiUrlObj.password; if (baiUrlUsername && baiUrlPassword) { baiUrl = `${baiUrlObj.protocol}//${baiUrlObj.host}${baiUrlObj.pathname}${baiUrlObj.search}`; // console.log( // `baiUrl | ${baiUrl} | ${baiUrlUsername} | ${baiUrlPassword}`, // ) this.index = new BAI({ filehandle: new RemoteFile(baiUrl, { overrides: { credentials: 'include', headers: { Authorization: 'Basic ' + btoa(baiUrlUsername + ':' + baiUrlPassword), }, }, }), }); } else { this.index = new BAI({ filehandle: new RemoteFile(baiUrl) }); } } else if (bamPath) { this.index = new BAI({ filehandle: new LocalFile(`${bamPath}.bai`) }); } else if (bamUrl) { const baiUrlObj = new URL(bamUrl); const baiUrlUsername = baiUrlObj.username; const baiUrlPassword = baiUrlObj.password; if (baiUrlUsername && baiUrlPassword) { const baiUrl = `${baiUrlObj.protocol}//${baiUrlObj.host}${baiUrlObj.pathname}.bai${baiUrlObj.search}`; // console.log( // `baiUrl | ${baiUrl} | ${baiUrlUsername} | ${baiUrlPassword}`, // ) this.index = new BAI({ filehandle: new RemoteFile(baiUrl, { overrides: { credentials: 'include', headers: { Authorization: 'Basic ' + btoa(baiUrlUsername + ':' + baiUrlPassword), }, }, }), }); } else { this.index = new BAI({ filehandle: new RemoteFile(`${bamUrl}.bai`) }); } } else if (htsget) { this.htsget = true; } else { throw new Error('unable to infer index format'); } this.yieldThreadTime = yieldThreadTime; } async getHeaderPre(origOpts) { const opts = makeOpts(origOpts); // console.log(`[bam-js] getHeaderPre: ${JSON.stringify(opts)}`) // if (opts.assemblyName && opts.assemblyName === 'hg38') { // this.chrToIndex = { // chr1: 0, // chr10: 1, // chr11: 2, // chr12: 3, // chr13: 4, // chr14: 5, // chr15: 6, // chr16: 7, // chr17: 8, // chr18: 9, // chr19: 10, // chr2: 11, // chr20: 12, // chr21: 13, // chr22: 14, // chr3: 15, // chr4: 16, // chr5: 17, // chr6: 18, // chr7: 19, // chr8: 20, // chr9: 21, // chrM: 22, // chrX: 23, // chrY: 24, // } // this.indexToChr = [ // { // refName: 'chr1', // length: 248956422, // }, // { // refName: 'chr10', // length: 133797422, // }, // { // refName: 'chr11', // length: 135086622, // }, // { // refName: 'chr12', // length: 133275309, // }, // { // refName: 'chr13', // length: 114364328, // }, // { // refName: 'chr14', // length: 107043718, // }, // { // refName: 'chr15', // length: 101991189, // }, // { // refName: 'chr16', // length: 90338345, // }, // { // refName: 'chr17', // length: 83257441, // }, // { // refName: 'chr18', // length: 80373285, // }, // { // refName: 'chr19', // length: 58617616, // }, // { // refName: 'chr2', // length: 242193529, // }, // { // refName: 'chr20', // length: 64444167, // }, // { // refName: 'chr21', // length: 46709983, // }, // { // refName: 'chr22', // length: 50818468, // }, // { // refName: 'chr3', // length: 198295559, // }, // { // refName: 'chr4', // length: 190214555, // }, // { // refName: 'chr5', // length: 181538259, // }, // { // refName: 'chr6', // length: 170805979, // }, // { // refName: 'chr7', // length: 159345973, // }, // { // refName: 'chr8', // length: 145138636, // }, // { // refName: 'chr9', // length: 138394717, // }, // { // refName: 'chrM', // length: 16569, // }, // { // refName: 'chrX', // length: 156040895, // }, // { // refName: 'chrY', // length: 57227415, // }, // ] // return // } if (opts.assemblyName && opts.assemblyName === 'hg38') { this.chrToIndex = { chr1: 0, chr2: 1, chr3: 2, chr4: 3, chr5: 4, chr6: 5, chr7: 6, chr8: 7, chr9: 8, chr10: 9, chr11: 10, chr12: 11, chr13: 12, chr14: 13, chr15: 14, chr16: 15, chr17: 16, chr18: 17, chr19: 18, chr20: 19, chr21: 20, chr22: 21, chrX: 22, chrY: 23, chrM: 24, 'GL000008.2': 25, 'GL000009.2': 26, 'GL000194.1': 27, 'GL000195.1': 28, 'GL000205.2': 29, 'GL000208.1': 30, 'GL000213.1': 31, 'GL000214.1': 32, 'GL000216.2': 33, 'GL000218.1': 34, 'GL000219.1': 35, 'GL000220.1': 36, 'GL000221.1': 37, 'GL000224.1': 38, 'GL000225.1': 39, 'GL000226.1': 40, 'KI270302.1': 41, 'KI270303.1': 42, 'KI270304.1': 43, 'KI270305.1': 44, 'KI270310.1': 45, 'KI270311.1': 46, 'KI270312.1': 47, 'KI270315.1': 48, 'KI270316.1': 49, 'KI270317.1': 50, 'KI270320.1': 51, 'KI270322.1': 52, 'KI270329.1': 53, 'KI270330.1': 54, 'KI270333.1': 55, 'KI270334.1': 56, 'KI270335.1': 57, 'KI270336.1': 58, 'KI270337.1': 59, 'KI270338.1': 60, 'KI270340.1': 61, 'KI270362.1': 62, 'KI270363.1': 63, 'KI270364.1': 64, 'KI270366.1': 65, 'KI270371.1': 66, 'KI270372.1': 67, 'KI270373.1': 68, 'KI270374.1': 69, 'KI270375.1': 70, 'KI270376.1': 71, 'KI270378.1': 72, 'KI270379.1': 73, 'KI270381.1': 74, 'KI270382.1': 75, 'KI270383.1': 76, 'KI270384.1': 77, 'KI270385.1': 78, 'KI270386.1': 79, 'KI270387.1': 80, 'KI270388.1': 81, 'KI270389.1': 82, 'KI270390.1': 83, 'KI270391.1': 84, 'KI270392.1': 85, 'KI270393.1': 86, 'KI270394.1': 87, 'KI270395.1': 88, 'KI270396.1': 89, 'KI270411.1': 90, 'KI270412.1': 91, 'KI270414.1': 92, 'KI270417.1': 93, 'KI270418.1': 94, 'KI270419.1': 95, 'KI270420.1': 96, 'KI270422.1': 97, 'KI270423.1': 98, 'KI270424.1': 99, 'KI270425.1': 100, 'KI270429.1': 101, 'KI270435.1': 102, 'KI270438.1': 103, 'KI270442.1': 104, 'KI270448.1': 105, 'KI270465.1': 106, 'KI270466.1': 107, 'KI270467.1': 108, 'KI270468.1': 109, 'KI270507.1': 110, 'KI270508.1': 111, 'KI270509.1': 112, 'KI270510.1': 113, 'KI270511.1': 114, 'KI270512.1': 115, 'KI270515.1': 116, 'KI270516.1': 117, 'KI270517.1': 118, 'KI270518.1': 119, 'KI270519.1': 120, 'KI270521.1': 121, 'KI270522.1': 122, 'KI270528.1': 123, 'KI270529.1': 124, 'KI270530.1': 125, 'KI270538.1': 126, 'KI270539.1': 127, 'KI270544.1': 128, 'KI270548.1': 129, 'KI270579.1': 130, 'KI270580.1': 131, 'KI270581.1': 132, 'KI270582.1': 133, 'KI270583.1': 134, 'KI270584.1': 135, 'KI270587.1': 136, 'KI270588.1': 137, 'KI270589.1': 138, 'KI270590.1': 139, 'KI270591.1': 140, 'KI270593.1': 141, 'KI270706.1': 142, 'KI270707.1': 143, 'KI270708.1': 144, 'KI270709.1': 145, 'KI270710.1': 146, 'KI270711.1': 147, 'KI270712.1': 148, 'KI270713.1': 149, 'KI270714.1': 150, 'KI270715.1': 151, 'KI270716.1': 152, 'KI270717.1': 153, 'KI270718.1': 154, 'KI270719.1': 155, 'KI270720.1': 156, 'KI270721.1': 157, 'KI270722.1': 158, 'KI270723.1': 159, 'KI270724.1': 160, 'KI270725.1': 161, 'KI270726.1': 162, 'KI270727.1': 163, 'KI270728.1': 164, 'KI270729.1': 165, 'KI270730.1': 166, 'KI270731.1': 167, 'KI270732.1': 168, 'KI270733.1': 169, 'KI270734.1': 170, 'KI270735.1': 171, 'KI270736.1': 172, 'KI270737.1': 173, 'KI270738.1': 174, 'KI270739.1': 175, 'KI270740.1': 176, 'KI270741.1': 177, 'KI270742.1': 178, 'KI270743.1': 179, 'KI270744.1': 180, 'KI270745.1': 181, 'KI270746.1': 182, 'KI270747.1': 183, 'KI270748.1': 184, 'KI270749.1': 185, 'KI270750.1': 186, 'KI270751.1': 187, 'KI270752.1': 188, 'KI270753.1': 189, 'KI270754.1': 190, 'KI270755.1': 191, 'KI270756.1': 192, 'KI270757.1': 193, }; this.indexToChr = [ { refName: 'chr1', length: 248956422 }, { refName: 'chr2', length: 242193529 }, { refName: 'chr3', length: 198295559 }, { refName: 'chr4', length: 190214555 }, { refName: 'chr5', length: 181538259 }, { refName: 'chr6', length: 170805979 }, { refName: 'chr7', length: 159345973 }, { refName: 'chr8', length: 145138636 }, { refName: 'chr9', length: 138394717 }, { refName: 'chr10', length: 133797422 }, { refName: 'chr11', length: 135086622 }, { refName: 'chr12', length: 133275309 }, { refName: 'chr13', length: 114364328 }, { refName: 'chr14', length: 107043718 }, { refName: 'chr15', length: 101991189 }, { refName: 'chr16', length: 90338345 }, { refName: 'chr17', length: 83257441 }, { refName: 'chr18', length: 80373285 }, { refName: 'chr19', length: 58617616 }, { refName: 'chr20', length: 64444167 }, { refName: 'chr21', length: 46709983 }, { refName: 'chr22', length: 50818468 }, { refName: 'chrX', length: 156040895 }, { refName: 'chrY', length: 57227415 }, { refName: 'chrM', length: 16569 }, { refName: 'GL000008.2', length: 209709 }, { refName: 'GL000009.2', length: 201709 }, { refName: 'GL000194.1', length: 191469 }, { refName: 'GL000195.1', length: 182896 }, { refName: 'GL000205.2', length: 185591 }, { refName: 'GL000208.1', length: 92689 }, { refName: 'GL000213.1', length: 164239 }, { refName: 'GL000214.1', length: 137718 }, { refName: 'GL000216.2', length: 176608 }, { refName: 'GL000218.1', length: 161147 }, { refName: 'GL000219.1', length: 179198 }, { refName: 'GL000220.1', length: 161802 }, { refName: 'GL000221.1', length: 155397 }, { refName: 'GL000224.1', length: 179693 }, { refName: 'GL000225.1', length: 211173 }, { refName: 'GL000226.1', length: 15008 }, { refName: 'KI270302.1', length: 2274 }, { refName: 'KI270303.1', length: 1942 }, { refName: 'KI270304.1', length: 2165 }, { refName: 'KI270305.1', length: 1472 }, { refName: 'KI270310.1', length: 1201 }, { refName: 'KI270311.1', length: 12399 }, { refName: 'KI270312.1', length: 998 }, { refName: 'KI270315.1', length: 2276 }, { refName: 'KI270316.1', length: 1444 }, { refName: 'KI270317.1', length: 37690 }, { refName: 'KI270320.1', length: 4416 }, { refName: 'KI270322.1', length: 21476 }, { refName: 'KI270329.1', length: 1040 }, { refName: 'KI270330.1', length: 1652 }, { refName: 'KI270333.1', length: 2699 }, { refName: 'KI270334.1', length: 1368 }, { refName: 'KI270335.1', length: 1048 }, { refName: 'KI270336.1', length: 1026 }, { refName: 'KI270337.1', length: 1121 }, { refName: 'KI270338.1', length: 1428 }, { refName: 'KI270340.1', length: 1428 }, { refName: 'KI270362.1', length: 3530 }, { refName: 'KI270363.1', length: 1803 }, { refName: 'KI270364.1', length: 2855 }, { refName: 'KI270366.1', length: 8320 }, { refName: 'KI270371.1', length: 2805 }, { refName: 'KI270372.1', length: 1650 }, { refName: 'KI270373.1', length: 1451 }, { refName: 'KI270374.1', length: 2656 }, { refName: 'KI270375.1', length: 2378 }, { refName: 'KI270376.1', length: 1136 }, { refName: 'KI270378.1', length: 1048 }, { refName: 'KI270379.1', length: 1045 }, { refName: 'KI270381.1', length: 1930 }, { refName: 'KI270382.1', length: 4215 }, { refName: 'KI270383.1', length: 1750 }, { refName: 'KI270384.1', length: 1658 }, { refName: 'KI270385.1', length: 990 }, { refName: 'KI270386.1', length: 1788 }, { refName: 'KI270387.1', length: 1537 }, { refName: 'KI270388.1', length: 1216 }, { refName: 'KI270389.1', length: 1298 }, { refName: 'KI270390.1', length: 2387 }, { refName: 'KI270391.1', length: 1484 }, { refName: 'KI270392.1', length: 971 }, { refName: 'KI270393.1', length: 1308 }, { refName: 'KI270394.1', length: 970 }, { refName: 'KI270395.1', length: 1143 }, { refName: 'KI270396.1', length: 1880 }, { refName: 'KI270411.1', length: 2646 }, { refName: 'KI270412.1', length: 1179 }, { refName: 'KI270414.1', length: 2489 }, { refName: 'KI270417.1', length: 2043 }, { refName: 'KI270418.1', length: 2145 }, { refName: 'KI270419.1', length: 1029 }, { refName: 'KI270420.1', length: 2321 }, { refName: 'KI270422.1', length: 1445 }, { refName: 'KI270423.1', length: 981 }, { refName: 'KI270424.1', length: 2140 }, { refName: 'KI270425.1', length: 1884 }, { refName: 'KI270429.1', length: 1361 }, { refName: 'KI270435.1', length: 92983 }, { refName: 'KI270438.1', length: 112505 }, { refName: 'KI270442.1', length: 392061 }, { refName: 'KI270448.1', length: 7992 }, { refName: 'KI270465.1', length: 1774 }, { refName: 'KI270466.1', length: 1233 }, { refName: 'KI270467.1', length: 3920 }, { refName: 'KI270468.1', length: 4055 }, { refName: 'KI270507.1', length: 5353 }, { refName: 'KI270508.1', length: 1951 }, { refName: 'KI270509.1', length: 2318 }, { refName: 'KI270510.1', length: 2415 }, { refName: 'KI270511.1', length: 8127 }, { refName: 'KI270512.1', length: 22689 }, { refName: 'KI270515.1', length: 6361 }, { refName: 'KI270516.1', length: 1300 }, { refName: 'KI270517.1', length: 3253 }, { refName: 'KI270518.1', length: 2186 }, { refName: 'KI270519.1', length: 138126 }, { refName: 'KI270521.1', length: 7642 }, { refName: 'KI270522.1', length: 5674 }, { refName: 'KI270528.1', length: 2983 }, { refName: 'KI270529.1', length: 1899 }, { refName: 'KI270530.1', length: 2168 }, { refName: 'KI270538.1', length: 91309 }, { refName: 'KI270539.1', length: 993 }, { refName: 'KI270544.1', length: 1202 }, { refName: 'KI270548.1', length: 1599 }, { refName: 'KI270579.1', length: 31033 }, { refName: 'KI270580.1', length: 1553 }, { refName: 'KI270581.1', length: 7046 }, { refName: 'KI270582.1', length: 6504 }, { refName: 'KI270583.1', length: 1400 }, { refName: 'KI270584.1', length: 4513 }, { refName: 'KI270587.1', length: 2969 }, { refName: 'KI270588.1', length: 6158 }, { refName: 'KI270589.1', length: 44474 }, { refName: 'KI270590.1', length: 4685 }, { refName: 'KI270591.1', length: 5796 }, { refName: 'KI270593.1', length: 3041 }, { refName: 'KI270706.1', length: 175055 }, { refName: 'KI270707.1', length: 32032 }, { refName: 'KI270708.1', length: 127682 }, { refName: 'KI270709.1', length: 66860 }, { refName: 'KI270710.1', length: 40176 }, { refName: 'KI270711.1', length: 42210 }, { refName: 'KI270712.1', length: 176043 }, { refName: 'KI270713.1', length: 40745 }, { refName: 'KI270714.1', length: 41717 }, { refName: 'KI270715.1', length: 161471 }, { refName: 'KI270716.1', length: 153799 }, { refName: 'KI270717.1', length: 40062 }, { refName: 'KI270718.1', length: 38054 }, { refName: 'KI270719.1', length: 176845 }, { refName: 'KI270720.1', length: 39050 }, { refName: 'KI270721.1', length: 100316 }, { refName: 'KI270722.1', length: 194050 }, { refName: 'KI270723.1', length: 38115 }, { refName: 'KI270724.1', length: 39555 }, { refName: 'KI270725.1', length: 172810 }, { refName: 'KI270726.1', length: 43739 }, { refName: 'KI270727.1', length: 448248 }, { refName: 'KI270728.1', length: 1872759 }, { refName: 'KI270729.1', length: 280839 }, { refName: 'KI270730.1', length: 112551 }, { refName: 'KI270731.1', length: 150754 }, { refName: 'KI270732.1', length: 41543 }, { refName: 'KI270733.1', length: 179772 }, { refName: 'KI270734.1', length: 165050 }, { refName: 'KI270735.1', length: 42811 }, { refName: 'KI270736.1', length: 181920 }, { refName: 'KI270737.1', length: 103838 }, { refName: 'KI270738.1', length: 99375 }, { refName: 'KI270739.1', length: 73985 }, { refName: 'KI270740.1', length: 37240 }, { refName: 'KI270741.1', length: 157432 }, { refName: 'KI270742.1', length: 186739 }, { refName: 'KI270743.1', length: 210658 }, { refName: 'KI270744.1', length: 168472 }, { refName: 'KI270745.1', length: 41891 }, { refName: 'KI270746.1', length: 66486 }, { refName: 'KI270747.1', length: 198735 }, { refName: 'KI270748.1', length: 93321 }, { refName: 'KI270749.1', length: 158759 }, { refName: 'KI270750.1', length: 148850 }, { refName: 'KI270751.1', length: 150742 }, { refName: 'KI270752.1', length: 27745 }, { refName: 'KI270753.1', length: 62944 }, { refName: 'KI270754.1', length: 40191 }, { refName: 'KI270755.1', length: 36723 }, { refName: 'KI270756.1', length: 79590 }, { refName: 'KI270757.1', length: 71251 }, ]; } if (!this.index) { return; } const indexData = await this.index.parse(opts); const ret = indexData.firstDataLine ? indexData.firstDataLine.blockPosition + 65535 : undefined; let buffer; if (ret) { const s = ret + blockLen; // console.log(`[bam-js] reading header [ ret ${ret} | s ${s} ]`) const res = await this.bam.read(Buffer.alloc(s), 0, s, 0, opts); if (!res.bytesRead) { throw new Error('Error reading header'); } buffer = res.buffer.subarray(0, Math.min(res.bytesRead, ret)); // console.log(`[bam-js] reading header [ res.bytesRead ${res.bytesRead} ]`) } else { // console.log(`[bam-js] reading all of header`) buffer = await this.bam.readFile(opts); } const uncba = await unzip(buffer); if (uncba.readInt32LE(0) !== BAM_MAGIC) { throw new Error('Not a BAM file'); } const headLen = uncba.readInt32LE(4); // console.log(`[bam-js] headLen ${headLen}`); this.header = uncba.toString('utf8', 8, 8 + headLen); const { chrToIndex, indexToChr } = await this._readRefSeqs(headLen + 8, 65535, opts); this.chrToIndex = chrToIndex; this.indexToChr = indexToChr; // console.log(`this.chrToIndex ${JSON.stringify(this.chrToIndex)}`) // console.log(`this.indexToChr ${JSON.stringify(this.indexToChr)}`) return parseHeaderText(this.header); } getHeader(opts) { if (!this.headerP) { this.headerP = this.getHeaderPre(opts).catch(e => { this.headerP = undefined; throw e; }); } return this.headerP; } async getHeaderText(opts = {}) { await this.getHeader(opts); return this.header; } // the full length of the refseq block is not given in advance so this grabs // a chunk and doubles it if all refseqs haven't been processed async _readRefSeqs(start, refSeqBytes, opts) { if (start > refSeqBytes) { return this._readRefSeqs(start, refSeqBytes * 2, opts); } const size = refSeqBytes + blockLen; const { bytesRead, buffer } = await this.bam.read(Buffer.alloc(size), 0, refSeqBytes, 0, opts); if (!bytesRead) { throw new Error('Error reading refseqs from header'); } const uncba = await unzip(buffer.subarray(0, Math.min(bytesRead, refSeqBytes))); const nRef = uncba.readInt32LE(start); let p = start + 4; const chrToIndex = {}; const indexToChr = []; for (let i = 0; i < nRef; i += 1) { const lName = uncba.readInt32LE(p); const refName = this.renameRefSeq(uncba.toString('utf8', p + 4, p + 4 + lName - 1)); const lRef = uncba.readInt32LE(p + lName + 4); chrToIndex[refName] = i; indexToChr.push({ refName, length: lRef }); p = p + 8 + lName; if (p > uncba.length) { console.warn(`BAM header is very big. Re-fetching ${refSeqBytes} bytes.`); return this._readRefSeqs(start, refSeqBytes * 2, opts); } } // console.log(`[bam-js] chrToIndex: ${JSON.stringify(chrToIndex)}`) // console.log(`[bam-js] indexToChr: ${JSON.stringify(indexToChr)}`) return { chrToIndex, indexToChr }; } async getRecordsForRange(chr, min, max, opts) { return gen2array(this.streamRecordsForRange(chr, min, max, opts)); } async *streamRecordsForRange(chr, min, max, opts) { var _a; // console.log( // `[bam-js] streamRecordsForRange | ${chr} | ${min} | ${max} | ${JSON.stringify(opts)}`, // ) // console.log(`[bam-js] opts?.assemblyName ${opts?.assemblyName}`) if ((opts === null || opts === void 0 ? void 0 : opts.assemblyName) && (opts === null || opts === void 0 ? void 0 : opts.assemblyName) !== 'hg38') { await this.getHeader(opts); } const chrId = (_a = this.chrToIndex) === null || _a === void 0 ? void 0 : _a[chr]; if (chrId === undefined || !this.index) { yield []; } else { const chunks = await this.index.blocksForRange(chrId, min - 1, max, opts); yield* this._fetchChunkFeatures(chunks, chrId, min, max, opts); } } async *_fetchChunkFeatures(chunks, chrId, min, max, opts = {}) { const { viewAsPairs } = opts; const feats = []; let done = false; for (const chunk of chunks) { const records = await this.featureCache.get(chunk.toString(), { chunk, opts }, opts.signal); const recs = []; for (const feature of records) { if (feature.seq_id() === chrId) { if (feature.get('start') >= max) { // past end of range, can stop iterating done = true; break; } else if (feature.get('end') >= min) { // must be in range recs.push(feature); } } } feats.push(recs); yield recs; if (done) { break; } } checkAbortSignal(opts.signal); if (viewAsPairs) { yield this.fetchPairs(chrId, feats, opts); } } async fetchPairs(chrId, feats, opts) { const { pairAcrossChr, maxInsertSize = 200000 } = opts; const unmatedPairs = {}; const readIds = {}; feats.map(ret => { const readNames = {}; for (const element of ret) { const name = element.name(); const id = element.id(); if (!readNames[name]) { readNames[name] = 0; } readNames[name]++; readIds[id] = 1; } for (const [k, v] of Object.entries(readNames)) { if (v === 1) { unmatedPairs[k] = true; } } }); const matePromises = []; feats.map(ret => { for (const f of ret) { const name = f.name(); const start = f.get('start'); const pnext = f._next_pos(); const rnext = f._next_refid(); if (this.index && unmatedPairs[name] && (pairAcrossChr || (rnext === chrId && Math.abs(start - pnext) < maxInsertSize))) { matePromises.push(this.index.blocksForRange(rnext, pnext, pnext + 1, opts)); } } }); // filter out duplicate chunks (the blocks are lists of chunks, blocks are // concatenated, then filter dup chunks) const map = new Map(); const res = await Promise.all(matePromises); for (const m of res.flat()) { if (!map.has(m.toString())) { map.set(m.toString(), m); } } const mateFeatPromises = await Promise.all([...map.values()].map(async (c) => { const { data, cpositions, dpositions, chunk } = await this._readChunk({ chunk: c, opts, }); const mateRecs = []; for (const feature of await this.readBamFeatures(data, cpositions, dpositions, chunk)) { if (unmatedPairs[feature.get('name')] && !readIds[feature.id()]) { mateRecs.push(feature); } } return mateRecs; })); return mateFeatPromises.flat(); } async _readRegion(position, size, opts = {}) { const { bytesRead, buffer } = await this.bam.read(Buffer.alloc(size), 0, size, position, opts); return buffer.subarray(0, Math.min(bytesRead, size)); } async _readChunk({ chunk, opts }) { const buffer = await this._readRegion(chunk.minv.blockPosition, chunk.fetchedSize(), opts); const { buffer: data, cpositions, dpositions, } = await unzipChunkSlice(buffer, chunk); return { data, cpositions, dpositions, chunk }; } async readBamFeatures(ba, cpositions, dpositions, chunk) { let blockStart = 0; const sink = []; let pos = 0; let last = +Date.now(); while (blockStart + 4 < ba.length) { const blockSize = ba.readInt32LE(blockStart); const blockEnd = blockStart + 4 + blockSize - 1; // increment position to the current decompressed status if (dpositions) { while (blockStart + chunk.minv.dataPosition >= dpositions[pos++]) { } pos--; } // only try to read the feature if we have all the bytes for it if (blockEnd < ba.length) { const feature = new BAMFeature({ bytes: { byteArray: ba, start: blockStart, end: blockEnd, }, // the below results in an automatically calculated file-offset based // ID if the info for that is available, otherwise crc32 of the // features // // cpositions[pos] refers to actual file offset of a bgzip block // boundaries // // we multiply by (1 <<8) in order to make sure each block has a // "unique" address space so that data in that block could never // overlap // // then the blockStart-dpositions is an uncompressed file offset from // that bgzip block boundary, and since the cpositions are multiplied // by (1 << 8) these uncompressed offsets get a unique space // // this has an extra chunk.minv.dataPosition added on because it // blockStart starts at 0 instead of chunk.minv.dataPosition // // the +1 is just to avoid any possible uniqueId 0 but this does not // realistically happen fileOffset: cpositions.length > 0 ? cpositions[pos] * (1 << 8) + (blockStart - dpositions[pos]) + chunk.minv.dataPosition + 1 : // must be slice, not subarray for buffer polyfill on web crc32.signed(ba.slice(blockStart, blockEnd)), }); sink.push(feature); if (this.yieldThreadTime && +Date.now() - last > this.yieldThreadTime) { await timeout(1); last = +Date.now(); } } blockStart = blockEnd + 1; } return sink; } async hasRefSeq(seqName) { var _a, _b; const seqId = (_a = this.chrToIndex) === null || _a === void 0 ? void 0 : _a[seqName]; return seqId === undefined ? false : (_b = this.index) === null || _b === void 0 ? void 0 : _b.hasRefSeq(seqId); } async lineCount(seqName) { var _a; const seqId = (_a = this.chrToIndex) === null || _a === void 0 ? void 0 : _a[seqName]; return seqId === undefined || !this.index ? 0 : this.index.lineCount(seqId); } async indexCov(seqName, start, end) { var _a; if (!this.index) { return []; } await this.index.parse(); const seqId = (_a = this.chrToIndex) === null || _a === void 0 ? void 0 : _a[seqName]; return seqId === undefined ? [] : this.index.indexCov(seqId, start, end); } async blocksForRange(seqName, start, end, opts) { var _a; if (!this.index) { return []; } await this.index.parse(); const seqId = (_a = this.chrToIndex) === null || _a === void 0 ? void 0 : _a[seqName]; return seqId === undefined ? [] : this.index.blocksForRange(seqId, start, end, opts); } } //# sourceMappingURL=bamFile.js.map