apr144-bam
Version:
Parser for BAM and BAM index (bai) files
980 lines • 39.8 kB
JavaScript
import { Buffer } from 'buffer';
import crc32 from 'buffer-crc32';
import { unzip, unzipChunkSlice } from '@gmod/bgzf-filehandle';
import { LocalFile, RemoteFile } from 'generic-filehandle';
import AbortablePromiseCache from 'abortable-promise-cache';
import QuickLRU from 'quick-lru';
// locals
import BAI from './bai';
import CSI from './csi';
import BAMFeature from './record';
import { parseHeaderText } from './sam';
import { checkAbortSignal, timeout, makeOpts } from './util';
export const BAM_MAGIC = 21840194;
const blockLen = 1 << 16;
async function gen2array(gen) {
let out = [];
for await (const x of gen) {
out = out.concat(x);
}
return out;
}
class NullFilehandle {
read() {
throw new Error('never called');
}
stat() {
throw new Error('never called');
}
readFile() {
throw new Error('never called');
}
close() {
throw new Error('never called');
}
}
export default class BamFile {
constructor({ bamFilehandle, bamPath, bamUrl, baiPath, baiFilehandle, baiUrl, csiPath, csiFilehandle, csiUrl, htsget, yieldThreadTime = 100, renameRefSeqs = n => n, }) {
this.htsget = false;
this.featureCache = new AbortablePromiseCache({
cache: new QuickLRU({
maxSize: 50,
}),
fill: async (args, signal) => {
const { chunk, opts } = args;
const { data, cpositions, dpositions } = await this._readChunk({
chunk,
opts: { ...opts, signal },
});
return this.readBamFeatures(data, cpositions, dpositions, chunk);
},
});
this.renameRefSeq = renameRefSeqs;
if (bamFilehandle) {
this.bam = bamFilehandle;
}
else if (bamPath) {
this.bam = new LocalFile(bamPath);
}
else if (bamUrl) {
const bamUrlObj = new URL(bamUrl);
const bamUrlUsername = bamUrlObj.username;
const bamUrlPassword = bamUrlObj.password;
if (bamUrlUsername && bamUrlPassword) {
bamUrl = `${bamUrlObj.protocol}//${bamUrlObj.host}${bamUrlObj.pathname}${bamUrlObj.search}`;
this.bam = new RemoteFile(bamUrl, {
overrides: {
credentials: 'include',
headers: {
Authorization: 'Basic ' + btoa(bamUrlUsername + ':' + bamUrlPassword),
},
},
});
}
else {
this.bam = new RemoteFile(bamUrl);
}
}
else if (htsget) {
this.htsget = true;
this.bam = new NullFilehandle();
}
else {
throw new Error('unable to initialize bam');
}
if (csiFilehandle) {
this.index = new CSI({ filehandle: csiFilehandle });
}
else if (csiPath) {
this.index = new CSI({ filehandle: new LocalFile(csiPath) });
}
else if (csiUrl) {
this.index = new CSI({ filehandle: new RemoteFile(csiUrl) });
}
else if (baiFilehandle) {
this.index = new BAI({ filehandle: baiFilehandle });
}
else if (baiPath) {
this.index = new BAI({ filehandle: new LocalFile(baiPath) });
}
else if (baiUrl) {
const baiUrlObj = new URL(baiUrl);
const baiUrlUsername = baiUrlObj.username;
const baiUrlPassword = baiUrlObj.password;
if (baiUrlUsername && baiUrlPassword) {
baiUrl = `${baiUrlObj.protocol}//${baiUrlObj.host}${baiUrlObj.pathname}${baiUrlObj.search}`;
// console.log(
// `baiUrl | ${baiUrl} | ${baiUrlUsername} | ${baiUrlPassword}`,
// )
this.index = new BAI({
filehandle: new RemoteFile(baiUrl, {
overrides: {
credentials: 'include',
headers: {
Authorization: 'Basic ' + btoa(baiUrlUsername + ':' + baiUrlPassword),
},
},
}),
});
}
else {
this.index = new BAI({ filehandle: new RemoteFile(baiUrl) });
}
}
else if (bamPath) {
this.index = new BAI({ filehandle: new LocalFile(`${bamPath}.bai`) });
}
else if (bamUrl) {
const baiUrlObj = new URL(bamUrl);
const baiUrlUsername = baiUrlObj.username;
const baiUrlPassword = baiUrlObj.password;
if (baiUrlUsername && baiUrlPassword) {
const baiUrl = `${baiUrlObj.protocol}//${baiUrlObj.host}${baiUrlObj.pathname}.bai${baiUrlObj.search}`;
// console.log(
// `baiUrl | ${baiUrl} | ${baiUrlUsername} | ${baiUrlPassword}`,
// )
this.index = new BAI({
filehandle: new RemoteFile(baiUrl, {
overrides: {
credentials: 'include',
headers: {
Authorization: 'Basic ' + btoa(baiUrlUsername + ':' + baiUrlPassword),
},
},
}),
});
}
else {
this.index = new BAI({ filehandle: new RemoteFile(`${bamUrl}.bai`) });
}
}
else if (htsget) {
this.htsget = true;
}
else {
throw new Error('unable to infer index format');
}
this.yieldThreadTime = yieldThreadTime;
}
async getHeaderPre(origOpts) {
const opts = makeOpts(origOpts);
// console.log(`[bam-js] getHeaderPre: ${JSON.stringify(opts)}`)
// if (opts.assemblyName && opts.assemblyName === 'hg38') {
// this.chrToIndex = {
// chr1: 0,
// chr10: 1,
// chr11: 2,
// chr12: 3,
// chr13: 4,
// chr14: 5,
// chr15: 6,
// chr16: 7,
// chr17: 8,
// chr18: 9,
// chr19: 10,
// chr2: 11,
// chr20: 12,
// chr21: 13,
// chr22: 14,
// chr3: 15,
// chr4: 16,
// chr5: 17,
// chr6: 18,
// chr7: 19,
// chr8: 20,
// chr9: 21,
// chrM: 22,
// chrX: 23,
// chrY: 24,
// }
// this.indexToChr = [
// {
// refName: 'chr1',
// length: 248956422,
// },
// {
// refName: 'chr10',
// length: 133797422,
// },
// {
// refName: 'chr11',
// length: 135086622,
// },
// {
// refName: 'chr12',
// length: 133275309,
// },
// {
// refName: 'chr13',
// length: 114364328,
// },
// {
// refName: 'chr14',
// length: 107043718,
// },
// {
// refName: 'chr15',
// length: 101991189,
// },
// {
// refName: 'chr16',
// length: 90338345,
// },
// {
// refName: 'chr17',
// length: 83257441,
// },
// {
// refName: 'chr18',
// length: 80373285,
// },
// {
// refName: 'chr19',
// length: 58617616,
// },
// {
// refName: 'chr2',
// length: 242193529,
// },
// {
// refName: 'chr20',
// length: 64444167,
// },
// {
// refName: 'chr21',
// length: 46709983,
// },
// {
// refName: 'chr22',
// length: 50818468,
// },
// {
// refName: 'chr3',
// length: 198295559,
// },
// {
// refName: 'chr4',
// length: 190214555,
// },
// {
// refName: 'chr5',
// length: 181538259,
// },
// {
// refName: 'chr6',
// length: 170805979,
// },
// {
// refName: 'chr7',
// length: 159345973,
// },
// {
// refName: 'chr8',
// length: 145138636,
// },
// {
// refName: 'chr9',
// length: 138394717,
// },
// {
// refName: 'chrM',
// length: 16569,
// },
// {
// refName: 'chrX',
// length: 156040895,
// },
// {
// refName: 'chrY',
// length: 57227415,
// },
// ]
// return
// }
if (opts.assemblyName && opts.assemblyName === 'hg38') {
this.chrToIndex = {
chr1: 0,
chr2: 1,
chr3: 2,
chr4: 3,
chr5: 4,
chr6: 5,
chr7: 6,
chr8: 7,
chr9: 8,
chr10: 9,
chr11: 10,
chr12: 11,
chr13: 12,
chr14: 13,
chr15: 14,
chr16: 15,
chr17: 16,
chr18: 17,
chr19: 18,
chr20: 19,
chr21: 20,
chr22: 21,
chrX: 22,
chrY: 23,
chrM: 24,
'GL000008.2': 25,
'GL000009.2': 26,
'GL000194.1': 27,
'GL000195.1': 28,
'GL000205.2': 29,
'GL000208.1': 30,
'GL000213.1': 31,
'GL000214.1': 32,
'GL000216.2': 33,
'GL000218.1': 34,
'GL000219.1': 35,
'GL000220.1': 36,
'GL000221.1': 37,
'GL000224.1': 38,
'GL000225.1': 39,
'GL000226.1': 40,
'KI270302.1': 41,
'KI270303.1': 42,
'KI270304.1': 43,
'KI270305.1': 44,
'KI270310.1': 45,
'KI270311.1': 46,
'KI270312.1': 47,
'KI270315.1': 48,
'KI270316.1': 49,
'KI270317.1': 50,
'KI270320.1': 51,
'KI270322.1': 52,
'KI270329.1': 53,
'KI270330.1': 54,
'KI270333.1': 55,
'KI270334.1': 56,
'KI270335.1': 57,
'KI270336.1': 58,
'KI270337.1': 59,
'KI270338.1': 60,
'KI270340.1': 61,
'KI270362.1': 62,
'KI270363.1': 63,
'KI270364.1': 64,
'KI270366.1': 65,
'KI270371.1': 66,
'KI270372.1': 67,
'KI270373.1': 68,
'KI270374.1': 69,
'KI270375.1': 70,
'KI270376.1': 71,
'KI270378.1': 72,
'KI270379.1': 73,
'KI270381.1': 74,
'KI270382.1': 75,
'KI270383.1': 76,
'KI270384.1': 77,
'KI270385.1': 78,
'KI270386.1': 79,
'KI270387.1': 80,
'KI270388.1': 81,
'KI270389.1': 82,
'KI270390.1': 83,
'KI270391.1': 84,
'KI270392.1': 85,
'KI270393.1': 86,
'KI270394.1': 87,
'KI270395.1': 88,
'KI270396.1': 89,
'KI270411.1': 90,
'KI270412.1': 91,
'KI270414.1': 92,
'KI270417.1': 93,
'KI270418.1': 94,
'KI270419.1': 95,
'KI270420.1': 96,
'KI270422.1': 97,
'KI270423.1': 98,
'KI270424.1': 99,
'KI270425.1': 100,
'KI270429.1': 101,
'KI270435.1': 102,
'KI270438.1': 103,
'KI270442.1': 104,
'KI270448.1': 105,
'KI270465.1': 106,
'KI270466.1': 107,
'KI270467.1': 108,
'KI270468.1': 109,
'KI270507.1': 110,
'KI270508.1': 111,
'KI270509.1': 112,
'KI270510.1': 113,
'KI270511.1': 114,
'KI270512.1': 115,
'KI270515.1': 116,
'KI270516.1': 117,
'KI270517.1': 118,
'KI270518.1': 119,
'KI270519.1': 120,
'KI270521.1': 121,
'KI270522.1': 122,
'KI270528.1': 123,
'KI270529.1': 124,
'KI270530.1': 125,
'KI270538.1': 126,
'KI270539.1': 127,
'KI270544.1': 128,
'KI270548.1': 129,
'KI270579.1': 130,
'KI270580.1': 131,
'KI270581.1': 132,
'KI270582.1': 133,
'KI270583.1': 134,
'KI270584.1': 135,
'KI270587.1': 136,
'KI270588.1': 137,
'KI270589.1': 138,
'KI270590.1': 139,
'KI270591.1': 140,
'KI270593.1': 141,
'KI270706.1': 142,
'KI270707.1': 143,
'KI270708.1': 144,
'KI270709.1': 145,
'KI270710.1': 146,
'KI270711.1': 147,
'KI270712.1': 148,
'KI270713.1': 149,
'KI270714.1': 150,
'KI270715.1': 151,
'KI270716.1': 152,
'KI270717.1': 153,
'KI270718.1': 154,
'KI270719.1': 155,
'KI270720.1': 156,
'KI270721.1': 157,
'KI270722.1': 158,
'KI270723.1': 159,
'KI270724.1': 160,
'KI270725.1': 161,
'KI270726.1': 162,
'KI270727.1': 163,
'KI270728.1': 164,
'KI270729.1': 165,
'KI270730.1': 166,
'KI270731.1': 167,
'KI270732.1': 168,
'KI270733.1': 169,
'KI270734.1': 170,
'KI270735.1': 171,
'KI270736.1': 172,
'KI270737.1': 173,
'KI270738.1': 174,
'KI270739.1': 175,
'KI270740.1': 176,
'KI270741.1': 177,
'KI270742.1': 178,
'KI270743.1': 179,
'KI270744.1': 180,
'KI270745.1': 181,
'KI270746.1': 182,
'KI270747.1': 183,
'KI270748.1': 184,
'KI270749.1': 185,
'KI270750.1': 186,
'KI270751.1': 187,
'KI270752.1': 188,
'KI270753.1': 189,
'KI270754.1': 190,
'KI270755.1': 191,
'KI270756.1': 192,
'KI270757.1': 193,
};
this.indexToChr = [
{ refName: 'chr1', length: 248956422 },
{ refName: 'chr2', length: 242193529 },
{ refName: 'chr3', length: 198295559 },
{ refName: 'chr4', length: 190214555 },
{ refName: 'chr5', length: 181538259 },
{ refName: 'chr6', length: 170805979 },
{ refName: 'chr7', length: 159345973 },
{ refName: 'chr8', length: 145138636 },
{ refName: 'chr9', length: 138394717 },
{ refName: 'chr10', length: 133797422 },
{ refName: 'chr11', length: 135086622 },
{ refName: 'chr12', length: 133275309 },
{ refName: 'chr13', length: 114364328 },
{ refName: 'chr14', length: 107043718 },
{ refName: 'chr15', length: 101991189 },
{ refName: 'chr16', length: 90338345 },
{ refName: 'chr17', length: 83257441 },
{ refName: 'chr18', length: 80373285 },
{ refName: 'chr19', length: 58617616 },
{ refName: 'chr20', length: 64444167 },
{ refName: 'chr21', length: 46709983 },
{ refName: 'chr22', length: 50818468 },
{ refName: 'chrX', length: 156040895 },
{ refName: 'chrY', length: 57227415 },
{ refName: 'chrM', length: 16569 },
{ refName: 'GL000008.2', length: 209709 },
{ refName: 'GL000009.2', length: 201709 },
{ refName: 'GL000194.1', length: 191469 },
{ refName: 'GL000195.1', length: 182896 },
{ refName: 'GL000205.2', length: 185591 },
{ refName: 'GL000208.1', length: 92689 },
{ refName: 'GL000213.1', length: 164239 },
{ refName: 'GL000214.1', length: 137718 },
{ refName: 'GL000216.2', length: 176608 },
{ refName: 'GL000218.1', length: 161147 },
{ refName: 'GL000219.1', length: 179198 },
{ refName: 'GL000220.1', length: 161802 },
{ refName: 'GL000221.1', length: 155397 },
{ refName: 'GL000224.1', length: 179693 },
{ refName: 'GL000225.1', length: 211173 },
{ refName: 'GL000226.1', length: 15008 },
{ refName: 'KI270302.1', length: 2274 },
{ refName: 'KI270303.1', length: 1942 },
{ refName: 'KI270304.1', length: 2165 },
{ refName: 'KI270305.1', length: 1472 },
{ refName: 'KI270310.1', length: 1201 },
{ refName: 'KI270311.1', length: 12399 },
{ refName: 'KI270312.1', length: 998 },
{ refName: 'KI270315.1', length: 2276 },
{ refName: 'KI270316.1', length: 1444 },
{ refName: 'KI270317.1', length: 37690 },
{ refName: 'KI270320.1', length: 4416 },
{ refName: 'KI270322.1', length: 21476 },
{ refName: 'KI270329.1', length: 1040 },
{ refName: 'KI270330.1', length: 1652 },
{ refName: 'KI270333.1', length: 2699 },
{ refName: 'KI270334.1', length: 1368 },
{ refName: 'KI270335.1', length: 1048 },
{ refName: 'KI270336.1', length: 1026 },
{ refName: 'KI270337.1', length: 1121 },
{ refName: 'KI270338.1', length: 1428 },
{ refName: 'KI270340.1', length: 1428 },
{ refName: 'KI270362.1', length: 3530 },
{ refName: 'KI270363.1', length: 1803 },
{ refName: 'KI270364.1', length: 2855 },
{ refName: 'KI270366.1', length: 8320 },
{ refName: 'KI270371.1', length: 2805 },
{ refName: 'KI270372.1', length: 1650 },
{ refName: 'KI270373.1', length: 1451 },
{ refName: 'KI270374.1', length: 2656 },
{ refName: 'KI270375.1', length: 2378 },
{ refName: 'KI270376.1', length: 1136 },
{ refName: 'KI270378.1', length: 1048 },
{ refName: 'KI270379.1', length: 1045 },
{ refName: 'KI270381.1', length: 1930 },
{ refName: 'KI270382.1', length: 4215 },
{ refName: 'KI270383.1', length: 1750 },
{ refName: 'KI270384.1', length: 1658 },
{ refName: 'KI270385.1', length: 990 },
{ refName: 'KI270386.1', length: 1788 },
{ refName: 'KI270387.1', length: 1537 },
{ refName: 'KI270388.1', length: 1216 },
{ refName: 'KI270389.1', length: 1298 },
{ refName: 'KI270390.1', length: 2387 },
{ refName: 'KI270391.1', length: 1484 },
{ refName: 'KI270392.1', length: 971 },
{ refName: 'KI270393.1', length: 1308 },
{ refName: 'KI270394.1', length: 970 },
{ refName: 'KI270395.1', length: 1143 },
{ refName: 'KI270396.1', length: 1880 },
{ refName: 'KI270411.1', length: 2646 },
{ refName: 'KI270412.1', length: 1179 },
{ refName: 'KI270414.1', length: 2489 },
{ refName: 'KI270417.1', length: 2043 },
{ refName: 'KI270418.1', length: 2145 },
{ refName: 'KI270419.1', length: 1029 },
{ refName: 'KI270420.1', length: 2321 },
{ refName: 'KI270422.1', length: 1445 },
{ refName: 'KI270423.1', length: 981 },
{ refName: 'KI270424.1', length: 2140 },
{ refName: 'KI270425.1', length: 1884 },
{ refName: 'KI270429.1', length: 1361 },
{ refName: 'KI270435.1', length: 92983 },
{ refName: 'KI270438.1', length: 112505 },
{ refName: 'KI270442.1', length: 392061 },
{ refName: 'KI270448.1', length: 7992 },
{ refName: 'KI270465.1', length: 1774 },
{ refName: 'KI270466.1', length: 1233 },
{ refName: 'KI270467.1', length: 3920 },
{ refName: 'KI270468.1', length: 4055 },
{ refName: 'KI270507.1', length: 5353 },
{ refName: 'KI270508.1', length: 1951 },
{ refName: 'KI270509.1', length: 2318 },
{ refName: 'KI270510.1', length: 2415 },
{ refName: 'KI270511.1', length: 8127 },
{ refName: 'KI270512.1', length: 22689 },
{ refName: 'KI270515.1', length: 6361 },
{ refName: 'KI270516.1', length: 1300 },
{ refName: 'KI270517.1', length: 3253 },
{ refName: 'KI270518.1', length: 2186 },
{ refName: 'KI270519.1', length: 138126 },
{ refName: 'KI270521.1', length: 7642 },
{ refName: 'KI270522.1', length: 5674 },
{ refName: 'KI270528.1', length: 2983 },
{ refName: 'KI270529.1', length: 1899 },
{ refName: 'KI270530.1', length: 2168 },
{ refName: 'KI270538.1', length: 91309 },
{ refName: 'KI270539.1', length: 993 },
{ refName: 'KI270544.1', length: 1202 },
{ refName: 'KI270548.1', length: 1599 },
{ refName: 'KI270579.1', length: 31033 },
{ refName: 'KI270580.1', length: 1553 },
{ refName: 'KI270581.1', length: 7046 },
{ refName: 'KI270582.1', length: 6504 },
{ refName: 'KI270583.1', length: 1400 },
{ refName: 'KI270584.1', length: 4513 },
{ refName: 'KI270587.1', length: 2969 },
{ refName: 'KI270588.1', length: 6158 },
{ refName: 'KI270589.1', length: 44474 },
{ refName: 'KI270590.1', length: 4685 },
{ refName: 'KI270591.1', length: 5796 },
{ refName: 'KI270593.1', length: 3041 },
{ refName: 'KI270706.1', length: 175055 },
{ refName: 'KI270707.1', length: 32032 },
{ refName: 'KI270708.1', length: 127682 },
{ refName: 'KI270709.1', length: 66860 },
{ refName: 'KI270710.1', length: 40176 },
{ refName: 'KI270711.1', length: 42210 },
{ refName: 'KI270712.1', length: 176043 },
{ refName: 'KI270713.1', length: 40745 },
{ refName: 'KI270714.1', length: 41717 },
{ refName: 'KI270715.1', length: 161471 },
{ refName: 'KI270716.1', length: 153799 },
{ refName: 'KI270717.1', length: 40062 },
{ refName: 'KI270718.1', length: 38054 },
{ refName: 'KI270719.1', length: 176845 },
{ refName: 'KI270720.1', length: 39050 },
{ refName: 'KI270721.1', length: 100316 },
{ refName: 'KI270722.1', length: 194050 },
{ refName: 'KI270723.1', length: 38115 },
{ refName: 'KI270724.1', length: 39555 },
{ refName: 'KI270725.1', length: 172810 },
{ refName: 'KI270726.1', length: 43739 },
{ refName: 'KI270727.1', length: 448248 },
{ refName: 'KI270728.1', length: 1872759 },
{ refName: 'KI270729.1', length: 280839 },
{ refName: 'KI270730.1', length: 112551 },
{ refName: 'KI270731.1', length: 150754 },
{ refName: 'KI270732.1', length: 41543 },
{ refName: 'KI270733.1', length: 179772 },
{ refName: 'KI270734.1', length: 165050 },
{ refName: 'KI270735.1', length: 42811 },
{ refName: 'KI270736.1', length: 181920 },
{ refName: 'KI270737.1', length: 103838 },
{ refName: 'KI270738.1', length: 99375 },
{ refName: 'KI270739.1', length: 73985 },
{ refName: 'KI270740.1', length: 37240 },
{ refName: 'KI270741.1', length: 157432 },
{ refName: 'KI270742.1', length: 186739 },
{ refName: 'KI270743.1', length: 210658 },
{ refName: 'KI270744.1', length: 168472 },
{ refName: 'KI270745.1', length: 41891 },
{ refName: 'KI270746.1', length: 66486 },
{ refName: 'KI270747.1', length: 198735 },
{ refName: 'KI270748.1', length: 93321 },
{ refName: 'KI270749.1', length: 158759 },
{ refName: 'KI270750.1', length: 148850 },
{ refName: 'KI270751.1', length: 150742 },
{ refName: 'KI270752.1', length: 27745 },
{ refName: 'KI270753.1', length: 62944 },
{ refName: 'KI270754.1', length: 40191 },
{ refName: 'KI270755.1', length: 36723 },
{ refName: 'KI270756.1', length: 79590 },
{ refName: 'KI270757.1', length: 71251 },
];
}
if (!this.index) {
return;
}
const indexData = await this.index.parse(opts);
const ret = indexData.firstDataLine
? indexData.firstDataLine.blockPosition + 65535
: undefined;
let buffer;
if (ret) {
const s = ret + blockLen;
// console.log(`[bam-js] reading header [ ret ${ret} | s ${s} ]`)
const res = await this.bam.read(Buffer.alloc(s), 0, s, 0, opts);
if (!res.bytesRead) {
throw new Error('Error reading header');
}
buffer = res.buffer.subarray(0, Math.min(res.bytesRead, ret));
// console.log(`[bam-js] reading header [ res.bytesRead ${res.bytesRead} ]`)
}
else {
// console.log(`[bam-js] reading all of header`)
buffer = await this.bam.readFile(opts);
}
const uncba = await unzip(buffer);
if (uncba.readInt32LE(0) !== BAM_MAGIC) {
throw new Error('Not a BAM file');
}
const headLen = uncba.readInt32LE(4);
// console.log(`[bam-js] headLen ${headLen}`);
this.header = uncba.toString('utf8', 8, 8 + headLen);
const { chrToIndex, indexToChr } = await this._readRefSeqs(headLen + 8, 65535, opts);
this.chrToIndex = chrToIndex;
this.indexToChr = indexToChr;
// console.log(`this.chrToIndex ${JSON.stringify(this.chrToIndex)}`)
// console.log(`this.indexToChr ${JSON.stringify(this.indexToChr)}`)
return parseHeaderText(this.header);
}
getHeader(opts) {
if (!this.headerP) {
this.headerP = this.getHeaderPre(opts).catch(e => {
this.headerP = undefined;
throw e;
});
}
return this.headerP;
}
async getHeaderText(opts = {}) {
await this.getHeader(opts);
return this.header;
}
// the full length of the refseq block is not given in advance so this grabs
// a chunk and doubles it if all refseqs haven't been processed
async _readRefSeqs(start, refSeqBytes, opts) {
if (start > refSeqBytes) {
return this._readRefSeqs(start, refSeqBytes * 2, opts);
}
const size = refSeqBytes + blockLen;
const { bytesRead, buffer } = await this.bam.read(Buffer.alloc(size), 0, refSeqBytes, 0, opts);
if (!bytesRead) {
throw new Error('Error reading refseqs from header');
}
const uncba = await unzip(buffer.subarray(0, Math.min(bytesRead, refSeqBytes)));
const nRef = uncba.readInt32LE(start);
let p = start + 4;
const chrToIndex = {};
const indexToChr = [];
for (let i = 0; i < nRef; i += 1) {
const lName = uncba.readInt32LE(p);
const refName = this.renameRefSeq(uncba.toString('utf8', p + 4, p + 4 + lName - 1));
const lRef = uncba.readInt32LE(p + lName + 4);
chrToIndex[refName] = i;
indexToChr.push({ refName, length: lRef });
p = p + 8 + lName;
if (p > uncba.length) {
console.warn(`BAM header is very big. Re-fetching ${refSeqBytes} bytes.`);
return this._readRefSeqs(start, refSeqBytes * 2, opts);
}
}
// console.log(`[bam-js] chrToIndex: ${JSON.stringify(chrToIndex)}`)
// console.log(`[bam-js] indexToChr: ${JSON.stringify(indexToChr)}`)
return { chrToIndex, indexToChr };
}
async getRecordsForRange(chr, min, max, opts) {
return gen2array(this.streamRecordsForRange(chr, min, max, opts));
}
async *streamRecordsForRange(chr, min, max, opts) {
var _a;
// console.log(
// `[bam-js] streamRecordsForRange | ${chr} | ${min} | ${max} | ${JSON.stringify(opts)}`,
// )
// console.log(`[bam-js] opts?.assemblyName ${opts?.assemblyName}`)
if ((opts === null || opts === void 0 ? void 0 : opts.assemblyName) && (opts === null || opts === void 0 ? void 0 : opts.assemblyName) !== 'hg38') {
await this.getHeader(opts);
}
const chrId = (_a = this.chrToIndex) === null || _a === void 0 ? void 0 : _a[chr];
if (chrId === undefined || !this.index) {
yield [];
}
else {
const chunks = await this.index.blocksForRange(chrId, min - 1, max, opts);
yield* this._fetchChunkFeatures(chunks, chrId, min, max, opts);
}
}
async *_fetchChunkFeatures(chunks, chrId, min, max, opts = {}) {
const { viewAsPairs } = opts;
const feats = [];
let done = false;
for (const chunk of chunks) {
const records = await this.featureCache.get(chunk.toString(), { chunk, opts }, opts.signal);
const recs = [];
for (const feature of records) {
if (feature.seq_id() === chrId) {
if (feature.get('start') >= max) {
// past end of range, can stop iterating
done = true;
break;
}
else if (feature.get('end') >= min) {
// must be in range
recs.push(feature);
}
}
}
feats.push(recs);
yield recs;
if (done) {
break;
}
}
checkAbortSignal(opts.signal);
if (viewAsPairs) {
yield this.fetchPairs(chrId, feats, opts);
}
}
async fetchPairs(chrId, feats, opts) {
const { pairAcrossChr, maxInsertSize = 200000 } = opts;
const unmatedPairs = {};
const readIds = {};
feats.map(ret => {
const readNames = {};
for (const element of ret) {
const name = element.name();
const id = element.id();
if (!readNames[name]) {
readNames[name] = 0;
}
readNames[name]++;
readIds[id] = 1;
}
for (const [k, v] of Object.entries(readNames)) {
if (v === 1) {
unmatedPairs[k] = true;
}
}
});
const matePromises = [];
feats.map(ret => {
for (const f of ret) {
const name = f.name();
const start = f.get('start');
const pnext = f._next_pos();
const rnext = f._next_refid();
if (this.index &&
unmatedPairs[name] &&
(pairAcrossChr ||
(rnext === chrId && Math.abs(start - pnext) < maxInsertSize))) {
matePromises.push(this.index.blocksForRange(rnext, pnext, pnext + 1, opts));
}
}
});
// filter out duplicate chunks (the blocks are lists of chunks, blocks are
// concatenated, then filter dup chunks)
const map = new Map();
const res = await Promise.all(matePromises);
for (const m of res.flat()) {
if (!map.has(m.toString())) {
map.set(m.toString(), m);
}
}
const mateFeatPromises = await Promise.all([...map.values()].map(async (c) => {
const { data, cpositions, dpositions, chunk } = await this._readChunk({
chunk: c,
opts,
});
const mateRecs = [];
for (const feature of await this.readBamFeatures(data, cpositions, dpositions, chunk)) {
if (unmatedPairs[feature.get('name')] && !readIds[feature.id()]) {
mateRecs.push(feature);
}
}
return mateRecs;
}));
return mateFeatPromises.flat();
}
async _readRegion(position, size, opts = {}) {
const { bytesRead, buffer } = await this.bam.read(Buffer.alloc(size), 0, size, position, opts);
return buffer.subarray(0, Math.min(bytesRead, size));
}
async _readChunk({ chunk, opts }) {
const buffer = await this._readRegion(chunk.minv.blockPosition, chunk.fetchedSize(), opts);
const { buffer: data, cpositions, dpositions, } = await unzipChunkSlice(buffer, chunk);
return { data, cpositions, dpositions, chunk };
}
async readBamFeatures(ba, cpositions, dpositions, chunk) {
let blockStart = 0;
const sink = [];
let pos = 0;
let last = +Date.now();
while (blockStart + 4 < ba.length) {
const blockSize = ba.readInt32LE(blockStart);
const blockEnd = blockStart + 4 + blockSize - 1;
// increment position to the current decompressed status
if (dpositions) {
while (blockStart + chunk.minv.dataPosition >= dpositions[pos++]) { }
pos--;
}
// only try to read the feature if we have all the bytes for it
if (blockEnd < ba.length) {
const feature = new BAMFeature({
bytes: {
byteArray: ba,
start: blockStart,
end: blockEnd,
},
// the below results in an automatically calculated file-offset based
// ID if the info for that is available, otherwise crc32 of the
// features
//
// cpositions[pos] refers to actual file offset of a bgzip block
// boundaries
//
// we multiply by (1 <<8) in order to make sure each block has a
// "unique" address space so that data in that block could never
// overlap
//
// then the blockStart-dpositions is an uncompressed file offset from
// that bgzip block boundary, and since the cpositions are multiplied
// by (1 << 8) these uncompressed offsets get a unique space
//
// this has an extra chunk.minv.dataPosition added on because it
// blockStart starts at 0 instead of chunk.minv.dataPosition
//
// the +1 is just to avoid any possible uniqueId 0 but this does not
// realistically happen
fileOffset: cpositions.length > 0
? cpositions[pos] * (1 << 8) +
(blockStart - dpositions[pos]) +
chunk.minv.dataPosition +
1
: // must be slice, not subarray for buffer polyfill on web
crc32.signed(ba.slice(blockStart, blockEnd)),
});
sink.push(feature);
if (this.yieldThreadTime && +Date.now() - last > this.yieldThreadTime) {
await timeout(1);
last = +Date.now();
}
}
blockStart = blockEnd + 1;
}
return sink;
}
async hasRefSeq(seqName) {
var _a, _b;
const seqId = (_a = this.chrToIndex) === null || _a === void 0 ? void 0 : _a[seqName];
return seqId === undefined ? false : (_b = this.index) === null || _b === void 0 ? void 0 : _b.hasRefSeq(seqId);
}
async lineCount(seqName) {
var _a;
const seqId = (_a = this.chrToIndex) === null || _a === void 0 ? void 0 : _a[seqName];
return seqId === undefined || !this.index ? 0 : this.index.lineCount(seqId);
}
async indexCov(seqName, start, end) {
var _a;
if (!this.index) {
return [];
}
await this.index.parse();
const seqId = (_a = this.chrToIndex) === null || _a === void 0 ? void 0 : _a[seqName];
return seqId === undefined ? [] : this.index.indexCov(seqId, start, end);
}
async blocksForRange(seqName, start, end, opts) {
var _a;
if (!this.index) {
return [];
}
await this.index.parse();
const seqId = (_a = this.chrToIndex) === null || _a === void 0 ? void 0 : _a[seqName];
return seqId === undefined
? []
: this.index.blocksForRange(seqId, start, end, opts);
}
}
//# sourceMappingURL=bamFile.js.map