apr144-bam
Version:
Parser for BAM and BAM index (bai) files
1,065 lines • 48.7 kB
JavaScript
"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __asyncValues = (this && this.__asyncValues) || function (o) {
if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
var m = o[Symbol.asyncIterator], i;
return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
};
var __await = (this && this.__await) || function (v) { return this instanceof __await ? (this.v = v, this) : new __await(v); }
var __asyncDelegator = (this && this.__asyncDelegator) || function (o) {
var i, p;
return i = {}, verb("next"), verb("throw", function (e) { throw e; }), verb("return"), i[Symbol.iterator] = function () { return this; }, i;
function verb(n, f) { i[n] = o[n] ? function (v) { return (p = !p) ? { value: __await(o[n](v)), done: false } : f ? f(v) : v; } : f; }
};
var __asyncGenerator = (this && this.__asyncGenerator) || function (thisArg, _arguments, generator) {
if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
var g = generator.apply(thisArg, _arguments || []), i, q = [];
return i = {}, verb("next"), verb("throw"), verb("return", awaitReturn), i[Symbol.asyncIterator] = function () { return this; }, i;
function awaitReturn(f) { return function (v) { return Promise.resolve(v).then(f, reject); }; }
function verb(n, f) { if (g[n]) { i[n] = function (v) { return new Promise(function (a, b) { q.push([n, v, a, b]) > 1 || resume(n, v); }); }; if (f) i[n] = f(i[n]); } }
function resume(n, v) { try { step(g[n](v)); } catch (e) { settle(q[0][3], e); } }
function step(r) { r.value instanceof __await ? Promise.resolve(r.value.v).then(fulfill, reject) : settle(q[0][2], r); }
function fulfill(value) { resume("next", value); }
function reject(value) { resume("throw", value); }
function settle(f, v) { if (f(v), q.shift(), q.length) resume(q[0][0], q[0][1]); }
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.BAM_MAGIC = void 0;
const buffer_1 = require("buffer");
const buffer_crc32_1 = __importDefault(require("buffer-crc32"));
const bgzf_filehandle_1 = require("@gmod/bgzf-filehandle");
const generic_filehandle_1 = require("generic-filehandle");
const abortable_promise_cache_1 = __importDefault(require("abortable-promise-cache"));
const quick_lru_1 = __importDefault(require("quick-lru"));
// locals
const bai_1 = __importDefault(require("./bai"));
const csi_1 = __importDefault(require("./csi"));
const record_1 = __importDefault(require("./record"));
const sam_1 = require("./sam");
const util_1 = require("./util");
exports.BAM_MAGIC = 21840194;
const blockLen = 1 << 16;
function gen2array(gen) {
return __awaiter(this, void 0, void 0, function* () {
var _a, gen_1, gen_1_1;
var _b, e_1, _c, _d;
let out = [];
try {
for (_a = true, gen_1 = __asyncValues(gen); gen_1_1 = yield gen_1.next(), _b = gen_1_1.done, !_b; _a = true) {
_d = gen_1_1.value;
_a = false;
const x = _d;
out = out.concat(x);
}
}
catch (e_1_1) { e_1 = { error: e_1_1 }; }
finally {
try {
if (!_a && !_b && (_c = gen_1.return)) yield _c.call(gen_1);
}
finally { if (e_1) throw e_1.error; }
}
return out;
});
}
class NullFilehandle {
read() {
throw new Error('never called');
}
stat() {
throw new Error('never called');
}
readFile() {
throw new Error('never called');
}
close() {
throw new Error('never called');
}
}
class BamFile {
constructor({ bamFilehandle, bamPath, bamUrl, baiPath, baiFilehandle, baiUrl, csiPath, csiFilehandle, csiUrl, htsget, yieldThreadTime = 100, renameRefSeqs = n => n, }) {
this.htsget = false;
this.featureCache = new abortable_promise_cache_1.default({
cache: new quick_lru_1.default({
maxSize: 50,
}),
fill: (args, signal) => __awaiter(this, void 0, void 0, function* () {
const { chunk, opts } = args;
const { data, cpositions, dpositions } = yield this._readChunk({
chunk,
opts: Object.assign(Object.assign({}, opts), { signal }),
});
return this.readBamFeatures(data, cpositions, dpositions, chunk);
}),
});
this.renameRefSeq = renameRefSeqs;
if (bamFilehandle) {
this.bam = bamFilehandle;
}
else if (bamPath) {
this.bam = new generic_filehandle_1.LocalFile(bamPath);
}
else if (bamUrl) {
const bamUrlObj = new URL(bamUrl);
const bamUrlUsername = bamUrlObj.username;
const bamUrlPassword = bamUrlObj.password;
if (bamUrlUsername && bamUrlPassword) {
bamUrl = `${bamUrlObj.protocol}//${bamUrlObj.host}${bamUrlObj.pathname}${bamUrlObj.search}`;
this.bam = new generic_filehandle_1.RemoteFile(bamUrl, {
overrides: {
credentials: 'include',
headers: {
Authorization: 'Basic ' + btoa(bamUrlUsername + ':' + bamUrlPassword),
},
},
});
}
else {
this.bam = new generic_filehandle_1.RemoteFile(bamUrl);
}
}
else if (htsget) {
this.htsget = true;
this.bam = new NullFilehandle();
}
else {
throw new Error('unable to initialize bam');
}
if (csiFilehandle) {
this.index = new csi_1.default({ filehandle: csiFilehandle });
}
else if (csiPath) {
this.index = new csi_1.default({ filehandle: new generic_filehandle_1.LocalFile(csiPath) });
}
else if (csiUrl) {
this.index = new csi_1.default({ filehandle: new generic_filehandle_1.RemoteFile(csiUrl) });
}
else if (baiFilehandle) {
this.index = new bai_1.default({ filehandle: baiFilehandle });
}
else if (baiPath) {
this.index = new bai_1.default({ filehandle: new generic_filehandle_1.LocalFile(baiPath) });
}
else if (baiUrl) {
const baiUrlObj = new URL(baiUrl);
const baiUrlUsername = baiUrlObj.username;
const baiUrlPassword = baiUrlObj.password;
if (baiUrlUsername && baiUrlPassword) {
baiUrl = `${baiUrlObj.protocol}//${baiUrlObj.host}${baiUrlObj.pathname}${baiUrlObj.search}`;
// console.log(
// `baiUrl | ${baiUrl} | ${baiUrlUsername} | ${baiUrlPassword}`,
// )
this.index = new bai_1.default({
filehandle: new generic_filehandle_1.RemoteFile(baiUrl, {
overrides: {
credentials: 'include',
headers: {
Authorization: 'Basic ' + btoa(baiUrlUsername + ':' + baiUrlPassword),
},
},
}),
});
}
else {
this.index = new bai_1.default({ filehandle: new generic_filehandle_1.RemoteFile(baiUrl) });
}
}
else if (bamPath) {
this.index = new bai_1.default({ filehandle: new generic_filehandle_1.LocalFile(`${bamPath}.bai`) });
}
else if (bamUrl) {
const baiUrlObj = new URL(bamUrl);
const baiUrlUsername = baiUrlObj.username;
const baiUrlPassword = baiUrlObj.password;
if (baiUrlUsername && baiUrlPassword) {
const baiUrl = `${baiUrlObj.protocol}//${baiUrlObj.host}${baiUrlObj.pathname}.bai${baiUrlObj.search}`;
// console.log(
// `baiUrl | ${baiUrl} | ${baiUrlUsername} | ${baiUrlPassword}`,
// )
this.index = new bai_1.default({
filehandle: new generic_filehandle_1.RemoteFile(baiUrl, {
overrides: {
credentials: 'include',
headers: {
Authorization: 'Basic ' + btoa(baiUrlUsername + ':' + baiUrlPassword),
},
},
}),
});
}
else {
this.index = new bai_1.default({ filehandle: new generic_filehandle_1.RemoteFile(`${bamUrl}.bai`) });
}
}
else if (htsget) {
this.htsget = true;
}
else {
throw new Error('unable to infer index format');
}
this.yieldThreadTime = yieldThreadTime;
}
getHeaderPre(origOpts) {
return __awaiter(this, void 0, void 0, function* () {
const opts = (0, util_1.makeOpts)(origOpts);
// console.log(`[bam-js] getHeaderPre: ${JSON.stringify(opts)}`)
// if (opts.assemblyName && opts.assemblyName === 'hg38') {
// this.chrToIndex = {
// chr1: 0,
// chr10: 1,
// chr11: 2,
// chr12: 3,
// chr13: 4,
// chr14: 5,
// chr15: 6,
// chr16: 7,
// chr17: 8,
// chr18: 9,
// chr19: 10,
// chr2: 11,
// chr20: 12,
// chr21: 13,
// chr22: 14,
// chr3: 15,
// chr4: 16,
// chr5: 17,
// chr6: 18,
// chr7: 19,
// chr8: 20,
// chr9: 21,
// chrM: 22,
// chrX: 23,
// chrY: 24,
// }
// this.indexToChr = [
// {
// refName: 'chr1',
// length: 248956422,
// },
// {
// refName: 'chr10',
// length: 133797422,
// },
// {
// refName: 'chr11',
// length: 135086622,
// },
// {
// refName: 'chr12',
// length: 133275309,
// },
// {
// refName: 'chr13',
// length: 114364328,
// },
// {
// refName: 'chr14',
// length: 107043718,
// },
// {
// refName: 'chr15',
// length: 101991189,
// },
// {
// refName: 'chr16',
// length: 90338345,
// },
// {
// refName: 'chr17',
// length: 83257441,
// },
// {
// refName: 'chr18',
// length: 80373285,
// },
// {
// refName: 'chr19',
// length: 58617616,
// },
// {
// refName: 'chr2',
// length: 242193529,
// },
// {
// refName: 'chr20',
// length: 64444167,
// },
// {
// refName: 'chr21',
// length: 46709983,
// },
// {
// refName: 'chr22',
// length: 50818468,
// },
// {
// refName: 'chr3',
// length: 198295559,
// },
// {
// refName: 'chr4',
// length: 190214555,
// },
// {
// refName: 'chr5',
// length: 181538259,
// },
// {
// refName: 'chr6',
// length: 170805979,
// },
// {
// refName: 'chr7',
// length: 159345973,
// },
// {
// refName: 'chr8',
// length: 145138636,
// },
// {
// refName: 'chr9',
// length: 138394717,
// },
// {
// refName: 'chrM',
// length: 16569,
// },
// {
// refName: 'chrX',
// length: 156040895,
// },
// {
// refName: 'chrY',
// length: 57227415,
// },
// ]
// return
// }
if (opts.assemblyName && opts.assemblyName === 'hg38') {
this.chrToIndex = {
chr1: 0,
chr2: 1,
chr3: 2,
chr4: 3,
chr5: 4,
chr6: 5,
chr7: 6,
chr8: 7,
chr9: 8,
chr10: 9,
chr11: 10,
chr12: 11,
chr13: 12,
chr14: 13,
chr15: 14,
chr16: 15,
chr17: 16,
chr18: 17,
chr19: 18,
chr20: 19,
chr21: 20,
chr22: 21,
chrX: 22,
chrY: 23,
chrM: 24,
'GL000008.2': 25,
'GL000009.2': 26,
'GL000194.1': 27,
'GL000195.1': 28,
'GL000205.2': 29,
'GL000208.1': 30,
'GL000213.1': 31,
'GL000214.1': 32,
'GL000216.2': 33,
'GL000218.1': 34,
'GL000219.1': 35,
'GL000220.1': 36,
'GL000221.1': 37,
'GL000224.1': 38,
'GL000225.1': 39,
'GL000226.1': 40,
'KI270302.1': 41,
'KI270303.1': 42,
'KI270304.1': 43,
'KI270305.1': 44,
'KI270310.1': 45,
'KI270311.1': 46,
'KI270312.1': 47,
'KI270315.1': 48,
'KI270316.1': 49,
'KI270317.1': 50,
'KI270320.1': 51,
'KI270322.1': 52,
'KI270329.1': 53,
'KI270330.1': 54,
'KI270333.1': 55,
'KI270334.1': 56,
'KI270335.1': 57,
'KI270336.1': 58,
'KI270337.1': 59,
'KI270338.1': 60,
'KI270340.1': 61,
'KI270362.1': 62,
'KI270363.1': 63,
'KI270364.1': 64,
'KI270366.1': 65,
'KI270371.1': 66,
'KI270372.1': 67,
'KI270373.1': 68,
'KI270374.1': 69,
'KI270375.1': 70,
'KI270376.1': 71,
'KI270378.1': 72,
'KI270379.1': 73,
'KI270381.1': 74,
'KI270382.1': 75,
'KI270383.1': 76,
'KI270384.1': 77,
'KI270385.1': 78,
'KI270386.1': 79,
'KI270387.1': 80,
'KI270388.1': 81,
'KI270389.1': 82,
'KI270390.1': 83,
'KI270391.1': 84,
'KI270392.1': 85,
'KI270393.1': 86,
'KI270394.1': 87,
'KI270395.1': 88,
'KI270396.1': 89,
'KI270411.1': 90,
'KI270412.1': 91,
'KI270414.1': 92,
'KI270417.1': 93,
'KI270418.1': 94,
'KI270419.1': 95,
'KI270420.1': 96,
'KI270422.1': 97,
'KI270423.1': 98,
'KI270424.1': 99,
'KI270425.1': 100,
'KI270429.1': 101,
'KI270435.1': 102,
'KI270438.1': 103,
'KI270442.1': 104,
'KI270448.1': 105,
'KI270465.1': 106,
'KI270466.1': 107,
'KI270467.1': 108,
'KI270468.1': 109,
'KI270507.1': 110,
'KI270508.1': 111,
'KI270509.1': 112,
'KI270510.1': 113,
'KI270511.1': 114,
'KI270512.1': 115,
'KI270515.1': 116,
'KI270516.1': 117,
'KI270517.1': 118,
'KI270518.1': 119,
'KI270519.1': 120,
'KI270521.1': 121,
'KI270522.1': 122,
'KI270528.1': 123,
'KI270529.1': 124,
'KI270530.1': 125,
'KI270538.1': 126,
'KI270539.1': 127,
'KI270544.1': 128,
'KI270548.1': 129,
'KI270579.1': 130,
'KI270580.1': 131,
'KI270581.1': 132,
'KI270582.1': 133,
'KI270583.1': 134,
'KI270584.1': 135,
'KI270587.1': 136,
'KI270588.1': 137,
'KI270589.1': 138,
'KI270590.1': 139,
'KI270591.1': 140,
'KI270593.1': 141,
'KI270706.1': 142,
'KI270707.1': 143,
'KI270708.1': 144,
'KI270709.1': 145,
'KI270710.1': 146,
'KI270711.1': 147,
'KI270712.1': 148,
'KI270713.1': 149,
'KI270714.1': 150,
'KI270715.1': 151,
'KI270716.1': 152,
'KI270717.1': 153,
'KI270718.1': 154,
'KI270719.1': 155,
'KI270720.1': 156,
'KI270721.1': 157,
'KI270722.1': 158,
'KI270723.1': 159,
'KI270724.1': 160,
'KI270725.1': 161,
'KI270726.1': 162,
'KI270727.1': 163,
'KI270728.1': 164,
'KI270729.1': 165,
'KI270730.1': 166,
'KI270731.1': 167,
'KI270732.1': 168,
'KI270733.1': 169,
'KI270734.1': 170,
'KI270735.1': 171,
'KI270736.1': 172,
'KI270737.1': 173,
'KI270738.1': 174,
'KI270739.1': 175,
'KI270740.1': 176,
'KI270741.1': 177,
'KI270742.1': 178,
'KI270743.1': 179,
'KI270744.1': 180,
'KI270745.1': 181,
'KI270746.1': 182,
'KI270747.1': 183,
'KI270748.1': 184,
'KI270749.1': 185,
'KI270750.1': 186,
'KI270751.1': 187,
'KI270752.1': 188,
'KI270753.1': 189,
'KI270754.1': 190,
'KI270755.1': 191,
'KI270756.1': 192,
'KI270757.1': 193,
};
this.indexToChr = [
{ refName: 'chr1', length: 248956422 },
{ refName: 'chr2', length: 242193529 },
{ refName: 'chr3', length: 198295559 },
{ refName: 'chr4', length: 190214555 },
{ refName: 'chr5', length: 181538259 },
{ refName: 'chr6', length: 170805979 },
{ refName: 'chr7', length: 159345973 },
{ refName: 'chr8', length: 145138636 },
{ refName: 'chr9', length: 138394717 },
{ refName: 'chr10', length: 133797422 },
{ refName: 'chr11', length: 135086622 },
{ refName: 'chr12', length: 133275309 },
{ refName: 'chr13', length: 114364328 },
{ refName: 'chr14', length: 107043718 },
{ refName: 'chr15', length: 101991189 },
{ refName: 'chr16', length: 90338345 },
{ refName: 'chr17', length: 83257441 },
{ refName: 'chr18', length: 80373285 },
{ refName: 'chr19', length: 58617616 },
{ refName: 'chr20', length: 64444167 },
{ refName: 'chr21', length: 46709983 },
{ refName: 'chr22', length: 50818468 },
{ refName: 'chrX', length: 156040895 },
{ refName: 'chrY', length: 57227415 },
{ refName: 'chrM', length: 16569 },
{ refName: 'GL000008.2', length: 209709 },
{ refName: 'GL000009.2', length: 201709 },
{ refName: 'GL000194.1', length: 191469 },
{ refName: 'GL000195.1', length: 182896 },
{ refName: 'GL000205.2', length: 185591 },
{ refName: 'GL000208.1', length: 92689 },
{ refName: 'GL000213.1', length: 164239 },
{ refName: 'GL000214.1', length: 137718 },
{ refName: 'GL000216.2', length: 176608 },
{ refName: 'GL000218.1', length: 161147 },
{ refName: 'GL000219.1', length: 179198 },
{ refName: 'GL000220.1', length: 161802 },
{ refName: 'GL000221.1', length: 155397 },
{ refName: 'GL000224.1', length: 179693 },
{ refName: 'GL000225.1', length: 211173 },
{ refName: 'GL000226.1', length: 15008 },
{ refName: 'KI270302.1', length: 2274 },
{ refName: 'KI270303.1', length: 1942 },
{ refName: 'KI270304.1', length: 2165 },
{ refName: 'KI270305.1', length: 1472 },
{ refName: 'KI270310.1', length: 1201 },
{ refName: 'KI270311.1', length: 12399 },
{ refName: 'KI270312.1', length: 998 },
{ refName: 'KI270315.1', length: 2276 },
{ refName: 'KI270316.1', length: 1444 },
{ refName: 'KI270317.1', length: 37690 },
{ refName: 'KI270320.1', length: 4416 },
{ refName: 'KI270322.1', length: 21476 },
{ refName: 'KI270329.1', length: 1040 },
{ refName: 'KI270330.1', length: 1652 },
{ refName: 'KI270333.1', length: 2699 },
{ refName: 'KI270334.1', length: 1368 },
{ refName: 'KI270335.1', length: 1048 },
{ refName: 'KI270336.1', length: 1026 },
{ refName: 'KI270337.1', length: 1121 },
{ refName: 'KI270338.1', length: 1428 },
{ refName: 'KI270340.1', length: 1428 },
{ refName: 'KI270362.1', length: 3530 },
{ refName: 'KI270363.1', length: 1803 },
{ refName: 'KI270364.1', length: 2855 },
{ refName: 'KI270366.1', length: 8320 },
{ refName: 'KI270371.1', length: 2805 },
{ refName: 'KI270372.1', length: 1650 },
{ refName: 'KI270373.1', length: 1451 },
{ refName: 'KI270374.1', length: 2656 },
{ refName: 'KI270375.1', length: 2378 },
{ refName: 'KI270376.1', length: 1136 },
{ refName: 'KI270378.1', length: 1048 },
{ refName: 'KI270379.1', length: 1045 },
{ refName: 'KI270381.1', length: 1930 },
{ refName: 'KI270382.1', length: 4215 },
{ refName: 'KI270383.1', length: 1750 },
{ refName: 'KI270384.1', length: 1658 },
{ refName: 'KI270385.1', length: 990 },
{ refName: 'KI270386.1', length: 1788 },
{ refName: 'KI270387.1', length: 1537 },
{ refName: 'KI270388.1', length: 1216 },
{ refName: 'KI270389.1', length: 1298 },
{ refName: 'KI270390.1', length: 2387 },
{ refName: 'KI270391.1', length: 1484 },
{ refName: 'KI270392.1', length: 971 },
{ refName: 'KI270393.1', length: 1308 },
{ refName: 'KI270394.1', length: 970 },
{ refName: 'KI270395.1', length: 1143 },
{ refName: 'KI270396.1', length: 1880 },
{ refName: 'KI270411.1', length: 2646 },
{ refName: 'KI270412.1', length: 1179 },
{ refName: 'KI270414.1', length: 2489 },
{ refName: 'KI270417.1', length: 2043 },
{ refName: 'KI270418.1', length: 2145 },
{ refName: 'KI270419.1', length: 1029 },
{ refName: 'KI270420.1', length: 2321 },
{ refName: 'KI270422.1', length: 1445 },
{ refName: 'KI270423.1', length: 981 },
{ refName: 'KI270424.1', length: 2140 },
{ refName: 'KI270425.1', length: 1884 },
{ refName: 'KI270429.1', length: 1361 },
{ refName: 'KI270435.1', length: 92983 },
{ refName: 'KI270438.1', length: 112505 },
{ refName: 'KI270442.1', length: 392061 },
{ refName: 'KI270448.1', length: 7992 },
{ refName: 'KI270465.1', length: 1774 },
{ refName: 'KI270466.1', length: 1233 },
{ refName: 'KI270467.1', length: 3920 },
{ refName: 'KI270468.1', length: 4055 },
{ refName: 'KI270507.1', length: 5353 },
{ refName: 'KI270508.1', length: 1951 },
{ refName: 'KI270509.1', length: 2318 },
{ refName: 'KI270510.1', length: 2415 },
{ refName: 'KI270511.1', length: 8127 },
{ refName: 'KI270512.1', length: 22689 },
{ refName: 'KI270515.1', length: 6361 },
{ refName: 'KI270516.1', length: 1300 },
{ refName: 'KI270517.1', length: 3253 },
{ refName: 'KI270518.1', length: 2186 },
{ refName: 'KI270519.1', length: 138126 },
{ refName: 'KI270521.1', length: 7642 },
{ refName: 'KI270522.1', length: 5674 },
{ refName: 'KI270528.1', length: 2983 },
{ refName: 'KI270529.1', length: 1899 },
{ refName: 'KI270530.1', length: 2168 },
{ refName: 'KI270538.1', length: 91309 },
{ refName: 'KI270539.1', length: 993 },
{ refName: 'KI270544.1', length: 1202 },
{ refName: 'KI270548.1', length: 1599 },
{ refName: 'KI270579.1', length: 31033 },
{ refName: 'KI270580.1', length: 1553 },
{ refName: 'KI270581.1', length: 7046 },
{ refName: 'KI270582.1', length: 6504 },
{ refName: 'KI270583.1', length: 1400 },
{ refName: 'KI270584.1', length: 4513 },
{ refName: 'KI270587.1', length: 2969 },
{ refName: 'KI270588.1', length: 6158 },
{ refName: 'KI270589.1', length: 44474 },
{ refName: 'KI270590.1', length: 4685 },
{ refName: 'KI270591.1', length: 5796 },
{ refName: 'KI270593.1', length: 3041 },
{ refName: 'KI270706.1', length: 175055 },
{ refName: 'KI270707.1', length: 32032 },
{ refName: 'KI270708.1', length: 127682 },
{ refName: 'KI270709.1', length: 66860 },
{ refName: 'KI270710.1', length: 40176 },
{ refName: 'KI270711.1', length: 42210 },
{ refName: 'KI270712.1', length: 176043 },
{ refName: 'KI270713.1', length: 40745 },
{ refName: 'KI270714.1', length: 41717 },
{ refName: 'KI270715.1', length: 161471 },
{ refName: 'KI270716.1', length: 153799 },
{ refName: 'KI270717.1', length: 40062 },
{ refName: 'KI270718.1', length: 38054 },
{ refName: 'KI270719.1', length: 176845 },
{ refName: 'KI270720.1', length: 39050 },
{ refName: 'KI270721.1', length: 100316 },
{ refName: 'KI270722.1', length: 194050 },
{ refName: 'KI270723.1', length: 38115 },
{ refName: 'KI270724.1', length: 39555 },
{ refName: 'KI270725.1', length: 172810 },
{ refName: 'KI270726.1', length: 43739 },
{ refName: 'KI270727.1', length: 448248 },
{ refName: 'KI270728.1', length: 1872759 },
{ refName: 'KI270729.1', length: 280839 },
{ refName: 'KI270730.1', length: 112551 },
{ refName: 'KI270731.1', length: 150754 },
{ refName: 'KI270732.1', length: 41543 },
{ refName: 'KI270733.1', length: 179772 },
{ refName: 'KI270734.1', length: 165050 },
{ refName: 'KI270735.1', length: 42811 },
{ refName: 'KI270736.1', length: 181920 },
{ refName: 'KI270737.1', length: 103838 },
{ refName: 'KI270738.1', length: 99375 },
{ refName: 'KI270739.1', length: 73985 },
{ refName: 'KI270740.1', length: 37240 },
{ refName: 'KI270741.1', length: 157432 },
{ refName: 'KI270742.1', length: 186739 },
{ refName: 'KI270743.1', length: 210658 },
{ refName: 'KI270744.1', length: 168472 },
{ refName: 'KI270745.1', length: 41891 },
{ refName: 'KI270746.1', length: 66486 },
{ refName: 'KI270747.1', length: 198735 },
{ refName: 'KI270748.1', length: 93321 },
{ refName: 'KI270749.1', length: 158759 },
{ refName: 'KI270750.1', length: 148850 },
{ refName: 'KI270751.1', length: 150742 },
{ refName: 'KI270752.1', length: 27745 },
{ refName: 'KI270753.1', length: 62944 },
{ refName: 'KI270754.1', length: 40191 },
{ refName: 'KI270755.1', length: 36723 },
{ refName: 'KI270756.1', length: 79590 },
{ refName: 'KI270757.1', length: 71251 },
];
}
if (!this.index) {
return;
}
const indexData = yield this.index.parse(opts);
const ret = indexData.firstDataLine
? indexData.firstDataLine.blockPosition + 65535
: undefined;
let buffer;
if (ret) {
const s = ret + blockLen;
// console.log(`[bam-js] reading header [ ret ${ret} | s ${s} ]`)
const res = yield this.bam.read(buffer_1.Buffer.alloc(s), 0, s, 0, opts);
if (!res.bytesRead) {
throw new Error('Error reading header');
}
buffer = res.buffer.subarray(0, Math.min(res.bytesRead, ret));
// console.log(`[bam-js] reading header [ res.bytesRead ${res.bytesRead} ]`)
}
else {
// console.log(`[bam-js] reading all of header`)
buffer = yield this.bam.readFile(opts);
}
const uncba = yield (0, bgzf_filehandle_1.unzip)(buffer);
if (uncba.readInt32LE(0) !== exports.BAM_MAGIC) {
throw new Error('Not a BAM file');
}
const headLen = uncba.readInt32LE(4);
// console.log(`[bam-js] headLen ${headLen}`);
this.header = uncba.toString('utf8', 8, 8 + headLen);
const { chrToIndex, indexToChr } = yield this._readRefSeqs(headLen + 8, 65535, opts);
this.chrToIndex = chrToIndex;
this.indexToChr = indexToChr;
// console.log(`this.chrToIndex ${JSON.stringify(this.chrToIndex)}`)
// console.log(`this.indexToChr ${JSON.stringify(this.indexToChr)}`)
return (0, sam_1.parseHeaderText)(this.header);
});
}
getHeader(opts) {
if (!this.headerP) {
this.headerP = this.getHeaderPre(opts).catch(e => {
this.headerP = undefined;
throw e;
});
}
return this.headerP;
}
getHeaderText() {
return __awaiter(this, arguments, void 0, function* (opts = {}) {
yield this.getHeader(opts);
return this.header;
});
}
// the full length of the refseq block is not given in advance so this grabs
// a chunk and doubles it if all refseqs haven't been processed
_readRefSeqs(start, refSeqBytes, opts) {
return __awaiter(this, void 0, void 0, function* () {
if (start > refSeqBytes) {
return this._readRefSeqs(start, refSeqBytes * 2, opts);
}
const size = refSeqBytes + blockLen;
const { bytesRead, buffer } = yield this.bam.read(buffer_1.Buffer.alloc(size), 0, refSeqBytes, 0, opts);
if (!bytesRead) {
throw new Error('Error reading refseqs from header');
}
const uncba = yield (0, bgzf_filehandle_1.unzip)(buffer.subarray(0, Math.min(bytesRead, refSeqBytes)));
const nRef = uncba.readInt32LE(start);
let p = start + 4;
const chrToIndex = {};
const indexToChr = [];
for (let i = 0; i < nRef; i += 1) {
const lName = uncba.readInt32LE(p);
const refName = this.renameRefSeq(uncba.toString('utf8', p + 4, p + 4 + lName - 1));
const lRef = uncba.readInt32LE(p + lName + 4);
chrToIndex[refName] = i;
indexToChr.push({ refName, length: lRef });
p = p + 8 + lName;
if (p > uncba.length) {
console.warn(`BAM header is very big. Re-fetching ${refSeqBytes} bytes.`);
return this._readRefSeqs(start, refSeqBytes * 2, opts);
}
}
// console.log(`[bam-js] chrToIndex: ${JSON.stringify(chrToIndex)}`)
// console.log(`[bam-js] indexToChr: ${JSON.stringify(indexToChr)}`)
return { chrToIndex, indexToChr };
});
}
getRecordsForRange(chr, min, max, opts) {
return __awaiter(this, void 0, void 0, function* () {
return gen2array(this.streamRecordsForRange(chr, min, max, opts));
});
}
streamRecordsForRange(chr, min, max, opts) {
return __asyncGenerator(this, arguments, function* streamRecordsForRange_1() {
var _a;
// console.log(
// `[bam-js] streamRecordsForRange | ${chr} | ${min} | ${max} | ${JSON.stringify(opts)}`,
// )
// console.log(`[bam-js] opts?.assemblyName ${opts?.assemblyName}`)
if ((opts === null || opts === void 0 ? void 0 : opts.assemblyName) && (opts === null || opts === void 0 ? void 0 : opts.assemblyName) !== 'hg38') {
yield __await(this.getHeader(opts));
}
const chrId = (_a = this.chrToIndex) === null || _a === void 0 ? void 0 : _a[chr];
if (chrId === undefined || !this.index) {
yield yield __await([]);
}
else {
const chunks = yield __await(this.index.blocksForRange(chrId, min - 1, max, opts));
yield __await(yield* __asyncDelegator(__asyncValues(this._fetchChunkFeatures(chunks, chrId, min, max, opts))));
}
});
}
_fetchChunkFeatures(chunks_1, chrId_1, min_1, max_1) {
return __asyncGenerator(this, arguments, function* _fetchChunkFeatures_1(chunks, chrId, min, max, opts = {}) {
const { viewAsPairs } = opts;
const feats = [];
let done = false;
for (const chunk of chunks) {
const records = yield __await(this.featureCache.get(chunk.toString(), { chunk, opts }, opts.signal));
const recs = [];
for (const feature of records) {
if (feature.seq_id() === chrId) {
if (feature.get('start') >= max) {
// past end of range, can stop iterating
done = true;
break;
}
else if (feature.get('end') >= min) {
// must be in range
recs.push(feature);
}
}
}
feats.push(recs);
yield yield __await(recs);
if (done) {
break;
}
}
(0, util_1.checkAbortSignal)(opts.signal);
if (viewAsPairs) {
yield yield __await(this.fetchPairs(chrId, feats, opts));
}
});
}
fetchPairs(chrId, feats, opts) {
return __awaiter(this, void 0, void 0, function* () {
const { pairAcrossChr, maxInsertSize = 200000 } = opts;
const unmatedPairs = {};
const readIds = {};
feats.map(ret => {
const readNames = {};
for (const element of ret) {
const name = element.name();
const id = element.id();
if (!readNames[name]) {
readNames[name] = 0;
}
readNames[name]++;
readIds[id] = 1;
}
for (const [k, v] of Object.entries(readNames)) {
if (v === 1) {
unmatedPairs[k] = true;
}
}
});
const matePromises = [];
feats.map(ret => {
for (const f of ret) {
const name = f.name();
const start = f.get('start');
const pnext = f._next_pos();
const rnext = f._next_refid();
if (this.index &&
unmatedPairs[name] &&
(pairAcrossChr ||
(rnext === chrId && Math.abs(start - pnext) < maxInsertSize))) {
matePromises.push(this.index.blocksForRange(rnext, pnext, pnext + 1, opts));
}
}
});
// filter out duplicate chunks (the blocks are lists of chunks, blocks are
// concatenated, then filter dup chunks)
const map = new Map();
const res = yield Promise.all(matePromises);
for (const m of res.flat()) {
if (!map.has(m.toString())) {
map.set(m.toString(), m);
}
}
const mateFeatPromises = yield Promise.all([...map.values()].map((c) => __awaiter(this, void 0, void 0, function* () {
const { data, cpositions, dpositions, chunk } = yield this._readChunk({
chunk: c,
opts,
});
const mateRecs = [];
for (const feature of yield this.readBamFeatures(data, cpositions, dpositions, chunk)) {
if (unmatedPairs[feature.get('name')] && !readIds[feature.id()]) {
mateRecs.push(feature);
}
}
return mateRecs;
})));
return mateFeatPromises.flat();
});
}
_readRegion(position_1, size_1) {
return __awaiter(this, arguments, void 0, function* (position, size, opts = {}) {
const { bytesRead, buffer } = yield this.bam.read(buffer_1.Buffer.alloc(size), 0, size, position, opts);
return buffer.subarray(0, Math.min(bytesRead, size));
});
}
_readChunk(_a) {
return __awaiter(this, arguments, void 0, function* ({ chunk, opts }) {
const buffer = yield this._readRegion(chunk.minv.blockPosition, chunk.fetchedSize(), opts);
const { buffer: data, cpositions, dpositions, } = yield (0, bgzf_filehandle_1.unzipChunkSlice)(buffer, chunk);
return { data, cpositions, dpositions, chunk };
});
}
readBamFeatures(ba, cpositions, dpositions, chunk) {
return __awaiter(this, void 0, void 0, function* () {
let blockStart = 0;
const sink = [];
let pos = 0;
let last = +Date.now();
while (blockStart + 4 < ba.length) {
const blockSize = ba.readInt32LE(blockStart);
const blockEnd = blockStart + 4 + blockSize - 1;
// increment position to the current decompressed status
if (dpositions) {
while (blockStart + chunk.minv.dataPosition >= dpositions[pos++]) { }
pos--;
}
// only try to read the feature if we have all the bytes for it
if (blockEnd < ba.length) {
const feature = new record_1.default({
bytes: {
byteArray: ba,
start: blockStart,
end: blockEnd,
},
// the below results in an automatically calculated file-offset based
// ID if the info for that is available, otherwise crc32 of the
// features
//
// cpositions[pos] refers to actual file offset of a bgzip block
// boundaries
//
// we multiply by (1 <<8) in order to make sure each block has a
// "unique" address space so that data in that block could never
// overlap
//
// then the blockStart-dpositions is an uncompressed file offset from
// that bgzip block boundary, and since the cpositions are multiplied
// by (1 << 8) these uncompressed offsets get a unique space
//
// this has an extra chunk.minv.dataPosition added on because it
// blockStart starts at 0 instead of chunk.minv.dataPosition
//
// the +1 is just to avoid any possible uniqueId 0 but this does not
// realistically happen
fileOffset: cpositions.length > 0
? cpositions[pos] * (1 << 8) +
(blockStart - dpositions[pos]) +
chunk.minv.dataPosition +
1
: // must be slice, not subarray for buffer polyfill on web
buffer_crc32_1.default.signed(ba.slice(blockStart, blockEnd)),
});
sink.push(feature);
if (this.yieldThreadTime && +Date.now() - last > this.yieldThreadTime) {
yield (0, util_1.timeout)(1);
last = +Date.now();
}
}
blockStart = blockEnd + 1;
}
return sink;
});
}
hasRefSeq(seqName) {
return __awaiter(this, void 0, void 0, function* () {
var _a, _b;
const seqId = (_a = this.chrToIndex) === null || _a === void 0 ? void 0 : _a[seqName];
return seqId === undefined ? false : (_b = this.index) === null || _b === void 0 ? void 0 : _b.hasRefSeq(seqId);
});
}
lineCount(seqName) {
return __awaiter(this, void 0, void 0, function* () {
var _a;
const seqId = (_a = this.chrToIndex) === null || _a === void 0 ? void 0 : _a[seqName];
return seqId === undefined || !this.index ? 0 : this.index.lineCount(seqId);
});
}
indexCov(seqName, start, end) {
return __awaiter(this, void 0, void 0, function* () {
var _a;
if (!this.index) {
return [];
}
yield this.index.parse();
const seqId = (_a = this.chrToIndex) === null || _a === void 0 ? void 0 : _a[seqName];
return seqId === undefined ? [] : this.index.indexCov(seqId, start, end);
});
}
blocksForRange(seqName, start, end, opts) {
return __awaiter(this, void 0, void 0, function* () {
var _a;
if (!this.index) {
return [];
}
yield this.index.parse();
const seqId = (_a = this.chrToIndex) === null || _a === void 0 ? void 0 : _a[seqName];
return seqId === undefined
? []
: this.index.blocksForRange(seqId, start, end, opts);
});
}
}
exports.default = BamFile;
//# sourceMappingURL=bamFile.js.map