@gmod/bam
Version:
Parser for BAM and BAM index (bai) files
249 lines • 9.49 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const quick_lru_1 = __importDefault(require("quick-lru"));
const chunk_1 = __importDefault(require("./chunk"));
const indexFile_1 = __importDefault(require("./indexFile"));
const util_1 = require("./util");
const virtualOffset_1 = require("./virtualOffset");
const BAI_MAGIC = 21578050; // BAI\1
function roundDown(n, multiple) {
return n - (n % multiple);
}
function roundUp(n, multiple) {
return n - (n % multiple) + multiple;
}
function reg2bins(beg, end) {
end -= 1;
return [
[0, 0],
[1 + (beg >> 26), 1 + (end >> 26)],
[9 + (beg >> 23), 9 + (end >> 23)],
[73 + (beg >> 20), 73 + (end >> 20)],
[585 + (beg >> 17), 585 + (end >> 17)],
[4681 + (beg >> 14), 4681 + (end >> 14)],
];
}
class BAI extends indexFile_1.default {
setupP;
async lineCount(refId, opts) {
const indexData = await this.parse(opts);
return indexData.indices(refId)?.stats?.lineCount || 0;
}
async _parse(_opts) {
const bytes = await this.filehandle.readFile();
const dataView = new DataView(bytes.buffer);
// check BAI magic numbers
if (dataView.getUint32(0, true) !== BAI_MAGIC) {
throw new Error('Not a BAI file');
}
const refCount = dataView.getInt32(4, true);
const depth = 5;
const binLimit = ((1 << ((depth + 1) * 3)) - 1) / 7;
// read the indexes for each reference sequence
let curr = 8;
let firstDataLine;
const offsets = [];
for (let i = 0; i < refCount; i++) {
offsets.push(curr);
const binCount = dataView.getInt32(curr, true);
curr += 4;
for (let j = 0; j < binCount; j += 1) {
const bin = dataView.getUint32(curr, true);
curr += 4;
if (bin === binLimit + 1) {
curr += 4;
curr += 32;
}
else if (bin > binLimit + 1) {
throw new Error('bai index contains too many bins, please use CSI');
}
else {
const chunkCount = dataView.getInt32(curr, true);
curr += 4;
for (let k = 0; k < chunkCount; k++) {
curr += 8;
curr += 8;
}
}
}
const linearCount = dataView.getInt32(curr, true);
curr += 4;
// as we're going through the linear index, figure out the smallest
// virtual offset in the indexes, which tells us where the BAM header
// ends
const linearIndex = new Array(linearCount);
for (let j = 0; j < linearCount; j++) {
const offset = (0, virtualOffset_1.fromBytes)(bytes, curr);
curr += 8;
firstDataLine = (0, util_1.findFirstData)(firstDataLine, offset);
linearIndex[j] = offset;
}
}
const indicesCache = new quick_lru_1.default({
maxSize: 5,
});
function getIndices(refId) {
let curr = offsets[refId];
if (curr === undefined) {
return undefined;
}
const binCount = dataView.getInt32(curr, true);
let stats;
curr += 4;
const binIndex = {};
for (let j = 0; j < binCount; j += 1) {
const bin = dataView.getUint32(curr, true);
curr += 4;
if (bin === binLimit + 1) {
curr += 4;
stats = (0, util_1.parsePseudoBin)(bytes, curr + 16);
curr += 32;
}
else if (bin > binLimit + 1) {
throw new Error('bai index contains too many bins, please use CSI');
}
else {
const chunkCount = dataView.getInt32(curr, true);
curr += 4;
const chunks = new Array(chunkCount);
for (let k = 0; k < chunkCount; k++) {
const u = (0, virtualOffset_1.fromBytes)(bytes, curr);
curr += 8;
const v = (0, virtualOffset_1.fromBytes)(bytes, curr);
curr += 8;
firstDataLine = (0, util_1.findFirstData)(firstDataLine, u);
chunks[k] = new chunk_1.default(u, v, bin);
}
binIndex[bin] = chunks;
}
}
const linearCount = dataView.getInt32(curr, true);
curr += 4;
// as we're going through the linear index, figure out the smallest
// virtual offset in the indexes, which tells us where the BAM header
// ends
const linearIndex = new Array(linearCount);
for (let j = 0; j < linearCount; j++) {
const offset = (0, virtualOffset_1.fromBytes)(bytes, curr);
curr += 8;
firstDataLine = (0, util_1.findFirstData)(firstDataLine, offset);
linearIndex[j] = offset;
}
return {
binIndex,
linearIndex,
stats,
};
}
return {
bai: true,
firstDataLine,
maxBlockSize: 1 << 16,
indices: (refId) => {
if (!indicesCache.has(refId)) {
const result = getIndices(refId);
if (result) {
indicesCache.set(refId, result);
}
return result;
}
return indicesCache.get(refId);
},
refCount,
};
}
async indexCov(seqId, start, end, opts) {
const v = 16384;
const range = start !== undefined;
const indexData = await this.parse(opts);
const seqIdx = indexData.indices(seqId);
if (!seqIdx) {
return [];
}
const { linearIndex = [], stats } = seqIdx;
if (linearIndex.length === 0) {
return [];
}
const e = end === undefined ? (linearIndex.length - 1) * v : roundUp(end, v);
const s = start === undefined ? 0 : roundDown(start, v);
const depths = range
? new Array((e - s) / v)
: new Array(linearIndex.length - 1);
const totalSize = linearIndex[linearIndex.length - 1].blockPosition;
if (e > (linearIndex.length - 1) * v) {
throw new Error('query outside of range of linear index');
}
let currentPos = linearIndex[s / v].blockPosition;
for (let i = s / v, j = 0; i < e / v; i++, j++) {
depths[j] = {
score: linearIndex[i + 1].blockPosition - currentPos,
start: i * v,
end: i * v + v,
};
currentPos = linearIndex[i + 1].blockPosition;
}
return depths.map(d => ({
...d,
score: (d.score * (stats?.lineCount || 0)) / totalSize,
}));
}
async blocksForRange(refId, min, max, opts = {}) {
if (min < 0) {
min = 0;
}
const indexData = await this.parse(opts);
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (!indexData) {
return [];
}
const ba = indexData.indices(refId);
if (!ba) {
return [];
}
// List of bin #s that overlap min, max
const overlappingBins = reg2bins(min, max);
const chunks = [];
// Find chunks in overlapping bins. Leaf bins (< 4681) are not pruned
for (const [start, end] of overlappingBins) {
for (let bin = start; bin <= end; bin++) {
if (ba.binIndex[bin]) {
const binChunks = ba.binIndex[bin];
for (const binChunk of binChunks) {
chunks.push(new chunk_1.default(binChunk.minv, binChunk.maxv, bin));
}
}
}
}
// Use the linear index to find minimum file position of chunks that could
// contain alignments in the region
const nintv = ba.linearIndex.length;
let lowest;
const minLin = Math.min(min >> 14, nintv - 1);
const maxLin = Math.min(max >> 14, nintv - 1);
for (let i = minLin; i <= maxLin; ++i) {
const vp = ba.linearIndex[i];
if (vp && (!lowest || vp.compareTo(lowest) < 0)) {
lowest = vp;
}
}
return (0, util_1.optimizeChunks)(chunks, lowest);
}
async parse(opts = {}) {
if (!this.setupP) {
this.setupP = this._parse(opts).catch((e) => {
this.setupP = undefined;
throw e;
});
}
return this.setupP;
}
async hasRefSeq(seqId, opts = {}) {
const header = await this.parse(opts);
return !!header.indices(seqId)?.binIndex;
}
}
exports.default = BAI;
//# sourceMappingURL=bai.js.map