@gmod/bam
Version:
Parser for BAM and BAM index (bai) files
218 lines • 8.7 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const chunk_ts_1 = __importDefault(require("./chunk.js"));
const indexFile_ts_1 = __importStar(require("./indexFile.js"));
const util_ts_1 = require("./util.js");
const virtualOffset_ts_1 = require("./virtualOffset.js");
const BAI_MAGIC = 21578050; // BAI\1
// BAI uses a fixed 5-level binning scheme with a 14-bit (16KB) linear index
// resolution. See SAMv1.pdf §5.1.3 (hts-specs).
// https://github.com/samtools/hts-specs/blob/master/SAMv1.pdf
const BAI_LINEAR_SHIFT = 14;
const BAI_LINEAR_INTERVAL = 1 << BAI_LINEAR_SHIFT; // 16384
function roundDown(n, multiple) {
return n - (n % multiple);
}
function roundUp(n, multiple) {
return n - (n % multiple) + multiple;
}
// Compute bin ranges that overlap [beg, end). Each level's first-bin offset
// is (8^L - 1) / 7. See SAMv1.pdf §5.1.1 for the binning derivation.
function reg2bins(beg, end) {
end -= 1;
return [
[0, 0],
[1 + (beg >> 26), 1 + (end >> 26)],
[9 + (beg >> 23), 9 + (end >> 23)],
[73 + (beg >> 20), 73 + (end >> 20)],
[585 + (beg >> 17), 585 + (end >> 17)],
[4681 + (beg >> BAI_LINEAR_SHIFT), 4681 + (end >> BAI_LINEAR_SHIFT)],
];
}
class BAI extends indexFile_ts_1.default {
async _parse(opts) {
const bytes = await this.filehandle.readFile(opts);
const dataView = new DataView(bytes.buffer);
// check BAI magic numbers
if (dataView.getUint32(0, true) !== BAI_MAGIC) {
throw new Error('Not a BAI file');
}
const refCount = dataView.getInt32(4, true);
const depth = 5;
const binLimit = ((1 << ((depth + 1) * 3)) - 1) / 7;
// read the indexes for each reference sequence
let curr = 8;
let firstDataLine;
const offsets = [];
for (let i = 0; i < refCount; i++) {
offsets.push(curr);
const binCount = dataView.getInt32(curr, true);
curr += 4;
for (let j = 0; j < binCount; j += 1) {
const bin = dataView.getUint32(curr, true);
curr += 4;
if (bin === binLimit + 1) {
curr += 4;
curr += 32;
}
else if (bin > binLimit + 1) {
throw new Error('bai index contains too many bins, please use CSI');
}
else {
const chunkCount = dataView.getInt32(curr, true);
curr += 4;
for (let k = 0; k < chunkCount; k++) {
curr += 8;
curr += 8;
}
}
}
// walk the linear index to find the smallest virtual offset, which
// marks where the BAM header ends and data begins
const linearCount = dataView.getInt32(curr, true);
curr += 4;
for (let j = 0; j < linearCount; j++) {
firstDataLine = (0, util_ts_1.findFirstData)(firstDataLine, (0, virtualOffset_ts_1.fromBytes)(bytes, curr));
curr += 8;
}
}
function getIndices(refId) {
let curr = offsets[refId];
if (curr === undefined) {
return undefined;
}
const binCount = dataView.getInt32(curr, true);
let stats;
curr += 4;
const binIndex = {};
for (let j = 0; j < binCount; j += 1) {
const bin = dataView.getUint32(curr, true);
curr += 4;
if (bin === binLimit + 1) {
curr += 4;
stats = (0, util_ts_1.parsePseudoBin)(bytes, curr + 16);
curr += 32;
}
else if (bin > binLimit + 1) {
throw new Error('bai index contains too many bins, please use CSI');
}
else {
const chunkCount = dataView.getInt32(curr, true);
curr += 4;
const chunks = new Array(chunkCount);
for (let k = 0; k < chunkCount; k++) {
const u = (0, virtualOffset_ts_1.fromBytes)(bytes, curr);
curr += 8;
const v = (0, virtualOffset_ts_1.fromBytes)(bytes, curr);
curr += 8;
chunks[k] = new chunk_ts_1.default(u, v, bin);
}
binIndex[bin] = chunks;
}
}
const linearCount = dataView.getInt32(curr, true);
curr += 4;
const linearIndex = new Array(linearCount);
for (let j = 0; j < linearCount; j++) {
linearIndex[j] = (0, virtualOffset_ts_1.fromBytes)(bytes, curr);
curr += 8;
}
return {
binIndex,
linearIndex,
stats,
};
}
return {
bai: true,
firstDataLine,
maxBlockSize: 1 << 16,
indices: (0, indexFile_ts_1.memoizeByRefId)(getIndices),
refCount,
};
}
async indexCov(seqId, start, end, opts) {
const v = BAI_LINEAR_INTERVAL;
const range = start !== undefined;
const indexData = await this.parse(opts);
const seqIdx = indexData.indices(seqId);
if (!seqIdx) {
return [];
}
const { linearIndex, stats } = seqIdx;
if (linearIndex.length === 0) {
return [];
}
const e = end === undefined ? (linearIndex.length - 1) * v : roundUp(end, v);
const s = start === undefined ? 0 : roundDown(start, v);
const depths = range
? new Array((e - s) / v)
: new Array(linearIndex.length - 1);
const totalSize = linearIndex[linearIndex.length - 1].blockPosition;
if (e > (linearIndex.length - 1) * v) {
throw new Error('query outside of range of linear index');
}
let currentPos = linearIndex[s / v].blockPosition;
for (let i = s / v, j = 0; i < e / v; i++, j++) {
depths[j] = {
score: linearIndex[i + 1].blockPosition - currentPos,
start: i * v,
end: i * v + v,
};
currentPos = linearIndex[i + 1].blockPosition;
}
return depths.map(d => ({
...d,
score: (d.score * (stats?.lineCount ?? 0)) / totalSize,
}));
}
reg2bins(min, max) {
return reg2bins(min, max);
}
// Use the linear index to find minimum file position of chunks that could
// contain alignments in the region. Linear index entries are monotonically
// non-decreasing, so the first entry at minLin is the minimum.
getLowestChunk(refIndex, min) {
const { linearIndex } = refIndex;
const nintv = linearIndex.length;
return linearIndex[Math.min(min >> BAI_LINEAR_SHIFT, nintv - 1)];
}
}
exports.default = BAI;
//# sourceMappingURL=bai.js.map