UNPKG

@gmod/bam

Version:

Parser for BAM and BAM index (bai) files

218 lines 8.7 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const chunk_ts_1 = __importDefault(require("./chunk.js")); const indexFile_ts_1 = __importStar(require("./indexFile.js")); const util_ts_1 = require("./util.js"); const virtualOffset_ts_1 = require("./virtualOffset.js"); const BAI_MAGIC = 21578050; // BAI\1 // BAI uses a fixed 5-level binning scheme with a 14-bit (16KB) linear index // resolution. See SAMv1.pdf §5.1.3 (hts-specs). // https://github.com/samtools/hts-specs/blob/master/SAMv1.pdf const BAI_LINEAR_SHIFT = 14; const BAI_LINEAR_INTERVAL = 1 << BAI_LINEAR_SHIFT; // 16384 function roundDown(n, multiple) { return n - (n % multiple); } function roundUp(n, multiple) { return n - (n % multiple) + multiple; } // Compute bin ranges that overlap [beg, end). Each level's first-bin offset // is (8^L - 1) / 7. See SAMv1.pdf §5.1.1 for the binning derivation. function reg2bins(beg, end) { end -= 1; return [ [0, 0], [1 + (beg >> 26), 1 + (end >> 26)], [9 + (beg >> 23), 9 + (end >> 23)], [73 + (beg >> 20), 73 + (end >> 20)], [585 + (beg >> 17), 585 + (end >> 17)], [4681 + (beg >> BAI_LINEAR_SHIFT), 4681 + (end >> BAI_LINEAR_SHIFT)], ]; } class BAI extends indexFile_ts_1.default { async _parse(opts) { const bytes = await this.filehandle.readFile(opts); const dataView = new DataView(bytes.buffer); // check BAI magic numbers if (dataView.getUint32(0, true) !== BAI_MAGIC) { throw new Error('Not a BAI file'); } const refCount = dataView.getInt32(4, true); const depth = 5; const binLimit = ((1 << ((depth + 1) * 3)) - 1) / 7; // read the indexes for each reference sequence let curr = 8; let firstDataLine; const offsets = []; for (let i = 0; i < refCount; i++) { offsets.push(curr); const binCount = dataView.getInt32(curr, true); curr += 4; for (let j = 0; j < binCount; j += 1) { const bin = dataView.getUint32(curr, true); curr += 4; if (bin === binLimit + 1) { curr += 4; curr += 32; } else if (bin > binLimit + 1) { throw new Error('bai index contains too many bins, please use CSI'); } else { const chunkCount = dataView.getInt32(curr, true); curr += 4; for (let k = 0; k < chunkCount; k++) { curr += 8; curr += 8; } } } // walk the linear index to find the smallest virtual offset, which // marks where the BAM header ends and data begins const linearCount = dataView.getInt32(curr, true); curr += 4; for (let j = 0; j < linearCount; j++) { firstDataLine = (0, util_ts_1.findFirstData)(firstDataLine, (0, virtualOffset_ts_1.fromBytes)(bytes, curr)); curr += 8; } } function getIndices(refId) { let curr = offsets[refId]; if (curr === undefined) { return undefined; } const binCount = dataView.getInt32(curr, true); let stats; curr += 4; const binIndex = {}; for (let j = 0; j < binCount; j += 1) { const bin = dataView.getUint32(curr, true); curr += 4; if (bin === binLimit + 1) { curr += 4; stats = (0, util_ts_1.parsePseudoBin)(bytes, curr + 16); curr += 32; } else if (bin > binLimit + 1) { throw new Error('bai index contains too many bins, please use CSI'); } else { const chunkCount = dataView.getInt32(curr, true); curr += 4; const chunks = new Array(chunkCount); for (let k = 0; k < chunkCount; k++) { const u = (0, virtualOffset_ts_1.fromBytes)(bytes, curr); curr += 8; const v = (0, virtualOffset_ts_1.fromBytes)(bytes, curr); curr += 8; chunks[k] = new chunk_ts_1.default(u, v, bin); } binIndex[bin] = chunks; } } const linearCount = dataView.getInt32(curr, true); curr += 4; const linearIndex = new Array(linearCount); for (let j = 0; j < linearCount; j++) { linearIndex[j] = (0, virtualOffset_ts_1.fromBytes)(bytes, curr); curr += 8; } return { binIndex, linearIndex, stats, }; } return { bai: true, firstDataLine, maxBlockSize: 1 << 16, indices: (0, indexFile_ts_1.memoizeByRefId)(getIndices), refCount, }; } async indexCov(seqId, start, end, opts) { const v = BAI_LINEAR_INTERVAL; const range = start !== undefined; const indexData = await this.parse(opts); const seqIdx = indexData.indices(seqId); if (!seqIdx) { return []; } const { linearIndex, stats } = seqIdx; if (linearIndex.length === 0) { return []; } const e = end === undefined ? (linearIndex.length - 1) * v : roundUp(end, v); const s = start === undefined ? 0 : roundDown(start, v); const depths = range ? new Array((e - s) / v) : new Array(linearIndex.length - 1); const totalSize = linearIndex[linearIndex.length - 1].blockPosition; if (e > (linearIndex.length - 1) * v) { throw new Error('query outside of range of linear index'); } let currentPos = linearIndex[s / v].blockPosition; for (let i = s / v, j = 0; i < e / v; i++, j++) { depths[j] = { score: linearIndex[i + 1].blockPosition - currentPos, start: i * v, end: i * v + v, }; currentPos = linearIndex[i + 1].blockPosition; } return depths.map(d => ({ ...d, score: (d.score * (stats?.lineCount ?? 0)) / totalSize, })); } reg2bins(min, max) { return reg2bins(min, max); } // Use the linear index to find minimum file position of chunks that could // contain alignments in the region. Linear index entries are monotonically // non-decreasing, so the first entry at minLin is the minimum. getLowestChunk(refIndex, min) { const { linearIndex } = refIndex; const nintv = linearIndex.length; return linearIndex[Math.min(min >> BAI_LINEAR_SHIFT, nintv - 1)]; } } exports.default = BAI; //# sourceMappingURL=bai.js.map