UNPKG

@gmod/bam

Version:

Parser for BAM and BAM index (bai) files

223 lines 8.98 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const bgzf_filehandle_1 = require("@gmod/bgzf-filehandle"); const chunk_ts_1 = __importDefault(require("./chunk.js")); const indexFile_ts_1 = __importStar(require("./indexFile.js")); const util_ts_1 = require("./util.js"); const virtualOffset_ts_1 = require("./virtualOffset.js"); const CSI1_MAGIC = 21582659; // CSI\1 const CSI2_MAGIC = 38359875; // CSI\2 const ZERO_OFFSET = new virtualOffset_ts_1.VirtualOffset(0, 0); function lshift(num, bits) { return num * 2 ** bits; } function rshift(num, bits) { return Math.floor(num / 2 ** bits); } class CSI extends indexFile_ts_1.default { maxBinNumber = 0; depth = 0; minShift = 0; // CSI omits the linear index that BAI's indexCov derives coverage from // (CSIv1.tex §3, hts-specs), so there's no equivalent to return. async indexCov() { return []; } parseAuxData(bytes, offset) { const dataView = new DataView(bytes.buffer); const formatFlags = dataView.getUint32(offset, true); const coordinateType = formatFlags & 0x10000 ? 'zero-based-half-open' : '1-based-closed'; const format = ['generic', 'SAM', 'VCF'][formatFlags & 0xf]; if (!format) { throw new Error(`invalid Tabix preset format flags ${formatFlags}`); } const columnNumbers = { ref: dataView.getInt32(offset + 4, true), start: dataView.getInt32(offset + 8, true), end: dataView.getInt32(offset + 12, true), }; const metaValue = dataView.getInt32(offset + 16, true); const metaChar = metaValue ? String.fromCharCode(metaValue) : ''; const skipLines = dataView.getInt32(offset + 20, true); const nameSectionLength = dataView.getInt32(offset + 24, true); return { columnNumbers, coordinateType, metaValue, metaChar, skipLines, format, formatFlags, ...(0, util_ts_1.parseNameBytes)(bytes.subarray(offset + 28, offset + 28 + nameSectionLength), this.renameRefSeq), }; } // fetch and parse the index async _parse(opts) { const buffer = await this.filehandle.readFile(opts); const bytes = await (0, bgzf_filehandle_1.unzip)(buffer); const dataView = new DataView(bytes.buffer); let csiVersion; const magic = dataView.getUint32(0, true); if (magic === CSI1_MAGIC) { csiVersion = 1; } else if (magic === CSI2_MAGIC) { csiVersion = 2; } else { throw new Error(`Not a CSI file ${magic}`); // TODO: do we need to support big-endian CSI files? } this.minShift = dataView.getInt32(4, true); this.depth = dataView.getInt32(8, true); this.maxBinNumber = ((1 << ((this.depth + 1) * 3)) - 1) / 7; const maxBinNumber = this.maxBinNumber; const auxLength = dataView.getInt32(12, true); const aux = auxLength >= 30 ? this.parseAuxData(bytes, 16) : undefined; const refCount = dataView.getInt32(16 + auxLength, true); // SYNC: ~/src/gmod/tabix-js/src/csi.ts _parse — two-pass structure // read the indexes for each reference sequence let curr = 16 + auxLength + 4; let firstDataLine; const offsets = []; for (let i = 0; i < refCount; i++) { offsets.push(curr); const binCount = dataView.getInt32(curr, true); curr += 4; for (let j = 0; j < binCount; j++) { const bin = dataView.getUint32(curr, true); curr += 4; if (bin > this.maxBinNumber) { curr += 28 + 16; } else { curr += 8; const chunkCount = dataView.getInt32(curr, true); curr += 4; for (let k = 0; k < chunkCount; k += 1) { const u = (0, virtualOffset_ts_1.fromBytes)(bytes, curr); curr += 8; curr += 8; firstDataLine = (0, util_ts_1.findFirstData)(firstDataLine, u); } } } } function getIndices(refId) { let curr = offsets[refId]; if (curr === undefined) { return undefined; } // the binning index const binCount = dataView.getInt32(curr, true); curr += 4; const binIndex = {}; let pseudoBinStats; for (let j = 0; j < binCount; j++) { const bin = dataView.getUint32(curr, true); curr += 4; if (bin > maxBinNumber) { pseudoBinStats = (0, util_ts_1.parsePseudoBin)(bytes, curr + 28); curr += 28 + 16; } else { curr += 8; // skip loffset; firstDataLine was computed in the first pass const chunkCount = dataView.getInt32(curr, true); curr += 4; const chunks = new Array(chunkCount); for (let k = 0; k < chunkCount; k += 1) { const u = (0, virtualOffset_ts_1.fromBytes)(bytes, curr); curr += 8; const v = (0, virtualOffset_ts_1.fromBytes)(bytes, curr); curr += 8; chunks[k] = new chunk_ts_1.default(u, v, bin); } binIndex[bin] = chunks; } } return { binIndex, stats: pseudoBinStats, }; } return { csiVersion, firstDataLine, indices: (0, indexFile_ts_1.memoizeByRefId)(getIndices), refCount, csi: true, maxBlockSize: 1 << 16, ...aux, }; } // CSI has no linear index — every refId starts from the beginning of file. getLowestChunk() { return ZERO_OFFSET; } /** * calculate the list of bins that may overlap with region [beg,end) * (zero-based half-open). Follows the reference implementation in hts-specs * CSIv1.tex. */ // SYNC: ~/src/gmod/tabix-js/src/csi.ts reg2bins reg2bins(beg, end) { // Clamp end to the maximum coordinate the index can address. With minShift // and depth, the index covers positions in [0, 2^(minShift + depth*3)). const maxPos = 2 ** (this.minShift + this.depth * 3); if (end > maxPos) { end = maxPos; } end -= 1; let l = 0; let t = 0; let s = this.minShift + this.depth * 3; const bins = []; for (; l <= this.depth; s -= 3, t += lshift(1, l * 3), l += 1) { const b = t + rshift(beg, s); const e = t + rshift(end, s); if (e - b + bins.length > this.maxBinNumber) { throw new Error(`query ${beg}-${end} is too large for current binning scheme (shift ${this.minShift}, depth ${this.depth}), try a smaller query or a coarser index binning scheme`); } bins.push([b, e]); } return bins; } } exports.default = CSI; //# sourceMappingURL=csi.js.map