@gmod/bam
Version:
Parser for BAM and BAM index (bai) files
223 lines • 8.98 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const bgzf_filehandle_1 = require("@gmod/bgzf-filehandle");
const chunk_ts_1 = __importDefault(require("./chunk.js"));
const indexFile_ts_1 = __importStar(require("./indexFile.js"));
const util_ts_1 = require("./util.js");
const virtualOffset_ts_1 = require("./virtualOffset.js");
const CSI1_MAGIC = 21582659; // CSI\1
const CSI2_MAGIC = 38359875; // CSI\2
const ZERO_OFFSET = new virtualOffset_ts_1.VirtualOffset(0, 0);
function lshift(num, bits) {
return num * 2 ** bits;
}
function rshift(num, bits) {
return Math.floor(num / 2 ** bits);
}
class CSI extends indexFile_ts_1.default {
maxBinNumber = 0;
depth = 0;
minShift = 0;
// CSI omits the linear index that BAI's indexCov derives coverage from
// (CSIv1.tex §3, hts-specs), so there's no equivalent to return.
async indexCov() {
return [];
}
parseAuxData(bytes, offset) {
const dataView = new DataView(bytes.buffer);
const formatFlags = dataView.getUint32(offset, true);
const coordinateType = formatFlags & 0x10000 ? 'zero-based-half-open' : '1-based-closed';
const format = ['generic', 'SAM', 'VCF'][formatFlags & 0xf];
if (!format) {
throw new Error(`invalid Tabix preset format flags ${formatFlags}`);
}
const columnNumbers = {
ref: dataView.getInt32(offset + 4, true),
start: dataView.getInt32(offset + 8, true),
end: dataView.getInt32(offset + 12, true),
};
const metaValue = dataView.getInt32(offset + 16, true);
const metaChar = metaValue ? String.fromCharCode(metaValue) : '';
const skipLines = dataView.getInt32(offset + 20, true);
const nameSectionLength = dataView.getInt32(offset + 24, true);
return {
columnNumbers,
coordinateType,
metaValue,
metaChar,
skipLines,
format,
formatFlags,
...(0, util_ts_1.parseNameBytes)(bytes.subarray(offset + 28, offset + 28 + nameSectionLength), this.renameRefSeq),
};
}
// fetch and parse the index
async _parse(opts) {
const buffer = await this.filehandle.readFile(opts);
const bytes = await (0, bgzf_filehandle_1.unzip)(buffer);
const dataView = new DataView(bytes.buffer);
let csiVersion;
const magic = dataView.getUint32(0, true);
if (magic === CSI1_MAGIC) {
csiVersion = 1;
}
else if (magic === CSI2_MAGIC) {
csiVersion = 2;
}
else {
throw new Error(`Not a CSI file ${magic}`);
// TODO: do we need to support big-endian CSI files?
}
this.minShift = dataView.getInt32(4, true);
this.depth = dataView.getInt32(8, true);
this.maxBinNumber = ((1 << ((this.depth + 1) * 3)) - 1) / 7;
const maxBinNumber = this.maxBinNumber;
const auxLength = dataView.getInt32(12, true);
const aux = auxLength >= 30 ? this.parseAuxData(bytes, 16) : undefined;
const refCount = dataView.getInt32(16 + auxLength, true);
// SYNC: ~/src/gmod/tabix-js/src/csi.ts _parse — two-pass structure
// read the indexes for each reference sequence
let curr = 16 + auxLength + 4;
let firstDataLine;
const offsets = [];
for (let i = 0; i < refCount; i++) {
offsets.push(curr);
const binCount = dataView.getInt32(curr, true);
curr += 4;
for (let j = 0; j < binCount; j++) {
const bin = dataView.getUint32(curr, true);
curr += 4;
if (bin > this.maxBinNumber) {
curr += 28 + 16;
}
else {
curr += 8;
const chunkCount = dataView.getInt32(curr, true);
curr += 4;
for (let k = 0; k < chunkCount; k += 1) {
const u = (0, virtualOffset_ts_1.fromBytes)(bytes, curr);
curr += 8;
curr += 8;
firstDataLine = (0, util_ts_1.findFirstData)(firstDataLine, u);
}
}
}
}
function getIndices(refId) {
let curr = offsets[refId];
if (curr === undefined) {
return undefined;
}
// the binning index
const binCount = dataView.getInt32(curr, true);
curr += 4;
const binIndex = {};
let pseudoBinStats;
for (let j = 0; j < binCount; j++) {
const bin = dataView.getUint32(curr, true);
curr += 4;
if (bin > maxBinNumber) {
pseudoBinStats = (0, util_ts_1.parsePseudoBin)(bytes, curr + 28);
curr += 28 + 16;
}
else {
curr += 8; // skip loffset; firstDataLine was computed in the first pass
const chunkCount = dataView.getInt32(curr, true);
curr += 4;
const chunks = new Array(chunkCount);
for (let k = 0; k < chunkCount; k += 1) {
const u = (0, virtualOffset_ts_1.fromBytes)(bytes, curr);
curr += 8;
const v = (0, virtualOffset_ts_1.fromBytes)(bytes, curr);
curr += 8;
chunks[k] = new chunk_ts_1.default(u, v, bin);
}
binIndex[bin] = chunks;
}
}
return {
binIndex,
stats: pseudoBinStats,
};
}
return {
csiVersion,
firstDataLine,
indices: (0, indexFile_ts_1.memoizeByRefId)(getIndices),
refCount,
csi: true,
maxBlockSize: 1 << 16,
...aux,
};
}
// CSI has no linear index — every refId starts from the beginning of file.
getLowestChunk() {
return ZERO_OFFSET;
}
/**
* calculate the list of bins that may overlap with region [beg,end)
* (zero-based half-open). Follows the reference implementation in hts-specs
* CSIv1.tex.
*/
// SYNC: ~/src/gmod/tabix-js/src/csi.ts reg2bins
reg2bins(beg, end) {
// Clamp end to the maximum coordinate the index can address. With minShift
// and depth, the index covers positions in [0, 2^(minShift + depth*3)).
const maxPos = 2 ** (this.minShift + this.depth * 3);
if (end > maxPos) {
end = maxPos;
}
end -= 1;
let l = 0;
let t = 0;
let s = this.minShift + this.depth * 3;
const bins = [];
for (; l <= this.depth; s -= 3, t += lshift(1, l * 3), l += 1) {
const b = t + rshift(beg, s);
const e = t + rshift(end, s);
if (e - b + bins.length > this.maxBinNumber) {
throw new Error(`query ${beg}-${end} is too large for current binning scheme (shift ${this.minShift}, depth ${this.depth}), try a smaller query or a coarser index binning scheme`);
}
bins.push([b, e]);
}
return bins;
}
}
exports.default = CSI;
//# sourceMappingURL=csi.js.map