genomic-reader
Version:
A Typescript library for reading BigWig, BigBed, 2bit, and Bam files. Capable of streaming. For use in the browser or on Node.js.
247 lines • 9.55 kB
JavaScript
"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.inflateChunk = exports.parseRawIndexRefData = exports.streamRawBamIndex = exports.readBamIndexRef = exports.readBamIndex = exports.blocksForRange = void 0;
const DataLoader_1 = require("../loader/DataLoader");
const BinaryParser_1 = require("../util/BinaryParser");
const stream_1 = require("stream");
const BAI_MAGIC = 21578050;
const PSEUDO_BIN_MAGIC = 37450;
function blocksForRange(indexData, start, end) {
const overlappingBins = reg2bins(start, end);
const binIndex = indexData.binIndex;
const linearIndex = indexData.linearIndex;
let allChunks = [];
for (let bin in binIndex) {
if (!overlappingBins.includes(Number(bin)))
continue;
const inflatedChunks = binIndex[bin].map((rawChunk) => inflateChunk(rawChunk));
allChunks = allChunks.concat(inflatedChunks);
}
let lowest = undefined;
const minLin = Math.min(start >> 14, linearIndex.length - 1);
const maxLin = Math.max(end >> 14, linearIndex.length - 1);
for (let i = minLin; i <= maxLin; i++) {
let offset = inflateVirtualOffset(linearIndex[i]);
if (offset === undefined)
continue;
if (lowest === undefined || isVOLessThan(offset, lowest)) {
lowest = offset;
}
}
return optimizeChunks(allChunks, lowest);
}
exports.blocksForRange = blocksForRange;
function isVOLessThan(first, second) {
return first.blockPosition < second.blockPosition ||
(first.blockPosition === second.blockPosition && first.dataPosition < second.dataPosition);
}
function optimizeChunks(chunks, lowest) {
if (chunks.length === 0)
return [];
let mergedChunks = [];
chunks.sort(function (c0, c1) {
let dif = c0.start.blockPosition - c1.start.blockPosition;
if (dif != 0) {
return dif;
}
else {
return c0.start.dataPosition - c1.start.dataPosition;
}
});
let currentMergedChunk = undefined;
for (let chunk of chunks) {
if (lowest !== undefined && isVOLessThan(chunk.end, lowest)) {
continue;
}
if (currentMergedChunk === undefined) {
currentMergedChunk = chunk;
mergedChunks.push(currentMergedChunk);
}
if ((chunk.start.blockPosition - currentMergedChunk.end.blockPosition) < 65000) {
if (isVOLessThan(currentMergedChunk.end, chunk.end)) {
currentMergedChunk.end = chunk.end;
}
}
else {
currentMergedChunk = chunk;
mergedChunks.push(currentMergedChunk);
}
}
return mergedChunks;
}
function readBamIndex(indexDataLoader) {
return __awaiter(this, void 0, void 0, function* () {
return readBamIndexData(indexDataLoader);
});
}
exports.readBamIndex = readBamIndex;
function readBamIndexRef(indexDataLoader, refId) {
return __awaiter(this, void 0, void 0, function* () {
return (yield readBamIndexData(indexDataLoader, refId)).refData[refId];
});
}
exports.readBamIndexRef = readBamIndexRef;
function readBamIndexData(indexDataLoader, refId) {
return __awaiter(this, void 0, void 0, function* () {
const indexData = yield indexDataLoader.load(0);
const parser = new BinaryParser_1.BinaryParser(indexData);
const magic = parser.getInt();
if (magic !== BAI_MAGIC) {
throw new Error('Not a BAI file');
}
const refData = [];
const numRefs = parser.getInt();
for (let ref = 0; ref < numRefs; ref++) {
if (refId === undefined || refId === ref) {
const refIdData = parseRefIdData(parser);
refData.push(refIdData);
}
else {
skipRefIdData(parser);
}
}
return { refData };
});
}
function parseRefIdData(parser) {
const binIndex = {};
const linearIndex = Array();
const numBins = parser.getInt();
for (let bin = 0; bin < numBins; bin++) {
const binNumber = parser.getUInt();
if (binNumber == PSEUDO_BIN_MAGIC) {
parser.position += 36;
continue;
}
const binChunks = [];
const numChunks = parser.getInt();
for (let chunk = 0; chunk < numChunks; chunk++) {
const chunkStart = readVirtualOffset(parser);
const chunkEnd = readVirtualOffset(parser);
binChunks.push({ start: chunkStart, end: chunkEnd });
}
binIndex[binNumber] = binChunks;
}
const numIntervals = parser.getInt();
for (let interval = 0; interval < numIntervals; interval++) {
linearIndex.push(readVirtualOffset(parser));
}
return { binIndex, linearIndex };
}
function skipRefIdData(parser) {
const numBins = parser.getInt();
for (let bin = 0; bin < numBins; bin++) {
const binNumber = parser.getUInt();
if (binNumber == PSEUDO_BIN_MAGIC) {
parser.position += 36;
continue;
}
const numChunks = parser.getInt();
for (let chunk = 0; chunk < numChunks; chunk++) {
parser.position += 16;
}
}
const numIntervals = parser.getInt();
for (let interval = 0; interval < numIntervals; interval++) {
parser.position += 8;
}
}
function streamRawBamIndex(indexDataLoader, refId) {
return __awaiter(this, void 0, void 0, function* () {
const bufferedLoader = new DataLoader_1.BufferedDataLoader(indexDataLoader, undefined, true);
let pos = 0;
const stream = new stream_1.Readable({ objectMode: true, read() { } });
const load = (bytes, streamData = false) => __awaiter(this, void 0, void 0, function* () {
const data = yield bufferedLoader.load(pos, bytes);
if (streamData)
stream.push(data);
pos += bytes;
return data;
});
const loadParser = (bytes, streamData = false) => __awaiter(this, void 0, void 0, function* () { return new BinaryParser_1.BinaryParser(yield load(bytes, streamData)); });
let parser = yield loadParser(8);
const magic = parser.getInt();
if (magic !== BAI_MAGIC) {
throw new Error('Not a BAI file');
}
const numRefs = parser.getInt();
for (let ref = 0; ref < numRefs; ref++) {
const streamData = refId === ref;
const numBins = (yield loadParser(4, streamData)).getInt();
for (let bin = 0; bin < numBins; bin++) {
const binNumber = (yield loadParser(4, streamData)).getUInt();
if (binNumber == PSEUDO_BIN_MAGIC) {
yield load(36, streamData);
continue;
}
const numChunks = (yield loadParser(4, streamData)).getInt();
for (let chunk = 0; chunk < numChunks; chunk++) {
yield load(16, streamData);
}
}
const numIntervals = (yield loadParser(4, streamData)).getInt();
for (let interval = 0; interval < numIntervals; interval++) {
yield load(8, streamData);
}
if (streamData)
break;
}
stream.push(null);
return stream;
});
}
exports.streamRawBamIndex = streamRawBamIndex;
function parseRawIndexRefData(data) {
return parseRefIdData(new BinaryParser_1.BinaryParser(data));
}
exports.parseRawIndexRefData = parseRawIndexRefData;
function readVirtualOffset(parser) {
const rawVO = new Uint8Array(8);
for (let i = 0; i < 8; i++)
rawVO[i] = parser.getByte();
return rawVO;
}
function inflateVirtualOffset(raw) {
const dataPosition = raw[1] << 8 | raw[0];
const blockPosition = raw[7] * 0x10000000000 +
raw[6] * 0x100000000 +
raw[5] * 0x1000000 +
raw[4] * 0x10000 +
raw[3] * 0x100 +
raw[2];
return { blockPosition, dataPosition };
}
function inflateChunk(raw) {
return {
start: inflateVirtualOffset(raw.start),
end: inflateVirtualOffset(raw.end)
};
}
exports.inflateChunk = inflateChunk;
function reg2bins(start, end) {
const list = [0];
if (end >= 1 << 29)
end = 1 << 29;
end--;
for (let k = 1 + (start >> 26); k <= 1 + (end >> 26); k++)
list.push(k);
for (let k = 9 + (start >> 23); k <= 9 + (end >> 23); k++)
list.push(k);
for (let k = 73 + (start >> 20); k <= 73 + (end >> 20); k++)
list.push(k);
for (let k = 585 + (start >> 17); k <= 585 + (end >> 17); k++)
list.push(k);
for (let k = 4681 + (start >> 14); k <= 4681 + (end >> 14); k++)
list.push(k);
return list;
}
//# sourceMappingURL=BamIndexReader.js.map