UNPKG

genomic-reader

Version:

A Typescript library for reading BigWig, BigBed, 2bit, and Bam files. Capable of streaming. For use in the browser or on Node.js.

247 lines 9.55 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.inflateChunk = exports.parseRawIndexRefData = exports.streamRawBamIndex = exports.readBamIndexRef = exports.readBamIndex = exports.blocksForRange = void 0; const DataLoader_1 = require("../loader/DataLoader"); const BinaryParser_1 = require("../util/BinaryParser"); const stream_1 = require("stream"); const BAI_MAGIC = 21578050; const PSEUDO_BIN_MAGIC = 37450; function blocksForRange(indexData, start, end) { const overlappingBins = reg2bins(start, end); const binIndex = indexData.binIndex; const linearIndex = indexData.linearIndex; let allChunks = []; for (let bin in binIndex) { if (!overlappingBins.includes(Number(bin))) continue; const inflatedChunks = binIndex[bin].map((rawChunk) => inflateChunk(rawChunk)); allChunks = allChunks.concat(inflatedChunks); } let lowest = undefined; const minLin = Math.min(start >> 14, linearIndex.length - 1); const maxLin = Math.max(end >> 14, linearIndex.length - 1); for (let i = minLin; i <= maxLin; i++) { let offset = inflateVirtualOffset(linearIndex[i]); if (offset === undefined) continue; if (lowest === undefined || isVOLessThan(offset, lowest)) { lowest = offset; } } return optimizeChunks(allChunks, lowest); } exports.blocksForRange = blocksForRange; function isVOLessThan(first, second) { return first.blockPosition < second.blockPosition || (first.blockPosition === second.blockPosition && first.dataPosition < second.dataPosition); } function optimizeChunks(chunks, lowest) { if (chunks.length === 0) return []; let mergedChunks = []; chunks.sort(function (c0, c1) { let dif = c0.start.blockPosition - c1.start.blockPosition; if (dif != 0) { return dif; } else { return c0.start.dataPosition - c1.start.dataPosition; } }); let currentMergedChunk = undefined; for (let chunk of chunks) { if (lowest !== undefined && isVOLessThan(chunk.end, lowest)) { continue; } if (currentMergedChunk === undefined) { currentMergedChunk = chunk; mergedChunks.push(currentMergedChunk); } if ((chunk.start.blockPosition - currentMergedChunk.end.blockPosition) < 65000) { if (isVOLessThan(currentMergedChunk.end, chunk.end)) { currentMergedChunk.end = chunk.end; } } else { currentMergedChunk = chunk; mergedChunks.push(currentMergedChunk); } } return mergedChunks; } function readBamIndex(indexDataLoader) { return __awaiter(this, void 0, void 0, function* () { return readBamIndexData(indexDataLoader); }); } exports.readBamIndex = readBamIndex; function readBamIndexRef(indexDataLoader, refId) { return __awaiter(this, void 0, void 0, function* () { return (yield readBamIndexData(indexDataLoader, refId)).refData[refId]; }); } exports.readBamIndexRef = readBamIndexRef; function readBamIndexData(indexDataLoader, refId) { return __awaiter(this, void 0, void 0, function* () { const indexData = yield indexDataLoader.load(0); const parser = new BinaryParser_1.BinaryParser(indexData); const magic = parser.getInt(); if (magic !== BAI_MAGIC) { throw new Error('Not a BAI file'); } const refData = []; const numRefs = parser.getInt(); for (let ref = 0; ref < numRefs; ref++) { if (refId === undefined || refId === ref) { const refIdData = parseRefIdData(parser); refData.push(refIdData); } else { skipRefIdData(parser); } } return { refData }; }); } function parseRefIdData(parser) { const binIndex = {}; const linearIndex = Array(); const numBins = parser.getInt(); for (let bin = 0; bin < numBins; bin++) { const binNumber = parser.getUInt(); if (binNumber == PSEUDO_BIN_MAGIC) { parser.position += 36; continue; } const binChunks = []; const numChunks = parser.getInt(); for (let chunk = 0; chunk < numChunks; chunk++) { const chunkStart = readVirtualOffset(parser); const chunkEnd = readVirtualOffset(parser); binChunks.push({ start: chunkStart, end: chunkEnd }); } binIndex[binNumber] = binChunks; } const numIntervals = parser.getInt(); for (let interval = 0; interval < numIntervals; interval++) { linearIndex.push(readVirtualOffset(parser)); } return { binIndex, linearIndex }; } function skipRefIdData(parser) { const numBins = parser.getInt(); for (let bin = 0; bin < numBins; bin++) { const binNumber = parser.getUInt(); if (binNumber == PSEUDO_BIN_MAGIC) { parser.position += 36; continue; } const numChunks = parser.getInt(); for (let chunk = 0; chunk < numChunks; chunk++) { parser.position += 16; } } const numIntervals = parser.getInt(); for (let interval = 0; interval < numIntervals; interval++) { parser.position += 8; } } function streamRawBamIndex(indexDataLoader, refId) { return __awaiter(this, void 0, void 0, function* () { const bufferedLoader = new DataLoader_1.BufferedDataLoader(indexDataLoader, undefined, true); let pos = 0; const stream = new stream_1.Readable({ objectMode: true, read() { } }); const load = (bytes, streamData = false) => __awaiter(this, void 0, void 0, function* () { const data = yield bufferedLoader.load(pos, bytes); if (streamData) stream.push(data); pos += bytes; return data; }); const loadParser = (bytes, streamData = false) => __awaiter(this, void 0, void 0, function* () { return new BinaryParser_1.BinaryParser(yield load(bytes, streamData)); }); let parser = yield loadParser(8); const magic = parser.getInt(); if (magic !== BAI_MAGIC) { throw new Error('Not a BAI file'); } const numRefs = parser.getInt(); for (let ref = 0; ref < numRefs; ref++) { const streamData = refId === ref; const numBins = (yield loadParser(4, streamData)).getInt(); for (let bin = 0; bin < numBins; bin++) { const binNumber = (yield loadParser(4, streamData)).getUInt(); if (binNumber == PSEUDO_BIN_MAGIC) { yield load(36, streamData); continue; } const numChunks = (yield loadParser(4, streamData)).getInt(); for (let chunk = 0; chunk < numChunks; chunk++) { yield load(16, streamData); } } const numIntervals = (yield loadParser(4, streamData)).getInt(); for (let interval = 0; interval < numIntervals; interval++) { yield load(8, streamData); } if (streamData) break; } stream.push(null); return stream; }); } exports.streamRawBamIndex = streamRawBamIndex; function parseRawIndexRefData(data) { return parseRefIdData(new BinaryParser_1.BinaryParser(data)); } exports.parseRawIndexRefData = parseRawIndexRefData; function readVirtualOffset(parser) { const rawVO = new Uint8Array(8); for (let i = 0; i < 8; i++) rawVO[i] = parser.getByte(); return rawVO; } function inflateVirtualOffset(raw) { const dataPosition = raw[1] << 8 | raw[0]; const blockPosition = raw[7] * 0x10000000000 + raw[6] * 0x100000000 + raw[5] * 0x1000000 + raw[4] * 0x10000 + raw[3] * 0x100 + raw[2]; return { blockPosition, dataPosition }; } function inflateChunk(raw) { return { start: inflateVirtualOffset(raw.start), end: inflateVirtualOffset(raw.end) }; } exports.inflateChunk = inflateChunk; function reg2bins(start, end) { const list = [0]; if (end >= 1 << 29) end = 1 << 29; end--; for (let k = 1 + (start >> 26); k <= 1 + (end >> 26); k++) list.push(k); for (let k = 9 + (start >> 23); k <= 9 + (end >> 23); k++) list.push(k); for (let k = 73 + (start >> 20); k <= 73 + (end >> 20); k++) list.push(k); for (let k = 585 + (start >> 17); k <= 585 + (end >> 17); k++) list.push(k); for (let k = 4681 + (start >> 14); k <= 4681 + (end >> 14); k++) list.push(k); return list; } //# sourceMappingURL=BamIndexReader.js.map