UNPKG

@gmod/bbi

Version:

Parser for BigWig/BigBed files

251 lines 9.78 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.BigBed = void 0; exports.filterUndef = filterUndef; const abortable_promise_cache_1 = __importDefault(require("@gmod/abortable-promise-cache")); const quick_lru_1 = __importDefault(require("@jbrowse/quick-lru")); const rxjs_1 = require("rxjs"); const operators_1 = require("rxjs/operators"); const bbi_ts_1 = require("./bbi.js"); const decoder = new TextDecoder('utf8'); function filterUndef(ts) { return ts.filter((t) => !!t); } function getTabField(str, fieldIndex) { if (fieldIndex < 0) { return undefined; } let start = 0; for (let i = 0; i < fieldIndex; i++) { start = str.indexOf('\t', start); if (start === -1) { return undefined; } start++; } const end = str.indexOf('\t', start); return end === -1 ? str.slice(start) : str.slice(start, end); } // Parses a null-terminated string key from a B+ tree node function parseKey(buffer, offset, keySize) { const keyEnd = buffer.indexOf(0, offset); const effectiveKeyEnd = keyEnd !== -1 && keyEnd < offset + keySize ? keyEnd : offset + keySize; return decoder.decode(buffer.subarray(offset, effectiveKeyEnd)); } // Recursively traverses a B+ tree to search for a specific name in the BigBed extraIndex // B+ trees are balanced tree structures optimized for disk-based searches async function readBPlusTreeNode(bbi, nodeOffset, blockSize, keySize, valSize, name, field, opts) { const len = 4 + blockSize * (keySize + valSize); const buffer = await bbi.read(len, nodeOffset, opts); const dataView = new DataView(buffer.buffer, buffer.byteOffset, buffer.length); let offset = 0; const nodeType = dataView.getInt8(offset); offset += 2; // skip nodeType byte + 1 reserved byte const cnt = dataView.getInt16(offset, true); offset += 2; // Non-leaf node (nodeType === 0): contains keys and child node pointers for navigation if (nodeType === 0) { const leafkeys = []; for (let i = 0; i < cnt; i++) { const key = parseKey(buffer, offset, keySize); offset += keySize; const dataOffset = Number(dataView.getBigUint64(offset, true)); offset += 8; leafkeys.push({ key, offset: dataOffset, }); } // Binary search to find the appropriate child node let left = 0; let right = leafkeys.length - 1; let targetIndex = leafkeys.length - 1; while (left <= right) { const mid = Math.floor((left + right) / 2); const cmp = name.localeCompare(leafkeys[mid].key); if (cmp < 0) { targetIndex = mid - 1; right = mid - 1; } else { left = mid + 1; } } const childOffset = targetIndex >= 0 ? leafkeys[targetIndex].offset : leafkeys[0].offset; return readBPlusTreeNode(bbi, childOffset, blockSize, keySize, valSize, name, field, opts); } else if (nodeType === 1) { // Leaf node (nodeType === 1): contains actual key-value data const keys = []; for (let i = 0; i < cnt; i++) { const key = parseKey(buffer, offset, keySize); offset += keySize; const dataOffset = Number(dataView.getBigUint64(offset, true)); offset += 8; const length = dataView.getUint32(offset, true); offset += 4; offset += 4; // skip reserved keys.push({ key, offset: dataOffset, length, }); } // Binary search for exact key match in sorted leaf node let left = 0; let right = keys.length - 1; while (left <= right) { const mid = Math.floor((left + right) / 2); const cmp = name.localeCompare(keys[mid].key); if (cmp === 0) { return { ...keys[mid], field }; } else if (cmp < 0) { right = mid - 1; } else { left = mid + 1; } } return undefined; } } class BigBed extends bbi_ts_1.BBI { readIndicesCache = new abortable_promise_cache_1.default({ cache: new quick_lru_1.default({ maxSize: 1 }), fill: (args, signal) => this._readIndices({ ...args, signal }), }); readIndices(opts = {}) { const { signal, ...rest } = opts; return this.readIndicesCache.get(JSON.stringify(rest), opts, signal); } /* * retrieve unzoomed view for any scale */ async getView(_scale, opts) { return this.getUnzoomedView(opts); } /* * parse the bigbed extraIndex fields * * * @return a Promise for an array of Index data structure since there can be * multiple extraIndexes in a bigbed, see bedToBigBed documentation */ async _readIndices(opts) { const { extHeaderOffset } = await this.getHeader(opts); const b = await this.bbi.read(64, extHeaderOffset); const dataView = new DataView(b.buffer, b.byteOffset, b.length); let offset = 0; // const _size = dataView.getUint16(offset, true) offset += 2; const count = dataView.getUint16(offset, true); offset += 2; const dataOffset = Number(dataView.getBigUint64(offset, true)); offset += 8; // no extra index is defined if count==0 if (count === 0) { return []; } const blocklen = 20; const len = blocklen * count; const buffer = await this.bbi.read(len, dataOffset); const indices = []; for (let i = 0; i < count; i += 1) { const b = buffer.subarray(i * blocklen); const dataView = new DataView(b.buffer, b.byteOffset, b.length); let offset = 0; const type = dataView.getInt16(offset, true); offset += 2; const fieldcount = dataView.getInt16(offset, true); offset += 2; const dataOffset = Number(dataView.getBigUint64(offset, true)); offset += 8 + 4; // skip 8-byte offset + 4 reserved bytes const field = dataView.getInt16(offset, true); indices.push({ type, fieldcount, offset: dataOffset, field, }); } return indices; } /* * perform a search in the bigbed extraIndex to find which blocks in the * bigbed data to look for the actual feature data * * @param name - the name to search for * * @param opts - a SearchOptions argument with optional signal * * @return a Promise for an array of bigbed block Loc entries */ async searchExtraIndexBlocks(name, opts = {}) { const indices = await this.readIndices(opts); if (indices.length === 0) { return []; } const locs = indices.map(async (index) => { const { offset: offset2, field } = index; const b = await this.bbi.read(32, offset2, opts); const dataView = new DataView(b.buffer, b.byteOffset, b.length); let offset = 0; // const _magic = dataView.getInt32(offset, true) offset += 4; const blockSize = dataView.getInt32(offset, true); offset += 4; const keySize = dataView.getInt32(offset, true); offset += 4; const valSize = dataView.getInt32(offset, true); offset += 4; // const _itemCount = Number(dataView.getBigUint64(offset, true)) offset += 8; return readBPlusTreeNode(this.bbi, offset2 + 32, blockSize, keySize, valSize, name, field, opts); }); return filterUndef(await Promise.all(locs)); } /* * retrieve the features from the bigbed data that were found through the * lookup of the extraIndex note that there can be multiple extraIndex, see * the BigBed specification and the -extraIndex argument to bedToBigBed * * @param name - the name to search for * * @param opts - options object with optional AbortSignal * * @return array of Feature */ async searchExtraIndex(name, opts = {}) { const blocks = await this.searchExtraIndexBlocks(name, opts); if (blocks.length === 0) { return []; } const view = await this.getUnzoomedView(opts); const res = blocks.map(block => { return new rxjs_1.Observable(observer => { view.readFeatures(observer, [block], opts).catch((e) => { observer.error(e); }); }).pipe((0, operators_1.reduce)((acc, curr) => { acc.push(...curr); return acc; }, []), (0, operators_1.map)(features => features.map(f => ({ ...f, field: block.field })))); }); const ret = await (0, rxjs_1.firstValueFrom)((0, rxjs_1.merge)(...res)); // Filter to features where the indexed field matches the search name // field offset is adjusted by -3 to account for chrom, chromStart, chromEnd columns return ret.filter(f => { if (!f.rest) { return false; } const fieldIndex = (f.field || 0) - 3; return getTabField(f.rest, fieldIndex) === name; }); } } exports.BigBed = BigBed; //# sourceMappingURL=bigbed.js.map