UNPKG

@gmod/bbi

Version:

Parser for BigWig/BigBed files

201 lines 8.15 kB
import AbortablePromiseCache from '@gmod/abortable-promise-cache'; import QuickLRU from 'quick-lru'; import { Observable, firstValueFrom, merge } from 'rxjs'; import { map, reduce } from 'rxjs/operators'; import { BBI } from "./bbi.js"; export function filterUndef(ts) { return ts.filter((t) => !!t); } export class BigBed extends BBI { constructor() { super(...arguments); this.readIndicesCache = new AbortablePromiseCache({ cache: new QuickLRU({ maxSize: 1 }), fill: (args, signal) => this._readIndices({ ...args, signal }), }); } readIndices(opts = {}) { const { signal, ...rest } = opts; return this.readIndicesCache.get(JSON.stringify(rest), opts, signal); } /* * retrieve unzoomed view for any scale */ async getView(_scale, opts) { return this.getUnzoomedView(opts); } /* * parse the bigbed extraIndex fields * * * @return a Promise for an array of Index data structure since there can be * multiple extraIndexes in a bigbed, see bedToBigBed documentation */ async _readIndices(opts) { const { extHeaderOffset } = await this.getHeader(opts); const b = await this.bbi.read(64, Number(extHeaderOffset)); const dataView = new DataView(b.buffer, b.byteOffset, b.length); let offset = 0; // const _size = dataView.getUint16(offset, true) offset += 2; const count = dataView.getUint16(offset, true); offset += 2; const dataOffset = Number(dataView.getBigUint64(offset, true)); offset += 8; // no extra index is defined if count==0 if (count === 0) { return []; } const blocklen = 20; const len = blocklen * count; const buffer = await this.bbi.read(len, Number(dataOffset)); const indices = []; for (let i = 0; i < count; i += 1) { const b = buffer.subarray(i * blocklen); const dataView = new DataView(b.buffer, b.byteOffset, b.length); let offset = 0; const type = dataView.getInt16(offset, true); offset += 2; const fieldcount = dataView.getInt16(offset, true); offset += 2; const dataOffset = Number(dataView.getBigUint64(offset, true)); offset += 8 + 4; //4 skip const field = dataView.getInt16(offset, true); indices.push({ type, fieldcount, offset: Number(dataOffset), field }); } return indices; } /* * perform a search in the bigbed extraIndex to find which blocks in the * bigbed data to look for the actual feature data * * @param name - the name to search for * * @param opts - a SearchOptions argument with optional signal * * @return a Promise for an array of bigbed block Loc entries */ async searchExtraIndexBlocks(name, opts = {}) { const indices = await this.readIndices(opts); if (indices.length === 0) { return []; } const decoder = new TextDecoder('utf8'); const locs = indices.map(async (index) => { const { offset: offset2, field } = index; const b = await this.bbi.read(32, offset2, opts); const dataView = new DataView(b.buffer, b.byteOffset, b.length); let offset = 0; // const _magic = dataView.getInt32(offset, true) offset += 4; const blockSize = dataView.getInt32(offset, true); offset += 4; const keySize = dataView.getInt32(offset, true); offset += 4; const valSize = dataView.getInt32(offset, true); offset += 4; // const _itemCount = Number(dataView.getBigUint64(offset, true)) offset += 8; const bptReadNode = async (nodeOffset) => { const val = Number(nodeOffset); const len = 4 + blockSize * (keySize + valSize); const buffer = await this.bbi.read(len, val, opts); const b = buffer; const dataView = new DataView(b.buffer, b.byteOffset, b.length); let offset = 0; const nodeType = dataView.getInt8(offset); offset += 2; //skip 1 const cnt = dataView.getInt16(offset, true); offset += 2; const keys = []; if (nodeType === 0) { const leafkeys = []; for (let i = 0; i < cnt; i++) { const key = decoder .decode(b.subarray(offset, offset + keySize)) .replaceAll('\0', ''); offset += keySize; const dataOffset = Number(dataView.getBigUint64(offset, true)); offset += 8; leafkeys.push({ key, offset: dataOffset, }); } let lastOffset = 0; for (const { key, offset } of leafkeys) { if (name.localeCompare(key) < 0 && lastOffset) { return bptReadNode(lastOffset); } lastOffset = offset; } return bptReadNode(lastOffset); } else if (nodeType === 1) { for (let i = 0; i < cnt; i++) { const key = decoder .decode(b.subarray(offset, offset + keySize)) .replaceAll('\0', ''); offset += keySize; const dataOffset = Number(dataView.getBigUint64(offset, true)); offset += 8; const length = dataView.getUint32(offset, true); offset += 4; const reserved = dataView.getUint32(offset, true); offset += 4; keys.push({ key, offset: dataOffset, length, reserved, }); } for (const n of keys) { if (n.key === name) { return { ...n, field, }; } } return undefined; } }; return bptReadNode(offset2 + 32); }); return filterUndef(await Promise.all(locs)); } /* * retrieve the features from the bigbed data that were found through the * lookup of the extraIndex note that there can be multiple extraIndex, see * the BigBed specification and the -extraIndex argument to bedToBigBed * * @param name - the name to search for * * @param opts - options object with optional AboutSignal * * @return array of Feature */ async searchExtraIndex(name, opts = {}) { const blocks = await this.searchExtraIndexBlocks(name, opts); if (blocks.length === 0) { return []; } const view = await this.getUnzoomedView(opts); const res = blocks.map(block => { return new Observable(observer => { view.readFeatures(observer, [block], opts).catch((e) => { observer.error(e); }); }).pipe(reduce((acc, curr) => acc.concat(curr)), map(x => { for (const element of x) { element.field = block.field; } return x; })); }); const ret = await firstValueFrom(merge(...res)); return ret.filter(f => f.rest?.split('\t')[(f.field || 0) - 3] === name); } } //# sourceMappingURL=bigbed.js.map