UNPKG

hic-straw

Version:

Utilities for reading .files (contact matrix files)

231 lines (192 loc) 8.9 kB
import BinaryParser from "./binary.js" class MatrixZoomData { constructor(chr1, chr2) { this.chr1 = chr1; // chromosome index this.chr2 = chr2; } getKey () { return this.chr1.name + "_" + this.chr2.name + "_" + this.zoom.unit + "_" + this.zoom.binSize; } getBlockNumbers(region1, region2, version) { // Verify region chromosomes and swap if neccessary if(region1.chr == this.chr2 && region2.chr === this.chr1) { const tmp = region1; region1 = region2; region2 = tmp; } const sameChr = this.chr1 === this.chr2; const binsize = this.zoom.binSize; const blockBinCount = this.blockBinCount const blockColumnCount = this.blockColumnCount return (version < 9 || !sameChr) ? getBlockNumbersV8() : getBlockNumbersV9(); function getBlockNumbersV8() { const x1 = region1.start / binsize const x2 = region1.end / binsize const y1 = region2.start / binsize const y2 = region2.end / binsize const col1 = Math.floor(x1 / blockBinCount) const col2 = Math.floor((x2 - 1) / blockBinCount) const row1 = Math.floor(y1 / blockBinCount) const row2 = Math.floor((y2 - 1) / blockBinCount) const blockNumbers = []; for (let row = row1; row <= row2; row++) { for (let column = col1; column <= col2; column++) { let blockNumber if (sameChr && row < column) { blockNumber = column * blockColumnCount + row; } else { blockNumber = row * blockColumnCount + column; } if (!blockNumbers.includes(blockNumber)) { // possible from transposition blockNumbers.push(blockNumber) } } } return blockNumbers } function getBlockNumbersV9() { const binX1 = region1.start / binsize const binX2 = region1.end / binsize const binY1 = region2.start / binsize const binY2 = region2.end / binsize // PAD = positionAlongDiagonal (~projected) // Depth is axis perpendicular to diagonal; nearer means closer to diagonal const translatedLowerPAD = Math.floor((binX1 + binY1) / 2 / blockBinCount); const translatedHigherPAD = Math.floor((binX2 + binY2) / 2 / blockBinCount); const translatedNearerDepth = Math.floor(Math.log2(1 + Math.abs(binX1 - binY2) / Math.sqrt(2) / blockBinCount)); const translatedFurtherDepth = Math.floor(Math.log2(1 + Math.abs(binX2 - binY1) / Math.sqrt(2) / blockBinCount)); // because code above assume above diagonal; but we could be below diagonal const containsDiagonal = (binX2 - binY1) * (binX1 - binY2) < 0; // i.e. sign of (x-y) opposite on 2 corners const nearerDepth = containsDiagonal ? 0 : Math.min(translatedNearerDepth, translatedFurtherDepth); const furtherDepth = Math.max(translatedNearerDepth, translatedFurtherDepth); const blockNumbers = []; for (let depth = nearerDepth; depth <= furtherDepth; depth++) { for (let pad = translatedLowerPAD; pad <= translatedHigherPAD; pad++) { const block_number = depth * blockColumnCount + pad; blockNumbers.push(block_number) } } return blockNumbers } } static parseMatrixZoomData(chr1, chr2, dis) { const zd = new MatrixZoomData(chr1, chr2); const unit = dis.getString(); const zoomIndex = dis.getInt(); const sumCounts = dis.getFloat(); const occupiedCellCount = dis.getFloat(); const stdDev = dis.getFloat(); const percent95 = dis.getFloat(); const binSize = dis.getInt(); zd. blockBinCount = dis.getInt(); zd. blockColumnCount = dis.getInt(); const nBlocks = dis.getInt(); zd. zoom = {index: zoomIndex, unit: unit, binSize: binSize}; zd.blockIndex = new StaticBlockIndex(nBlocks, dis); const nBins1 = (chr1.size / binSize); const nBins2 = (chr2.size / binSize); const avgCount = (sumCounts / nBins1) / nBins2; // <= trying to avoid overflows zd.averageCount = avgCount; zd.sumCounts = sumCounts; zd.stdDev = stdDev; zd.occupiedCellCount = occupiedCellCount; zd.percent95 = percent95; return zd; } } class StaticBlockIndex { constructor(nBlocks, dis) { this.blockIndex = {}; while (nBlocks-- > 0) { const blockNumber = dis.getInt(); const filePosition = dis.getLong(); const size = dis.getInt(); this.blockIndex[blockNumber] = {filePosition, size}; } } getBlockIndexEntry(blockNumber) { return this.blockIndex[blockNumber]; } } /** * The dynamic block index looks up entries on-the-fly using a bisecting algorithm. It was developed for 1bp max * resolution files when block grids were square for intra-chromosomes (format v8). The rotated non-square grid * used for v9 makes this obsolete. */ class DynamicBlockIndex { constructor({file, position, nBlocks, maxBlock}) { this.file = file; this.minPosition = position; this.maxPosition = position + nBlocks * 16; this.nBlocks = nBlocks; this.maxBlock = maxBlock; } async getBlockIndexEntry(blockNumber) { if (blockNumber > this.maxBlock) { return undefined; } else if (this.blockNumberRange && blockNumber >= this.blockNumberRange.first && blockNumber <= this.blockNumberRange.last) { return this.blockIndexMap[blockNumber]; } else { let minPosition = this.minPosition; let maxPosition = this.maxPosition; if (this.blockNumberRange && this.mapFileBounds) { if (blockNumber < this.blockNumberRange.first) { maxPosition = this.mapFileBounds.min; } else if (blockNumber > this.blockNumberRange.last) { minPosition = this.mapFileBounds.max; } } if (maxPosition - minPosition < 16) { return undefined; } else { return this.searchForBlockIndexEntry(blockNumber, minPosition, maxPosition); } } } // Search entry for blockNumber between file positions boundsMin and boundsMax // boundsMin is guaranteed to start at the beginning of an entry, boundsMax at the end async searchForBlockIndexEntry(blockNumber, boundsMin, boundsMax) { // console.log(`${blockNumber} ${boundsMin} ${boundsMax}`) const chunkSize = 16 * 100000; if (boundsMax - boundsMin < chunkSize) { const data = await this.file.read(boundsMin, boundsMax - boundsMin); const dis = new BinaryParser(new DataView(data)); const blockIndex = {}; let ptr = boundsMin; let firstBlockNumber, lastBlockNumber; while (ptr < boundsMax) { const bn = dis.getInt(); const filePosition = dis.getLong(); const blockSizeInBytes = dis.getInt(); blockIndex[bn] = {filePosition: filePosition, size: blockSizeInBytes}; if (firstBlockNumber === undefined) firstBlockNumber = bn; lastBlockNumber = bn; ptr += 16; } this.mapFileBounds = {min: boundsMin, max: boundsMax}; this.blockNumberRange = {first: firstBlockNumber, last: lastBlockNumber}; this.blockIndexMap = blockIndex; return blockIndex[blockNumber]; } // Midpoint in units of 16 byte chunks const nEntries = (boundsMax - boundsMin) / 16; const pos1 = boundsMin + Math.floor(nEntries / 2) * 16; const data = await this.file.read(pos1, 16); const dis = new BinaryParser(new DataView(data)); const bn = dis.getInt(); if (bn === blockNumber) { const filePosition = dis.getLong(); const blockSizeInBytes = dis.getInt(); return {filePosition: filePosition, size: blockSizeInBytes} // Extraordinarily lucky } else if (blockNumber > bn) { const bie = await this.searchForBlockIndexEntry(blockNumber, pos1 + 16, boundsMax); return bie; } else { const bie = await this.searchForBlockIndexEntry(blockNumber, boundsMin, pos1); return bie; } } } export default MatrixZoomData;