UNPKG

@gmod/bam

Version:

Parser for BAM and BAM index (bai) files

182 lines 6.65 kB
import Chunk from "./chunk.js"; import { longFromBytesToUnsigned } from "./long.js"; export function optimizeChunks(chunks, lowest) { const n = chunks.length; if (n === 0) { return chunks; } // Pre-filter chunks below lowest threshold before sorting let filtered; if (lowest) { const lowestBlock = lowest.blockPosition; const lowestData = lowest.dataPosition; filtered = []; for (let i = 0; i < n; i++) { const chunk = chunks[i]; const maxv = chunk.maxv; const cmp = maxv.blockPosition - lowestBlock || maxv.dataPosition - lowestData; if (cmp > 0) { filtered.push(chunk); } } if (filtered.length === 0) { return filtered; } } else { filtered = chunks; } filtered.sort((c0, c1) => { const dif = c0.minv.blockPosition - c1.minv.blockPosition; return dif !== 0 ? dif : c0.minv.dataPosition - c1.minv.dataPosition; }); // Source chunks are shared with the index's per-refId cache, so we never // mutate them — extending a merged span produces a new Chunk instance. const mergedChunks = [filtered[0]]; let lastMinBlock = filtered[0].minv.blockPosition; let lastMaxBlock = filtered[0].maxv.blockPosition; for (let i = 1; i < filtered.length; i++) { const chunk = filtered[i]; const chunkMinBlock = chunk.minv.blockPosition; const chunkMaxBlock = chunk.maxv.blockPosition; // Merge if chunks are close enough: small gap between them, and the // combined span is bounded so we don't grow a single chunk indefinitely. if (chunkMinBlock - lastMaxBlock < 65000 && chunkMaxBlock - lastMinBlock < 5000000) { const lastChunk = mergedChunks[mergedChunks.length - 1]; const cmp = chunkMaxBlock - lastMaxBlock || chunk.maxv.dataPosition - lastChunk.maxv.dataPosition; if (cmp > 0) { mergedChunks[mergedChunks.length - 1] = new Chunk(lastChunk.minv, chunk.maxv, lastChunk.bin); lastMaxBlock = chunkMaxBlock; } } else { mergedChunks.push(chunk); lastMinBlock = chunkMinBlock; lastMaxBlock = chunkMaxBlock; } } return mergedChunks; } export function parsePseudoBin(bytes, offset) { return { lineCount: longFromBytesToUnsigned(bytes, offset), }; } // Parse the BAM reference-sequence table (SAMv1.pdf §4.2). Returns undefined // if `uncba` doesn't yet contain the full table — caller fetches more bytes // and retries. export function parseRefSeqs(uncba, start, renameRefSeq) { if (start + 4 > uncba.length) { return undefined; } const dataView = new DataView(uncba.buffer); const nRef = dataView.getInt32(start, true); const chrToIndex = {}; const indexToChr = []; const decoder = new TextDecoder('utf8'); let p = start + 4; for (let i = 0; i < nRef; i++) { if (p + 8 > uncba.length) { return undefined; } const lName = dataView.getInt32(p, true); if (p + 8 + lName > uncba.length) { return undefined; } const refName = renameRefSeq(decoder.decode(uncba.subarray(p + 4, p + 4 + lName - 1))); const lRef = dataView.getInt32(p + lName + 4, true); chrToIndex[refName] = i; indexToChr.push({ refName, length: lRef }); p += 8 + lName; } return { chrToIndex, indexToChr }; } export function findFirstData(firstDataLine, virtualOffset) { return firstDataLine ? firstDataLine.compareTo(virtualOffset) > 0 ? virtualOffset : firstDataLine : virtualOffset; } // SYNC: ~/src/gmod/tabix-js/src/util.ts parseNameBytes uses indexOf(0) instead of byte scan export function parseNameBytes(namesBytes, renameRefSeq = s => s) { const decoder = new TextDecoder(); let currRefId = 0; let currNameStart = 0; const refIdToName = []; const refNameToId = {}; for (let i = 0; i < namesBytes.length; i++) { if (!namesBytes[i]) { if (currNameStart < i) { const refName = renameRefSeq(decoder.decode(namesBytes.subarray(currNameStart, i))); refIdToName[currRefId] = refName; refNameToId[refName] = currRefId; } currNameStart = i + 1; currRefId++; } } return { refNameToId, refIdToName }; } export function concatUint8Array(args) { let totalLength = 0; for (const entry of args) { totalLength += entry.length; } const mergedArray = new Uint8Array(totalLength); let offset = 0; for (const entry of args) { mergedArray.set(entry, offset); offset += entry.length; } return mergedArray; } export function filterReadFlag(flags, flagInclude, flagExclude) { return (flags & flagInclude) !== flagInclude || (flags & flagExclude) !== 0; } export function filterTagValue(readVal, filterVal) { return filterVal === '*' ? readVal === undefined : `${readVal}` !== `${filterVal}`; } export function filterCacheKey(filterBy) { if (!filterBy) { return ''; } const { flagInclude = 0, flagExclude = 0, tagFilter } = filterBy; const tagPart = tagFilter ? `:${tagFilter.tag}=${tagFilter.value ?? '*'}` : ''; return `:f${flagInclude}x${flagExclude}${tagPart}`; } // Apply flagInclude/flagExclude/tagFilter to a list of records. export function applyFilters(records, filterBy) { const { flagInclude = 0, flagExclude = 0, tagFilter } = filterBy; const out = []; for (let i = 0, l = records.length; i < l; i++) { const r = records[i]; if (!filterReadFlag(r.flags, flagInclude, flagExclude) && !(tagFilter && filterTagValue(r.tags[tagFilter.tag], tagFilter.value))) { out.push(r); } } return out; } // Append records overlapping [min, max) on `chrId` into `out` (or a fresh // array if omitted). Records are assumed sorted by start, so we stop scanning // at the first record past `max`. Returns the populated array. export function appendInRange(records, chrId, min, max, out = []) { for (let i = 0, l = records.length; i < l; i++) { const r = records[i]; if (r.ref_id === chrId) { if (r.start >= max) { break; } else if (r.end >= min) { out.push(r); } } } return out; } //# sourceMappingURL=util.js.map