@gmod/bam
Version:
Parser for BAM and BAM index (bai) files
182 lines • 6.65 kB
JavaScript
import Chunk from "./chunk.js";
import { longFromBytesToUnsigned } from "./long.js";
export function optimizeChunks(chunks, lowest) {
const n = chunks.length;
if (n === 0) {
return chunks;
}
// Pre-filter chunks below lowest threshold before sorting
let filtered;
if (lowest) {
const lowestBlock = lowest.blockPosition;
const lowestData = lowest.dataPosition;
filtered = [];
for (let i = 0; i < n; i++) {
const chunk = chunks[i];
const maxv = chunk.maxv;
const cmp = maxv.blockPosition - lowestBlock || maxv.dataPosition - lowestData;
if (cmp > 0) {
filtered.push(chunk);
}
}
if (filtered.length === 0) {
return filtered;
}
}
else {
filtered = chunks;
}
filtered.sort((c0, c1) => {
const dif = c0.minv.blockPosition - c1.minv.blockPosition;
return dif !== 0 ? dif : c0.minv.dataPosition - c1.minv.dataPosition;
});
// Source chunks are shared with the index's per-refId cache, so we never
// mutate them — extending a merged span produces a new Chunk instance.
const mergedChunks = [filtered[0]];
let lastMinBlock = filtered[0].minv.blockPosition;
let lastMaxBlock = filtered[0].maxv.blockPosition;
for (let i = 1; i < filtered.length; i++) {
const chunk = filtered[i];
const chunkMinBlock = chunk.minv.blockPosition;
const chunkMaxBlock = chunk.maxv.blockPosition;
// Merge if chunks are close enough: small gap between them, and the
// combined span is bounded so we don't grow a single chunk indefinitely.
if (chunkMinBlock - lastMaxBlock < 65000 &&
chunkMaxBlock - lastMinBlock < 5000000) {
const lastChunk = mergedChunks[mergedChunks.length - 1];
const cmp = chunkMaxBlock - lastMaxBlock ||
chunk.maxv.dataPosition - lastChunk.maxv.dataPosition;
if (cmp > 0) {
mergedChunks[mergedChunks.length - 1] = new Chunk(lastChunk.minv, chunk.maxv, lastChunk.bin);
lastMaxBlock = chunkMaxBlock;
}
}
else {
mergedChunks.push(chunk);
lastMinBlock = chunkMinBlock;
lastMaxBlock = chunkMaxBlock;
}
}
return mergedChunks;
}
export function parsePseudoBin(bytes, offset) {
return {
lineCount: longFromBytesToUnsigned(bytes, offset),
};
}
// Parse the BAM reference-sequence table (SAMv1.pdf §4.2). Returns undefined
// if `uncba` doesn't yet contain the full table — caller fetches more bytes
// and retries.
export function parseRefSeqs(uncba, start, renameRefSeq) {
if (start + 4 > uncba.length) {
return undefined;
}
const dataView = new DataView(uncba.buffer);
const nRef = dataView.getInt32(start, true);
const chrToIndex = {};
const indexToChr = [];
const decoder = new TextDecoder('utf8');
let p = start + 4;
for (let i = 0; i < nRef; i++) {
if (p + 8 > uncba.length) {
return undefined;
}
const lName = dataView.getInt32(p, true);
if (p + 8 + lName > uncba.length) {
return undefined;
}
const refName = renameRefSeq(decoder.decode(uncba.subarray(p + 4, p + 4 + lName - 1)));
const lRef = dataView.getInt32(p + lName + 4, true);
chrToIndex[refName] = i;
indexToChr.push({ refName, length: lRef });
p += 8 + lName;
}
return { chrToIndex, indexToChr };
}
export function findFirstData(firstDataLine, virtualOffset) {
return firstDataLine
? firstDataLine.compareTo(virtualOffset) > 0
? virtualOffset
: firstDataLine
: virtualOffset;
}
// SYNC: ~/src/gmod/tabix-js/src/util.ts parseNameBytes uses indexOf(0) instead of byte scan
export function parseNameBytes(namesBytes, renameRefSeq = s => s) {
const decoder = new TextDecoder();
let currRefId = 0;
let currNameStart = 0;
const refIdToName = [];
const refNameToId = {};
for (let i = 0; i < namesBytes.length; i++) {
if (!namesBytes[i]) {
if (currNameStart < i) {
const refName = renameRefSeq(decoder.decode(namesBytes.subarray(currNameStart, i)));
refIdToName[currRefId] = refName;
refNameToId[refName] = currRefId;
}
currNameStart = i + 1;
currRefId++;
}
}
return { refNameToId, refIdToName };
}
export function concatUint8Array(args) {
let totalLength = 0;
for (const entry of args) {
totalLength += entry.length;
}
const mergedArray = new Uint8Array(totalLength);
let offset = 0;
for (const entry of args) {
mergedArray.set(entry, offset);
offset += entry.length;
}
return mergedArray;
}
export function filterReadFlag(flags, flagInclude, flagExclude) {
return (flags & flagInclude) !== flagInclude || (flags & flagExclude) !== 0;
}
export function filterTagValue(readVal, filterVal) {
return filterVal === '*'
? readVal === undefined
: `${readVal}` !== `${filterVal}`;
}
export function filterCacheKey(filterBy) {
if (!filterBy) {
return '';
}
const { flagInclude = 0, flagExclude = 0, tagFilter } = filterBy;
const tagPart = tagFilter ? `:${tagFilter.tag}=${tagFilter.value ?? '*'}` : '';
return `:f${flagInclude}x${flagExclude}${tagPart}`;
}
// Apply flagInclude/flagExclude/tagFilter to a list of records.
export function applyFilters(records, filterBy) {
const { flagInclude = 0, flagExclude = 0, tagFilter } = filterBy;
const out = [];
for (let i = 0, l = records.length; i < l; i++) {
const r = records[i];
if (!filterReadFlag(r.flags, flagInclude, flagExclude) &&
!(tagFilter && filterTagValue(r.tags[tagFilter.tag], tagFilter.value))) {
out.push(r);
}
}
return out;
}
// Append records overlapping [min, max) on `chrId` into `out` (or a fresh
// array if omitted). Records are assumed sorted by start, so we stop scanning
// at the first record past `max`. Returns the populated array.
export function appendInRange(records, chrId, min, max, out = []) {
for (let i = 0, l = records.length; i < l; i++) {
const r = records[i];
if (r.ref_id === chrId) {
if (r.start >= max) {
break;
}
else if (r.end >= min) {
out.push(r);
}
}
}
return out;
}
//# sourceMappingURL=util.js.map