@gmod/bam
Version:
Parser for BAM and BAM index (bai) files
91 lines • 3.74 kB
JavaScript
import { unzip } from '@gmod/bgzf-filehandle';
import BamFile, { BAM_MAGIC } from "./bamFile.js";
import Chunk from "./chunk.js";
import { parseHeaderText } from "./sam.js";
import { appendInRange, concatUint8Array } from "./util.js";
import { VirtualOffset } from "./virtualOffset.js";
async function fetchOk(url, opts) {
const res = await fetch(url, opts);
if (!res.ok) {
throw new Error(`HTTP ${res.status} fetching ${url}: ${await res.text()}`);
}
return res;
}
async function fetchChunk({ url, headers }, opts) {
// pass base64 data URLs straight to fetch; otherwise apply headers (minus
// referer, which isn't a permitted client-set header).
// https://stackoverflow.com/a/54123275/2129219
const { referer: _referer, ...rest } = headers ?? {};
const res = url.startsWith('data:')
? await fetchOk(url)
: await fetchOk(url, { ...opts, headers: rest });
return new Uint8Array(await res.arrayBuffer());
}
async function fetchAndConcat(arr, opts) {
// Pipeline unzip after each fetch so decompression overlaps later fetches.
return concatUint8Array(await Promise.all(arr.map(async (c) => unzip(await fetchChunk(c, opts)))));
}
export default class HtsgetFile extends BamFile {
baseUrl;
trackId;
constructor(args) {
super({ htsget: true, recordClass: args.recordClass });
this.baseUrl = args.baseUrl;
this.trackId = args.trackId;
}
async getRecordsForRange(chr, min, max, opts) {
const base = `${this.baseUrl}/${this.trackId}`;
const url = `${base}?referenceName=${chr}&start=${min}&end=${max}&format=BAM`;
const chrId = this.chrToIndex?.[chr];
if (chrId === undefined) {
return [];
}
const result = await fetchOk(url, opts);
const data = await result.json();
const uncba = await fetchAndConcat(data.htsget.urls.slice(1), {
signal: opts?.signal,
});
const zero = new VirtualOffset(0, 0);
const allRecords = await this.readBamFeatures(uncba, [], [], new Chunk(zero, zero, 0));
return appendInRange(allRecords, chrId, min, max);
}
async getHeaderPre(opts = {}) {
const url = `${this.baseUrl}/${this.trackId}?referenceName=na&class=header`;
const result = await fetchOk(url, opts);
const data = await result.json();
const uncba = await fetchAndConcat(data.htsget.urls, {
signal: opts.signal,
});
const dataView = new DataView(uncba.buffer);
if (dataView.getInt32(0, true) !== BAM_MAGIC) {
throw new Error('Not a BAM file');
}
const headLen = dataView.getInt32(4, true);
const decoder = new TextDecoder();
const headerText = decoder.decode(uncba.subarray(8, 8 + headLen));
const samHeader = parseHeaderText(headerText);
// use the @SQ lines in the header to figure out the
// mapping between ref ref ID numbers and names
const idToName = [];
const nameToId = {};
const sqLines = samHeader.filter(l => l.tag === 'SQ');
for (const [refId, sqLine] of sqLines.entries()) {
let refName = '';
let length = 0;
for (const item of sqLine.data) {
if (item.tag === 'SN') {
refName = item.value;
}
else if (item.tag === 'LN') {
length = +item.value;
}
}
nameToId[refName] = refId;
idToName[refId] = { refName, length };
}
this.chrToIndex = nameToId;
this.indexToChr = idToName;
return samHeader;
}
}
//# sourceMappingURL=htsget.js.map