UNPKG

@gmod/bam

Version:

Parser for BAM and BAM index (bai) files

91 lines 3.74 kB
import { unzip } from '@gmod/bgzf-filehandle'; import BamFile, { BAM_MAGIC } from "./bamFile.js"; import Chunk from "./chunk.js"; import { parseHeaderText } from "./sam.js"; import { appendInRange, concatUint8Array } from "./util.js"; import { VirtualOffset } from "./virtualOffset.js"; async function fetchOk(url, opts) { const res = await fetch(url, opts); if (!res.ok) { throw new Error(`HTTP ${res.status} fetching ${url}: ${await res.text()}`); } return res; } async function fetchChunk({ url, headers }, opts) { // pass base64 data URLs straight to fetch; otherwise apply headers (minus // referer, which isn't a permitted client-set header). // https://stackoverflow.com/a/54123275/2129219 const { referer: _referer, ...rest } = headers ?? {}; const res = url.startsWith('data:') ? await fetchOk(url) : await fetchOk(url, { ...opts, headers: rest }); return new Uint8Array(await res.arrayBuffer()); } async function fetchAndConcat(arr, opts) { // Pipeline unzip after each fetch so decompression overlaps later fetches. return concatUint8Array(await Promise.all(arr.map(async (c) => unzip(await fetchChunk(c, opts))))); } export default class HtsgetFile extends BamFile { baseUrl; trackId; constructor(args) { super({ htsget: true, recordClass: args.recordClass }); this.baseUrl = args.baseUrl; this.trackId = args.trackId; } async getRecordsForRange(chr, min, max, opts) { const base = `${this.baseUrl}/${this.trackId}`; const url = `${base}?referenceName=${chr}&start=${min}&end=${max}&format=BAM`; const chrId = this.chrToIndex?.[chr]; if (chrId === undefined) { return []; } const result = await fetchOk(url, opts); const data = await result.json(); const uncba = await fetchAndConcat(data.htsget.urls.slice(1), { signal: opts?.signal, }); const zero = new VirtualOffset(0, 0); const allRecords = await this.readBamFeatures(uncba, [], [], new Chunk(zero, zero, 0)); return appendInRange(allRecords, chrId, min, max); } async getHeaderPre(opts = {}) { const url = `${this.baseUrl}/${this.trackId}?referenceName=na&class=header`; const result = await fetchOk(url, opts); const data = await result.json(); const uncba = await fetchAndConcat(data.htsget.urls, { signal: opts.signal, }); const dataView = new DataView(uncba.buffer); if (dataView.getInt32(0, true) !== BAM_MAGIC) { throw new Error('Not a BAM file'); } const headLen = dataView.getInt32(4, true); const decoder = new TextDecoder(); const headerText = decoder.decode(uncba.subarray(8, 8 + headLen)); const samHeader = parseHeaderText(headerText); // use the @SQ lines in the header to figure out the // mapping between ref ref ID numbers and names const idToName = []; const nameToId = {}; const sqLines = samHeader.filter(l => l.tag === 'SQ'); for (const [refId, sqLine] of sqLines.entries()) { let refName = ''; let length = 0; for (const item of sqLine.data) { if (item.tag === 'SN') { refName = item.value; } else if (item.tag === 'LN') { length = +item.value; } } nameToId[refName] = refId; idToName[refId] = { refName, length }; } this.chrToIndex = nameToId; this.indexToChr = idToName; return samHeader; } } //# sourceMappingURL=htsget.js.map