UNPKG

taxonium-component

Version:

React component for exploring large phylogenetic trees in the browser

661 lines (660 loc) 20.7 kB
import { A as J } from "./AbortablePromiseCache-CcuMrnn7.js"; import { u as V, a as K } from "./unzip-NIBF0hze.js"; import { L as B } from "./browser-BpRiKmO-.js"; import { R as A, Q as Z } from "./remoteFile-H_6BTCFF.js"; class D { constructor(e, t, n, i = void 0) { this.minv = e, this.maxv = t, this.bin = n, this._fetchedSize = i; } toUniqueString() { return `${this.minv}..${this.maxv} (bin ${this.bin}, fetchedSize ${this.fetchedSize()})`; } toString() { return this.toUniqueString(); } compareTo(e) { return this.minv.compareTo(e.minv) || this.maxv.compareTo(e.maxv) || this.bin - e.bin; } fetchedSize() { return this._fetchedSize !== void 0 ? this._fetchedSize : this.maxv.blockPosition + 65536 - this.minv.blockPosition; } } class G { constructor({ filehandle: e, renameRefSeqs: t = (n) => n }) { this.filehandle = e, this.renameRefSeq = t; } async getMetadata(e = {}) { const { indices: t, ...n } = await this.parse(e); return n; } _findFirstData(e, t) { return e ? e.compareTo(t) > 0 ? t : e : t; } async parse(e = {}) { return this.parseP || (this.parseP = this._parse(e).catch((t) => { throw this.parseP = void 0, t; })), this.parseP; } async hasRefSeq(e, t = {}) { var i; return !!((i = (await this.parse(t)).indices[e]) != null && i.binIndex); } } const W = 65536, ee = W * W; function Q(s, e = 0) { const t = s[e] | s[e + 1] << 8 | s[e + 2] << 16 | s[e + 3] << 24; return ((s[e + 4] | s[e + 5] << 8 | s[e + 6] << 16 | s[e + 7] << 24) >>> 0) * ee + (t >>> 0); } class te extends Error { } function N(s) { if (s && s.aborted) { if (typeof DOMException < "u") throw new DOMException("aborted", "AbortError"); { const e = new te("aborted"); throw e.code = "ERR_ABORTED", e; } } } function ne(s, e) { return e.minv.blockPosition - s.maxv.blockPosition < 65e3 && e.maxv.blockPosition - s.minv.blockPosition < 5e6; } function X(s, e) { const t = []; let n = null; return s.length === 0 ? s : (s.sort(function(i, o) { const a = i.minv.blockPosition - o.minv.blockPosition; return a !== 0 ? a : i.minv.dataPosition - o.minv.dataPosition; }), s.forEach((i) => { (!e || i.maxv.compareTo(e) > 0) && (n === null ? (t.push(i), n = i) : ne(n, i) ? i.maxv.compareTo(n.maxv) > 0 && (n.maxv = i.maxv) : (t.push(i), n = i)); }), t); } class z { constructor(e, t) { this.blockPosition = e, this.dataPosition = t; } toString() { return `${this.blockPosition}:${this.dataPosition}`; } compareTo(e) { return this.blockPosition - e.blockPosition || this.dataPosition - e.dataPosition; } } function S(s, e = 0) { return new z(s[e + 7] * 1099511627776 + s[e + 6] * 4294967296 + s[e + 5] * 16777216 + s[e + 4] * 65536 + s[e + 3] * 256 + s[e + 2], s[e + 1] << 8 | s[e]); } const ie = 21582659, re = 38359875, se = { 0: "generic", 1: "SAM", 2: "VCF" }; function oe(s, e) { return s * 2 ** e; } function H(s, e) { return Math.floor(s / 2 ** e); } class M extends G { constructor(e) { super(e), this.maxBinNumber = 0, this.depth = 0, this.minShift = 0; } async lineCount(e, t = {}) { const n = await this.parse(t), i = n.refNameToId[e]; if (i === void 0 || !n.indices[i]) return -1; const { stats: a } = n.indices[i]; return a ? a.lineCount : -1; } indexCov() { throw new Error("CSI indexes do not support indexcov"); } parseAuxData(e, t) { const n = new DataView(e.buffer), i = n.getInt32(t, !0), o = i & 65536 ? "zero-based-half-open" : "1-based-closed", a = se[i & 15]; if (!a) throw new Error(`invalid Tabix preset format flags ${i}`); const r = { ref: n.getInt32(t + 4, !0), start: n.getInt32(t + 8, !0), end: n.getInt32(t + 12, !0) }, l = n.getInt32(t + 16, !0), h = l ? String.fromCharCode(l) : null, c = n.getInt32(t + 20, !0), f = n.getInt32(t + 24, !0), { refIdToName: d, refNameToId: x } = this._parseNameBytes(e.subarray(t + 28, t + 28 + f)); return { refIdToName: d, refNameToId: x, skipLines: c, metaChar: h, columnNumbers: r, format: a, coordinateType: o }; } _parseNameBytes(e) { let t = 0, n = 0; const i = [], o = {}, a = new TextDecoder("utf8"); for (let r = 0; r < e.length; r += 1) if (!e[r]) { if (n < r) { const l = this.renameRefSeq(a.decode(e.subarray(n, r))); i[t] = l, o[l] = t; } n = r + 1, t += 1; } return { refNameToId: o, refIdToName: i }; } // fetch and parse the index async _parse(e = {}) { const t = await V(await this.filehandle.readFile(e)), n = new DataView(t.buffer); let i; if (n.getUint32(0, !0) === ie) i = 1; else if (n.getUint32(0, !0) === re) i = 2; else throw new Error("Not a CSI file"); this.minShift = n.getInt32(4, !0), this.depth = n.getInt32(8, !0), this.maxBinNumber = ((1 << (this.depth + 1) * 3) - 1) / 7; const o = 2 ** (this.minShift + this.depth * 3), a = n.getInt32(12, !0), r = a && a >= 30 ? this.parseAuxData(t, 16) : { refIdToName: [], refNameToId: {}, metaChar: null, columnNumbers: { ref: 0, start: 1, end: 2 }, coordinateType: "zero-based-half-open", format: "generic" }, l = n.getInt32(16 + a, !0); let h, c = 16 + a + 4; const f = new Array(l).fill(0).map(() => { const d = n.getInt32(c, !0); c += 4; const x = {}; let g; for (let w = 0; w < d; w += 1) { const b = n.getUint32(c, !0); if (b > this.maxBinNumber) g = this.parsePseudoBin(t, c + 4), c += 48; else { const u = S(t, c + 4); h = this._findFirstData(h, u); const I = n.getInt32(c + 12, !0); c += 16; const p = new Array(I); for (let v = 0; v < I; v += 1) { const m = S(t, c), C = S(t, c + 8); c += 16, p[v] = new D(m, C, b); } x[b] = p; } } return { binIndex: x, stats: g }; }); return { ...r, csi: !0, refCount: l, maxBlockSize: 65536, firstDataLine: h, csiVersion: i, indices: f, depth: this.depth, maxBinNumber: this.maxBinNumber, maxRefLength: o }; } parsePseudoBin(e, t) { return { lineCount: Q(e, t + 28) }; } async blocksForRange(e, t, n, i = {}) { t < 0 && (t = 0); const o = await this.parse(i), a = o.refNameToId[e]; if (a === void 0) return []; const r = o.indices[a]; if (!r) return []; const l = this.reg2bins(t, n), h = []; for (const [c, f] of l) for (let d = c; d <= f; d++) if (r.binIndex[d]) for (const x of r.binIndex[d]) h.push(new D(x.minv, x.maxv, d)); return X(h, new z(0, 0)); } /** * calculate the list of bins that may overlap with region [beg,end) (zero-based half-open) */ reg2bins(e, t) { e -= 1, e < 1 && (e = 1), t > 2 ** 50 && (t = 2 ** 34), t -= 1; let n = 0, i = 0, o = this.minShift + this.depth * 3; const a = []; for (; n <= this.depth; o -= 3, i += oe(1, n * 3), n += 1) { const r = i + H(e, o), l = i + H(t, o); if (l - r + a.length > this.maxBinNumber) throw new Error(`query ${e}-${t} is too large for current binning scheme (shift ${this.minShift}, depth ${this.depth}), try a smaller query or a coarser index binning scheme`); a.push([r, l]); } return a; } } const ae = 21578324, j = 14; function ce(s, e) { return s += 1, e -= 1, [ [0, 0], [1 + (s >> 26), 1 + (e >> 26)], [9 + (s >> 23), 9 + (e >> 23)], [73 + (s >> 20), 73 + (e >> 20)], [585 + (s >> 17), 585 + (e >> 17)], [4681 + (s >> 14), 4681 + (e >> 14)] ]; } class y extends G { async lineCount(e, t = {}) { var a; const n = await this.parse(t), i = n.refNameToId[e]; return i === void 0 || !n.indices[i] ? -1 : ((a = n.indices[i].stats) == null ? void 0 : a.lineCount) ?? -1; } // fetch and parse the index async _parse(e = {}) { const t = await this.filehandle.readFile(e), n = await V(t); N(e.signal); const i = new DataView(n.buffer); if (i.getUint32(0, !0) !== ae) throw new Error("Not a TBI file"); const a = i.getUint32(4, !0), r = i.getUint32(8, !0), l = r & 65536 ? "zero-based-half-open" : "1-based-closed", c = { 0: "generic", 1: "SAM", 2: "VCF" }[r & 15]; if (!c) throw new Error(`invalid Tabix preset format flags ${r}`); const f = { ref: i.getInt32(12, !0), start: i.getInt32(16, !0), end: i.getInt32(20, !0) }, d = i.getInt32(24, !0), x = 5, g = ((1 << (x + 1) * 3) - 1) / 7, w = 2 ** (14 + x * 3), b = d ? String.fromCharCode(d) : null, u = i.getInt32(28, !0), I = i.getInt32(32, !0), { refNameToId: p, refIdToName: v } = this._parseNameBytes(n.slice(36, 36 + I)); let m = 36 + I, C; return { indices: new Array(a).fill(0).map(() => { const F = i.getInt32(m, !0); m += 4; const P = {}; let $; for (let T = 0; T < F; T += 1) { const _ = i.getUint32(m, !0); if (m += 4, _ > g + 1) throw new Error("tabix index contains too many bins, please use a CSI index"); if (_ === g + 1) { const k = i.getInt32(m, !0); m += 4, k === 2 && ($ = this.parsePseudoBin(n, m)), m += 16 * k; } else { const k = i.getInt32(m, !0); m += 4; const U = new Array(k); for (let R = 0; R < k; R += 1) { const q = S(n, m), Y = S(n, m + 8); m += 16, C = this._findFirstData(C, q), U[R] = new D(q, Y, _); } P[_] = U; } } const O = i.getInt32(m, !0); m += 4; const L = new Array(O); for (let T = 0; T < O; T += 1) L[T] = S(n, m), m += 8, C = this._findFirstData(C, L[T]); return { binIndex: P, linearIndex: L, stats: $ }; }), metaChar: b, maxBinNumber: g, maxRefLength: w, skipLines: u, firstDataLine: C, columnNumbers: f, coordinateType: l, format: c, refIdToName: v, refNameToId: p, maxBlockSize: 65536 }; } parsePseudoBin(e, t) { return { lineCount: Q(e, t + 16) }; } _parseNameBytes(e) { let t = 0, n = 0; const i = [], o = {}, a = new TextDecoder("utf8"); for (let r = 0; r < e.length; r += 1) if (!e[r]) { if (n < r) { const l = this.renameRefSeq(a.decode(e.subarray(n, r))); i[t] = l, o[l] = t; } n = r + 1, t += 1; } return { refNameToId: o, refIdToName: i }; } async blocksForRange(e, t, n, i = {}) { t < 0 && (t = 0); const o = await this.parse(i), a = o.refNameToId[e]; if (a === void 0) return []; const r = o.indices[a]; if (!r) return []; (r.linearIndex.length ? r.linearIndex[t >> j >= r.linearIndex.length ? r.linearIndex.length - 1 : t >> j] : new z(0, 0)) || console.warn("querying outside of possible tabix range"); const h = ce(t, n), c = []; for (const [w, b] of h) for (let u = w; u <= b; u++) if (r.binIndex[u]) for (const I of r.binIndex[u]) c.push(new D(I.minv, I.maxv, u)); const f = r.linearIndex.length; let d = null; const x = Math.min(t >> 14, f - 1), g = Math.min(n >> 14, f - 1); for (let w = x; w <= g; ++w) { const b = r.linearIndex[w]; b && (!d || b.compareTo(d) < 0) && (d = b); } return X(c, d); } } function le(s) { return /^[\u0000-\u007F]*$/.test(s); } class me { /** * @param {object} args * * @param {string} [args.path] * * @param {filehandle} [args.filehandle] * * @param {string} [args.tbiPath] * * @param {filehandle} [args.tbiFilehandle] * * @param {string} [args.csiPath] * * @param {filehandle} [args.csiFilehandle] * * @param {url} [args.url] * * @param {csiUrl} [args.csiUrl] * * @param {tbiUrl} [args.tbiUrl] * * @param {function} [args.renameRefSeqs] optional function with sig `string * => string` to transform reference sequence names for the purpose of * indexing and querying. note that the data that is returned is not altered, * just the names of the reference sequences that are used for querying. */ constructor({ path: e, filehandle: t, url: n, tbiPath: i, tbiUrl: o, tbiFilehandle: a, csiPath: r, csiUrl: l, csiFilehandle: h, renameRefSeqs: c = (d) => d, chunkCacheSize: f = 5 * 2 ** 20 }) { if (t) this.filehandle = t; else if (e) this.filehandle = new B(e); else if (n) this.filehandle = new A(n); else throw new TypeError("must provide either filehandle or path"); if (a) this.index = new y({ filehandle: a, renameRefSeqs: c }); else if (h) this.index = new M({ filehandle: h, renameRefSeqs: c }); else if (i) this.index = new y({ filehandle: new B(i), renameRefSeqs: c }); else if (r) this.index = new M({ filehandle: new B(r), renameRefSeqs: c }); else if (e) this.index = new y({ filehandle: new B(`${e}.tbi`), renameRefSeqs: c }); else if (l) this.index = new M({ filehandle: new A(l) }); else if (o) this.index = new y({ filehandle: new A(o) }); else if (n) this.index = new y({ filehandle: new A(`${n}.tbi`) }); else throw new TypeError("must provide one of tbiFilehandle, tbiPath, csiFilehandle, csiPath, tbiUrl, csiUrl"); this.renameRefSeq = c, this.chunkCache = new J({ cache: new Z({ maxSize: Math.floor(f / 65536) }), fill: (d, x) => this.readChunk(d, { signal: x }) }); } /** * @param refName name of the reference sequence * * @param start start of the region (in 0-based half-open coordinates) * * @param end end of the region (in 0-based half-open coordinates) * * @param opts callback called for each line in the region. can also pass a * object param containing obj.lineCallback, obj.signal, etc * * @returns promise that is resolved when the whole read is finished, * rejected on error */ async getLines(e, t, n, i) { let o, a = {}, r; typeof i == "function" ? r = i : (a = i, r = i.lineCallback, o = i.signal); const l = await this.index.getMetadata(a); N(o); const h = t ?? 0, c = n ?? l.maxRefLength; if (!(h <= c)) throw new TypeError("invalid start and end coordinates. start must be less than or equal to end"); if (h === c) return; const f = await this.index.blocksForRange(e, h, c, a); N(o); const d = new TextDecoder("utf8"); for (const x of f) { const { buffer: g, cpositions: w, dpositions: b } = await this.chunkCache.get(x.toString(), x, o); N(o); let u = 0, I = 0; const p = d.decode(g), v = le(p); for (; u < p.length; ) { let m, C; if (v) { if (C = p.indexOf(` `, u), C === -1) break; m = p.slice(u, C); } else { if (C = g.indexOf(10, u), C === -1) break; const P = g.slice(u, C); m = d.decode(P); } if (b) { for (; u + x.minv.dataPosition >= b[I++]; ) ; I--; } const { startCoordinate: E, overlaps: F } = this.checkLine(l, e, h, c, m); if (F) r( m, // cpositions[pos] refers to actual file offset of a bgzip block // boundaries // // we multiply by (1 <<8) in order to make sure each block has a // "unique" address space so that data in that block could never // overlap // // then the blockStart-dpositions is an uncompressed file offset // from that bgzip block boundary, and since the cpositions are // multiplied by (1 << 8) these uncompressed offsets get a unique // space w[I] * 256 + (u - b[I]) + x.minv.dataPosition + 1 ); else if (E !== void 0 && E >= c) return; u = C + 1; } } } async getMetadata(e = {}) { return this.index.getMetadata(e); } /** * get a buffer containing the "header" region of the file, which are the * bytes up to the first non-meta line */ async getHeaderBuffer(e = {}) { const { firstDataLine: t, metaChar: n, maxBlockSize: i } = await this.getMetadata(e); N(e.signal); const o = ((t == null ? void 0 : t.blockPosition) || 0) + i, a = await this.filehandle.read(o, 0, e), r = await V(a); if (n) { let l = -1; const h = 10, c = n.charCodeAt(0); for (let f = 0; f < r.length && !(f === l + 1 && r[f] !== c); f += 1) r[f] === h && (l = f); return r.subarray(0, l + 1); } return r; } /** * get a string containing the "header" region of the file, is the portion up * to the first non-meta line * * @returns {Promise} for a string */ async getHeader(e = {}) { const t = new TextDecoder("utf8"), n = await this.getHeaderBuffer(e); return t.decode(n); } /** * get an array of reference sequence names, in the order in which they occur * in the file. reference sequence renaming is not applied to these names. */ async getReferenceSequenceNames(e = {}) { return (await this.getMetadata(e)).refIdToName; } /** * @param {object} metadata metadata object from the parsed index, containing * columnNumbers, metaChar, and format * * @param {string} regionRefName * * @param {number} regionStart region start coordinate (0-based-half-open) * * @param {number} regionEnd region end coordinate (0-based-half-open) * * @param {array[string]} line * * @returns {object} like `{startCoordinate, overlaps}`. overlaps is boolean, * true if line is a data line that overlaps the given region */ checkLine(e, t, n, i, o) { const { columnNumbers: a, metaChar: r, coordinateType: l, format: h } = e; if (r && o.startsWith(r)) return { overlaps: !1 }; let { ref: c, start: f, end: d } = a; c || (c = 0), f || (f = 0), d || (d = 0), h === "VCF" && (d = 8); const x = Math.max(c, f, d); let g = 1, w = 0, b = "", u = -1 / 0; const I = o.length; for (let p = 0; p < I + 1; p++) if (o[p] === " " || p === I) { if (g === c) { if (this.renameRefSeq(o.slice(w, p)) !== t) return { overlaps: !1 }; } else if (g === f) { if (u = parseInt(o.slice(w, p), 10), l === "1-based-closed" && (u -= 1), u >= i) return { startCoordinate: u, overlaps: !1 }; if ((d === 0 || d === f) && u + 1 <= n) return { startCoordinate: u, overlaps: !1 }; } else if (h === "VCF" && g === 4) b = o.slice(w, p); else if (g === d && (h === "VCF" ? this._getVcfEnd(u, b, o.slice(w, p)) : Number.parseInt(o.slice(w, p), 10)) <= n) return { overlaps: !1 }; if (w = p + 1, g += 1, g > x) break; } return { startCoordinate: u, overlaps: !0 }; } _getVcfEnd(e, t, n) { let i = e + t.length; const o = n.includes("SVTYPE=TRA"); if (n[0] !== "." && !o) { let a = ";"; for (let r = 0; r < n.length; r += 1) { if (a === ";" && n.slice(r, r + 4) === "END=") { let l = n.indexOf(";", r); l === -1 && (l = n.length), i = parseInt(n.slice(r + 4, l), 10); break; } a = n[r]; } } else if (o) return e + 1; return i; } /** * return the approximate number of data lines in the given reference * sequence * * @param refSeq reference sequence name * * @returns number of data lines present on that reference sequence */ async lineCount(e, t = {}) { return this.index.lineCount(e, t); } /** * read and uncompress the data in a chunk (composed of one or more * contiguous bgzip blocks) of the file */ async readChunk(e, t = {}) { const n = await this.filehandle.read(e.fetchedSize(), e.minv.blockPosition, t); return K(n, e); } } export { me as T }; //# sourceMappingURL=tabixIndexedFile-WXUhSlHL.js.map